extrait 0.5.3 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,16 +8,20 @@ Structured JSON extraction from LLMs with validation, repair, and streaming.
8
8
  </a>
9
9
  </p>
10
10
 
11
- **Features:**
11
+ ## Features
12
+
12
13
  - Multi-candidate JSON extraction from LLM responses
13
14
  - Automatic repair with jsonrepair
14
15
  - Zod schema validation and coercion
15
16
  - Optional self-healing for validation failures
16
17
  - Streaming support
17
18
  - MCP tools
19
+ - Vector embeddings (OpenAI-compatible + Voyage AI)
18
20
 
19
21
  ## Installation
20
22
 
23
+ Install `extrait` with your preferred package manager.
24
+
21
25
  ```bash
22
26
  bun add extrait
23
27
  # or
@@ -28,56 +32,118 @@ deno add npm:extrait
28
32
 
29
33
  ## Quick Start
30
34
 
35
+ Use a custom OpenAI-compatible transport to point `extrait` at a local endpoint.
36
+
31
37
  ```typescript
32
38
  import { createLLM, prompt, s } from "extrait";
33
39
  import { z } from "zod";
34
40
 
35
41
  const llm = createLLM({
36
42
  provider: "openai-compatible",
37
- model: "gpt-5-nano",
38
- transport: { apiKey: process.env.LLM_API_KEY },
43
+ model: "mistralai/ministral-3-3b",
44
+ transport: {
45
+ baseURL: "http://localhost:1234/v1",
46
+ apiKey: process.env.LLM_API_KEY ?? "local-demo-key",
47
+ },
39
48
  });
40
49
 
41
- const SummarySchema = s.schema(
42
- "Summary",
50
+ const RecipeSchema = s.schema(
51
+ "Recipe",
43
52
  z.object({
44
- summary: s.string().min(1).describe("One-sentence summary"),
45
- tags: s.array(s.string()).default([]).describe("Keywords"),
53
+ title: s.string().min(1).describe("Short recipe title"),
54
+ ingredients: s.array(s.string()).min(1).describe("Ingredient list"),
46
55
  })
47
56
  );
48
57
 
49
58
  const result = await llm.structured(
50
- SummarySchema,
51
- prompt`Summarize this: """${text}"""`
59
+ RecipeSchema,
60
+ prompt`Extract a simple recipe from this text: """${text}"""`
52
61
  );
53
62
 
54
63
  console.log(result.data);
55
64
  ```
56
65
 
66
+ ## Examples at a Glance
67
+
68
+ These examples cover the most common usage patterns in the repository.
69
+
70
+ - [`examples/simple.ts`](examples/simple.ts) - Basic structured output with streaming
71
+ - [`examples/streaming.ts`](examples/streaming.ts) - Real-time partial output and snapshot updates
72
+ - [`examples/calculator-tool.ts`](examples/calculator-tool.ts) - Structured extraction with MCP tools
73
+ - [`examples/conversation.ts`](examples/conversation.ts) - Multi-turn prompts and multimodal content
74
+ - [`examples/image-analysis.ts`](examples/image-analysis.ts) - Vision input with structured output
75
+ - [`examples/embeddings.ts`](examples/embeddings.ts) - Embeddings and similarity workflows
76
+
77
+ ```bash
78
+ bun run dev simple "Bun.js runtime"
79
+ bun run dev streaming
80
+ bun run dev calculator-tool
81
+ ```
82
+
57
83
  ## API Reference
58
84
 
59
- ### Creating an LLM Client
85
+ The sections below cover the main building blocks of the library.
86
+
87
+ ### Create an LLM Client
88
+
89
+ Use `createLLM()` to configure the provider, model, transport, and client defaults.
60
90
 
61
91
  ```typescript
62
92
  const llm = createLLM({
63
93
  provider: "openai-compatible" | "anthropic-compatible",
64
94
  model: "gpt-5-nano",
95
+ baseURL: "https://api.openai.com", // optional alias for transport.baseURL
96
+ apiKey: process.env.LLM_API_KEY, // optional alias for transport.apiKey
65
97
  transport: {
66
- baseURL: "https://api.openai.com", // optional
67
- apiKey: process.env.LLM_API_KEY, // optional
98
+ baseURL: "https://api.openai.com", // optional
99
+ apiKey: process.env.LLM_API_KEY, // optional
100
+ path: "/v1/chat/completions", // optional; anthropic-compatible usually uses /v1/messages
101
+ headers: { "x-trace-id": "docs-demo" }, // optional extra headers
102
+ defaultBody: { user: "docs-demo" }, // optional provider body defaults
103
+ version: "2023-06-01", // anthropic-compatible only
104
+ fetcher: fetch, // optional custom fetch implementation
68
105
  },
69
106
  defaults: {
70
- mode: "loose" | "strict", // loose allows repair
71
- selfHeal: 0 | 1 | 2, // retry attempts
72
- debug: false, // show repair logs
73
- timeout: { request: 30_000 }, // optional default timeouts
107
+ mode: "loose" | "strict", // loose allows repair
108
+ selfHeal: 1, // optional retry attempts
109
+ debug: false, // optional structured debug output
110
+ systemPrompt: "You are a helpful assistant.",
111
+ timeout: {
112
+ request: 30_000,
113
+ tool: 10_000,
114
+ },
115
+ },
116
+ });
117
+ ```
118
+
119
+ `baseURL` and `apiKey` at the top level are shorthand aliases for `transport.baseURL` and `transport.apiKey`. For request-specific options such as `stream`, `request`, `schemaInstruction`, and parse tuning, see the sections below.
120
+
121
+ Common setup patterns:
122
+
123
+ ```typescript
124
+ // OpenAI-compatible gateway or local endpoint with top-level aliases
125
+ const llm = createLLM({
126
+ provider: "openai-compatible",
127
+ model: "gpt-4o-mini",
128
+ baseURL: process.env.LLM_BASE_URL ?? "http://localhost:1234/v1",
129
+ apiKey: process.env.LLM_API_KEY ?? "local-demo-key",
130
+ });
131
+
132
+ // Anthropic-compatible endpoint with explicit API version
133
+ const anthropic = createLLM({
134
+ provider: "anthropic-compatible",
135
+ model: "claude-3-5-sonnet-latest",
136
+ transport: {
137
+ baseURL: "https://api.anthropic.com",
138
+ apiKey: process.env.LLM_API_KEY,
139
+ version: "2023-06-01",
74
140
  },
75
141
  });
76
142
  ```
77
143
 
78
144
  ### Defining Schemas
79
145
 
80
- Use the `s` wrapper around Zod for enhanced schema building:
146
+ Use the `s` wrapper around Zod for schema names, descriptions, and a more ergonomic authoring flow.
81
147
 
82
148
  ```typescript
83
149
  import { s } from "extrait";
@@ -114,6 +180,8 @@ const Schema = s.schema(
114
180
 
115
181
  ### Making Structured Calls
116
182
 
183
+ `structured()` accepts a schema plus either a tagged prompt, a fluent prompt builder, or a raw message payload.
184
+
117
185
  ```typescript
118
186
  // Simple prompt
119
187
  const result = await llm.structured(
@@ -158,7 +226,7 @@ const result = await llm.structured(
158
226
  },
159
227
  },
160
228
  request: {
161
- signal: abortController.signal, // optional AbortSignal
229
+ signal: AbortSignal.timeout(30_000), // optional AbortSignal
162
230
  },
163
231
  timeout: {
164
232
  request: 30_000, // ms per LLM HTTP request
@@ -170,6 +238,22 @@ const result = await llm.structured(
170
238
 
171
239
  `prompt()` builds an ordered `messages` payload. Use ``prompt`...` `` for a single string prompt, or the fluent builder for multi-turn conversations. The `LLMMessage` type is exported if you need to type your own message arrays.
172
240
 
241
+ You can also pass provider request options through `request`:
242
+
243
+ ```typescript
244
+ const result = await llm.structured(
245
+ Schema,
246
+ prompt`Summarize this document: """${text}"""`,
247
+ {
248
+ request: {
249
+ temperature: 0,
250
+ maxTokens: 800,
251
+ body: { user: "demo-user" },
252
+ },
253
+ }
254
+ );
255
+ ```
256
+
173
257
  ### Images (multimodal)
174
258
 
175
259
  Use `images()` to build base64 image content blocks for vision-capable models.
@@ -247,13 +331,15 @@ const messages = conversation("You are a vision assistant.", [
247
331
 
248
332
  ### Result Object
249
333
 
334
+ Successful structured calls return validated data plus the raw response and trace metadata.
335
+
250
336
  ```typescript
251
337
  {
252
338
  data: T, // Validated data matching schema
253
339
  raw: string, // Raw LLM response
254
340
  thinkBlocks: ThinkBlock[], // Extracted <think> blocks
255
341
  json: unknown | null, // Parsed JSON before validation
256
- attempts: AttemptTrace[], // Self-heal attempts
342
+ attempts: StructuredAttempt<T>[], // One entry per parse / self-heal attempt
257
343
  usage?: {
258
344
  inputTokens?: number,
259
345
  outputTokens?: number,
@@ -264,8 +350,30 @@ const messages = conversation("You are a vision assistant.", [
264
350
  }
265
351
  ```
266
352
 
353
+ Each `attempts` entry includes:
354
+
355
+ ```typescript
356
+ {
357
+ attempt: number,
358
+ selfHeal: boolean,
359
+ via: "complete" | "stream",
360
+ raw: string,
361
+ thinkBlocks: ThinkBlock[],
362
+ json: unknown | null,
363
+ candidates: string[],
364
+ repairLog: string[],
365
+ zodIssues: z.ZodIssue[],
366
+ success: boolean,
367
+ usage?: LLMUsage,
368
+ finishReason?: string,
369
+ parsed: ParseLLMOutputResult<T>,
370
+ }
371
+ ```
372
+
267
373
  ### Error Handling
268
374
 
375
+ Catch `StructuredParseError` when repair and validation still fail.
376
+
269
377
  ```typescript
270
378
  import { StructuredParseError } from "extrait";
271
379
 
@@ -282,8 +390,68 @@ try {
282
390
  }
283
391
  ```
284
392
 
393
+ ### Embeddings
394
+
395
+ Generate vector embeddings using `llm.embed()`. It always returns `number[][]` — one vector per input string.
396
+
397
+ ```typescript
398
+ // Create a dedicated embedder client (recommended)
399
+ const embedder = createLLM({
400
+ provider: "openai-compatible",
401
+ model: "text-embedding-3-small",
402
+ transport: { apiKey: process.env.LLM_API_KEY },
403
+ });
404
+
405
+ // Single string
406
+ const { embeddings, model, usage } = await embedder.embed("Hello world");
407
+ const vector: number[] = embeddings[0];
408
+
409
+ // Multiple strings in one request
410
+ const { embeddings } = await embedder.embed(["text one", "text two", "text three"]);
411
+ // embeddings[0], embeddings[1], embeddings[2] — one vector each
412
+
413
+ // Optional: override model or request extra options per call
414
+ const { embeddings } = await embedder.embed("Hello", {
415
+ model: "text-embedding-ada-002",
416
+ dimensions: 512, // supported by text-embedding-3-* models
417
+ body: { user: "user-id" }, // pass-through to provider
418
+ });
419
+ ```
420
+
421
+ **Result shape:**
422
+
423
+ ```typescript
424
+ {
425
+ embeddings: number[][]; // one vector per input
426
+ model: string;
427
+ usage?: { inputTokens?: number; totalTokens?: number };
428
+ raw?: unknown; // full provider response
429
+ }
430
+ ```
431
+
432
+ **Anthropic / Voyage AI**
433
+
434
+ Anthropic does not provide a native embedding API. Their recommended solution is [Voyage AI](https://api.voyageai.com), which uses the same OpenAI-compatible format:
435
+
436
+ ```typescript
437
+ const embedder = createLLM({
438
+ provider: "openai-compatible",
439
+ model: "voyage-3",
440
+ transport: {
441
+ baseURL: "https://api.voyageai.com",
442
+ apiKey: process.env.LLM_API_KEY,
443
+ },
444
+ });
445
+
446
+ const { embeddings } = await embedder.embed(["query", "document"]);
447
+ ```
448
+
449
+ Calling `llm.embed()` on an `anthropic-compatible` adapter throws a descriptive error pointing to Voyage AI.
450
+
285
451
  ### MCP Tools
286
452
 
453
+ Attach MCP clients at request time to let the model call tools during structured generation.
454
+
287
455
  ```typescript
288
456
  import { createMCPClient } from "extrait";
289
457
 
@@ -356,7 +524,7 @@ const llm = createLLM({
356
524
 
357
525
  ## Examples
358
526
 
359
- Run examples with: `bun run dev <example-name>`
527
+ Run repository examples with `bun run dev <example-name>`.
360
528
 
361
529
  Available examples:
362
530
  - `streaming` - Real LLM streaming + snapshot self-check ([streaming.ts](examples/streaming.ts))
@@ -370,6 +538,8 @@ Available examples:
370
538
  - `calculator-tool` - MCP tool integration ([calculator-tool.ts](examples/calculator-tool.ts))
371
539
  - `image-analysis` - Multimodal structured extraction from an image file ([image-analysis.ts](examples/image-analysis.ts))
372
540
  - `conversation` - Multi-turn conversation history and inline image messages ([conversation.ts](examples/conversation.ts))
541
+ - `simulated-tools` - Inject fake tool calls/results into conversation context without real execution ([simulated-tools.ts](examples/simulated-tools.ts))
542
+ - `embeddings` - Vector embeddings, cosine similarity, and semantic comparison ([embeddings.ts](examples/embeddings.ts))
373
543
 
374
544
  Pass arguments after the example name:
375
545
  ```bash
@@ -380,10 +550,13 @@ bun run dev timeout 5000
380
550
  bun run dev simple "Bun.js runtime"
381
551
  bun run dev sentiment-analysis "I love this product."
382
552
  bun run dev multi-step-reasoning "Why is the sky blue?"
553
+ bun run dev embeddings "the cat sat on the mat" "a feline rested on the rug"
383
554
  ```
384
555
 
385
556
  ## Environment Variables
386
557
 
558
+ These environment variables are used across the examples and common client setups.
559
+
387
560
  - `LLM_PROVIDER` - `openai-compatible` or `anthropic-compatible`
388
561
  - `LLM_BASE_URL` - API endpoint (optional)
389
562
  - `LLM_MODEL` - Model name (default: `gpt-5-nano`)
@@ -392,6 +565,8 @@ bun run dev multi-step-reasoning "Why is the sky blue?"
392
565
 
393
566
  ## Testing
394
567
 
568
+ Run the test suite with Bun.
569
+
395
570
  ```bash
396
571
  bun run test
397
572
  ```
@@ -1,8 +1,21 @@
1
1
  import { type ImageInput } from "./image";
2
2
  import type { LLMMessage } from "./types";
3
- export interface ConversationEntry {
4
- role: "user" | "assistant";
3
+ export type ConversationEntry = {
4
+ role: "user";
5
5
  text: string;
6
6
  images?: ImageInput[];
7
- }
7
+ } | {
8
+ role: "assistant";
9
+ text: string;
10
+ images?: ImageInput[];
11
+ } | {
12
+ role: "tool_call";
13
+ id: string;
14
+ name: string;
15
+ arguments?: Record<string, unknown>;
16
+ } | {
17
+ role: "tool_result";
18
+ id: string;
19
+ output: unknown;
20
+ };
8
21
  export declare function conversation(systemPrompt: string, entries: ConversationEntry[]): LLMMessage[];
package/dist/index.cjs CHANGED
@@ -1594,6 +1594,25 @@ function mergeUsage(base, next) {
1594
1594
  }
1595
1595
  return Object.keys(merged).length > 0 ? merged : undefined;
1596
1596
  }
1597
+ function preferLatestUsage(base, next) {
1598
+ if (!base && !next) {
1599
+ return;
1600
+ }
1601
+ const merged = {};
1602
+ if (base?.inputTokens !== undefined || next?.inputTokens !== undefined) {
1603
+ merged.inputTokens = next?.inputTokens ?? base?.inputTokens;
1604
+ }
1605
+ if (base?.outputTokens !== undefined || next?.outputTokens !== undefined) {
1606
+ merged.outputTokens = next?.outputTokens ?? base?.outputTokens;
1607
+ }
1608
+ if (base?.totalTokens !== undefined || next?.totalTokens !== undefined) {
1609
+ merged.totalTokens = next?.totalTokens ?? base?.totalTokens;
1610
+ }
1611
+ if (base?.cost !== undefined || next?.cost !== undefined) {
1612
+ merged.cost = next?.cost ?? base?.cost;
1613
+ }
1614
+ return Object.keys(merged).length > 0 ? merged : undefined;
1615
+ }
1597
1616
  function addOptional(a, b) {
1598
1617
  if (a === undefined && b === undefined) {
1599
1618
  return;
@@ -1606,6 +1625,7 @@ function createOpenAICompatibleAdapter(options) {
1606
1625
  const fetcher = options.fetcher ?? fetch;
1607
1626
  const path = options.path ?? "/v1/chat/completions";
1608
1627
  const responsesPath = options.responsesPath ?? "/v1/responses";
1628
+ const embeddingPath = options.embeddingPath ?? "/v1/embeddings";
1609
1629
  return {
1610
1630
  provider: "openai-compatible",
1611
1631
  model: options.model,
@@ -1657,7 +1677,7 @@ function createOpenAICompatibleAdapter(options) {
1657
1677
  const delta = pickAssistantDelta(json);
1658
1678
  const chunkUsage = pickUsage(json);
1659
1679
  const chunkFinishReason = pickFinishReason(json);
1660
- usage = mergeUsage(usage, chunkUsage);
1680
+ usage = preferLatestUsage(usage, chunkUsage);
1661
1681
  if (chunkFinishReason) {
1662
1682
  finishReason = chunkFinishReason;
1663
1683
  }
@@ -1678,6 +1698,36 @@ function createOpenAICompatibleAdapter(options) {
1678
1698
  const out = { text, usage, finishReason };
1679
1699
  callbacks.onComplete?.(out);
1680
1700
  return out;
1701
+ },
1702
+ async embed(request) {
1703
+ const body = cleanUndefined({
1704
+ ...options.defaultBody,
1705
+ ...request.body,
1706
+ model: request.model ?? options.model,
1707
+ input: request.input,
1708
+ dimensions: request.dimensions,
1709
+ encoding_format: "float"
1710
+ });
1711
+ const response = await fetcher(buildURL(options.baseURL, embeddingPath), {
1712
+ method: "POST",
1713
+ headers: buildHeaders(options),
1714
+ body: JSON.stringify(body)
1715
+ });
1716
+ if (!response.ok) {
1717
+ const message = await response.text();
1718
+ throw new Error(`HTTP ${response.status}: ${message}`);
1719
+ }
1720
+ const json = await response.json();
1721
+ const data = json.data;
1722
+ if (!Array.isArray(data)) {
1723
+ throw new Error("Unexpected embedding response: missing data array");
1724
+ }
1725
+ return {
1726
+ embeddings: data.map((d) => isRecord2(d) && Array.isArray(d.embedding) ? d.embedding : []),
1727
+ model: pickString(json.model) ?? body.model,
1728
+ usage: pickUsage(json),
1729
+ raw: json
1730
+ };
1681
1731
  }
1682
1732
  };
1683
1733
  }
@@ -1962,7 +2012,7 @@ async function streamWithChatCompletionsWithMCP(options, fetcher, path, request,
1962
2012
  const chunkUsage = pickUsage(json);
1963
2013
  const chunkFinishReason = pickFinishReason(json);
1964
2014
  collectOpenAIStreamToolCalls(json, streamedToolCalls);
1965
- roundUsage = mergeUsage(roundUsage, chunkUsage);
2015
+ roundUsage = preferLatestUsage(roundUsage, chunkUsage);
1966
2016
  if (chunkFinishReason) {
1967
2017
  roundFinishReason = chunkFinishReason;
1968
2018
  }
@@ -2068,7 +2118,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
2068
2118
  const delta = pickResponsesStreamTextDelta(json);
2069
2119
  const chunkUsage = pickResponsesStreamUsage(json);
2070
2120
  const chunkFinishReason = pickResponsesStreamFinishReason(json);
2071
- usage = mergeUsage(usage, chunkUsage);
2121
+ usage = preferLatestUsage(usage, chunkUsage);
2072
2122
  if (chunkFinishReason) {
2073
2123
  finishReason = chunkFinishReason;
2074
2124
  }
@@ -2090,7 +2140,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
2090
2140
  const out = {
2091
2141
  text: text.length > 0 ? text : pickResponsesText(finalPayload) || pickAssistantText(finalPayload),
2092
2142
  raw: finalPayload,
2093
- usage: mergeUsage(usage, pickUsage(finalPayload)),
2143
+ usage: preferLatestUsage(usage, pickUsage(finalPayload)),
2094
2144
  finishReason: finishReason ?? pickResponsesFinishReason(finalPayload) ?? pickFinishReason(finalPayload)
2095
2145
  };
2096
2146
  callbacks.onComplete?.(out);
@@ -2153,7 +2203,7 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
2153
2203
  const chunkUsage = pickResponsesStreamUsage(json);
2154
2204
  const chunkFinishReason = pickResponsesStreamFinishReason(json);
2155
2205
  collectResponsesStreamToolCalls(json, streamedToolCalls);
2156
- roundUsage = mergeUsage(roundUsage, chunkUsage);
2206
+ roundUsage = preferLatestUsage(roundUsage, chunkUsage);
2157
2207
  if (chunkFinishReason) {
2158
2208
  roundFinishReason = chunkFinishReason;
2159
2209
  }
@@ -2171,9 +2221,8 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
2171
2221
  callbacks.onChunk?.(chunk);
2172
2222
  }
2173
2223
  });
2174
- aggregatedUsage = mergeUsage(aggregatedUsage, roundUsage);
2175
- const payloadUsage = roundPayload ? pickUsage(roundPayload) : undefined;
2176
- aggregatedUsage = mergeUsage(aggregatedUsage, payloadUsage);
2224
+ const resolvedRoundUsage = preferLatestUsage(roundUsage, roundPayload ? pickUsage(roundPayload) : undefined);
2225
+ aggregatedUsage = mergeUsage(aggregatedUsage, resolvedRoundUsage);
2177
2226
  if (roundFinishReason) {
2178
2227
  finishReason = roundFinishReason;
2179
2228
  } else if (roundPayload) {
@@ -2266,10 +2315,7 @@ function buildResponsesInput(request) {
2266
2315
  return buildMessages(request);
2267
2316
  }
2268
2317
  function toOpenAIMessage(message) {
2269
- return {
2270
- role: message.role,
2271
- content: message.content
2272
- };
2318
+ return { ...message };
2273
2319
  }
2274
2320
  function toResponsesTools(tools) {
2275
2321
  if (!Array.isArray(tools) || tools.length === 0) {
@@ -2716,7 +2762,7 @@ function createAnthropicCompatibleAdapter(options) {
2716
2762
  const delta = pickAnthropicDelta(json);
2717
2763
  const chunkUsage = pickUsage2(json);
2718
2764
  const chunkFinishReason = pickFinishReason2(json);
2719
- usage = mergeUsage(usage, chunkUsage);
2765
+ usage = preferLatestUsage(usage, chunkUsage);
2720
2766
  if (chunkFinishReason) {
2721
2767
  finishReason = chunkFinishReason;
2722
2768
  }
@@ -2737,6 +2783,9 @@ function createAnthropicCompatibleAdapter(options) {
2737
2783
  const out = { text, usage, finishReason };
2738
2784
  callbacks.onComplete?.(out);
2739
2785
  return out;
2786
+ },
2787
+ async embed() {
2788
+ throw new Error("Anthropic does not provide a native embedding API. " + "Use the openai-compatible provider with Voyage AI (https://api.voyageai.com) — " + "Anthropic's recommended embedding solution, which uses the same request format.");
2740
2789
  }
2741
2790
  };
2742
2791
  }
@@ -2910,7 +2959,7 @@ async function streamWithMCPToolLoop(options, fetcher, path, request, callbacks)
2910
2959
  const chunkUsage = pickUsage2(json);
2911
2960
  const chunkFinishReason = pickFinishReason2(json);
2912
2961
  collectAnthropicStreamToolCalls(json, streamedToolCalls);
2913
- roundUsage = mergeUsage(roundUsage, chunkUsage);
2962
+ roundUsage = preferLatestUsage(roundUsage, chunkUsage);
2914
2963
  if (chunkFinishReason) {
2915
2964
  roundFinishReason = chunkFinishReason;
2916
2965
  }
@@ -3015,6 +3064,23 @@ function toAnthropicInput(messages) {
3015
3064
  continue;
3016
3065
  }
3017
3066
  sawNonSystem = true;
3067
+ if (message.role === "assistant" && Array.isArray(message.tool_calls)) {
3068
+ const parts = [];
3069
+ if (message.content)
3070
+ parts.push({ type: "text", text: message.content });
3071
+ for (const tc of message.tool_calls) {
3072
+ parts.push({ type: "tool_use", id: tc.id, name: tc.function.name, input: JSON.parse(tc.function.arguments) });
3073
+ }
3074
+ normalizedMessages.push({ role: "assistant", content: parts });
3075
+ continue;
3076
+ }
3077
+ if (message.role === "tool") {
3078
+ normalizedMessages.push({
3079
+ role: "user",
3080
+ content: [{ type: "tool_result", tool_use_id: message.tool_call_id, content: message.content }]
3081
+ });
3082
+ continue;
3083
+ }
3018
3084
  normalizedMessages.push({
3019
3085
  role: message.role,
3020
3086
  content: message.content
@@ -4516,7 +4582,7 @@ async function callModel(adapter, options) {
4516
4582
  handleTextDelta(chunk.textDelta);
4517
4583
  }
4518
4584
  if (chunk.usage) {
4519
- latestUsage = mergeUsage2(latestUsage, chunk.usage);
4585
+ latestUsage = preferLatestUsage(latestUsage, chunk.usage);
4520
4586
  }
4521
4587
  if (chunk.finishReason) {
4522
4588
  latestFinishReason = chunk.finishReason;
@@ -4524,7 +4590,7 @@ async function callModel(adapter, options) {
4524
4590
  }
4525
4591
  });
4526
4592
  const finalText = typeof response2.text === "string" && response2.text.length > 0 ? response2.text : streamedRaw;
4527
- const usage = mergeUsage2(latestUsage, response2.usage);
4593
+ const usage = preferLatestUsage(latestUsage, response2.usage);
4528
4594
  const finishReason = response2.finishReason ?? latestFinishReason;
4529
4595
  emitStreamingData(finalText, true, usage, finishReason);
4530
4596
  emitObserve(options.observe, {
@@ -4794,6 +4860,12 @@ function createLLM(config, registry = createDefaultProviderRegistry()) {
4794
4860
  async structured(schema, prompt, options) {
4795
4861
  const merged = mergeStructuredOptions(defaults, options);
4796
4862
  return structured(adapter, schema, prompt, merged);
4863
+ },
4864
+ async embed(input, options = {}) {
4865
+ if (!adapter.embed) {
4866
+ throw new Error(`Provider "${adapter.provider ?? "unknown"}" does not support embeddings.`);
4867
+ }
4868
+ return adapter.embed({ ...options, input });
4797
4869
  }
4798
4870
  };
4799
4871
  }
@@ -4955,10 +5027,32 @@ async function resizeImage(source, size, mimeType) {
4955
5027
  function conversation(systemPrompt, entries) {
4956
5028
  return [
4957
5029
  { role: "system", content: systemPrompt },
4958
- ...entries.map((entry) => ({
4959
- role: entry.role,
4960
- content: entry.images && entry.images.length > 0 ? [{ type: "text", text: entry.text }, ...images(entry.images)] : entry.text
4961
- }))
5030
+ ...entries.map((entry) => {
5031
+ if (entry.role === "tool_call") {
5032
+ return {
5033
+ role: "assistant",
5034
+ content: "",
5035
+ tool_calls: [
5036
+ {
5037
+ id: entry.id,
5038
+ type: "function",
5039
+ function: { name: entry.name, arguments: JSON.stringify(entry.arguments ?? {}) }
5040
+ }
5041
+ ]
5042
+ };
5043
+ }
5044
+ if (entry.role === "tool_result") {
5045
+ return {
5046
+ role: "tool",
5047
+ content: typeof entry.output === "string" ? entry.output : JSON.stringify(entry.output),
5048
+ tool_call_id: entry.id
5049
+ };
5050
+ }
5051
+ return {
5052
+ role: entry.role,
5053
+ content: entry.images && entry.images.length > 0 ? [{ type: "text", text: entry.text }, ...images(entry.images)] : entry.text
5054
+ };
5055
+ })
4962
5056
  ];
4963
5057
  }
4964
5058
  // src/prompt.ts
package/dist/index.d.ts CHANGED
@@ -14,4 +14,4 @@ export { createOpenAICompatibleAdapter, type OpenAICompatibleAdapterOptions, } f
14
14
  export { createAnthropicCompatibleAdapter, DEFAULT_ANTHROPIC_MAX_TOKENS, DEFAULT_ANTHROPIC_VERSION, type AnthropicCompatibleAdapterOptions, } from "./providers/anthropic-compatible";
15
15
  export { DEFAULT_MAX_TOOL_ROUNDS } from "./providers/mcp-runtime";
16
16
  export { createDefaultProviderRegistry, createModelAdapter, createProviderRegistry, registerBuiltinProviders, type BuiltinProviderKind, type ModelAdapterConfig, type ProviderFactory, type ProviderRegistry, type ProviderTransportConfig, } from "./providers/registry";
17
- export type { CandidateDiagnostics, LLMImageContent, LLMMessageContent, LLMTextContent, ExtractJsonCandidatesOptions, ExtractionCandidate, ExtractionHeuristicsOptions, ExtractionParseHint, HTTPHeaders, LLMAdapter, LLMMessage, LLMRequest, LLMResponse, LLMStreamCallbacks, LLMStreamChunk, LLMToolCall, LLMToolDebugOptions, LLMToolExecution, LLMToolOutputTransformer, LLMToolArgumentsTransformer, LLMToolChoice, MCPCallToolParams, MCPListToolsResult, MCPToolClient, MCPToolDescriptor, MCPToolSchema, LLMUsage, MarkdownCodeBlock, MarkdownCodeOptions, ParseLLMOutputOptions, ParseLLMOutputResult, ParseTraceEvent, PipelineError, StructuredAttempt, StructuredCallOptions, StructuredDebugOptions, StructuredError, StructuredMode, StructuredOptions, StructuredPromptBuilder, StructuredPromptContext, StructuredPromptPayload, StructuredPromptResolver, StructuredPromptValue, StructuredResult, StructuredStreamData, StructuredStreamEvent, StructuredStreamInput, StructuredStreamOptions, StructuredSelfHealInput, StructuredTimeoutOptions, ThinkDiagnostics, ThinkBlock, StructuredTraceEvent, } from "./types";
17
+ export type { CandidateDiagnostics, EmbeddingRequest, EmbeddingResult, LLMImageContent, LLMMessageContent, LLMTextContent, ExtractJsonCandidatesOptions, ExtractionCandidate, ExtractionHeuristicsOptions, ExtractionParseHint, HTTPHeaders, LLMAdapter, LLMMessage, LLMRequest, LLMResponse, LLMStreamCallbacks, LLMStreamChunk, LLMToolCall, LLMToolCallRef, LLMToolDebugOptions, LLMToolExecution, LLMToolOutputTransformer, LLMToolArgumentsTransformer, LLMToolChoice, MCPCallToolParams, MCPListToolsResult, MCPToolClient, MCPToolDescriptor, MCPToolSchema, LLMUsage, MarkdownCodeBlock, MarkdownCodeOptions, ParseLLMOutputOptions, ParseLLMOutputResult, ParseTraceEvent, PipelineError, StructuredAttempt, StructuredCallOptions, StructuredDebugOptions, StructuredError, StructuredMode, StructuredOptions, StructuredPromptBuilder, StructuredPromptContext, StructuredPromptPayload, StructuredPromptResolver, StructuredPromptValue, StructuredResult, StructuredStreamData, StructuredStreamEvent, StructuredStreamInput, StructuredStreamOptions, StructuredSelfHealInput, StructuredTimeoutOptions, ThinkDiagnostics, ThinkBlock, StructuredTraceEvent, } from "./types";
package/dist/index.js CHANGED
@@ -1505,6 +1505,25 @@ function mergeUsage(base, next) {
1505
1505
  }
1506
1506
  return Object.keys(merged).length > 0 ? merged : undefined;
1507
1507
  }
1508
+ function preferLatestUsage(base, next) {
1509
+ if (!base && !next) {
1510
+ return;
1511
+ }
1512
+ const merged = {};
1513
+ if (base?.inputTokens !== undefined || next?.inputTokens !== undefined) {
1514
+ merged.inputTokens = next?.inputTokens ?? base?.inputTokens;
1515
+ }
1516
+ if (base?.outputTokens !== undefined || next?.outputTokens !== undefined) {
1517
+ merged.outputTokens = next?.outputTokens ?? base?.outputTokens;
1518
+ }
1519
+ if (base?.totalTokens !== undefined || next?.totalTokens !== undefined) {
1520
+ merged.totalTokens = next?.totalTokens ?? base?.totalTokens;
1521
+ }
1522
+ if (base?.cost !== undefined || next?.cost !== undefined) {
1523
+ merged.cost = next?.cost ?? base?.cost;
1524
+ }
1525
+ return Object.keys(merged).length > 0 ? merged : undefined;
1526
+ }
1508
1527
  function addOptional(a, b) {
1509
1528
  if (a === undefined && b === undefined) {
1510
1529
  return;
@@ -1517,6 +1536,7 @@ function createOpenAICompatibleAdapter(options) {
1517
1536
  const fetcher = options.fetcher ?? fetch;
1518
1537
  const path = options.path ?? "/v1/chat/completions";
1519
1538
  const responsesPath = options.responsesPath ?? "/v1/responses";
1539
+ const embeddingPath = options.embeddingPath ?? "/v1/embeddings";
1520
1540
  return {
1521
1541
  provider: "openai-compatible",
1522
1542
  model: options.model,
@@ -1568,7 +1588,7 @@ function createOpenAICompatibleAdapter(options) {
1568
1588
  const delta = pickAssistantDelta(json);
1569
1589
  const chunkUsage = pickUsage(json);
1570
1590
  const chunkFinishReason = pickFinishReason(json);
1571
- usage = mergeUsage(usage, chunkUsage);
1591
+ usage = preferLatestUsage(usage, chunkUsage);
1572
1592
  if (chunkFinishReason) {
1573
1593
  finishReason = chunkFinishReason;
1574
1594
  }
@@ -1589,6 +1609,36 @@ function createOpenAICompatibleAdapter(options) {
1589
1609
  const out = { text, usage, finishReason };
1590
1610
  callbacks.onComplete?.(out);
1591
1611
  return out;
1612
+ },
1613
+ async embed(request) {
1614
+ const body = cleanUndefined({
1615
+ ...options.defaultBody,
1616
+ ...request.body,
1617
+ model: request.model ?? options.model,
1618
+ input: request.input,
1619
+ dimensions: request.dimensions,
1620
+ encoding_format: "float"
1621
+ });
1622
+ const response = await fetcher(buildURL(options.baseURL, embeddingPath), {
1623
+ method: "POST",
1624
+ headers: buildHeaders(options),
1625
+ body: JSON.stringify(body)
1626
+ });
1627
+ if (!response.ok) {
1628
+ const message = await response.text();
1629
+ throw new Error(`HTTP ${response.status}: ${message}`);
1630
+ }
1631
+ const json = await response.json();
1632
+ const data = json.data;
1633
+ if (!Array.isArray(data)) {
1634
+ throw new Error("Unexpected embedding response: missing data array");
1635
+ }
1636
+ return {
1637
+ embeddings: data.map((d) => isRecord2(d) && Array.isArray(d.embedding) ? d.embedding : []),
1638
+ model: pickString(json.model) ?? body.model,
1639
+ usage: pickUsage(json),
1640
+ raw: json
1641
+ };
1592
1642
  }
1593
1643
  };
1594
1644
  }
@@ -1873,7 +1923,7 @@ async function streamWithChatCompletionsWithMCP(options, fetcher, path, request,
1873
1923
  const chunkUsage = pickUsage(json);
1874
1924
  const chunkFinishReason = pickFinishReason(json);
1875
1925
  collectOpenAIStreamToolCalls(json, streamedToolCalls);
1876
- roundUsage = mergeUsage(roundUsage, chunkUsage);
1926
+ roundUsage = preferLatestUsage(roundUsage, chunkUsage);
1877
1927
  if (chunkFinishReason) {
1878
1928
  roundFinishReason = chunkFinishReason;
1879
1929
  }
@@ -1979,7 +2029,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
1979
2029
  const delta = pickResponsesStreamTextDelta(json);
1980
2030
  const chunkUsage = pickResponsesStreamUsage(json);
1981
2031
  const chunkFinishReason = pickResponsesStreamFinishReason(json);
1982
- usage = mergeUsage(usage, chunkUsage);
2032
+ usage = preferLatestUsage(usage, chunkUsage);
1983
2033
  if (chunkFinishReason) {
1984
2034
  finishReason = chunkFinishReason;
1985
2035
  }
@@ -2001,7 +2051,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
2001
2051
  const out = {
2002
2052
  text: text.length > 0 ? text : pickResponsesText(finalPayload) || pickAssistantText(finalPayload),
2003
2053
  raw: finalPayload,
2004
- usage: mergeUsage(usage, pickUsage(finalPayload)),
2054
+ usage: preferLatestUsage(usage, pickUsage(finalPayload)),
2005
2055
  finishReason: finishReason ?? pickResponsesFinishReason(finalPayload) ?? pickFinishReason(finalPayload)
2006
2056
  };
2007
2057
  callbacks.onComplete?.(out);
@@ -2064,7 +2114,7 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
2064
2114
  const chunkUsage = pickResponsesStreamUsage(json);
2065
2115
  const chunkFinishReason = pickResponsesStreamFinishReason(json);
2066
2116
  collectResponsesStreamToolCalls(json, streamedToolCalls);
2067
- roundUsage = mergeUsage(roundUsage, chunkUsage);
2117
+ roundUsage = preferLatestUsage(roundUsage, chunkUsage);
2068
2118
  if (chunkFinishReason) {
2069
2119
  roundFinishReason = chunkFinishReason;
2070
2120
  }
@@ -2082,9 +2132,8 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
2082
2132
  callbacks.onChunk?.(chunk);
2083
2133
  }
2084
2134
  });
2085
- aggregatedUsage = mergeUsage(aggregatedUsage, roundUsage);
2086
- const payloadUsage = roundPayload ? pickUsage(roundPayload) : undefined;
2087
- aggregatedUsage = mergeUsage(aggregatedUsage, payloadUsage);
2135
+ const resolvedRoundUsage = preferLatestUsage(roundUsage, roundPayload ? pickUsage(roundPayload) : undefined);
2136
+ aggregatedUsage = mergeUsage(aggregatedUsage, resolvedRoundUsage);
2088
2137
  if (roundFinishReason) {
2089
2138
  finishReason = roundFinishReason;
2090
2139
  } else if (roundPayload) {
@@ -2177,10 +2226,7 @@ function buildResponsesInput(request) {
2177
2226
  return buildMessages(request);
2178
2227
  }
2179
2228
  function toOpenAIMessage(message) {
2180
- return {
2181
- role: message.role,
2182
- content: message.content
2183
- };
2229
+ return { ...message };
2184
2230
  }
2185
2231
  function toResponsesTools(tools) {
2186
2232
  if (!Array.isArray(tools) || tools.length === 0) {
@@ -2627,7 +2673,7 @@ function createAnthropicCompatibleAdapter(options) {
2627
2673
  const delta = pickAnthropicDelta(json);
2628
2674
  const chunkUsage = pickUsage2(json);
2629
2675
  const chunkFinishReason = pickFinishReason2(json);
2630
- usage = mergeUsage(usage, chunkUsage);
2676
+ usage = preferLatestUsage(usage, chunkUsage);
2631
2677
  if (chunkFinishReason) {
2632
2678
  finishReason = chunkFinishReason;
2633
2679
  }
@@ -2648,6 +2694,9 @@ function createAnthropicCompatibleAdapter(options) {
2648
2694
  const out = { text, usage, finishReason };
2649
2695
  callbacks.onComplete?.(out);
2650
2696
  return out;
2697
+ },
2698
+ async embed() {
2699
+ throw new Error("Anthropic does not provide a native embedding API. " + "Use the openai-compatible provider with Voyage AI (https://api.voyageai.com) — " + "Anthropic's recommended embedding solution, which uses the same request format.");
2651
2700
  }
2652
2701
  };
2653
2702
  }
@@ -2821,7 +2870,7 @@ async function streamWithMCPToolLoop(options, fetcher, path, request, callbacks)
2821
2870
  const chunkUsage = pickUsage2(json);
2822
2871
  const chunkFinishReason = pickFinishReason2(json);
2823
2872
  collectAnthropicStreamToolCalls(json, streamedToolCalls);
2824
- roundUsage = mergeUsage(roundUsage, chunkUsage);
2873
+ roundUsage = preferLatestUsage(roundUsage, chunkUsage);
2825
2874
  if (chunkFinishReason) {
2826
2875
  roundFinishReason = chunkFinishReason;
2827
2876
  }
@@ -2926,6 +2975,23 @@ function toAnthropicInput(messages) {
2926
2975
  continue;
2927
2976
  }
2928
2977
  sawNonSystem = true;
2978
+ if (message.role === "assistant" && Array.isArray(message.tool_calls)) {
2979
+ const parts = [];
2980
+ if (message.content)
2981
+ parts.push({ type: "text", text: message.content });
2982
+ for (const tc of message.tool_calls) {
2983
+ parts.push({ type: "tool_use", id: tc.id, name: tc.function.name, input: JSON.parse(tc.function.arguments) });
2984
+ }
2985
+ normalizedMessages.push({ role: "assistant", content: parts });
2986
+ continue;
2987
+ }
2988
+ if (message.role === "tool") {
2989
+ normalizedMessages.push({
2990
+ role: "user",
2991
+ content: [{ type: "tool_result", tool_use_id: message.tool_call_id, content: message.content }]
2992
+ });
2993
+ continue;
2994
+ }
2929
2995
  normalizedMessages.push({
2930
2996
  role: message.role,
2931
2997
  content: message.content
@@ -4427,7 +4493,7 @@ async function callModel(adapter, options) {
4427
4493
  handleTextDelta(chunk.textDelta);
4428
4494
  }
4429
4495
  if (chunk.usage) {
4430
- latestUsage = mergeUsage2(latestUsage, chunk.usage);
4496
+ latestUsage = preferLatestUsage(latestUsage, chunk.usage);
4431
4497
  }
4432
4498
  if (chunk.finishReason) {
4433
4499
  latestFinishReason = chunk.finishReason;
@@ -4435,7 +4501,7 @@ async function callModel(adapter, options) {
4435
4501
  }
4436
4502
  });
4437
4503
  const finalText = typeof response2.text === "string" && response2.text.length > 0 ? response2.text : streamedRaw;
4438
- const usage = mergeUsage2(latestUsage, response2.usage);
4504
+ const usage = preferLatestUsage(latestUsage, response2.usage);
4439
4505
  const finishReason = response2.finishReason ?? latestFinishReason;
4440
4506
  emitStreamingData(finalText, true, usage, finishReason);
4441
4507
  emitObserve(options.observe, {
@@ -4705,6 +4771,12 @@ function createLLM(config, registry = createDefaultProviderRegistry()) {
4705
4771
  async structured(schema, prompt, options) {
4706
4772
  const merged = mergeStructuredOptions(defaults, options);
4707
4773
  return structured(adapter, schema, prompt, merged);
4774
+ },
4775
+ async embed(input, options = {}) {
4776
+ if (!adapter.embed) {
4777
+ throw new Error(`Provider "${adapter.provider ?? "unknown"}" does not support embeddings.`);
4778
+ }
4779
+ return adapter.embed({ ...options, input });
4708
4780
  }
4709
4781
  };
4710
4782
  }
@@ -4870,10 +4942,32 @@ async function resizeImage(source, size, mimeType) {
4870
4942
  function conversation(systemPrompt, entries) {
4871
4943
  return [
4872
4944
  { role: "system", content: systemPrompt },
4873
- ...entries.map((entry) => ({
4874
- role: entry.role,
4875
- content: entry.images && entry.images.length > 0 ? [{ type: "text", text: entry.text }, ...images(entry.images)] : entry.text
4876
- }))
4945
+ ...entries.map((entry) => {
4946
+ if (entry.role === "tool_call") {
4947
+ return {
4948
+ role: "assistant",
4949
+ content: "",
4950
+ tool_calls: [
4951
+ {
4952
+ id: entry.id,
4953
+ type: "function",
4954
+ function: { name: entry.name, arguments: JSON.stringify(entry.arguments ?? {}) }
4955
+ }
4956
+ ]
4957
+ };
4958
+ }
4959
+ if (entry.role === "tool_result") {
4960
+ return {
4961
+ role: "tool",
4962
+ content: typeof entry.output === "string" ? entry.output : JSON.stringify(entry.output),
4963
+ tool_call_id: entry.id
4964
+ };
4965
+ }
4966
+ return {
4967
+ role: entry.role,
4968
+ content: entry.images && entry.images.length > 0 ? [{ type: "text", text: entry.text }, ...images(entry.images)] : entry.text
4969
+ };
4970
+ })
4877
4971
  ];
4878
4972
  }
4879
4973
  // src/prompt.ts
package/dist/llm.d.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  import type { z } from "zod";
2
2
  import { type ModelAdapterConfig, type ProviderRegistry } from "./providers/registry";
3
- import type { LLMAdapter, StructuredCallOptions, StructuredPromptBuilder, StructuredResult } from "./types";
3
+ import type { EmbeddingRequest, EmbeddingResult, LLMAdapter, StructuredCallOptions, StructuredPromptBuilder, StructuredResult } from "./types";
4
4
  export interface CreateLLMOptions extends ModelAdapterConfig {
5
5
  defaults?: StructuredCallOptions<z.ZodTypeAny>;
6
6
  }
@@ -9,5 +9,6 @@ export interface LLMClient {
9
9
  provider?: string;
10
10
  model?: string;
11
11
  structured<TSchema extends z.ZodTypeAny>(schema: TSchema, prompt: StructuredPromptBuilder, options?: StructuredCallOptions<TSchema>): Promise<StructuredResult<z.infer<TSchema>>>;
12
+ embed(input: string | string[], options?: Omit<EmbeddingRequest, "input">): Promise<EmbeddingResult>;
12
13
  }
13
14
  export declare function createLLM(config: CreateLLMOptions, registry?: ProviderRegistry): LLMClient;
@@ -5,6 +5,7 @@ export interface OpenAICompatibleAdapterOptions {
5
5
  apiKey?: string;
6
6
  path?: string;
7
7
  responsesPath?: string;
8
+ embeddingPath?: string;
8
9
  defaultMaxToolRounds?: number;
9
10
  headers?: HTTPHeaders;
10
11
  defaultBody?: Record<string, unknown>;
@@ -7,3 +7,4 @@ export declare function isRecord(value: unknown): value is Record<string, unknow
7
7
  export declare function pickString(value: unknown): string | undefined;
8
8
  export declare function toFiniteNumber(value: unknown): number | undefined;
9
9
  export declare function mergeUsage(base: LLMUsage | undefined, next: LLMUsage | undefined): LLMUsage | undefined;
10
+ export declare function preferLatestUsage(base: LLMUsage | undefined, next: LLMUsage | undefined): LLMUsage | undefined;
package/dist/types.d.ts CHANGED
@@ -130,9 +130,18 @@ export interface LLMImageContent {
130
130
  };
131
131
  }
132
132
  export type LLMMessageContent = string | (LLMTextContent | LLMImageContent)[];
133
+ export interface LLMToolCallRef {
134
+ id: string;
135
+ type: "function";
136
+ function: {
137
+ name: string;
138
+ arguments: string;
139
+ };
140
+ }
133
141
  export interface LLMMessage {
134
142
  role: "system" | "user" | "assistant" | "tool";
135
143
  content: LLMMessageContent;
144
+ [key: string]: unknown;
136
145
  }
137
146
  export interface LLMRequest {
138
147
  prompt?: string;
@@ -179,11 +188,24 @@ export interface LLMStreamCallbacks {
179
188
  onChunk?: (chunk: LLMStreamChunk) => void;
180
189
  onComplete?: (response: LLMResponse) => void;
181
190
  }
191
+ export interface EmbeddingRequest {
192
+ input: string | string[];
193
+ model?: string;
194
+ dimensions?: number;
195
+ body?: Record<string, unknown>;
196
+ }
197
+ export interface EmbeddingResult {
198
+ embeddings: number[][];
199
+ model: string;
200
+ usage?: LLMUsage;
201
+ raw?: unknown;
202
+ }
182
203
  export interface LLMAdapter {
183
204
  provider?: string;
184
205
  model?: string;
185
206
  complete(request: LLMRequest): Promise<LLMResponse>;
186
207
  stream?(request: LLMRequest, callbacks?: LLMStreamCallbacks): Promise<LLMResponse>;
208
+ embed?(request: EmbeddingRequest): Promise<EmbeddingResult>;
187
209
  }
188
210
  export interface LLMToolCall {
189
211
  id: string;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "extrait",
3
- "version": "0.5.3",
3
+ "version": "0.5.5",
4
4
  "repository": {
5
5
  "type": "git",
6
6
  "url": "git+https://github.com/tterrasson/extrait.git"