extrait 0.5.4 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,7 +8,8 @@ Structured JSON extraction from LLMs with validation, repair, and streaming.
8
8
  </a>
9
9
  </p>
10
10
 
11
- **Features:**
11
+ ## Features
12
+
12
13
  - Multi-candidate JSON extraction from LLM responses
13
14
  - Automatic repair with jsonrepair
14
15
  - Zod schema validation and coercion
@@ -19,6 +20,8 @@ Structured JSON extraction from LLMs with validation, repair, and streaming.
19
20
 
20
21
  ## Installation
21
22
 
23
+ Install `extrait` with your preferred package manager.
24
+
22
25
  ```bash
23
26
  bun add extrait
24
27
  # or
@@ -29,56 +32,118 @@ deno add npm:extrait
29
32
 
30
33
  ## Quick Start
31
34
 
35
+ Use a custom OpenAI-compatible transport to point `extrait` at a local endpoint.
36
+
32
37
  ```typescript
33
38
  import { createLLM, prompt, s } from "extrait";
34
39
  import { z } from "zod";
35
40
 
36
41
  const llm = createLLM({
37
42
  provider: "openai-compatible",
38
- model: "gpt-5-nano",
39
- transport: { apiKey: process.env.LLM_API_KEY },
43
+ model: "mistralai/ministral-3-3b",
44
+ transport: {
45
+ baseURL: "http://localhost:1234/v1",
46
+ apiKey: process.env.LLM_API_KEY ?? "local-demo-key",
47
+ },
40
48
  });
41
49
 
42
- const SummarySchema = s.schema(
43
- "Summary",
50
+ const RecipeSchema = s.schema(
51
+ "Recipe",
44
52
  z.object({
45
- summary: s.string().min(1).describe("One-sentence summary"),
46
- tags: s.array(s.string()).default([]).describe("Keywords"),
53
+ title: s.string().min(1).describe("Short recipe title"),
54
+ ingredients: s.array(s.string()).min(1).describe("Ingredient list"),
47
55
  })
48
56
  );
49
57
 
50
58
  const result = await llm.structured(
51
- SummarySchema,
52
- prompt`Summarize this: """${text}"""`
59
+ RecipeSchema,
60
+ prompt`Extract a simple recipe from this text: """${text}"""`
53
61
  );
54
62
 
55
63
  console.log(result.data);
56
64
  ```
57
65
 
66
+ ## Examples at a Glance
67
+
68
+ These examples cover the most common usage patterns in the repository.
69
+
70
+ - [`examples/simple.ts`](examples/simple.ts) - Basic structured output with streaming
71
+ - [`examples/streaming.ts`](examples/streaming.ts) - Real-time partial output and snapshot updates
72
+ - [`examples/calculator-tool.ts`](examples/calculator-tool.ts) - Structured extraction with MCP tools
73
+ - [`examples/conversation.ts`](examples/conversation.ts) - Multi-turn prompts and multimodal content
74
+ - [`examples/image-analysis.ts`](examples/image-analysis.ts) - Vision input with structured output
75
+ - [`examples/embeddings.ts`](examples/embeddings.ts) - Embeddings and similarity workflows
76
+
77
+ ```bash
78
+ bun run dev simple "Bun.js runtime"
79
+ bun run dev streaming
80
+ bun run dev calculator-tool
81
+ ```
82
+
58
83
  ## API Reference
59
84
 
60
- ### Creating an LLM Client
85
+ The sections below cover the main building blocks of the library.
86
+
87
+ ### Create an LLM Client
88
+
89
+ Use `createLLM()` to configure the provider, model, transport, and client defaults.
61
90
 
62
91
  ```typescript
63
92
  const llm = createLLM({
64
93
  provider: "openai-compatible" | "anthropic-compatible",
65
94
  model: "gpt-5-nano",
95
+ baseURL: "https://api.openai.com", // optional alias for transport.baseURL
96
+ apiKey: process.env.LLM_API_KEY, // optional alias for transport.apiKey
66
97
  transport: {
67
- baseURL: "https://api.openai.com", // optional
68
- apiKey: process.env.LLM_API_KEY, // optional
98
+ baseURL: "https://api.openai.com", // optional
99
+ apiKey: process.env.LLM_API_KEY, // optional
100
+ path: "/v1/chat/completions", // optional; anthropic-compatible usually uses /v1/messages
101
+ headers: { "x-trace-id": "docs-demo" }, // optional extra headers
102
+ defaultBody: { user: "docs-demo" }, // optional provider body defaults
103
+ version: "2023-06-01", // anthropic-compatible only
104
+ fetcher: fetch, // optional custom fetch implementation
69
105
  },
70
106
  defaults: {
71
- mode: "loose" | "strict", // loose allows repair
72
- selfHeal: 0 | 1 | 2, // retry attempts
73
- debug: false, // show repair logs
74
- timeout: { request: 30_000 }, // optional default timeouts
107
+ mode: "loose" | "strict", // loose allows repair
108
+ selfHeal: 1, // optional retry attempts
109
+ debug: false, // optional structured debug output
110
+ systemPrompt: "You are a helpful assistant.",
111
+ timeout: {
112
+ request: 30_000,
113
+ tool: 10_000,
114
+ },
115
+ },
116
+ });
117
+ ```
118
+
119
+ `baseURL` and `apiKey` at the top level are shorthand aliases for `transport.baseURL` and `transport.apiKey`. For request-specific options such as `stream`, `request`, `schemaInstruction`, and parse tuning, see the sections below.
120
+
121
+ Common setup patterns:
122
+
123
+ ```typescript
124
+ // OpenAI-compatible gateway or local endpoint with top-level aliases
125
+ const llm = createLLM({
126
+ provider: "openai-compatible",
127
+ model: "gpt-4o-mini",
128
+ baseURL: process.env.LLM_BASE_URL ?? "http://localhost:1234/v1",
129
+ apiKey: process.env.LLM_API_KEY ?? "local-demo-key",
130
+ });
131
+
132
+ // Anthropic-compatible endpoint with explicit API version
133
+ const anthropic = createLLM({
134
+ provider: "anthropic-compatible",
135
+ model: "claude-3-5-sonnet-latest",
136
+ transport: {
137
+ baseURL: "https://api.anthropic.com",
138
+ apiKey: process.env.LLM_API_KEY,
139
+ version: "2023-06-01",
75
140
  },
76
141
  });
77
142
  ```
78
143
 
79
144
  ### Defining Schemas
80
145
 
81
- Use the `s` wrapper around Zod for enhanced schema building:
146
+ Use the `s` wrapper around Zod for schema names, descriptions, and a more ergonomic authoring flow.
82
147
 
83
148
  ```typescript
84
149
  import { s } from "extrait";
@@ -115,6 +180,8 @@ const Schema = s.schema(
115
180
 
116
181
  ### Making Structured Calls
117
182
 
183
+ `structured()` accepts a schema plus either a tagged prompt, a fluent prompt builder, or a raw message payload.
184
+
118
185
  ```typescript
119
186
  // Simple prompt
120
187
  const result = await llm.structured(
@@ -159,7 +226,7 @@ const result = await llm.structured(
159
226
  },
160
227
  },
161
228
  request: {
162
- signal: abortController.signal, // optional AbortSignal
229
+ signal: AbortSignal.timeout(30_000), // optional AbortSignal
163
230
  },
164
231
  timeout: {
165
232
  request: 30_000, // ms per LLM HTTP request
@@ -171,6 +238,22 @@ const result = await llm.structured(
171
238
 
172
239
  `prompt()` builds an ordered `messages` payload. Use ``prompt`...` `` for a single string prompt, or the fluent builder for multi-turn conversations. The `LLMMessage` type is exported if you need to type your own message arrays.
173
240
 
241
+ You can also pass provider request options through `request`:
242
+
243
+ ```typescript
244
+ const result = await llm.structured(
245
+ Schema,
246
+ prompt`Summarize this document: """${text}"""`,
247
+ {
248
+ request: {
249
+ temperature: 0,
250
+ maxTokens: 800,
251
+ body: { user: "demo-user" },
252
+ },
253
+ }
254
+ );
255
+ ```
256
+
174
257
  ### Images (multimodal)
175
258
 
176
259
  Use `images()` to build base64 image content blocks for vision-capable models.
@@ -248,13 +331,15 @@ const messages = conversation("You are a vision assistant.", [
248
331
 
249
332
  ### Result Object
250
333
 
334
+ Successful structured calls return validated data plus the raw response and trace metadata.
335
+
251
336
  ```typescript
252
337
  {
253
338
  data: T, // Validated data matching schema
254
339
  raw: string, // Raw LLM response
255
340
  thinkBlocks: ThinkBlock[], // Extracted <think> blocks
256
341
  json: unknown | null, // Parsed JSON before validation
257
- attempts: AttemptTrace[], // Self-heal attempts
342
+ attempts: StructuredAttempt<T>[], // One entry per parse / self-heal attempt
258
343
  usage?: {
259
344
  inputTokens?: number,
260
345
  outputTokens?: number,
@@ -265,8 +350,30 @@ const messages = conversation("You are a vision assistant.", [
265
350
  }
266
351
  ```
267
352
 
353
+ Each `attempts` entry includes:
354
+
355
+ ```typescript
356
+ {
357
+ attempt: number,
358
+ selfHeal: boolean,
359
+ via: "complete" | "stream",
360
+ raw: string,
361
+ thinkBlocks: ThinkBlock[],
362
+ json: unknown | null,
363
+ candidates: string[],
364
+ repairLog: string[],
365
+ zodIssues: z.ZodIssue[],
366
+ success: boolean,
367
+ usage?: LLMUsage,
368
+ finishReason?: string,
369
+ parsed: ParseLLMOutputResult<T>,
370
+ }
371
+ ```
372
+
268
373
  ### Error Handling
269
374
 
375
+ Catch `StructuredParseError` when repair and validation still fail.
376
+
270
377
  ```typescript
271
378
  import { StructuredParseError } from "extrait";
272
379
 
@@ -292,7 +399,7 @@ Generate vector embeddings using `llm.embed()`. It always returns `number[][]`
292
399
  const embedder = createLLM({
293
400
  provider: "openai-compatible",
294
401
  model: "text-embedding-3-small",
295
- transport: { apiKey: process.env.OPENAI_API_KEY },
402
+ transport: { apiKey: process.env.LLM_API_KEY },
296
403
  });
297
404
 
298
405
  // Single string
@@ -332,7 +439,7 @@ const embedder = createLLM({
332
439
  model: "voyage-3",
333
440
  transport: {
334
441
  baseURL: "https://api.voyageai.com",
335
- apiKey: process.env.VOYAGE_API_KEY,
442
+ apiKey: process.env.LLM_API_KEY,
336
443
  },
337
444
  });
338
445
 
@@ -343,6 +450,8 @@ Calling `llm.embed()` on an `anthropic-compatible` adapter throws a descriptive
343
450
 
344
451
  ### MCP Tools
345
452
 
453
+ Attach MCP clients at request time to let the model call tools during structured generation.
454
+
346
455
  ```typescript
347
456
  import { createMCPClient } from "extrait";
348
457
 
@@ -415,7 +524,7 @@ const llm = createLLM({
415
524
 
416
525
  ## Examples
417
526
 
418
- Run examples with: `bun run dev <example-name>`
527
+ Run repository examples with `bun run dev <example-name>`.
419
528
 
420
529
  Available examples:
421
530
  - `streaming` - Real LLM streaming + snapshot self-check ([streaming.ts](examples/streaming.ts))
@@ -429,6 +538,7 @@ Available examples:
429
538
  - `calculator-tool` - MCP tool integration ([calculator-tool.ts](examples/calculator-tool.ts))
430
539
  - `image-analysis` - Multimodal structured extraction from an image file ([image-analysis.ts](examples/image-analysis.ts))
431
540
  - `conversation` - Multi-turn conversation history and inline image messages ([conversation.ts](examples/conversation.ts))
541
+ - `simulated-tools` - Inject fake tool calls/results into conversation context without real execution ([simulated-tools.ts](examples/simulated-tools.ts))
432
542
  - `embeddings` - Vector embeddings, cosine similarity, and semantic comparison ([embeddings.ts](examples/embeddings.ts))
433
543
 
434
544
  Pass arguments after the example name:
@@ -445,6 +555,8 @@ bun run dev embeddings "the cat sat on the mat" "a feline rested on the rug"
445
555
 
446
556
  ## Environment Variables
447
557
 
558
+ These environment variables are used across the examples and common client setups.
559
+
448
560
  - `LLM_PROVIDER` - `openai-compatible` or `anthropic-compatible`
449
561
  - `LLM_BASE_URL` - API endpoint (optional)
450
562
  - `LLM_MODEL` - Model name (default: `gpt-5-nano`)
@@ -453,6 +565,8 @@ bun run dev embeddings "the cat sat on the mat" "a feline rested on the rug"
453
565
 
454
566
  ## Testing
455
567
 
568
+ Run the test suite with Bun.
569
+
456
570
  ```bash
457
571
  bun run test
458
572
  ```
package/dist/index.cjs CHANGED
@@ -1594,6 +1594,25 @@ function mergeUsage(base, next) {
1594
1594
  }
1595
1595
  return Object.keys(merged).length > 0 ? merged : undefined;
1596
1596
  }
1597
+ function preferLatestUsage(base, next) {
1598
+ if (!base && !next) {
1599
+ return;
1600
+ }
1601
+ const merged = {};
1602
+ if (base?.inputTokens !== undefined || next?.inputTokens !== undefined) {
1603
+ merged.inputTokens = next?.inputTokens ?? base?.inputTokens;
1604
+ }
1605
+ if (base?.outputTokens !== undefined || next?.outputTokens !== undefined) {
1606
+ merged.outputTokens = next?.outputTokens ?? base?.outputTokens;
1607
+ }
1608
+ if (base?.totalTokens !== undefined || next?.totalTokens !== undefined) {
1609
+ merged.totalTokens = next?.totalTokens ?? base?.totalTokens;
1610
+ }
1611
+ if (base?.cost !== undefined || next?.cost !== undefined) {
1612
+ merged.cost = next?.cost ?? base?.cost;
1613
+ }
1614
+ return Object.keys(merged).length > 0 ? merged : undefined;
1615
+ }
1597
1616
  function addOptional(a, b) {
1598
1617
  if (a === undefined && b === undefined) {
1599
1618
  return;
@@ -1658,7 +1677,7 @@ function createOpenAICompatibleAdapter(options) {
1658
1677
  const delta = pickAssistantDelta(json);
1659
1678
  const chunkUsage = pickUsage(json);
1660
1679
  const chunkFinishReason = pickFinishReason(json);
1661
- usage = mergeUsage(usage, chunkUsage);
1680
+ usage = preferLatestUsage(usage, chunkUsage);
1662
1681
  if (chunkFinishReason) {
1663
1682
  finishReason = chunkFinishReason;
1664
1683
  }
@@ -1993,7 +2012,7 @@ async function streamWithChatCompletionsWithMCP(options, fetcher, path, request,
1993
2012
  const chunkUsage = pickUsage(json);
1994
2013
  const chunkFinishReason = pickFinishReason(json);
1995
2014
  collectOpenAIStreamToolCalls(json, streamedToolCalls);
1996
- roundUsage = mergeUsage(roundUsage, chunkUsage);
2015
+ roundUsage = preferLatestUsage(roundUsage, chunkUsage);
1997
2016
  if (chunkFinishReason) {
1998
2017
  roundFinishReason = chunkFinishReason;
1999
2018
  }
@@ -2099,7 +2118,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
2099
2118
  const delta = pickResponsesStreamTextDelta(json);
2100
2119
  const chunkUsage = pickResponsesStreamUsage(json);
2101
2120
  const chunkFinishReason = pickResponsesStreamFinishReason(json);
2102
- usage = mergeUsage(usage, chunkUsage);
2121
+ usage = preferLatestUsage(usage, chunkUsage);
2103
2122
  if (chunkFinishReason) {
2104
2123
  finishReason = chunkFinishReason;
2105
2124
  }
@@ -2121,7 +2140,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
2121
2140
  const out = {
2122
2141
  text: text.length > 0 ? text : pickResponsesText(finalPayload) || pickAssistantText(finalPayload),
2123
2142
  raw: finalPayload,
2124
- usage: mergeUsage(usage, pickUsage(finalPayload)),
2143
+ usage: preferLatestUsage(usage, pickUsage(finalPayload)),
2125
2144
  finishReason: finishReason ?? pickResponsesFinishReason(finalPayload) ?? pickFinishReason(finalPayload)
2126
2145
  };
2127
2146
  callbacks.onComplete?.(out);
@@ -2184,7 +2203,7 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
2184
2203
  const chunkUsage = pickResponsesStreamUsage(json);
2185
2204
  const chunkFinishReason = pickResponsesStreamFinishReason(json);
2186
2205
  collectResponsesStreamToolCalls(json, streamedToolCalls);
2187
- roundUsage = mergeUsage(roundUsage, chunkUsage);
2206
+ roundUsage = preferLatestUsage(roundUsage, chunkUsage);
2188
2207
  if (chunkFinishReason) {
2189
2208
  roundFinishReason = chunkFinishReason;
2190
2209
  }
@@ -2202,9 +2221,8 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
2202
2221
  callbacks.onChunk?.(chunk);
2203
2222
  }
2204
2223
  });
2205
- aggregatedUsage = mergeUsage(aggregatedUsage, roundUsage);
2206
- const payloadUsage = roundPayload ? pickUsage(roundPayload) : undefined;
2207
- aggregatedUsage = mergeUsage(aggregatedUsage, payloadUsage);
2224
+ const resolvedRoundUsage = preferLatestUsage(roundUsage, roundPayload ? pickUsage(roundPayload) : undefined);
2225
+ aggregatedUsage = mergeUsage(aggregatedUsage, resolvedRoundUsage);
2208
2226
  if (roundFinishReason) {
2209
2227
  finishReason = roundFinishReason;
2210
2228
  } else if (roundPayload) {
@@ -2744,7 +2762,7 @@ function createAnthropicCompatibleAdapter(options) {
2744
2762
  const delta = pickAnthropicDelta(json);
2745
2763
  const chunkUsage = pickUsage2(json);
2746
2764
  const chunkFinishReason = pickFinishReason2(json);
2747
- usage = mergeUsage(usage, chunkUsage);
2765
+ usage = preferLatestUsage(usage, chunkUsage);
2748
2766
  if (chunkFinishReason) {
2749
2767
  finishReason = chunkFinishReason;
2750
2768
  }
@@ -2941,7 +2959,7 @@ async function streamWithMCPToolLoop(options, fetcher, path, request, callbacks)
2941
2959
  const chunkUsage = pickUsage2(json);
2942
2960
  const chunkFinishReason = pickFinishReason2(json);
2943
2961
  collectAnthropicStreamToolCalls(json, streamedToolCalls);
2944
- roundUsage = mergeUsage(roundUsage, chunkUsage);
2962
+ roundUsage = preferLatestUsage(roundUsage, chunkUsage);
2945
2963
  if (chunkFinishReason) {
2946
2964
  roundFinishReason = chunkFinishReason;
2947
2965
  }
@@ -4564,7 +4582,7 @@ async function callModel(adapter, options) {
4564
4582
  handleTextDelta(chunk.textDelta);
4565
4583
  }
4566
4584
  if (chunk.usage) {
4567
- latestUsage = mergeUsage2(latestUsage, chunk.usage);
4585
+ latestUsage = preferLatestUsage(latestUsage, chunk.usage);
4568
4586
  }
4569
4587
  if (chunk.finishReason) {
4570
4588
  latestFinishReason = chunk.finishReason;
@@ -4572,7 +4590,7 @@ async function callModel(adapter, options) {
4572
4590
  }
4573
4591
  });
4574
4592
  const finalText = typeof response2.text === "string" && response2.text.length > 0 ? response2.text : streamedRaw;
4575
- const usage = mergeUsage2(latestUsage, response2.usage);
4593
+ const usage = preferLatestUsage(latestUsage, response2.usage);
4576
4594
  const finishReason = response2.finishReason ?? latestFinishReason;
4577
4595
  emitStreamingData(finalText, true, usage, finishReason);
4578
4596
  emitObserve(options.observe, {
package/dist/index.js CHANGED
@@ -1505,6 +1505,25 @@ function mergeUsage(base, next) {
1505
1505
  }
1506
1506
  return Object.keys(merged).length > 0 ? merged : undefined;
1507
1507
  }
1508
+ function preferLatestUsage(base, next) {
1509
+ if (!base && !next) {
1510
+ return;
1511
+ }
1512
+ const merged = {};
1513
+ if (base?.inputTokens !== undefined || next?.inputTokens !== undefined) {
1514
+ merged.inputTokens = next?.inputTokens ?? base?.inputTokens;
1515
+ }
1516
+ if (base?.outputTokens !== undefined || next?.outputTokens !== undefined) {
1517
+ merged.outputTokens = next?.outputTokens ?? base?.outputTokens;
1518
+ }
1519
+ if (base?.totalTokens !== undefined || next?.totalTokens !== undefined) {
1520
+ merged.totalTokens = next?.totalTokens ?? base?.totalTokens;
1521
+ }
1522
+ if (base?.cost !== undefined || next?.cost !== undefined) {
1523
+ merged.cost = next?.cost ?? base?.cost;
1524
+ }
1525
+ return Object.keys(merged).length > 0 ? merged : undefined;
1526
+ }
1508
1527
  function addOptional(a, b) {
1509
1528
  if (a === undefined && b === undefined) {
1510
1529
  return;
@@ -1569,7 +1588,7 @@ function createOpenAICompatibleAdapter(options) {
1569
1588
  const delta = pickAssistantDelta(json);
1570
1589
  const chunkUsage = pickUsage(json);
1571
1590
  const chunkFinishReason = pickFinishReason(json);
1572
- usage = mergeUsage(usage, chunkUsage);
1591
+ usage = preferLatestUsage(usage, chunkUsage);
1573
1592
  if (chunkFinishReason) {
1574
1593
  finishReason = chunkFinishReason;
1575
1594
  }
@@ -1904,7 +1923,7 @@ async function streamWithChatCompletionsWithMCP(options, fetcher, path, request,
1904
1923
  const chunkUsage = pickUsage(json);
1905
1924
  const chunkFinishReason = pickFinishReason(json);
1906
1925
  collectOpenAIStreamToolCalls(json, streamedToolCalls);
1907
- roundUsage = mergeUsage(roundUsage, chunkUsage);
1926
+ roundUsage = preferLatestUsage(roundUsage, chunkUsage);
1908
1927
  if (chunkFinishReason) {
1909
1928
  roundFinishReason = chunkFinishReason;
1910
1929
  }
@@ -2010,7 +2029,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
2010
2029
  const delta = pickResponsesStreamTextDelta(json);
2011
2030
  const chunkUsage = pickResponsesStreamUsage(json);
2012
2031
  const chunkFinishReason = pickResponsesStreamFinishReason(json);
2013
- usage = mergeUsage(usage, chunkUsage);
2032
+ usage = preferLatestUsage(usage, chunkUsage);
2014
2033
  if (chunkFinishReason) {
2015
2034
  finishReason = chunkFinishReason;
2016
2035
  }
@@ -2032,7 +2051,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
2032
2051
  const out = {
2033
2052
  text: text.length > 0 ? text : pickResponsesText(finalPayload) || pickAssistantText(finalPayload),
2034
2053
  raw: finalPayload,
2035
- usage: mergeUsage(usage, pickUsage(finalPayload)),
2054
+ usage: preferLatestUsage(usage, pickUsage(finalPayload)),
2036
2055
  finishReason: finishReason ?? pickResponsesFinishReason(finalPayload) ?? pickFinishReason(finalPayload)
2037
2056
  };
2038
2057
  callbacks.onComplete?.(out);
@@ -2095,7 +2114,7 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
2095
2114
  const chunkUsage = pickResponsesStreamUsage(json);
2096
2115
  const chunkFinishReason = pickResponsesStreamFinishReason(json);
2097
2116
  collectResponsesStreamToolCalls(json, streamedToolCalls);
2098
- roundUsage = mergeUsage(roundUsage, chunkUsage);
2117
+ roundUsage = preferLatestUsage(roundUsage, chunkUsage);
2099
2118
  if (chunkFinishReason) {
2100
2119
  roundFinishReason = chunkFinishReason;
2101
2120
  }
@@ -2113,9 +2132,8 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
2113
2132
  callbacks.onChunk?.(chunk);
2114
2133
  }
2115
2134
  });
2116
- aggregatedUsage = mergeUsage(aggregatedUsage, roundUsage);
2117
- const payloadUsage = roundPayload ? pickUsage(roundPayload) : undefined;
2118
- aggregatedUsage = mergeUsage(aggregatedUsage, payloadUsage);
2135
+ const resolvedRoundUsage = preferLatestUsage(roundUsage, roundPayload ? pickUsage(roundPayload) : undefined);
2136
+ aggregatedUsage = mergeUsage(aggregatedUsage, resolvedRoundUsage);
2119
2137
  if (roundFinishReason) {
2120
2138
  finishReason = roundFinishReason;
2121
2139
  } else if (roundPayload) {
@@ -2655,7 +2673,7 @@ function createAnthropicCompatibleAdapter(options) {
2655
2673
  const delta = pickAnthropicDelta(json);
2656
2674
  const chunkUsage = pickUsage2(json);
2657
2675
  const chunkFinishReason = pickFinishReason2(json);
2658
- usage = mergeUsage(usage, chunkUsage);
2676
+ usage = preferLatestUsage(usage, chunkUsage);
2659
2677
  if (chunkFinishReason) {
2660
2678
  finishReason = chunkFinishReason;
2661
2679
  }
@@ -2852,7 +2870,7 @@ async function streamWithMCPToolLoop(options, fetcher, path, request, callbacks)
2852
2870
  const chunkUsage = pickUsage2(json);
2853
2871
  const chunkFinishReason = pickFinishReason2(json);
2854
2872
  collectAnthropicStreamToolCalls(json, streamedToolCalls);
2855
- roundUsage = mergeUsage(roundUsage, chunkUsage);
2873
+ roundUsage = preferLatestUsage(roundUsage, chunkUsage);
2856
2874
  if (chunkFinishReason) {
2857
2875
  roundFinishReason = chunkFinishReason;
2858
2876
  }
@@ -4475,7 +4493,7 @@ async function callModel(adapter, options) {
4475
4493
  handleTextDelta(chunk.textDelta);
4476
4494
  }
4477
4495
  if (chunk.usage) {
4478
- latestUsage = mergeUsage2(latestUsage, chunk.usage);
4496
+ latestUsage = preferLatestUsage(latestUsage, chunk.usage);
4479
4497
  }
4480
4498
  if (chunk.finishReason) {
4481
4499
  latestFinishReason = chunk.finishReason;
@@ -4483,7 +4501,7 @@ async function callModel(adapter, options) {
4483
4501
  }
4484
4502
  });
4485
4503
  const finalText = typeof response2.text === "string" && response2.text.length > 0 ? response2.text : streamedRaw;
4486
- const usage = mergeUsage2(latestUsage, response2.usage);
4504
+ const usage = preferLatestUsage(latestUsage, response2.usage);
4487
4505
  const finishReason = response2.finishReason ?? latestFinishReason;
4488
4506
  emitStreamingData(finalText, true, usage, finishReason);
4489
4507
  emitObserve(options.observe, {
@@ -7,3 +7,4 @@ export declare function isRecord(value: unknown): value is Record<string, unknow
7
7
  export declare function pickString(value: unknown): string | undefined;
8
8
  export declare function toFiniteNumber(value: unknown): number | undefined;
9
9
  export declare function mergeUsage(base: LLMUsage | undefined, next: LLMUsage | undefined): LLMUsage | undefined;
10
+ export declare function preferLatestUsage(base: LLMUsage | undefined, next: LLMUsage | undefined): LLMUsage | undefined;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "extrait",
3
- "version": "0.5.4",
3
+ "version": "0.5.5",
4
4
  "repository": {
5
5
  "type": "git",
6
6
  "url": "git+https://github.com/tterrasson/extrait.git"