extrait 0.5.4 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +136 -22
- package/dist/index.cjs +30 -12
- package/dist/index.js +30 -12
- package/dist/providers/utils.d.ts +1 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -8,7 +8,8 @@ Structured JSON extraction from LLMs with validation, repair, and streaming.
|
|
|
8
8
|
</a>
|
|
9
9
|
</p>
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
## Features
|
|
12
|
+
|
|
12
13
|
- Multi-candidate JSON extraction from LLM responses
|
|
13
14
|
- Automatic repair with jsonrepair
|
|
14
15
|
- Zod schema validation and coercion
|
|
@@ -19,6 +20,8 @@ Structured JSON extraction from LLMs with validation, repair, and streaming.
|
|
|
19
20
|
|
|
20
21
|
## Installation
|
|
21
22
|
|
|
23
|
+
Install `extrait` with your preferred package manager.
|
|
24
|
+
|
|
22
25
|
```bash
|
|
23
26
|
bun add extrait
|
|
24
27
|
# or
|
|
@@ -29,56 +32,118 @@ deno add npm:extrait
|
|
|
29
32
|
|
|
30
33
|
## Quick Start
|
|
31
34
|
|
|
35
|
+
Use a custom OpenAI-compatible transport to point `extrait` at a local endpoint.
|
|
36
|
+
|
|
32
37
|
```typescript
|
|
33
38
|
import { createLLM, prompt, s } from "extrait";
|
|
34
39
|
import { z } from "zod";
|
|
35
40
|
|
|
36
41
|
const llm = createLLM({
|
|
37
42
|
provider: "openai-compatible",
|
|
38
|
-
model: "
|
|
39
|
-
transport: {
|
|
43
|
+
model: "mistralai/ministral-3-3b",
|
|
44
|
+
transport: {
|
|
45
|
+
baseURL: "http://localhost:1234/v1",
|
|
46
|
+
apiKey: process.env.LLM_API_KEY ?? "local-demo-key",
|
|
47
|
+
},
|
|
40
48
|
});
|
|
41
49
|
|
|
42
|
-
const
|
|
43
|
-
"
|
|
50
|
+
const RecipeSchema = s.schema(
|
|
51
|
+
"Recipe",
|
|
44
52
|
z.object({
|
|
45
|
-
|
|
46
|
-
|
|
53
|
+
title: s.string().min(1).describe("Short recipe title"),
|
|
54
|
+
ingredients: s.array(s.string()).min(1).describe("Ingredient list"),
|
|
47
55
|
})
|
|
48
56
|
);
|
|
49
57
|
|
|
50
58
|
const result = await llm.structured(
|
|
51
|
-
|
|
52
|
-
prompt`
|
|
59
|
+
RecipeSchema,
|
|
60
|
+
prompt`Extract a simple recipe from this text: """${text}"""`
|
|
53
61
|
);
|
|
54
62
|
|
|
55
63
|
console.log(result.data);
|
|
56
64
|
```
|
|
57
65
|
|
|
66
|
+
## Examples at a Glance
|
|
67
|
+
|
|
68
|
+
These examples cover the most common usage patterns in the repository.
|
|
69
|
+
|
|
70
|
+
- [`examples/simple.ts`](examples/simple.ts) - Basic structured output with streaming
|
|
71
|
+
- [`examples/streaming.ts`](examples/streaming.ts) - Real-time partial output and snapshot updates
|
|
72
|
+
- [`examples/calculator-tool.ts`](examples/calculator-tool.ts) - Structured extraction with MCP tools
|
|
73
|
+
- [`examples/conversation.ts`](examples/conversation.ts) - Multi-turn prompts and multimodal content
|
|
74
|
+
- [`examples/image-analysis.ts`](examples/image-analysis.ts) - Vision input with structured output
|
|
75
|
+
- [`examples/embeddings.ts`](examples/embeddings.ts) - Embeddings and similarity workflows
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
bun run dev simple "Bun.js runtime"
|
|
79
|
+
bun run dev streaming
|
|
80
|
+
bun run dev calculator-tool
|
|
81
|
+
```
|
|
82
|
+
|
|
58
83
|
## API Reference
|
|
59
84
|
|
|
60
|
-
|
|
85
|
+
The sections below cover the main building blocks of the library.
|
|
86
|
+
|
|
87
|
+
### Create an LLM Client
|
|
88
|
+
|
|
89
|
+
Use `createLLM()` to configure the provider, model, transport, and client defaults.
|
|
61
90
|
|
|
62
91
|
```typescript
|
|
63
92
|
const llm = createLLM({
|
|
64
93
|
provider: "openai-compatible" | "anthropic-compatible",
|
|
65
94
|
model: "gpt-5-nano",
|
|
95
|
+
baseURL: "https://api.openai.com", // optional alias for transport.baseURL
|
|
96
|
+
apiKey: process.env.LLM_API_KEY, // optional alias for transport.apiKey
|
|
66
97
|
transport: {
|
|
67
|
-
baseURL: "https://api.openai.com",
|
|
68
|
-
apiKey: process.env.LLM_API_KEY,
|
|
98
|
+
baseURL: "https://api.openai.com", // optional
|
|
99
|
+
apiKey: process.env.LLM_API_KEY, // optional
|
|
100
|
+
path: "/v1/chat/completions", // optional; anthropic-compatible usually uses /v1/messages
|
|
101
|
+
headers: { "x-trace-id": "docs-demo" }, // optional extra headers
|
|
102
|
+
defaultBody: { user: "docs-demo" }, // optional provider body defaults
|
|
103
|
+
version: "2023-06-01", // anthropic-compatible only
|
|
104
|
+
fetcher: fetch, // optional custom fetch implementation
|
|
69
105
|
},
|
|
70
106
|
defaults: {
|
|
71
|
-
mode: "loose" | "strict",
|
|
72
|
-
selfHeal:
|
|
73
|
-
debug: false,
|
|
74
|
-
|
|
107
|
+
mode: "loose" | "strict", // loose allows repair
|
|
108
|
+
selfHeal: 1, // optional retry attempts
|
|
109
|
+
debug: false, // optional structured debug output
|
|
110
|
+
systemPrompt: "You are a helpful assistant.",
|
|
111
|
+
timeout: {
|
|
112
|
+
request: 30_000,
|
|
113
|
+
tool: 10_000,
|
|
114
|
+
},
|
|
115
|
+
},
|
|
116
|
+
});
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
`baseURL` and `apiKey` at the top level are shorthand aliases for `transport.baseURL` and `transport.apiKey`. For request-specific options such as `stream`, `request`, `schemaInstruction`, and parse tuning, see the sections below.
|
|
120
|
+
|
|
121
|
+
Common setup patterns:
|
|
122
|
+
|
|
123
|
+
```typescript
|
|
124
|
+
// OpenAI-compatible gateway or local endpoint with top-level aliases
|
|
125
|
+
const llm = createLLM({
|
|
126
|
+
provider: "openai-compatible",
|
|
127
|
+
model: "gpt-4o-mini",
|
|
128
|
+
baseURL: process.env.LLM_BASE_URL ?? "http://localhost:1234/v1",
|
|
129
|
+
apiKey: process.env.LLM_API_KEY ?? "local-demo-key",
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
// Anthropic-compatible endpoint with explicit API version
|
|
133
|
+
const anthropic = createLLM({
|
|
134
|
+
provider: "anthropic-compatible",
|
|
135
|
+
model: "claude-3-5-sonnet-latest",
|
|
136
|
+
transport: {
|
|
137
|
+
baseURL: "https://api.anthropic.com",
|
|
138
|
+
apiKey: process.env.LLM_API_KEY,
|
|
139
|
+
version: "2023-06-01",
|
|
75
140
|
},
|
|
76
141
|
});
|
|
77
142
|
```
|
|
78
143
|
|
|
79
144
|
### Defining Schemas
|
|
80
145
|
|
|
81
|
-
Use the `s` wrapper around Zod for
|
|
146
|
+
Use the `s` wrapper around Zod for schema names, descriptions, and a more ergonomic authoring flow.
|
|
82
147
|
|
|
83
148
|
```typescript
|
|
84
149
|
import { s } from "extrait";
|
|
@@ -115,6 +180,8 @@ const Schema = s.schema(
|
|
|
115
180
|
|
|
116
181
|
### Making Structured Calls
|
|
117
182
|
|
|
183
|
+
`structured()` accepts a schema plus either a tagged prompt, a fluent prompt builder, or a raw message payload.
|
|
184
|
+
|
|
118
185
|
```typescript
|
|
119
186
|
// Simple prompt
|
|
120
187
|
const result = await llm.structured(
|
|
@@ -159,7 +226,7 @@ const result = await llm.structured(
|
|
|
159
226
|
},
|
|
160
227
|
},
|
|
161
228
|
request: {
|
|
162
|
-
signal:
|
|
229
|
+
signal: AbortSignal.timeout(30_000), // optional AbortSignal
|
|
163
230
|
},
|
|
164
231
|
timeout: {
|
|
165
232
|
request: 30_000, // ms per LLM HTTP request
|
|
@@ -171,6 +238,22 @@ const result = await llm.structured(
|
|
|
171
238
|
|
|
172
239
|
`prompt()` builds an ordered `messages` payload. Use ``prompt`...` `` for a single string prompt, or the fluent builder for multi-turn conversations. The `LLMMessage` type is exported if you need to type your own message arrays.
|
|
173
240
|
|
|
241
|
+
You can also pass provider request options through `request`:
|
|
242
|
+
|
|
243
|
+
```typescript
|
|
244
|
+
const result = await llm.structured(
|
|
245
|
+
Schema,
|
|
246
|
+
prompt`Summarize this document: """${text}"""`,
|
|
247
|
+
{
|
|
248
|
+
request: {
|
|
249
|
+
temperature: 0,
|
|
250
|
+
maxTokens: 800,
|
|
251
|
+
body: { user: "demo-user" },
|
|
252
|
+
},
|
|
253
|
+
}
|
|
254
|
+
);
|
|
255
|
+
```
|
|
256
|
+
|
|
174
257
|
### Images (multimodal)
|
|
175
258
|
|
|
176
259
|
Use `images()` to build base64 image content blocks for vision-capable models.
|
|
@@ -248,13 +331,15 @@ const messages = conversation("You are a vision assistant.", [
|
|
|
248
331
|
|
|
249
332
|
### Result Object
|
|
250
333
|
|
|
334
|
+
Successful structured calls return validated data plus the raw response and trace metadata.
|
|
335
|
+
|
|
251
336
|
```typescript
|
|
252
337
|
{
|
|
253
338
|
data: T, // Validated data matching schema
|
|
254
339
|
raw: string, // Raw LLM response
|
|
255
340
|
thinkBlocks: ThinkBlock[], // Extracted <think> blocks
|
|
256
341
|
json: unknown | null, // Parsed JSON before validation
|
|
257
|
-
attempts:
|
|
342
|
+
attempts: StructuredAttempt<T>[], // One entry per parse / self-heal attempt
|
|
258
343
|
usage?: {
|
|
259
344
|
inputTokens?: number,
|
|
260
345
|
outputTokens?: number,
|
|
@@ -265,8 +350,30 @@ const messages = conversation("You are a vision assistant.", [
|
|
|
265
350
|
}
|
|
266
351
|
```
|
|
267
352
|
|
|
353
|
+
Each `attempts` entry includes:
|
|
354
|
+
|
|
355
|
+
```typescript
|
|
356
|
+
{
|
|
357
|
+
attempt: number,
|
|
358
|
+
selfHeal: boolean,
|
|
359
|
+
via: "complete" | "stream",
|
|
360
|
+
raw: string,
|
|
361
|
+
thinkBlocks: ThinkBlock[],
|
|
362
|
+
json: unknown | null,
|
|
363
|
+
candidates: string[],
|
|
364
|
+
repairLog: string[],
|
|
365
|
+
zodIssues: z.ZodIssue[],
|
|
366
|
+
success: boolean,
|
|
367
|
+
usage?: LLMUsage,
|
|
368
|
+
finishReason?: string,
|
|
369
|
+
parsed: ParseLLMOutputResult<T>,
|
|
370
|
+
}
|
|
371
|
+
```
|
|
372
|
+
|
|
268
373
|
### Error Handling
|
|
269
374
|
|
|
375
|
+
Catch `StructuredParseError` when repair and validation still fail.
|
|
376
|
+
|
|
270
377
|
```typescript
|
|
271
378
|
import { StructuredParseError } from "extrait";
|
|
272
379
|
|
|
@@ -292,7 +399,7 @@ Generate vector embeddings using `llm.embed()`. It always returns `number[][]`
|
|
|
292
399
|
const embedder = createLLM({
|
|
293
400
|
provider: "openai-compatible",
|
|
294
401
|
model: "text-embedding-3-small",
|
|
295
|
-
transport: { apiKey: process.env.
|
|
402
|
+
transport: { apiKey: process.env.LLM_API_KEY },
|
|
296
403
|
});
|
|
297
404
|
|
|
298
405
|
// Single string
|
|
@@ -332,7 +439,7 @@ const embedder = createLLM({
|
|
|
332
439
|
model: "voyage-3",
|
|
333
440
|
transport: {
|
|
334
441
|
baseURL: "https://api.voyageai.com",
|
|
335
|
-
apiKey: process.env.
|
|
442
|
+
apiKey: process.env.LLM_API_KEY,
|
|
336
443
|
},
|
|
337
444
|
});
|
|
338
445
|
|
|
@@ -343,6 +450,8 @@ Calling `llm.embed()` on an `anthropic-compatible` adapter throws a descriptive
|
|
|
343
450
|
|
|
344
451
|
### MCP Tools
|
|
345
452
|
|
|
453
|
+
Attach MCP clients at request time to let the model call tools during structured generation.
|
|
454
|
+
|
|
346
455
|
```typescript
|
|
347
456
|
import { createMCPClient } from "extrait";
|
|
348
457
|
|
|
@@ -415,7 +524,7 @@ const llm = createLLM({
|
|
|
415
524
|
|
|
416
525
|
## Examples
|
|
417
526
|
|
|
418
|
-
Run examples with
|
|
527
|
+
Run repository examples with `bun run dev <example-name>`.
|
|
419
528
|
|
|
420
529
|
Available examples:
|
|
421
530
|
- `streaming` - Real LLM streaming + snapshot self-check ([streaming.ts](examples/streaming.ts))
|
|
@@ -429,6 +538,7 @@ Available examples:
|
|
|
429
538
|
- `calculator-tool` - MCP tool integration ([calculator-tool.ts](examples/calculator-tool.ts))
|
|
430
539
|
- `image-analysis` - Multimodal structured extraction from an image file ([image-analysis.ts](examples/image-analysis.ts))
|
|
431
540
|
- `conversation` - Multi-turn conversation history and inline image messages ([conversation.ts](examples/conversation.ts))
|
|
541
|
+
- `simulated-tools` - Inject fake tool calls/results into conversation context without real execution ([simulated-tools.ts](examples/simulated-tools.ts))
|
|
432
542
|
- `embeddings` - Vector embeddings, cosine similarity, and semantic comparison ([embeddings.ts](examples/embeddings.ts))
|
|
433
543
|
|
|
434
544
|
Pass arguments after the example name:
|
|
@@ -445,6 +555,8 @@ bun run dev embeddings "the cat sat on the mat" "a feline rested on the rug"
|
|
|
445
555
|
|
|
446
556
|
## Environment Variables
|
|
447
557
|
|
|
558
|
+
These environment variables are used across the examples and common client setups.
|
|
559
|
+
|
|
448
560
|
- `LLM_PROVIDER` - `openai-compatible` or `anthropic-compatible`
|
|
449
561
|
- `LLM_BASE_URL` - API endpoint (optional)
|
|
450
562
|
- `LLM_MODEL` - Model name (default: `gpt-5-nano`)
|
|
@@ -453,6 +565,8 @@ bun run dev embeddings "the cat sat on the mat" "a feline rested on the rug"
|
|
|
453
565
|
|
|
454
566
|
## Testing
|
|
455
567
|
|
|
568
|
+
Run the test suite with Bun.
|
|
569
|
+
|
|
456
570
|
```bash
|
|
457
571
|
bun run test
|
|
458
572
|
```
|
package/dist/index.cjs
CHANGED
|
@@ -1594,6 +1594,25 @@ function mergeUsage(base, next) {
|
|
|
1594
1594
|
}
|
|
1595
1595
|
return Object.keys(merged).length > 0 ? merged : undefined;
|
|
1596
1596
|
}
|
|
1597
|
+
function preferLatestUsage(base, next) {
|
|
1598
|
+
if (!base && !next) {
|
|
1599
|
+
return;
|
|
1600
|
+
}
|
|
1601
|
+
const merged = {};
|
|
1602
|
+
if (base?.inputTokens !== undefined || next?.inputTokens !== undefined) {
|
|
1603
|
+
merged.inputTokens = next?.inputTokens ?? base?.inputTokens;
|
|
1604
|
+
}
|
|
1605
|
+
if (base?.outputTokens !== undefined || next?.outputTokens !== undefined) {
|
|
1606
|
+
merged.outputTokens = next?.outputTokens ?? base?.outputTokens;
|
|
1607
|
+
}
|
|
1608
|
+
if (base?.totalTokens !== undefined || next?.totalTokens !== undefined) {
|
|
1609
|
+
merged.totalTokens = next?.totalTokens ?? base?.totalTokens;
|
|
1610
|
+
}
|
|
1611
|
+
if (base?.cost !== undefined || next?.cost !== undefined) {
|
|
1612
|
+
merged.cost = next?.cost ?? base?.cost;
|
|
1613
|
+
}
|
|
1614
|
+
return Object.keys(merged).length > 0 ? merged : undefined;
|
|
1615
|
+
}
|
|
1597
1616
|
function addOptional(a, b) {
|
|
1598
1617
|
if (a === undefined && b === undefined) {
|
|
1599
1618
|
return;
|
|
@@ -1658,7 +1677,7 @@ function createOpenAICompatibleAdapter(options) {
|
|
|
1658
1677
|
const delta = pickAssistantDelta(json);
|
|
1659
1678
|
const chunkUsage = pickUsage(json);
|
|
1660
1679
|
const chunkFinishReason = pickFinishReason(json);
|
|
1661
|
-
usage =
|
|
1680
|
+
usage = preferLatestUsage(usage, chunkUsage);
|
|
1662
1681
|
if (chunkFinishReason) {
|
|
1663
1682
|
finishReason = chunkFinishReason;
|
|
1664
1683
|
}
|
|
@@ -1993,7 +2012,7 @@ async function streamWithChatCompletionsWithMCP(options, fetcher, path, request,
|
|
|
1993
2012
|
const chunkUsage = pickUsage(json);
|
|
1994
2013
|
const chunkFinishReason = pickFinishReason(json);
|
|
1995
2014
|
collectOpenAIStreamToolCalls(json, streamedToolCalls);
|
|
1996
|
-
roundUsage =
|
|
2015
|
+
roundUsage = preferLatestUsage(roundUsage, chunkUsage);
|
|
1997
2016
|
if (chunkFinishReason) {
|
|
1998
2017
|
roundFinishReason = chunkFinishReason;
|
|
1999
2018
|
}
|
|
@@ -2099,7 +2118,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
|
|
|
2099
2118
|
const delta = pickResponsesStreamTextDelta(json);
|
|
2100
2119
|
const chunkUsage = pickResponsesStreamUsage(json);
|
|
2101
2120
|
const chunkFinishReason = pickResponsesStreamFinishReason(json);
|
|
2102
|
-
usage =
|
|
2121
|
+
usage = preferLatestUsage(usage, chunkUsage);
|
|
2103
2122
|
if (chunkFinishReason) {
|
|
2104
2123
|
finishReason = chunkFinishReason;
|
|
2105
2124
|
}
|
|
@@ -2121,7 +2140,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
|
|
|
2121
2140
|
const out = {
|
|
2122
2141
|
text: text.length > 0 ? text : pickResponsesText(finalPayload) || pickAssistantText(finalPayload),
|
|
2123
2142
|
raw: finalPayload,
|
|
2124
|
-
usage:
|
|
2143
|
+
usage: preferLatestUsage(usage, pickUsage(finalPayload)),
|
|
2125
2144
|
finishReason: finishReason ?? pickResponsesFinishReason(finalPayload) ?? pickFinishReason(finalPayload)
|
|
2126
2145
|
};
|
|
2127
2146
|
callbacks.onComplete?.(out);
|
|
@@ -2184,7 +2203,7 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
|
|
|
2184
2203
|
const chunkUsage = pickResponsesStreamUsage(json);
|
|
2185
2204
|
const chunkFinishReason = pickResponsesStreamFinishReason(json);
|
|
2186
2205
|
collectResponsesStreamToolCalls(json, streamedToolCalls);
|
|
2187
|
-
roundUsage =
|
|
2206
|
+
roundUsage = preferLatestUsage(roundUsage, chunkUsage);
|
|
2188
2207
|
if (chunkFinishReason) {
|
|
2189
2208
|
roundFinishReason = chunkFinishReason;
|
|
2190
2209
|
}
|
|
@@ -2202,9 +2221,8 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
|
|
|
2202
2221
|
callbacks.onChunk?.(chunk);
|
|
2203
2222
|
}
|
|
2204
2223
|
});
|
|
2205
|
-
|
|
2206
|
-
|
|
2207
|
-
aggregatedUsage = mergeUsage(aggregatedUsage, payloadUsage);
|
|
2224
|
+
const resolvedRoundUsage = preferLatestUsage(roundUsage, roundPayload ? pickUsage(roundPayload) : undefined);
|
|
2225
|
+
aggregatedUsage = mergeUsage(aggregatedUsage, resolvedRoundUsage);
|
|
2208
2226
|
if (roundFinishReason) {
|
|
2209
2227
|
finishReason = roundFinishReason;
|
|
2210
2228
|
} else if (roundPayload) {
|
|
@@ -2744,7 +2762,7 @@ function createAnthropicCompatibleAdapter(options) {
|
|
|
2744
2762
|
const delta = pickAnthropicDelta(json);
|
|
2745
2763
|
const chunkUsage = pickUsage2(json);
|
|
2746
2764
|
const chunkFinishReason = pickFinishReason2(json);
|
|
2747
|
-
usage =
|
|
2765
|
+
usage = preferLatestUsage(usage, chunkUsage);
|
|
2748
2766
|
if (chunkFinishReason) {
|
|
2749
2767
|
finishReason = chunkFinishReason;
|
|
2750
2768
|
}
|
|
@@ -2941,7 +2959,7 @@ async function streamWithMCPToolLoop(options, fetcher, path, request, callbacks)
|
|
|
2941
2959
|
const chunkUsage = pickUsage2(json);
|
|
2942
2960
|
const chunkFinishReason = pickFinishReason2(json);
|
|
2943
2961
|
collectAnthropicStreamToolCalls(json, streamedToolCalls);
|
|
2944
|
-
roundUsage =
|
|
2962
|
+
roundUsage = preferLatestUsage(roundUsage, chunkUsage);
|
|
2945
2963
|
if (chunkFinishReason) {
|
|
2946
2964
|
roundFinishReason = chunkFinishReason;
|
|
2947
2965
|
}
|
|
@@ -4564,7 +4582,7 @@ async function callModel(adapter, options) {
|
|
|
4564
4582
|
handleTextDelta(chunk.textDelta);
|
|
4565
4583
|
}
|
|
4566
4584
|
if (chunk.usage) {
|
|
4567
|
-
latestUsage =
|
|
4585
|
+
latestUsage = preferLatestUsage(latestUsage, chunk.usage);
|
|
4568
4586
|
}
|
|
4569
4587
|
if (chunk.finishReason) {
|
|
4570
4588
|
latestFinishReason = chunk.finishReason;
|
|
@@ -4572,7 +4590,7 @@ async function callModel(adapter, options) {
|
|
|
4572
4590
|
}
|
|
4573
4591
|
});
|
|
4574
4592
|
const finalText = typeof response2.text === "string" && response2.text.length > 0 ? response2.text : streamedRaw;
|
|
4575
|
-
const usage =
|
|
4593
|
+
const usage = preferLatestUsage(latestUsage, response2.usage);
|
|
4576
4594
|
const finishReason = response2.finishReason ?? latestFinishReason;
|
|
4577
4595
|
emitStreamingData(finalText, true, usage, finishReason);
|
|
4578
4596
|
emitObserve(options.observe, {
|
package/dist/index.js
CHANGED
|
@@ -1505,6 +1505,25 @@ function mergeUsage(base, next) {
|
|
|
1505
1505
|
}
|
|
1506
1506
|
return Object.keys(merged).length > 0 ? merged : undefined;
|
|
1507
1507
|
}
|
|
1508
|
+
function preferLatestUsage(base, next) {
|
|
1509
|
+
if (!base && !next) {
|
|
1510
|
+
return;
|
|
1511
|
+
}
|
|
1512
|
+
const merged = {};
|
|
1513
|
+
if (base?.inputTokens !== undefined || next?.inputTokens !== undefined) {
|
|
1514
|
+
merged.inputTokens = next?.inputTokens ?? base?.inputTokens;
|
|
1515
|
+
}
|
|
1516
|
+
if (base?.outputTokens !== undefined || next?.outputTokens !== undefined) {
|
|
1517
|
+
merged.outputTokens = next?.outputTokens ?? base?.outputTokens;
|
|
1518
|
+
}
|
|
1519
|
+
if (base?.totalTokens !== undefined || next?.totalTokens !== undefined) {
|
|
1520
|
+
merged.totalTokens = next?.totalTokens ?? base?.totalTokens;
|
|
1521
|
+
}
|
|
1522
|
+
if (base?.cost !== undefined || next?.cost !== undefined) {
|
|
1523
|
+
merged.cost = next?.cost ?? base?.cost;
|
|
1524
|
+
}
|
|
1525
|
+
return Object.keys(merged).length > 0 ? merged : undefined;
|
|
1526
|
+
}
|
|
1508
1527
|
function addOptional(a, b) {
|
|
1509
1528
|
if (a === undefined && b === undefined) {
|
|
1510
1529
|
return;
|
|
@@ -1569,7 +1588,7 @@ function createOpenAICompatibleAdapter(options) {
|
|
|
1569
1588
|
const delta = pickAssistantDelta(json);
|
|
1570
1589
|
const chunkUsage = pickUsage(json);
|
|
1571
1590
|
const chunkFinishReason = pickFinishReason(json);
|
|
1572
|
-
usage =
|
|
1591
|
+
usage = preferLatestUsage(usage, chunkUsage);
|
|
1573
1592
|
if (chunkFinishReason) {
|
|
1574
1593
|
finishReason = chunkFinishReason;
|
|
1575
1594
|
}
|
|
@@ -1904,7 +1923,7 @@ async function streamWithChatCompletionsWithMCP(options, fetcher, path, request,
|
|
|
1904
1923
|
const chunkUsage = pickUsage(json);
|
|
1905
1924
|
const chunkFinishReason = pickFinishReason(json);
|
|
1906
1925
|
collectOpenAIStreamToolCalls(json, streamedToolCalls);
|
|
1907
|
-
roundUsage =
|
|
1926
|
+
roundUsage = preferLatestUsage(roundUsage, chunkUsage);
|
|
1908
1927
|
if (chunkFinishReason) {
|
|
1909
1928
|
roundFinishReason = chunkFinishReason;
|
|
1910
1929
|
}
|
|
@@ -2010,7 +2029,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
|
|
|
2010
2029
|
const delta = pickResponsesStreamTextDelta(json);
|
|
2011
2030
|
const chunkUsage = pickResponsesStreamUsage(json);
|
|
2012
2031
|
const chunkFinishReason = pickResponsesStreamFinishReason(json);
|
|
2013
|
-
usage =
|
|
2032
|
+
usage = preferLatestUsage(usage, chunkUsage);
|
|
2014
2033
|
if (chunkFinishReason) {
|
|
2015
2034
|
finishReason = chunkFinishReason;
|
|
2016
2035
|
}
|
|
@@ -2032,7 +2051,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
|
|
|
2032
2051
|
const out = {
|
|
2033
2052
|
text: text.length > 0 ? text : pickResponsesText(finalPayload) || pickAssistantText(finalPayload),
|
|
2034
2053
|
raw: finalPayload,
|
|
2035
|
-
usage:
|
|
2054
|
+
usage: preferLatestUsage(usage, pickUsage(finalPayload)),
|
|
2036
2055
|
finishReason: finishReason ?? pickResponsesFinishReason(finalPayload) ?? pickFinishReason(finalPayload)
|
|
2037
2056
|
};
|
|
2038
2057
|
callbacks.onComplete?.(out);
|
|
@@ -2095,7 +2114,7 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
|
|
|
2095
2114
|
const chunkUsage = pickResponsesStreamUsage(json);
|
|
2096
2115
|
const chunkFinishReason = pickResponsesStreamFinishReason(json);
|
|
2097
2116
|
collectResponsesStreamToolCalls(json, streamedToolCalls);
|
|
2098
|
-
roundUsage =
|
|
2117
|
+
roundUsage = preferLatestUsage(roundUsage, chunkUsage);
|
|
2099
2118
|
if (chunkFinishReason) {
|
|
2100
2119
|
roundFinishReason = chunkFinishReason;
|
|
2101
2120
|
}
|
|
@@ -2113,9 +2132,8 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
|
|
|
2113
2132
|
callbacks.onChunk?.(chunk);
|
|
2114
2133
|
}
|
|
2115
2134
|
});
|
|
2116
|
-
|
|
2117
|
-
|
|
2118
|
-
aggregatedUsage = mergeUsage(aggregatedUsage, payloadUsage);
|
|
2135
|
+
const resolvedRoundUsage = preferLatestUsage(roundUsage, roundPayload ? pickUsage(roundPayload) : undefined);
|
|
2136
|
+
aggregatedUsage = mergeUsage(aggregatedUsage, resolvedRoundUsage);
|
|
2119
2137
|
if (roundFinishReason) {
|
|
2120
2138
|
finishReason = roundFinishReason;
|
|
2121
2139
|
} else if (roundPayload) {
|
|
@@ -2655,7 +2673,7 @@ function createAnthropicCompatibleAdapter(options) {
|
|
|
2655
2673
|
const delta = pickAnthropicDelta(json);
|
|
2656
2674
|
const chunkUsage = pickUsage2(json);
|
|
2657
2675
|
const chunkFinishReason = pickFinishReason2(json);
|
|
2658
|
-
usage =
|
|
2676
|
+
usage = preferLatestUsage(usage, chunkUsage);
|
|
2659
2677
|
if (chunkFinishReason) {
|
|
2660
2678
|
finishReason = chunkFinishReason;
|
|
2661
2679
|
}
|
|
@@ -2852,7 +2870,7 @@ async function streamWithMCPToolLoop(options, fetcher, path, request, callbacks)
|
|
|
2852
2870
|
const chunkUsage = pickUsage2(json);
|
|
2853
2871
|
const chunkFinishReason = pickFinishReason2(json);
|
|
2854
2872
|
collectAnthropicStreamToolCalls(json, streamedToolCalls);
|
|
2855
|
-
roundUsage =
|
|
2873
|
+
roundUsage = preferLatestUsage(roundUsage, chunkUsage);
|
|
2856
2874
|
if (chunkFinishReason) {
|
|
2857
2875
|
roundFinishReason = chunkFinishReason;
|
|
2858
2876
|
}
|
|
@@ -4475,7 +4493,7 @@ async function callModel(adapter, options) {
|
|
|
4475
4493
|
handleTextDelta(chunk.textDelta);
|
|
4476
4494
|
}
|
|
4477
4495
|
if (chunk.usage) {
|
|
4478
|
-
latestUsage =
|
|
4496
|
+
latestUsage = preferLatestUsage(latestUsage, chunk.usage);
|
|
4479
4497
|
}
|
|
4480
4498
|
if (chunk.finishReason) {
|
|
4481
4499
|
latestFinishReason = chunk.finishReason;
|
|
@@ -4483,7 +4501,7 @@ async function callModel(adapter, options) {
|
|
|
4483
4501
|
}
|
|
4484
4502
|
});
|
|
4485
4503
|
const finalText = typeof response2.text === "string" && response2.text.length > 0 ? response2.text : streamedRaw;
|
|
4486
|
-
const usage =
|
|
4504
|
+
const usage = preferLatestUsage(latestUsage, response2.usage);
|
|
4487
4505
|
const finishReason = response2.finishReason ?? latestFinishReason;
|
|
4488
4506
|
emitStreamingData(finalText, true, usage, finishReason);
|
|
4489
4507
|
emitObserve(options.observe, {
|
|
@@ -7,3 +7,4 @@ export declare function isRecord(value: unknown): value is Record<string, unknow
|
|
|
7
7
|
export declare function pickString(value: unknown): string | undefined;
|
|
8
8
|
export declare function toFiniteNumber(value: unknown): number | undefined;
|
|
9
9
|
export declare function mergeUsage(base: LLMUsage | undefined, next: LLMUsage | undefined): LLMUsage | undefined;
|
|
10
|
+
export declare function preferLatestUsage(base: LLMUsage | undefined, next: LLMUsage | undefined): LLMUsage | undefined;
|