extrait 0.5.4 → 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +156 -22
- package/dist/index.cjs +46 -16
- package/dist/index.d.ts +1 -1
- package/dist/index.js +46 -16
- package/dist/providers/utils.d.ts +1 -0
- package/dist/types.d.ts +7 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -8,7 +8,8 @@ Structured JSON extraction from LLMs with validation, repair, and streaming.
|
|
|
8
8
|
</a>
|
|
9
9
|
</p>
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
## Features
|
|
12
|
+
|
|
12
13
|
- Multi-candidate JSON extraction from LLM responses
|
|
13
14
|
- Automatic repair with jsonrepair
|
|
14
15
|
- Zod schema validation and coercion
|
|
@@ -19,6 +20,8 @@ Structured JSON extraction from LLMs with validation, repair, and streaming.
|
|
|
19
20
|
|
|
20
21
|
## Installation
|
|
21
22
|
|
|
23
|
+
Install `extrait` with your preferred package manager.
|
|
24
|
+
|
|
22
25
|
```bash
|
|
23
26
|
bun add extrait
|
|
24
27
|
# or
|
|
@@ -29,56 +32,118 @@ deno add npm:extrait
|
|
|
29
32
|
|
|
30
33
|
## Quick Start
|
|
31
34
|
|
|
35
|
+
Use a custom OpenAI-compatible transport to point `extrait` at a local endpoint.
|
|
36
|
+
|
|
32
37
|
```typescript
|
|
33
38
|
import { createLLM, prompt, s } from "extrait";
|
|
34
39
|
import { z } from "zod";
|
|
35
40
|
|
|
36
41
|
const llm = createLLM({
|
|
37
42
|
provider: "openai-compatible",
|
|
38
|
-
model: "
|
|
39
|
-
transport: {
|
|
43
|
+
model: "mistralai/ministral-3-3b",
|
|
44
|
+
transport: {
|
|
45
|
+
baseURL: "http://localhost:1234/v1",
|
|
46
|
+
apiKey: process.env.LLM_API_KEY ?? "local-demo-key",
|
|
47
|
+
},
|
|
40
48
|
});
|
|
41
49
|
|
|
42
|
-
const
|
|
43
|
-
"
|
|
50
|
+
const RecipeSchema = s.schema(
|
|
51
|
+
"Recipe",
|
|
44
52
|
z.object({
|
|
45
|
-
|
|
46
|
-
|
|
53
|
+
title: s.string().min(1).describe("Short recipe title"),
|
|
54
|
+
ingredients: s.array(s.string()).min(1).describe("Ingredient list"),
|
|
47
55
|
})
|
|
48
56
|
);
|
|
49
57
|
|
|
50
58
|
const result = await llm.structured(
|
|
51
|
-
|
|
52
|
-
prompt`
|
|
59
|
+
RecipeSchema,
|
|
60
|
+
prompt`Extract a simple recipe from this text: """${text}"""`
|
|
53
61
|
);
|
|
54
62
|
|
|
55
63
|
console.log(result.data);
|
|
56
64
|
```
|
|
57
65
|
|
|
66
|
+
## Examples at a Glance
|
|
67
|
+
|
|
68
|
+
These examples cover the most common usage patterns in the repository.
|
|
69
|
+
|
|
70
|
+
- [`examples/simple.ts`](examples/simple.ts) - Basic structured output with streaming
|
|
71
|
+
- [`examples/streaming.ts`](examples/streaming.ts) - Real-time partial output and snapshot updates
|
|
72
|
+
- [`examples/calculator-tool.ts`](examples/calculator-tool.ts) - Structured extraction with MCP tools
|
|
73
|
+
- [`examples/conversation.ts`](examples/conversation.ts) - Multi-turn prompts and multimodal content
|
|
74
|
+
- [`examples/image-analysis.ts`](examples/image-analysis.ts) - Vision input with structured output
|
|
75
|
+
- [`examples/embeddings.ts`](examples/embeddings.ts) - Embeddings and similarity workflows
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
bun run dev simple "Bun.js runtime"
|
|
79
|
+
bun run dev streaming
|
|
80
|
+
bun run dev calculator-tool
|
|
81
|
+
```
|
|
82
|
+
|
|
58
83
|
## API Reference
|
|
59
84
|
|
|
60
|
-
|
|
85
|
+
The sections below cover the main building blocks of the library.
|
|
86
|
+
|
|
87
|
+
### Create an LLM Client
|
|
88
|
+
|
|
89
|
+
Use `createLLM()` to configure the provider, model, transport, and client defaults.
|
|
61
90
|
|
|
62
91
|
```typescript
|
|
63
92
|
const llm = createLLM({
|
|
64
93
|
provider: "openai-compatible" | "anthropic-compatible",
|
|
65
94
|
model: "gpt-5-nano",
|
|
95
|
+
baseURL: "https://api.openai.com", // optional alias for transport.baseURL
|
|
96
|
+
apiKey: process.env.LLM_API_KEY, // optional alias for transport.apiKey
|
|
66
97
|
transport: {
|
|
67
|
-
baseURL: "https://api.openai.com",
|
|
68
|
-
apiKey: process.env.LLM_API_KEY,
|
|
98
|
+
baseURL: "https://api.openai.com", // optional
|
|
99
|
+
apiKey: process.env.LLM_API_KEY, // optional
|
|
100
|
+
path: "/v1/chat/completions", // optional; anthropic-compatible usually uses /v1/messages
|
|
101
|
+
headers: { "x-trace-id": "docs-demo" }, // optional extra headers
|
|
102
|
+
defaultBody: { user: "docs-demo" }, // optional provider body defaults
|
|
103
|
+
version: "2023-06-01", // anthropic-compatible only
|
|
104
|
+
fetcher: fetch, // optional custom fetch implementation
|
|
69
105
|
},
|
|
70
106
|
defaults: {
|
|
71
|
-
mode: "loose" | "strict",
|
|
72
|
-
selfHeal:
|
|
73
|
-
debug: false,
|
|
74
|
-
|
|
107
|
+
mode: "loose" | "strict", // loose allows repair
|
|
108
|
+
selfHeal: 1, // optional retry attempts
|
|
109
|
+
debug: false, // optional structured debug output
|
|
110
|
+
systemPrompt: "You are a helpful assistant.",
|
|
111
|
+
timeout: {
|
|
112
|
+
request: 30_000,
|
|
113
|
+
tool: 10_000,
|
|
114
|
+
},
|
|
115
|
+
},
|
|
116
|
+
});
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
`baseURL` and `apiKey` at the top level are shorthand aliases for `transport.baseURL` and `transport.apiKey`. For request-specific options such as `stream`, `request`, `schemaInstruction`, and parse tuning, see the sections below.
|
|
120
|
+
|
|
121
|
+
Common setup patterns:
|
|
122
|
+
|
|
123
|
+
```typescript
|
|
124
|
+
// OpenAI-compatible gateway or local endpoint with top-level aliases
|
|
125
|
+
const llm = createLLM({
|
|
126
|
+
provider: "openai-compatible",
|
|
127
|
+
model: "gpt-4o-mini",
|
|
128
|
+
baseURL: process.env.LLM_BASE_URL ?? "http://localhost:1234/v1",
|
|
129
|
+
apiKey: process.env.LLM_API_KEY ?? "local-demo-key",
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
// Anthropic-compatible endpoint with explicit API version
|
|
133
|
+
const anthropic = createLLM({
|
|
134
|
+
provider: "anthropic-compatible",
|
|
135
|
+
model: "claude-3-5-sonnet-latest",
|
|
136
|
+
transport: {
|
|
137
|
+
baseURL: "https://api.anthropic.com",
|
|
138
|
+
apiKey: process.env.LLM_API_KEY,
|
|
139
|
+
version: "2023-06-01",
|
|
75
140
|
},
|
|
76
141
|
});
|
|
77
142
|
```
|
|
78
143
|
|
|
79
144
|
### Defining Schemas
|
|
80
145
|
|
|
81
|
-
Use the `s` wrapper around Zod for
|
|
146
|
+
Use the `s` wrapper around Zod for schema names, descriptions, and a more ergonomic authoring flow.
|
|
82
147
|
|
|
83
148
|
```typescript
|
|
84
149
|
import { s } from "extrait";
|
|
@@ -115,6 +180,8 @@ const Schema = s.schema(
|
|
|
115
180
|
|
|
116
181
|
### Making Structured Calls
|
|
117
182
|
|
|
183
|
+
`structured()` accepts a schema plus either a tagged prompt, a fluent prompt builder, or a raw message payload.
|
|
184
|
+
|
|
118
185
|
```typescript
|
|
119
186
|
// Simple prompt
|
|
120
187
|
const result = await llm.structured(
|
|
@@ -159,7 +226,7 @@ const result = await llm.structured(
|
|
|
159
226
|
},
|
|
160
227
|
},
|
|
161
228
|
request: {
|
|
162
|
-
signal:
|
|
229
|
+
signal: AbortSignal.timeout(30_000), // optional AbortSignal
|
|
163
230
|
},
|
|
164
231
|
timeout: {
|
|
165
232
|
request: 30_000, // ms per LLM HTTP request
|
|
@@ -171,6 +238,22 @@ const result = await llm.structured(
|
|
|
171
238
|
|
|
172
239
|
`prompt()` builds an ordered `messages` payload. Use ``prompt`...` `` for a single string prompt, or the fluent builder for multi-turn conversations. The `LLMMessage` type is exported if you need to type your own message arrays.
|
|
173
240
|
|
|
241
|
+
You can also pass provider request options through `request`:
|
|
242
|
+
|
|
243
|
+
```typescript
|
|
244
|
+
const result = await llm.structured(
|
|
245
|
+
Schema,
|
|
246
|
+
prompt`Summarize this document: """${text}"""`,
|
|
247
|
+
{
|
|
248
|
+
request: {
|
|
249
|
+
temperature: 0,
|
|
250
|
+
maxTokens: 800,
|
|
251
|
+
body: { user: "demo-user" },
|
|
252
|
+
},
|
|
253
|
+
}
|
|
254
|
+
);
|
|
255
|
+
```
|
|
256
|
+
|
|
174
257
|
### Images (multimodal)
|
|
175
258
|
|
|
176
259
|
Use `images()` to build base64 image content blocks for vision-capable models.
|
|
@@ -248,13 +331,15 @@ const messages = conversation("You are a vision assistant.", [
|
|
|
248
331
|
|
|
249
332
|
### Result Object
|
|
250
333
|
|
|
334
|
+
Successful structured calls return validated data plus the raw response and trace metadata.
|
|
335
|
+
|
|
251
336
|
```typescript
|
|
252
337
|
{
|
|
253
338
|
data: T, // Validated data matching schema
|
|
254
339
|
raw: string, // Raw LLM response
|
|
255
340
|
thinkBlocks: ThinkBlock[], // Extracted <think> blocks
|
|
256
341
|
json: unknown | null, // Parsed JSON before validation
|
|
257
|
-
attempts:
|
|
342
|
+
attempts: StructuredAttempt<T>[], // One entry per parse / self-heal attempt
|
|
258
343
|
usage?: {
|
|
259
344
|
inputTokens?: number,
|
|
260
345
|
outputTokens?: number,
|
|
@@ -265,8 +350,30 @@ const messages = conversation("You are a vision assistant.", [
|
|
|
265
350
|
}
|
|
266
351
|
```
|
|
267
352
|
|
|
353
|
+
Each `attempts` entry includes:
|
|
354
|
+
|
|
355
|
+
```typescript
|
|
356
|
+
{
|
|
357
|
+
attempt: number,
|
|
358
|
+
selfHeal: boolean,
|
|
359
|
+
via: "complete" | "stream",
|
|
360
|
+
raw: string,
|
|
361
|
+
thinkBlocks: ThinkBlock[],
|
|
362
|
+
json: unknown | null,
|
|
363
|
+
candidates: string[],
|
|
364
|
+
repairLog: string[],
|
|
365
|
+
zodIssues: z.ZodIssue[],
|
|
366
|
+
success: boolean,
|
|
367
|
+
usage?: LLMUsage,
|
|
368
|
+
finishReason?: string,
|
|
369
|
+
parsed: ParseLLMOutputResult<T>,
|
|
370
|
+
}
|
|
371
|
+
```
|
|
372
|
+
|
|
268
373
|
### Error Handling
|
|
269
374
|
|
|
375
|
+
Catch `StructuredParseError` when repair and validation still fail.
|
|
376
|
+
|
|
270
377
|
```typescript
|
|
271
378
|
import { StructuredParseError } from "extrait";
|
|
272
379
|
|
|
@@ -292,7 +399,7 @@ Generate vector embeddings using `llm.embed()`. It always returns `number[][]`
|
|
|
292
399
|
const embedder = createLLM({
|
|
293
400
|
provider: "openai-compatible",
|
|
294
401
|
model: "text-embedding-3-small",
|
|
295
|
-
transport: { apiKey: process.env.
|
|
402
|
+
transport: { apiKey: process.env.LLM_API_KEY },
|
|
296
403
|
});
|
|
297
404
|
|
|
298
405
|
// Single string
|
|
@@ -332,7 +439,7 @@ const embedder = createLLM({
|
|
|
332
439
|
model: "voyage-3",
|
|
333
440
|
transport: {
|
|
334
441
|
baseURL: "https://api.voyageai.com",
|
|
335
|
-
apiKey: process.env.
|
|
442
|
+
apiKey: process.env.LLM_API_KEY,
|
|
336
443
|
},
|
|
337
444
|
});
|
|
338
445
|
|
|
@@ -343,6 +450,8 @@ Calling `llm.embed()` on an `anthropic-compatible` adapter throws a descriptive
|
|
|
343
450
|
|
|
344
451
|
### MCP Tools
|
|
345
452
|
|
|
453
|
+
Attach MCP clients at request time to let the model call tools during structured generation.
|
|
454
|
+
|
|
346
455
|
```typescript
|
|
347
456
|
import { createMCPClient } from "extrait";
|
|
348
457
|
|
|
@@ -376,6 +485,14 @@ const result = await llm.structured(
|
|
|
376
485
|
},
|
|
377
486
|
// Optional: transform tool arguments before the tool is called
|
|
378
487
|
transformToolArguments: (args, call) => args,
|
|
488
|
+
// Optional: transform the full MCP call payload, including _meta
|
|
489
|
+
transformToolCallParams: (params, call) => ({
|
|
490
|
+
...params,
|
|
491
|
+
_meta: {
|
|
492
|
+
source: "extrait-docs",
|
|
493
|
+
clientId: call.clientId,
|
|
494
|
+
},
|
|
495
|
+
}),
|
|
379
496
|
// Optional: custom error message when an unknown tool is called
|
|
380
497
|
unknownToolError: (toolName) => `Tool "${toolName}" is not available.`,
|
|
381
498
|
},
|
|
@@ -385,6 +502,18 @@ const result = await llm.structured(
|
|
|
385
502
|
await mcpClient.close?.();
|
|
386
503
|
```
|
|
387
504
|
|
|
505
|
+
`transformToolArguments()` only receives the tool input object. `transformToolCallParams()` runs after it and receives the full `MCPCallToolParams` payload that will be sent to the MCP client:
|
|
506
|
+
|
|
507
|
+
```typescript
|
|
508
|
+
type MCPCallToolParams = {
|
|
509
|
+
name: string;
|
|
510
|
+
arguments?: Record<string, unknown>;
|
|
511
|
+
_meta?: Record<string, unknown>;
|
|
512
|
+
};
|
|
513
|
+
```
|
|
514
|
+
|
|
515
|
+
Use `transformToolCallParams()` when you need to attach MCP-specific metadata, override the final remote tool name, or otherwise change the full request passed to `client.callTool()`. This hook is exported as `LLMToolCallParamsTransformer`.
|
|
516
|
+
|
|
388
517
|
### Timeouts
|
|
389
518
|
|
|
390
519
|
Use `timeout` to set per-request and per-tool-call time limits without managing `AbortSignal` manually.
|
|
@@ -415,7 +544,7 @@ const llm = createLLM({
|
|
|
415
544
|
|
|
416
545
|
## Examples
|
|
417
546
|
|
|
418
|
-
Run examples with
|
|
547
|
+
Run repository examples with `bun run dev <example-name>`.
|
|
419
548
|
|
|
420
549
|
Available examples:
|
|
421
550
|
- `streaming` - Real LLM streaming + snapshot self-check ([streaming.ts](examples/streaming.ts))
|
|
@@ -429,6 +558,7 @@ Available examples:
|
|
|
429
558
|
- `calculator-tool` - MCP tool integration ([calculator-tool.ts](examples/calculator-tool.ts))
|
|
430
559
|
- `image-analysis` - Multimodal structured extraction from an image file ([image-analysis.ts](examples/image-analysis.ts))
|
|
431
560
|
- `conversation` - Multi-turn conversation history and inline image messages ([conversation.ts](examples/conversation.ts))
|
|
561
|
+
- `simulated-tools` - Inject fake tool calls/results into conversation context without real execution ([simulated-tools.ts](examples/simulated-tools.ts))
|
|
432
562
|
- `embeddings` - Vector embeddings, cosine similarity, and semantic comparison ([embeddings.ts](examples/embeddings.ts))
|
|
433
563
|
|
|
434
564
|
Pass arguments after the example name:
|
|
@@ -445,6 +575,8 @@ bun run dev embeddings "the cat sat on the mat" "a feline rested on the rug"
|
|
|
445
575
|
|
|
446
576
|
## Environment Variables
|
|
447
577
|
|
|
578
|
+
These environment variables are used across the examples and common client setups.
|
|
579
|
+
|
|
448
580
|
- `LLM_PROVIDER` - `openai-compatible` or `anthropic-compatible`
|
|
449
581
|
- `LLM_BASE_URL` - API endpoint (optional)
|
|
450
582
|
- `LLM_MODEL` - Model name (default: `gpt-5-nano`)
|
|
@@ -453,6 +585,8 @@ bun run dev embeddings "the cat sat on the mat" "a feline rested on the rug"
|
|
|
453
585
|
|
|
454
586
|
## Testing
|
|
455
587
|
|
|
588
|
+
Run the test suite with Bun.
|
|
589
|
+
|
|
456
590
|
```bash
|
|
457
591
|
bun run test
|
|
458
592
|
```
|
package/dist/index.cjs
CHANGED
|
@@ -1317,6 +1317,17 @@ async function executeMCPToolCalls(calls, toolset, context) {
|
|
|
1317
1317
|
remoteName: tool.remoteName,
|
|
1318
1318
|
clientId: tool.clientId
|
|
1319
1319
|
}) : rawArgs;
|
|
1320
|
+
const toolParams = context.request.transformToolCallParams ? await context.request.transformToolCallParams({
|
|
1321
|
+
name: tool.remoteName,
|
|
1322
|
+
arguments: args
|
|
1323
|
+
}, {
|
|
1324
|
+
name: toolName,
|
|
1325
|
+
remoteName: tool.remoteName,
|
|
1326
|
+
clientId: tool.clientId
|
|
1327
|
+
}) : {
|
|
1328
|
+
name: tool.remoteName,
|
|
1329
|
+
arguments: args
|
|
1330
|
+
};
|
|
1320
1331
|
const metadata = {
|
|
1321
1332
|
id: callId,
|
|
1322
1333
|
type: call.type ?? "function",
|
|
@@ -1326,10 +1337,7 @@ async function executeMCPToolCalls(calls, toolset, context) {
|
|
|
1326
1337
|
const startedAt = new Date().toISOString();
|
|
1327
1338
|
const startedAtMs = Date.now();
|
|
1328
1339
|
try {
|
|
1329
|
-
const output = await tool.client.callTool(
|
|
1330
|
-
name: tool.remoteName,
|
|
1331
|
-
arguments: args
|
|
1332
|
-
});
|
|
1340
|
+
const output = await tool.client.callTool(toolParams);
|
|
1333
1341
|
const executionContext = {
|
|
1334
1342
|
callId,
|
|
1335
1343
|
type: call.type ?? "function",
|
|
@@ -1594,6 +1602,25 @@ function mergeUsage(base, next) {
|
|
|
1594
1602
|
}
|
|
1595
1603
|
return Object.keys(merged).length > 0 ? merged : undefined;
|
|
1596
1604
|
}
|
|
1605
|
+
function preferLatestUsage(base, next) {
|
|
1606
|
+
if (!base && !next) {
|
|
1607
|
+
return;
|
|
1608
|
+
}
|
|
1609
|
+
const merged = {};
|
|
1610
|
+
if (base?.inputTokens !== undefined || next?.inputTokens !== undefined) {
|
|
1611
|
+
merged.inputTokens = next?.inputTokens ?? base?.inputTokens;
|
|
1612
|
+
}
|
|
1613
|
+
if (base?.outputTokens !== undefined || next?.outputTokens !== undefined) {
|
|
1614
|
+
merged.outputTokens = next?.outputTokens ?? base?.outputTokens;
|
|
1615
|
+
}
|
|
1616
|
+
if (base?.totalTokens !== undefined || next?.totalTokens !== undefined) {
|
|
1617
|
+
merged.totalTokens = next?.totalTokens ?? base?.totalTokens;
|
|
1618
|
+
}
|
|
1619
|
+
if (base?.cost !== undefined || next?.cost !== undefined) {
|
|
1620
|
+
merged.cost = next?.cost ?? base?.cost;
|
|
1621
|
+
}
|
|
1622
|
+
return Object.keys(merged).length > 0 ? merged : undefined;
|
|
1623
|
+
}
|
|
1597
1624
|
function addOptional(a, b) {
|
|
1598
1625
|
if (a === undefined && b === undefined) {
|
|
1599
1626
|
return;
|
|
@@ -1658,7 +1685,7 @@ function createOpenAICompatibleAdapter(options) {
|
|
|
1658
1685
|
const delta = pickAssistantDelta(json);
|
|
1659
1686
|
const chunkUsage = pickUsage(json);
|
|
1660
1687
|
const chunkFinishReason = pickFinishReason(json);
|
|
1661
|
-
usage =
|
|
1688
|
+
usage = preferLatestUsage(usage, chunkUsage);
|
|
1662
1689
|
if (chunkFinishReason) {
|
|
1663
1690
|
finishReason = chunkFinishReason;
|
|
1664
1691
|
}
|
|
@@ -1993,7 +2020,7 @@ async function streamWithChatCompletionsWithMCP(options, fetcher, path, request,
|
|
|
1993
2020
|
const chunkUsage = pickUsage(json);
|
|
1994
2021
|
const chunkFinishReason = pickFinishReason(json);
|
|
1995
2022
|
collectOpenAIStreamToolCalls(json, streamedToolCalls);
|
|
1996
|
-
roundUsage =
|
|
2023
|
+
roundUsage = preferLatestUsage(roundUsage, chunkUsage);
|
|
1997
2024
|
if (chunkFinishReason) {
|
|
1998
2025
|
roundFinishReason = chunkFinishReason;
|
|
1999
2026
|
}
|
|
@@ -2099,7 +2126,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
|
|
|
2099
2126
|
const delta = pickResponsesStreamTextDelta(json);
|
|
2100
2127
|
const chunkUsage = pickResponsesStreamUsage(json);
|
|
2101
2128
|
const chunkFinishReason = pickResponsesStreamFinishReason(json);
|
|
2102
|
-
usage =
|
|
2129
|
+
usage = preferLatestUsage(usage, chunkUsage);
|
|
2103
2130
|
if (chunkFinishReason) {
|
|
2104
2131
|
finishReason = chunkFinishReason;
|
|
2105
2132
|
}
|
|
@@ -2121,7 +2148,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
|
|
|
2121
2148
|
const out = {
|
|
2122
2149
|
text: text.length > 0 ? text : pickResponsesText(finalPayload) || pickAssistantText(finalPayload),
|
|
2123
2150
|
raw: finalPayload,
|
|
2124
|
-
usage:
|
|
2151
|
+
usage: preferLatestUsage(usage, pickUsage(finalPayload)),
|
|
2125
2152
|
finishReason: finishReason ?? pickResponsesFinishReason(finalPayload) ?? pickFinishReason(finalPayload)
|
|
2126
2153
|
};
|
|
2127
2154
|
callbacks.onComplete?.(out);
|
|
@@ -2184,7 +2211,7 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
|
|
|
2184
2211
|
const chunkUsage = pickResponsesStreamUsage(json);
|
|
2185
2212
|
const chunkFinishReason = pickResponsesStreamFinishReason(json);
|
|
2186
2213
|
collectResponsesStreamToolCalls(json, streamedToolCalls);
|
|
2187
|
-
roundUsage =
|
|
2214
|
+
roundUsage = preferLatestUsage(roundUsage, chunkUsage);
|
|
2188
2215
|
if (chunkFinishReason) {
|
|
2189
2216
|
roundFinishReason = chunkFinishReason;
|
|
2190
2217
|
}
|
|
@@ -2202,9 +2229,8 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
|
|
|
2202
2229
|
callbacks.onChunk?.(chunk);
|
|
2203
2230
|
}
|
|
2204
2231
|
});
|
|
2205
|
-
|
|
2206
|
-
|
|
2207
|
-
aggregatedUsage = mergeUsage(aggregatedUsage, payloadUsage);
|
|
2232
|
+
const resolvedRoundUsage = preferLatestUsage(roundUsage, roundPayload ? pickUsage(roundPayload) : undefined);
|
|
2233
|
+
aggregatedUsage = mergeUsage(aggregatedUsage, resolvedRoundUsage);
|
|
2208
2234
|
if (roundFinishReason) {
|
|
2209
2235
|
finishReason = roundFinishReason;
|
|
2210
2236
|
} else if (roundPayload) {
|
|
@@ -2744,7 +2770,7 @@ function createAnthropicCompatibleAdapter(options) {
|
|
|
2744
2770
|
const delta = pickAnthropicDelta(json);
|
|
2745
2771
|
const chunkUsage = pickUsage2(json);
|
|
2746
2772
|
const chunkFinishReason = pickFinishReason2(json);
|
|
2747
|
-
usage =
|
|
2773
|
+
usage = preferLatestUsage(usage, chunkUsage);
|
|
2748
2774
|
if (chunkFinishReason) {
|
|
2749
2775
|
finishReason = chunkFinishReason;
|
|
2750
2776
|
}
|
|
@@ -2941,7 +2967,7 @@ async function streamWithMCPToolLoop(options, fetcher, path, request, callbacks)
|
|
|
2941
2967
|
const chunkUsage = pickUsage2(json);
|
|
2942
2968
|
const chunkFinishReason = pickFinishReason2(json);
|
|
2943
2969
|
collectAnthropicStreamToolCalls(json, streamedToolCalls);
|
|
2944
|
-
roundUsage =
|
|
2970
|
+
roundUsage = preferLatestUsage(roundUsage, chunkUsage);
|
|
2945
2971
|
if (chunkFinishReason) {
|
|
2946
2972
|
roundFinishReason = chunkFinishReason;
|
|
2947
2973
|
}
|
|
@@ -4477,6 +4503,10 @@ async function callModel(adapter, options) {
|
|
|
4477
4503
|
parallelToolCalls: options.request?.parallelToolCalls,
|
|
4478
4504
|
maxToolRounds: options.request?.maxToolRounds,
|
|
4479
4505
|
onToolExecution: options.request?.onToolExecution,
|
|
4506
|
+
transformToolOutput: options.request?.transformToolOutput,
|
|
4507
|
+
transformToolArguments: options.request?.transformToolArguments,
|
|
4508
|
+
transformToolCallParams: options.request?.transformToolCallParams,
|
|
4509
|
+
unknownToolError: options.request?.unknownToolError,
|
|
4480
4510
|
toolDebug: options.request?.toolDebug,
|
|
4481
4511
|
body: options.request?.body,
|
|
4482
4512
|
signal: requestSignal
|
|
@@ -4564,7 +4594,7 @@ async function callModel(adapter, options) {
|
|
|
4564
4594
|
handleTextDelta(chunk.textDelta);
|
|
4565
4595
|
}
|
|
4566
4596
|
if (chunk.usage) {
|
|
4567
|
-
latestUsage =
|
|
4597
|
+
latestUsage = preferLatestUsage(latestUsage, chunk.usage);
|
|
4568
4598
|
}
|
|
4569
4599
|
if (chunk.finishReason) {
|
|
4570
4600
|
latestFinishReason = chunk.finishReason;
|
|
@@ -4572,7 +4602,7 @@ async function callModel(adapter, options) {
|
|
|
4572
4602
|
}
|
|
4573
4603
|
});
|
|
4574
4604
|
const finalText = typeof response2.text === "string" && response2.text.length > 0 ? response2.text : streamedRaw;
|
|
4575
|
-
const usage =
|
|
4605
|
+
const usage = preferLatestUsage(latestUsage, response2.usage);
|
|
4576
4606
|
const finishReason = response2.finishReason ?? latestFinishReason;
|
|
4577
4607
|
emitStreamingData(finalText, true, usage, finishReason);
|
|
4578
4608
|
emitObserve(options.observe, {
|
package/dist/index.d.ts
CHANGED
|
@@ -14,4 +14,4 @@ export { createOpenAICompatibleAdapter, type OpenAICompatibleAdapterOptions, } f
|
|
|
14
14
|
export { createAnthropicCompatibleAdapter, DEFAULT_ANTHROPIC_MAX_TOKENS, DEFAULT_ANTHROPIC_VERSION, type AnthropicCompatibleAdapterOptions, } from "./providers/anthropic-compatible";
|
|
15
15
|
export { DEFAULT_MAX_TOOL_ROUNDS } from "./providers/mcp-runtime";
|
|
16
16
|
export { createDefaultProviderRegistry, createModelAdapter, createProviderRegistry, registerBuiltinProviders, type BuiltinProviderKind, type ModelAdapterConfig, type ProviderFactory, type ProviderRegistry, type ProviderTransportConfig, } from "./providers/registry";
|
|
17
|
-
export type { CandidateDiagnostics, EmbeddingRequest, EmbeddingResult, LLMImageContent, LLMMessageContent, LLMTextContent, ExtractJsonCandidatesOptions, ExtractionCandidate, ExtractionHeuristicsOptions, ExtractionParseHint, HTTPHeaders, LLMAdapter, LLMMessage, LLMRequest, LLMResponse, LLMStreamCallbacks, LLMStreamChunk, LLMToolCall, LLMToolCallRef, LLMToolDebugOptions, LLMToolExecution, LLMToolOutputTransformer, LLMToolArgumentsTransformer, LLMToolChoice, MCPCallToolParams, MCPListToolsResult, MCPToolClient, MCPToolDescriptor, MCPToolSchema, LLMUsage, MarkdownCodeBlock, MarkdownCodeOptions, ParseLLMOutputOptions, ParseLLMOutputResult, ParseTraceEvent, PipelineError, StructuredAttempt, StructuredCallOptions, StructuredDebugOptions, StructuredError, StructuredMode, StructuredOptions, StructuredPromptBuilder, StructuredPromptContext, StructuredPromptPayload, StructuredPromptResolver, StructuredPromptValue, StructuredResult, StructuredStreamData, StructuredStreamEvent, StructuredStreamInput, StructuredStreamOptions, StructuredSelfHealInput, StructuredTimeoutOptions, ThinkDiagnostics, ThinkBlock, StructuredTraceEvent, } from "./types";
|
|
17
|
+
export type { CandidateDiagnostics, EmbeddingRequest, EmbeddingResult, LLMImageContent, LLMMessageContent, LLMTextContent, ExtractJsonCandidatesOptions, ExtractionCandidate, ExtractionHeuristicsOptions, ExtractionParseHint, HTTPHeaders, LLMAdapter, LLMMessage, LLMRequest, LLMResponse, LLMStreamCallbacks, LLMStreamChunk, LLMToolCall, LLMToolCallRef, LLMToolDebugOptions, LLMToolExecution, LLMToolOutputTransformer, LLMToolArgumentsTransformer, LLMToolCallParamsTransformer, LLMToolChoice, MCPCallToolParams, MCPListToolsResult, MCPToolClient, MCPToolDescriptor, MCPToolSchema, LLMUsage, MarkdownCodeBlock, MarkdownCodeOptions, ParseLLMOutputOptions, ParseLLMOutputResult, ParseTraceEvent, PipelineError, StructuredAttempt, StructuredCallOptions, StructuredDebugOptions, StructuredError, StructuredMode, StructuredOptions, StructuredPromptBuilder, StructuredPromptContext, StructuredPromptPayload, StructuredPromptResolver, StructuredPromptValue, StructuredResult, StructuredStreamData, StructuredStreamEvent, StructuredStreamInput, StructuredStreamOptions, StructuredSelfHealInput, StructuredTimeoutOptions, ThinkDiagnostics, ThinkBlock, StructuredTraceEvent, } from "./types";
|
package/dist/index.js
CHANGED
|
@@ -1228,6 +1228,17 @@ async function executeMCPToolCalls(calls, toolset, context) {
|
|
|
1228
1228
|
remoteName: tool.remoteName,
|
|
1229
1229
|
clientId: tool.clientId
|
|
1230
1230
|
}) : rawArgs;
|
|
1231
|
+
const toolParams = context.request.transformToolCallParams ? await context.request.transformToolCallParams({
|
|
1232
|
+
name: tool.remoteName,
|
|
1233
|
+
arguments: args
|
|
1234
|
+
}, {
|
|
1235
|
+
name: toolName,
|
|
1236
|
+
remoteName: tool.remoteName,
|
|
1237
|
+
clientId: tool.clientId
|
|
1238
|
+
}) : {
|
|
1239
|
+
name: tool.remoteName,
|
|
1240
|
+
arguments: args
|
|
1241
|
+
};
|
|
1231
1242
|
const metadata = {
|
|
1232
1243
|
id: callId,
|
|
1233
1244
|
type: call.type ?? "function",
|
|
@@ -1237,10 +1248,7 @@ async function executeMCPToolCalls(calls, toolset, context) {
|
|
|
1237
1248
|
const startedAt = new Date().toISOString();
|
|
1238
1249
|
const startedAtMs = Date.now();
|
|
1239
1250
|
try {
|
|
1240
|
-
const output = await tool.client.callTool(
|
|
1241
|
-
name: tool.remoteName,
|
|
1242
|
-
arguments: args
|
|
1243
|
-
});
|
|
1251
|
+
const output = await tool.client.callTool(toolParams);
|
|
1244
1252
|
const executionContext = {
|
|
1245
1253
|
callId,
|
|
1246
1254
|
type: call.type ?? "function",
|
|
@@ -1505,6 +1513,25 @@ function mergeUsage(base, next) {
|
|
|
1505
1513
|
}
|
|
1506
1514
|
return Object.keys(merged).length > 0 ? merged : undefined;
|
|
1507
1515
|
}
|
|
1516
|
+
function preferLatestUsage(base, next) {
|
|
1517
|
+
if (!base && !next) {
|
|
1518
|
+
return;
|
|
1519
|
+
}
|
|
1520
|
+
const merged = {};
|
|
1521
|
+
if (base?.inputTokens !== undefined || next?.inputTokens !== undefined) {
|
|
1522
|
+
merged.inputTokens = next?.inputTokens ?? base?.inputTokens;
|
|
1523
|
+
}
|
|
1524
|
+
if (base?.outputTokens !== undefined || next?.outputTokens !== undefined) {
|
|
1525
|
+
merged.outputTokens = next?.outputTokens ?? base?.outputTokens;
|
|
1526
|
+
}
|
|
1527
|
+
if (base?.totalTokens !== undefined || next?.totalTokens !== undefined) {
|
|
1528
|
+
merged.totalTokens = next?.totalTokens ?? base?.totalTokens;
|
|
1529
|
+
}
|
|
1530
|
+
if (base?.cost !== undefined || next?.cost !== undefined) {
|
|
1531
|
+
merged.cost = next?.cost ?? base?.cost;
|
|
1532
|
+
}
|
|
1533
|
+
return Object.keys(merged).length > 0 ? merged : undefined;
|
|
1534
|
+
}
|
|
1508
1535
|
function addOptional(a, b) {
|
|
1509
1536
|
if (a === undefined && b === undefined) {
|
|
1510
1537
|
return;
|
|
@@ -1569,7 +1596,7 @@ function createOpenAICompatibleAdapter(options) {
|
|
|
1569
1596
|
const delta = pickAssistantDelta(json);
|
|
1570
1597
|
const chunkUsage = pickUsage(json);
|
|
1571
1598
|
const chunkFinishReason = pickFinishReason(json);
|
|
1572
|
-
usage =
|
|
1599
|
+
usage = preferLatestUsage(usage, chunkUsage);
|
|
1573
1600
|
if (chunkFinishReason) {
|
|
1574
1601
|
finishReason = chunkFinishReason;
|
|
1575
1602
|
}
|
|
@@ -1904,7 +1931,7 @@ async function streamWithChatCompletionsWithMCP(options, fetcher, path, request,
|
|
|
1904
1931
|
const chunkUsage = pickUsage(json);
|
|
1905
1932
|
const chunkFinishReason = pickFinishReason(json);
|
|
1906
1933
|
collectOpenAIStreamToolCalls(json, streamedToolCalls);
|
|
1907
|
-
roundUsage =
|
|
1934
|
+
roundUsage = preferLatestUsage(roundUsage, chunkUsage);
|
|
1908
1935
|
if (chunkFinishReason) {
|
|
1909
1936
|
roundFinishReason = chunkFinishReason;
|
|
1910
1937
|
}
|
|
@@ -2010,7 +2037,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
|
|
|
2010
2037
|
const delta = pickResponsesStreamTextDelta(json);
|
|
2011
2038
|
const chunkUsage = pickResponsesStreamUsage(json);
|
|
2012
2039
|
const chunkFinishReason = pickResponsesStreamFinishReason(json);
|
|
2013
|
-
usage =
|
|
2040
|
+
usage = preferLatestUsage(usage, chunkUsage);
|
|
2014
2041
|
if (chunkFinishReason) {
|
|
2015
2042
|
finishReason = chunkFinishReason;
|
|
2016
2043
|
}
|
|
@@ -2032,7 +2059,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
|
|
|
2032
2059
|
const out = {
|
|
2033
2060
|
text: text.length > 0 ? text : pickResponsesText(finalPayload) || pickAssistantText(finalPayload),
|
|
2034
2061
|
raw: finalPayload,
|
|
2035
|
-
usage:
|
|
2062
|
+
usage: preferLatestUsage(usage, pickUsage(finalPayload)),
|
|
2036
2063
|
finishReason: finishReason ?? pickResponsesFinishReason(finalPayload) ?? pickFinishReason(finalPayload)
|
|
2037
2064
|
};
|
|
2038
2065
|
callbacks.onComplete?.(out);
|
|
@@ -2095,7 +2122,7 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
|
|
|
2095
2122
|
const chunkUsage = pickResponsesStreamUsage(json);
|
|
2096
2123
|
const chunkFinishReason = pickResponsesStreamFinishReason(json);
|
|
2097
2124
|
collectResponsesStreamToolCalls(json, streamedToolCalls);
|
|
2098
|
-
roundUsage =
|
|
2125
|
+
roundUsage = preferLatestUsage(roundUsage, chunkUsage);
|
|
2099
2126
|
if (chunkFinishReason) {
|
|
2100
2127
|
roundFinishReason = chunkFinishReason;
|
|
2101
2128
|
}
|
|
@@ -2113,9 +2140,8 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
|
|
|
2113
2140
|
callbacks.onChunk?.(chunk);
|
|
2114
2141
|
}
|
|
2115
2142
|
});
|
|
2116
|
-
|
|
2117
|
-
|
|
2118
|
-
aggregatedUsage = mergeUsage(aggregatedUsage, payloadUsage);
|
|
2143
|
+
const resolvedRoundUsage = preferLatestUsage(roundUsage, roundPayload ? pickUsage(roundPayload) : undefined);
|
|
2144
|
+
aggregatedUsage = mergeUsage(aggregatedUsage, resolvedRoundUsage);
|
|
2119
2145
|
if (roundFinishReason) {
|
|
2120
2146
|
finishReason = roundFinishReason;
|
|
2121
2147
|
} else if (roundPayload) {
|
|
@@ -2655,7 +2681,7 @@ function createAnthropicCompatibleAdapter(options) {
|
|
|
2655
2681
|
const delta = pickAnthropicDelta(json);
|
|
2656
2682
|
const chunkUsage = pickUsage2(json);
|
|
2657
2683
|
const chunkFinishReason = pickFinishReason2(json);
|
|
2658
|
-
usage =
|
|
2684
|
+
usage = preferLatestUsage(usage, chunkUsage);
|
|
2659
2685
|
if (chunkFinishReason) {
|
|
2660
2686
|
finishReason = chunkFinishReason;
|
|
2661
2687
|
}
|
|
@@ -2852,7 +2878,7 @@ async function streamWithMCPToolLoop(options, fetcher, path, request, callbacks)
|
|
|
2852
2878
|
const chunkUsage = pickUsage2(json);
|
|
2853
2879
|
const chunkFinishReason = pickFinishReason2(json);
|
|
2854
2880
|
collectAnthropicStreamToolCalls(json, streamedToolCalls);
|
|
2855
|
-
roundUsage =
|
|
2881
|
+
roundUsage = preferLatestUsage(roundUsage, chunkUsage);
|
|
2856
2882
|
if (chunkFinishReason) {
|
|
2857
2883
|
roundFinishReason = chunkFinishReason;
|
|
2858
2884
|
}
|
|
@@ -4388,6 +4414,10 @@ async function callModel(adapter, options) {
|
|
|
4388
4414
|
parallelToolCalls: options.request?.parallelToolCalls,
|
|
4389
4415
|
maxToolRounds: options.request?.maxToolRounds,
|
|
4390
4416
|
onToolExecution: options.request?.onToolExecution,
|
|
4417
|
+
transformToolOutput: options.request?.transformToolOutput,
|
|
4418
|
+
transformToolArguments: options.request?.transformToolArguments,
|
|
4419
|
+
transformToolCallParams: options.request?.transformToolCallParams,
|
|
4420
|
+
unknownToolError: options.request?.unknownToolError,
|
|
4391
4421
|
toolDebug: options.request?.toolDebug,
|
|
4392
4422
|
body: options.request?.body,
|
|
4393
4423
|
signal: requestSignal
|
|
@@ -4475,7 +4505,7 @@ async function callModel(adapter, options) {
|
|
|
4475
4505
|
handleTextDelta(chunk.textDelta);
|
|
4476
4506
|
}
|
|
4477
4507
|
if (chunk.usage) {
|
|
4478
|
-
latestUsage =
|
|
4508
|
+
latestUsage = preferLatestUsage(latestUsage, chunk.usage);
|
|
4479
4509
|
}
|
|
4480
4510
|
if (chunk.finishReason) {
|
|
4481
4511
|
latestFinishReason = chunk.finishReason;
|
|
@@ -4483,7 +4513,7 @@ async function callModel(adapter, options) {
|
|
|
4483
4513
|
}
|
|
4484
4514
|
});
|
|
4485
4515
|
const finalText = typeof response2.text === "string" && response2.text.length > 0 ? response2.text : streamedRaw;
|
|
4486
|
-
const usage =
|
|
4516
|
+
const usage = preferLatestUsage(latestUsage, response2.usage);
|
|
4487
4517
|
const finishReason = response2.finishReason ?? latestFinishReason;
|
|
4488
4518
|
emitStreamingData(finalText, true, usage, finishReason);
|
|
4489
4519
|
emitObserve(options.observe, {
|
|
@@ -7,3 +7,4 @@ export declare function isRecord(value: unknown): value is Record<string, unknow
|
|
|
7
7
|
export declare function pickString(value: unknown): string | undefined;
|
|
8
8
|
export declare function toFiniteNumber(value: unknown): number | undefined;
|
|
9
9
|
export declare function mergeUsage(base: LLMUsage | undefined, next: LLMUsage | undefined): LLMUsage | undefined;
|
|
10
|
+
export declare function preferLatestUsage(base: LLMUsage | undefined, next: LLMUsage | undefined): LLMUsage | undefined;
|
package/dist/types.d.ts
CHANGED
|
@@ -110,6 +110,7 @@ export interface MCPListToolsResult {
|
|
|
110
110
|
export interface MCPCallToolParams {
|
|
111
111
|
name: string;
|
|
112
112
|
arguments?: Record<string, unknown>;
|
|
113
|
+
_meta?: Record<string, unknown>;
|
|
113
114
|
}
|
|
114
115
|
export interface MCPToolClient {
|
|
115
116
|
id: string;
|
|
@@ -156,6 +157,7 @@ export interface LLMRequest {
|
|
|
156
157
|
onToolExecution?: (execution: LLMToolExecution) => void;
|
|
157
158
|
transformToolOutput?: LLMToolOutputTransformer;
|
|
158
159
|
transformToolArguments?: LLMToolArgumentsTransformer;
|
|
160
|
+
transformToolCallParams?: LLMToolCallParamsTransformer;
|
|
159
161
|
unknownToolError?: (toolName: string) => string;
|
|
160
162
|
toolDebug?: boolean | LLMToolDebugOptions;
|
|
161
163
|
body?: Record<string, unknown>;
|
|
@@ -237,6 +239,11 @@ export type LLMToolArgumentsTransformer = (args: Record<string, unknown>, contex
|
|
|
237
239
|
remoteName: string;
|
|
238
240
|
clientId: string;
|
|
239
241
|
}) => Record<string, unknown> | Promise<Record<string, unknown>>;
|
|
242
|
+
export type LLMToolCallParamsTransformer = (params: MCPCallToolParams, context: {
|
|
243
|
+
name: string;
|
|
244
|
+
remoteName: string;
|
|
245
|
+
clientId: string;
|
|
246
|
+
}) => MCPCallToolParams | Promise<MCPCallToolParams>;
|
|
240
247
|
export interface LLMToolDebugOptions {
|
|
241
248
|
enabled?: boolean;
|
|
242
249
|
logger?: (line: string) => void;
|