extrait 0.5.4 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,7 +8,8 @@ Structured JSON extraction from LLMs with validation, repair, and streaming.
8
8
  </a>
9
9
  </p>
10
10
 
11
- **Features:**
11
+ ## Features
12
+
12
13
  - Multi-candidate JSON extraction from LLM responses
13
14
  - Automatic repair with jsonrepair
14
15
  - Zod schema validation and coercion
@@ -19,6 +20,8 @@ Structured JSON extraction from LLMs with validation, repair, and streaming.
19
20
 
20
21
  ## Installation
21
22
 
23
+ Install `extrait` with your preferred package manager.
24
+
22
25
  ```bash
23
26
  bun add extrait
24
27
  # or
@@ -29,56 +32,118 @@ deno add npm:extrait
29
32
 
30
33
  ## Quick Start
31
34
 
35
+ Use a custom OpenAI-compatible transport to point `extrait` at a local endpoint.
36
+
32
37
  ```typescript
33
38
  import { createLLM, prompt, s } from "extrait";
34
39
  import { z } from "zod";
35
40
 
36
41
  const llm = createLLM({
37
42
  provider: "openai-compatible",
38
- model: "gpt-5-nano",
39
- transport: { apiKey: process.env.LLM_API_KEY },
43
+ model: "mistralai/ministral-3-3b",
44
+ transport: {
45
+ baseURL: "http://localhost:1234/v1",
46
+ apiKey: process.env.LLM_API_KEY ?? "local-demo-key",
47
+ },
40
48
  });
41
49
 
42
- const SummarySchema = s.schema(
43
- "Summary",
50
+ const RecipeSchema = s.schema(
51
+ "Recipe",
44
52
  z.object({
45
- summary: s.string().min(1).describe("One-sentence summary"),
46
- tags: s.array(s.string()).default([]).describe("Keywords"),
53
+ title: s.string().min(1).describe("Short recipe title"),
54
+ ingredients: s.array(s.string()).min(1).describe("Ingredient list"),
47
55
  })
48
56
  );
49
57
 
50
58
  const result = await llm.structured(
51
- SummarySchema,
52
- prompt`Summarize this: """${text}"""`
59
+ RecipeSchema,
60
+ prompt`Extract a simple recipe from this text: """${text}"""`
53
61
  );
54
62
 
55
63
  console.log(result.data);
56
64
  ```
57
65
 
66
+ ## Examples at a Glance
67
+
68
+ These examples cover the most common usage patterns in the repository.
69
+
70
+ - [`examples/simple.ts`](examples/simple.ts) - Basic structured output with streaming
71
+ - [`examples/streaming.ts`](examples/streaming.ts) - Real-time partial output and snapshot updates
72
+ - [`examples/calculator-tool.ts`](examples/calculator-tool.ts) - Structured extraction with MCP tools
73
+ - [`examples/conversation.ts`](examples/conversation.ts) - Multi-turn prompts and multimodal content
74
+ - [`examples/image-analysis.ts`](examples/image-analysis.ts) - Vision input with structured output
75
+ - [`examples/embeddings.ts`](examples/embeddings.ts) - Embeddings and similarity workflows
76
+
77
+ ```bash
78
+ bun run dev simple "Bun.js runtime"
79
+ bun run dev streaming
80
+ bun run dev calculator-tool
81
+ ```
82
+
58
83
  ## API Reference
59
84
 
60
- ### Creating an LLM Client
85
+ The sections below cover the main building blocks of the library.
86
+
87
+ ### Create an LLM Client
88
+
89
+ Use `createLLM()` to configure the provider, model, transport, and client defaults.
61
90
 
62
91
  ```typescript
63
92
  const llm = createLLM({
64
93
  provider: "openai-compatible" | "anthropic-compatible",
65
94
  model: "gpt-5-nano",
95
+ baseURL: "https://api.openai.com", // optional alias for transport.baseURL
96
+ apiKey: process.env.LLM_API_KEY, // optional alias for transport.apiKey
66
97
  transport: {
67
- baseURL: "https://api.openai.com", // optional
68
- apiKey: process.env.LLM_API_KEY, // optional
98
+ baseURL: "https://api.openai.com", // optional
99
+ apiKey: process.env.LLM_API_KEY, // optional
100
+ path: "/v1/chat/completions", // optional; anthropic-compatible usually uses /v1/messages
101
+ headers: { "x-trace-id": "docs-demo" }, // optional extra headers
102
+ defaultBody: { user: "docs-demo" }, // optional provider body defaults
103
+ version: "2023-06-01", // anthropic-compatible only
104
+ fetcher: fetch, // optional custom fetch implementation
69
105
  },
70
106
  defaults: {
71
- mode: "loose" | "strict", // loose allows repair
72
- selfHeal: 0 | 1 | 2, // retry attempts
73
- debug: false, // show repair logs
74
- timeout: { request: 30_000 }, // optional default timeouts
107
+ mode: "loose" | "strict", // loose allows repair
108
+ selfHeal: 1, // optional retry attempts
109
+ debug: false, // optional structured debug output
110
+ systemPrompt: "You are a helpful assistant.",
111
+ timeout: {
112
+ request: 30_000,
113
+ tool: 10_000,
114
+ },
115
+ },
116
+ });
117
+ ```
118
+
119
+ `baseURL` and `apiKey` at the top level are shorthand aliases for `transport.baseURL` and `transport.apiKey`. For request-specific options such as `stream`, `request`, `schemaInstruction`, and parse tuning, see the sections below.
120
+
121
+ Common setup patterns:
122
+
123
+ ```typescript
124
+ // OpenAI-compatible gateway or local endpoint with top-level aliases
125
+ const llm = createLLM({
126
+ provider: "openai-compatible",
127
+ model: "gpt-4o-mini",
128
+ baseURL: process.env.LLM_BASE_URL ?? "http://localhost:1234/v1",
129
+ apiKey: process.env.LLM_API_KEY ?? "local-demo-key",
130
+ });
131
+
132
+ // Anthropic-compatible endpoint with explicit API version
133
+ const anthropic = createLLM({
134
+ provider: "anthropic-compatible",
135
+ model: "claude-3-5-sonnet-latest",
136
+ transport: {
137
+ baseURL: "https://api.anthropic.com",
138
+ apiKey: process.env.LLM_API_KEY,
139
+ version: "2023-06-01",
75
140
  },
76
141
  });
77
142
  ```
78
143
 
79
144
  ### Defining Schemas
80
145
 
81
- Use the `s` wrapper around Zod for enhanced schema building:
146
+ Use the `s` wrapper around Zod for schema names, descriptions, and a more ergonomic authoring flow.
82
147
 
83
148
  ```typescript
84
149
  import { s } from "extrait";
@@ -115,6 +180,8 @@ const Schema = s.schema(
115
180
 
116
181
  ### Making Structured Calls
117
182
 
183
+ `structured()` accepts a schema plus either a tagged prompt, a fluent prompt builder, or a raw message payload.
184
+
118
185
  ```typescript
119
186
  // Simple prompt
120
187
  const result = await llm.structured(
@@ -159,7 +226,7 @@ const result = await llm.structured(
159
226
  },
160
227
  },
161
228
  request: {
162
- signal: abortController.signal, // optional AbortSignal
229
+ signal: AbortSignal.timeout(30_000), // optional AbortSignal
163
230
  },
164
231
  timeout: {
165
232
  request: 30_000, // ms per LLM HTTP request
@@ -171,6 +238,22 @@ const result = await llm.structured(
171
238
 
172
239
  `prompt()` builds an ordered `messages` payload. Use ``prompt`...` `` for a single string prompt, or the fluent builder for multi-turn conversations. The `LLMMessage` type is exported if you need to type your own message arrays.
173
240
 
241
+ You can also pass provider request options through `request`:
242
+
243
+ ```typescript
244
+ const result = await llm.structured(
245
+ Schema,
246
+ prompt`Summarize this document: """${text}"""`,
247
+ {
248
+ request: {
249
+ temperature: 0,
250
+ maxTokens: 800,
251
+ body: { user: "demo-user" },
252
+ },
253
+ }
254
+ );
255
+ ```
256
+
174
257
  ### Images (multimodal)
175
258
 
176
259
  Use `images()` to build base64 image content blocks for vision-capable models.
@@ -248,13 +331,15 @@ const messages = conversation("You are a vision assistant.", [
248
331
 
249
332
  ### Result Object
250
333
 
334
+ Successful structured calls return validated data plus the raw response and trace metadata.
335
+
251
336
  ```typescript
252
337
  {
253
338
  data: T, // Validated data matching schema
254
339
  raw: string, // Raw LLM response
255
340
  thinkBlocks: ThinkBlock[], // Extracted <think> blocks
256
341
  json: unknown | null, // Parsed JSON before validation
257
- attempts: AttemptTrace[], // Self-heal attempts
342
+ attempts: StructuredAttempt<T>[], // One entry per parse / self-heal attempt
258
343
  usage?: {
259
344
  inputTokens?: number,
260
345
  outputTokens?: number,
@@ -265,8 +350,30 @@ const messages = conversation("You are a vision assistant.", [
265
350
  }
266
351
  ```
267
352
 
353
+ Each `attempts` entry includes:
354
+
355
+ ```typescript
356
+ {
357
+ attempt: number,
358
+ selfHeal: boolean,
359
+ via: "complete" | "stream",
360
+ raw: string,
361
+ thinkBlocks: ThinkBlock[],
362
+ json: unknown | null,
363
+ candidates: string[],
364
+ repairLog: string[],
365
+ zodIssues: z.ZodIssue[],
366
+ success: boolean,
367
+ usage?: LLMUsage,
368
+ finishReason?: string,
369
+ parsed: ParseLLMOutputResult<T>,
370
+ }
371
+ ```
372
+
268
373
  ### Error Handling
269
374
 
375
+ Catch `StructuredParseError` when repair and validation still fail.
376
+
270
377
  ```typescript
271
378
  import { StructuredParseError } from "extrait";
272
379
 
@@ -292,7 +399,7 @@ Generate vector embeddings using `llm.embed()`. It always returns `number[][]`
292
399
  const embedder = createLLM({
293
400
  provider: "openai-compatible",
294
401
  model: "text-embedding-3-small",
295
- transport: { apiKey: process.env.OPENAI_API_KEY },
402
+ transport: { apiKey: process.env.LLM_API_KEY },
296
403
  });
297
404
 
298
405
  // Single string
@@ -332,7 +439,7 @@ const embedder = createLLM({
332
439
  model: "voyage-3",
333
440
  transport: {
334
441
  baseURL: "https://api.voyageai.com",
335
- apiKey: process.env.VOYAGE_API_KEY,
442
+ apiKey: process.env.LLM_API_KEY,
336
443
  },
337
444
  });
338
445
 
@@ -343,6 +450,8 @@ Calling `llm.embed()` on an `anthropic-compatible` adapter throws a descriptive
343
450
 
344
451
  ### MCP Tools
345
452
 
453
+ Attach MCP clients at request time to let the model call tools during structured generation.
454
+
346
455
  ```typescript
347
456
  import { createMCPClient } from "extrait";
348
457
 
@@ -376,6 +485,14 @@ const result = await llm.structured(
376
485
  },
377
486
  // Optional: transform tool arguments before the tool is called
378
487
  transformToolArguments: (args, call) => args,
488
+ // Optional: transform the full MCP call payload, including _meta
489
+ transformToolCallParams: (params, call) => ({
490
+ ...params,
491
+ _meta: {
492
+ source: "extrait-docs",
493
+ clientId: call.clientId,
494
+ },
495
+ }),
379
496
  // Optional: custom error message when an unknown tool is called
380
497
  unknownToolError: (toolName) => `Tool "${toolName}" is not available.`,
381
498
  },
@@ -385,6 +502,18 @@ const result = await llm.structured(
385
502
  await mcpClient.close?.();
386
503
  ```
387
504
 
505
+ `transformToolArguments()` only receives the tool input object. `transformToolCallParams()` runs after it and receives the full `MCPCallToolParams` payload that will be sent to the MCP client:
506
+
507
+ ```typescript
508
+ type MCPCallToolParams = {
509
+ name: string;
510
+ arguments?: Record<string, unknown>;
511
+ _meta?: Record<string, unknown>;
512
+ };
513
+ ```
514
+
515
+ Use `transformToolCallParams()` when you need to attach MCP-specific metadata, override the final remote tool name, or otherwise change the full request passed to `client.callTool()`. This hook is exported as `LLMToolCallParamsTransformer`.
516
+
388
517
  ### Timeouts
389
518
 
390
519
  Use `timeout` to set per-request and per-tool-call time limits without managing `AbortSignal` manually.
@@ -415,7 +544,7 @@ const llm = createLLM({
415
544
 
416
545
  ## Examples
417
546
 
418
- Run examples with: `bun run dev <example-name>`
547
+ Run repository examples with `bun run dev <example-name>`.
419
548
 
420
549
  Available examples:
421
550
  - `streaming` - Real LLM streaming + snapshot self-check ([streaming.ts](examples/streaming.ts))
@@ -429,6 +558,7 @@ Available examples:
429
558
  - `calculator-tool` - MCP tool integration ([calculator-tool.ts](examples/calculator-tool.ts))
430
559
  - `image-analysis` - Multimodal structured extraction from an image file ([image-analysis.ts](examples/image-analysis.ts))
431
560
  - `conversation` - Multi-turn conversation history and inline image messages ([conversation.ts](examples/conversation.ts))
561
+ - `simulated-tools` - Inject fake tool calls/results into conversation context without real execution ([simulated-tools.ts](examples/simulated-tools.ts))
432
562
  - `embeddings` - Vector embeddings, cosine similarity, and semantic comparison ([embeddings.ts](examples/embeddings.ts))
433
563
 
434
564
  Pass arguments after the example name:
@@ -445,6 +575,8 @@ bun run dev embeddings "the cat sat on the mat" "a feline rested on the rug"
445
575
 
446
576
  ## Environment Variables
447
577
 
578
+ These environment variables are used across the examples and common client setups.
579
+
448
580
  - `LLM_PROVIDER` - `openai-compatible` or `anthropic-compatible`
449
581
  - `LLM_BASE_URL` - API endpoint (optional)
450
582
  - `LLM_MODEL` - Model name (default: `gpt-5-nano`)
@@ -453,6 +585,8 @@ bun run dev embeddings "the cat sat on the mat" "a feline rested on the rug"
453
585
 
454
586
  ## Testing
455
587
 
588
+ Run the test suite with Bun.
589
+
456
590
  ```bash
457
591
  bun run test
458
592
  ```
package/dist/index.cjs CHANGED
@@ -1317,6 +1317,17 @@ async function executeMCPToolCalls(calls, toolset, context) {
1317
1317
  remoteName: tool.remoteName,
1318
1318
  clientId: tool.clientId
1319
1319
  }) : rawArgs;
1320
+ const toolParams = context.request.transformToolCallParams ? await context.request.transformToolCallParams({
1321
+ name: tool.remoteName,
1322
+ arguments: args
1323
+ }, {
1324
+ name: toolName,
1325
+ remoteName: tool.remoteName,
1326
+ clientId: tool.clientId
1327
+ }) : {
1328
+ name: tool.remoteName,
1329
+ arguments: args
1330
+ };
1320
1331
  const metadata = {
1321
1332
  id: callId,
1322
1333
  type: call.type ?? "function",
@@ -1326,10 +1337,7 @@ async function executeMCPToolCalls(calls, toolset, context) {
1326
1337
  const startedAt = new Date().toISOString();
1327
1338
  const startedAtMs = Date.now();
1328
1339
  try {
1329
- const output = await tool.client.callTool({
1330
- name: tool.remoteName,
1331
- arguments: args
1332
- });
1340
+ const output = await tool.client.callTool(toolParams);
1333
1341
  const executionContext = {
1334
1342
  callId,
1335
1343
  type: call.type ?? "function",
@@ -1594,6 +1602,25 @@ function mergeUsage(base, next) {
1594
1602
  }
1595
1603
  return Object.keys(merged).length > 0 ? merged : undefined;
1596
1604
  }
1605
+ function preferLatestUsage(base, next) {
1606
+ if (!base && !next) {
1607
+ return;
1608
+ }
1609
+ const merged = {};
1610
+ if (base?.inputTokens !== undefined || next?.inputTokens !== undefined) {
1611
+ merged.inputTokens = next?.inputTokens ?? base?.inputTokens;
1612
+ }
1613
+ if (base?.outputTokens !== undefined || next?.outputTokens !== undefined) {
1614
+ merged.outputTokens = next?.outputTokens ?? base?.outputTokens;
1615
+ }
1616
+ if (base?.totalTokens !== undefined || next?.totalTokens !== undefined) {
1617
+ merged.totalTokens = next?.totalTokens ?? base?.totalTokens;
1618
+ }
1619
+ if (base?.cost !== undefined || next?.cost !== undefined) {
1620
+ merged.cost = next?.cost ?? base?.cost;
1621
+ }
1622
+ return Object.keys(merged).length > 0 ? merged : undefined;
1623
+ }
1597
1624
  function addOptional(a, b) {
1598
1625
  if (a === undefined && b === undefined) {
1599
1626
  return;
@@ -1658,7 +1685,7 @@ function createOpenAICompatibleAdapter(options) {
1658
1685
  const delta = pickAssistantDelta(json);
1659
1686
  const chunkUsage = pickUsage(json);
1660
1687
  const chunkFinishReason = pickFinishReason(json);
1661
- usage = mergeUsage(usage, chunkUsage);
1688
+ usage = preferLatestUsage(usage, chunkUsage);
1662
1689
  if (chunkFinishReason) {
1663
1690
  finishReason = chunkFinishReason;
1664
1691
  }
@@ -1993,7 +2020,7 @@ async function streamWithChatCompletionsWithMCP(options, fetcher, path, request,
1993
2020
  const chunkUsage = pickUsage(json);
1994
2021
  const chunkFinishReason = pickFinishReason(json);
1995
2022
  collectOpenAIStreamToolCalls(json, streamedToolCalls);
1996
- roundUsage = mergeUsage(roundUsage, chunkUsage);
2023
+ roundUsage = preferLatestUsage(roundUsage, chunkUsage);
1997
2024
  if (chunkFinishReason) {
1998
2025
  roundFinishReason = chunkFinishReason;
1999
2026
  }
@@ -2099,7 +2126,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
2099
2126
  const delta = pickResponsesStreamTextDelta(json);
2100
2127
  const chunkUsage = pickResponsesStreamUsage(json);
2101
2128
  const chunkFinishReason = pickResponsesStreamFinishReason(json);
2102
- usage = mergeUsage(usage, chunkUsage);
2129
+ usage = preferLatestUsage(usage, chunkUsage);
2103
2130
  if (chunkFinishReason) {
2104
2131
  finishReason = chunkFinishReason;
2105
2132
  }
@@ -2121,7 +2148,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
2121
2148
  const out = {
2122
2149
  text: text.length > 0 ? text : pickResponsesText(finalPayload) || pickAssistantText(finalPayload),
2123
2150
  raw: finalPayload,
2124
- usage: mergeUsage(usage, pickUsage(finalPayload)),
2151
+ usage: preferLatestUsage(usage, pickUsage(finalPayload)),
2125
2152
  finishReason: finishReason ?? pickResponsesFinishReason(finalPayload) ?? pickFinishReason(finalPayload)
2126
2153
  };
2127
2154
  callbacks.onComplete?.(out);
@@ -2184,7 +2211,7 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
2184
2211
  const chunkUsage = pickResponsesStreamUsage(json);
2185
2212
  const chunkFinishReason = pickResponsesStreamFinishReason(json);
2186
2213
  collectResponsesStreamToolCalls(json, streamedToolCalls);
2187
- roundUsage = mergeUsage(roundUsage, chunkUsage);
2214
+ roundUsage = preferLatestUsage(roundUsage, chunkUsage);
2188
2215
  if (chunkFinishReason) {
2189
2216
  roundFinishReason = chunkFinishReason;
2190
2217
  }
@@ -2202,9 +2229,8 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
2202
2229
  callbacks.onChunk?.(chunk);
2203
2230
  }
2204
2231
  });
2205
- aggregatedUsage = mergeUsage(aggregatedUsage, roundUsage);
2206
- const payloadUsage = roundPayload ? pickUsage(roundPayload) : undefined;
2207
- aggregatedUsage = mergeUsage(aggregatedUsage, payloadUsage);
2232
+ const resolvedRoundUsage = preferLatestUsage(roundUsage, roundPayload ? pickUsage(roundPayload) : undefined);
2233
+ aggregatedUsage = mergeUsage(aggregatedUsage, resolvedRoundUsage);
2208
2234
  if (roundFinishReason) {
2209
2235
  finishReason = roundFinishReason;
2210
2236
  } else if (roundPayload) {
@@ -2744,7 +2770,7 @@ function createAnthropicCompatibleAdapter(options) {
2744
2770
  const delta = pickAnthropicDelta(json);
2745
2771
  const chunkUsage = pickUsage2(json);
2746
2772
  const chunkFinishReason = pickFinishReason2(json);
2747
- usage = mergeUsage(usage, chunkUsage);
2773
+ usage = preferLatestUsage(usage, chunkUsage);
2748
2774
  if (chunkFinishReason) {
2749
2775
  finishReason = chunkFinishReason;
2750
2776
  }
@@ -2941,7 +2967,7 @@ async function streamWithMCPToolLoop(options, fetcher, path, request, callbacks)
2941
2967
  const chunkUsage = pickUsage2(json);
2942
2968
  const chunkFinishReason = pickFinishReason2(json);
2943
2969
  collectAnthropicStreamToolCalls(json, streamedToolCalls);
2944
- roundUsage = mergeUsage(roundUsage, chunkUsage);
2970
+ roundUsage = preferLatestUsage(roundUsage, chunkUsage);
2945
2971
  if (chunkFinishReason) {
2946
2972
  roundFinishReason = chunkFinishReason;
2947
2973
  }
@@ -4477,6 +4503,10 @@ async function callModel(adapter, options) {
4477
4503
  parallelToolCalls: options.request?.parallelToolCalls,
4478
4504
  maxToolRounds: options.request?.maxToolRounds,
4479
4505
  onToolExecution: options.request?.onToolExecution,
4506
+ transformToolOutput: options.request?.transformToolOutput,
4507
+ transformToolArguments: options.request?.transformToolArguments,
4508
+ transformToolCallParams: options.request?.transformToolCallParams,
4509
+ unknownToolError: options.request?.unknownToolError,
4480
4510
  toolDebug: options.request?.toolDebug,
4481
4511
  body: options.request?.body,
4482
4512
  signal: requestSignal
@@ -4564,7 +4594,7 @@ async function callModel(adapter, options) {
4564
4594
  handleTextDelta(chunk.textDelta);
4565
4595
  }
4566
4596
  if (chunk.usage) {
4567
- latestUsage = mergeUsage2(latestUsage, chunk.usage);
4597
+ latestUsage = preferLatestUsage(latestUsage, chunk.usage);
4568
4598
  }
4569
4599
  if (chunk.finishReason) {
4570
4600
  latestFinishReason = chunk.finishReason;
@@ -4572,7 +4602,7 @@ async function callModel(adapter, options) {
4572
4602
  }
4573
4603
  });
4574
4604
  const finalText = typeof response2.text === "string" && response2.text.length > 0 ? response2.text : streamedRaw;
4575
- const usage = mergeUsage2(latestUsage, response2.usage);
4605
+ const usage = preferLatestUsage(latestUsage, response2.usage);
4576
4606
  const finishReason = response2.finishReason ?? latestFinishReason;
4577
4607
  emitStreamingData(finalText, true, usage, finishReason);
4578
4608
  emitObserve(options.observe, {
package/dist/index.d.ts CHANGED
@@ -14,4 +14,4 @@ export { createOpenAICompatibleAdapter, type OpenAICompatibleAdapterOptions, } f
14
14
  export { createAnthropicCompatibleAdapter, DEFAULT_ANTHROPIC_MAX_TOKENS, DEFAULT_ANTHROPIC_VERSION, type AnthropicCompatibleAdapterOptions, } from "./providers/anthropic-compatible";
15
15
  export { DEFAULT_MAX_TOOL_ROUNDS } from "./providers/mcp-runtime";
16
16
  export { createDefaultProviderRegistry, createModelAdapter, createProviderRegistry, registerBuiltinProviders, type BuiltinProviderKind, type ModelAdapterConfig, type ProviderFactory, type ProviderRegistry, type ProviderTransportConfig, } from "./providers/registry";
17
- export type { CandidateDiagnostics, EmbeddingRequest, EmbeddingResult, LLMImageContent, LLMMessageContent, LLMTextContent, ExtractJsonCandidatesOptions, ExtractionCandidate, ExtractionHeuristicsOptions, ExtractionParseHint, HTTPHeaders, LLMAdapter, LLMMessage, LLMRequest, LLMResponse, LLMStreamCallbacks, LLMStreamChunk, LLMToolCall, LLMToolCallRef, LLMToolDebugOptions, LLMToolExecution, LLMToolOutputTransformer, LLMToolArgumentsTransformer, LLMToolChoice, MCPCallToolParams, MCPListToolsResult, MCPToolClient, MCPToolDescriptor, MCPToolSchema, LLMUsage, MarkdownCodeBlock, MarkdownCodeOptions, ParseLLMOutputOptions, ParseLLMOutputResult, ParseTraceEvent, PipelineError, StructuredAttempt, StructuredCallOptions, StructuredDebugOptions, StructuredError, StructuredMode, StructuredOptions, StructuredPromptBuilder, StructuredPromptContext, StructuredPromptPayload, StructuredPromptResolver, StructuredPromptValue, StructuredResult, StructuredStreamData, StructuredStreamEvent, StructuredStreamInput, StructuredStreamOptions, StructuredSelfHealInput, StructuredTimeoutOptions, ThinkDiagnostics, ThinkBlock, StructuredTraceEvent, } from "./types";
17
+ export type { CandidateDiagnostics, EmbeddingRequest, EmbeddingResult, LLMImageContent, LLMMessageContent, LLMTextContent, ExtractJsonCandidatesOptions, ExtractionCandidate, ExtractionHeuristicsOptions, ExtractionParseHint, HTTPHeaders, LLMAdapter, LLMMessage, LLMRequest, LLMResponse, LLMStreamCallbacks, LLMStreamChunk, LLMToolCall, LLMToolCallRef, LLMToolDebugOptions, LLMToolExecution, LLMToolOutputTransformer, LLMToolArgumentsTransformer, LLMToolCallParamsTransformer, LLMToolChoice, MCPCallToolParams, MCPListToolsResult, MCPToolClient, MCPToolDescriptor, MCPToolSchema, LLMUsage, MarkdownCodeBlock, MarkdownCodeOptions, ParseLLMOutputOptions, ParseLLMOutputResult, ParseTraceEvent, PipelineError, StructuredAttempt, StructuredCallOptions, StructuredDebugOptions, StructuredError, StructuredMode, StructuredOptions, StructuredPromptBuilder, StructuredPromptContext, StructuredPromptPayload, StructuredPromptResolver, StructuredPromptValue, StructuredResult, StructuredStreamData, StructuredStreamEvent, StructuredStreamInput, StructuredStreamOptions, StructuredSelfHealInput, StructuredTimeoutOptions, ThinkDiagnostics, ThinkBlock, StructuredTraceEvent, } from "./types";
package/dist/index.js CHANGED
@@ -1228,6 +1228,17 @@ async function executeMCPToolCalls(calls, toolset, context) {
1228
1228
  remoteName: tool.remoteName,
1229
1229
  clientId: tool.clientId
1230
1230
  }) : rawArgs;
1231
+ const toolParams = context.request.transformToolCallParams ? await context.request.transformToolCallParams({
1232
+ name: tool.remoteName,
1233
+ arguments: args
1234
+ }, {
1235
+ name: toolName,
1236
+ remoteName: tool.remoteName,
1237
+ clientId: tool.clientId
1238
+ }) : {
1239
+ name: tool.remoteName,
1240
+ arguments: args
1241
+ };
1231
1242
  const metadata = {
1232
1243
  id: callId,
1233
1244
  type: call.type ?? "function",
@@ -1237,10 +1248,7 @@ async function executeMCPToolCalls(calls, toolset, context) {
1237
1248
  const startedAt = new Date().toISOString();
1238
1249
  const startedAtMs = Date.now();
1239
1250
  try {
1240
- const output = await tool.client.callTool({
1241
- name: tool.remoteName,
1242
- arguments: args
1243
- });
1251
+ const output = await tool.client.callTool(toolParams);
1244
1252
  const executionContext = {
1245
1253
  callId,
1246
1254
  type: call.type ?? "function",
@@ -1505,6 +1513,25 @@ function mergeUsage(base, next) {
1505
1513
  }
1506
1514
  return Object.keys(merged).length > 0 ? merged : undefined;
1507
1515
  }
1516
+ function preferLatestUsage(base, next) {
1517
+ if (!base && !next) {
1518
+ return;
1519
+ }
1520
+ const merged = {};
1521
+ if (base?.inputTokens !== undefined || next?.inputTokens !== undefined) {
1522
+ merged.inputTokens = next?.inputTokens ?? base?.inputTokens;
1523
+ }
1524
+ if (base?.outputTokens !== undefined || next?.outputTokens !== undefined) {
1525
+ merged.outputTokens = next?.outputTokens ?? base?.outputTokens;
1526
+ }
1527
+ if (base?.totalTokens !== undefined || next?.totalTokens !== undefined) {
1528
+ merged.totalTokens = next?.totalTokens ?? base?.totalTokens;
1529
+ }
1530
+ if (base?.cost !== undefined || next?.cost !== undefined) {
1531
+ merged.cost = next?.cost ?? base?.cost;
1532
+ }
1533
+ return Object.keys(merged).length > 0 ? merged : undefined;
1534
+ }
1508
1535
  function addOptional(a, b) {
1509
1536
  if (a === undefined && b === undefined) {
1510
1537
  return;
@@ -1569,7 +1596,7 @@ function createOpenAICompatibleAdapter(options) {
1569
1596
  const delta = pickAssistantDelta(json);
1570
1597
  const chunkUsage = pickUsage(json);
1571
1598
  const chunkFinishReason = pickFinishReason(json);
1572
- usage = mergeUsage(usage, chunkUsage);
1599
+ usage = preferLatestUsage(usage, chunkUsage);
1573
1600
  if (chunkFinishReason) {
1574
1601
  finishReason = chunkFinishReason;
1575
1602
  }
@@ -1904,7 +1931,7 @@ async function streamWithChatCompletionsWithMCP(options, fetcher, path, request,
1904
1931
  const chunkUsage = pickUsage(json);
1905
1932
  const chunkFinishReason = pickFinishReason(json);
1906
1933
  collectOpenAIStreamToolCalls(json, streamedToolCalls);
1907
- roundUsage = mergeUsage(roundUsage, chunkUsage);
1934
+ roundUsage = preferLatestUsage(roundUsage, chunkUsage);
1908
1935
  if (chunkFinishReason) {
1909
1936
  roundFinishReason = chunkFinishReason;
1910
1937
  }
@@ -2010,7 +2037,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
2010
2037
  const delta = pickResponsesStreamTextDelta(json);
2011
2038
  const chunkUsage = pickResponsesStreamUsage(json);
2012
2039
  const chunkFinishReason = pickResponsesStreamFinishReason(json);
2013
- usage = mergeUsage(usage, chunkUsage);
2040
+ usage = preferLatestUsage(usage, chunkUsage);
2014
2041
  if (chunkFinishReason) {
2015
2042
  finishReason = chunkFinishReason;
2016
2043
  }
@@ -2032,7 +2059,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
2032
2059
  const out = {
2033
2060
  text: text.length > 0 ? text : pickResponsesText(finalPayload) || pickAssistantText(finalPayload),
2034
2061
  raw: finalPayload,
2035
- usage: mergeUsage(usage, pickUsage(finalPayload)),
2062
+ usage: preferLatestUsage(usage, pickUsage(finalPayload)),
2036
2063
  finishReason: finishReason ?? pickResponsesFinishReason(finalPayload) ?? pickFinishReason(finalPayload)
2037
2064
  };
2038
2065
  callbacks.onComplete?.(out);
@@ -2095,7 +2122,7 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
2095
2122
  const chunkUsage = pickResponsesStreamUsage(json);
2096
2123
  const chunkFinishReason = pickResponsesStreamFinishReason(json);
2097
2124
  collectResponsesStreamToolCalls(json, streamedToolCalls);
2098
- roundUsage = mergeUsage(roundUsage, chunkUsage);
2125
+ roundUsage = preferLatestUsage(roundUsage, chunkUsage);
2099
2126
  if (chunkFinishReason) {
2100
2127
  roundFinishReason = chunkFinishReason;
2101
2128
  }
@@ -2113,9 +2140,8 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
2113
2140
  callbacks.onChunk?.(chunk);
2114
2141
  }
2115
2142
  });
2116
- aggregatedUsage = mergeUsage(aggregatedUsage, roundUsage);
2117
- const payloadUsage = roundPayload ? pickUsage(roundPayload) : undefined;
2118
- aggregatedUsage = mergeUsage(aggregatedUsage, payloadUsage);
2143
+ const resolvedRoundUsage = preferLatestUsage(roundUsage, roundPayload ? pickUsage(roundPayload) : undefined);
2144
+ aggregatedUsage = mergeUsage(aggregatedUsage, resolvedRoundUsage);
2119
2145
  if (roundFinishReason) {
2120
2146
  finishReason = roundFinishReason;
2121
2147
  } else if (roundPayload) {
@@ -2655,7 +2681,7 @@ function createAnthropicCompatibleAdapter(options) {
2655
2681
  const delta = pickAnthropicDelta(json);
2656
2682
  const chunkUsage = pickUsage2(json);
2657
2683
  const chunkFinishReason = pickFinishReason2(json);
2658
- usage = mergeUsage(usage, chunkUsage);
2684
+ usage = preferLatestUsage(usage, chunkUsage);
2659
2685
  if (chunkFinishReason) {
2660
2686
  finishReason = chunkFinishReason;
2661
2687
  }
@@ -2852,7 +2878,7 @@ async function streamWithMCPToolLoop(options, fetcher, path, request, callbacks)
2852
2878
  const chunkUsage = pickUsage2(json);
2853
2879
  const chunkFinishReason = pickFinishReason2(json);
2854
2880
  collectAnthropicStreamToolCalls(json, streamedToolCalls);
2855
- roundUsage = mergeUsage(roundUsage, chunkUsage);
2881
+ roundUsage = preferLatestUsage(roundUsage, chunkUsage);
2856
2882
  if (chunkFinishReason) {
2857
2883
  roundFinishReason = chunkFinishReason;
2858
2884
  }
@@ -4388,6 +4414,10 @@ async function callModel(adapter, options) {
4388
4414
  parallelToolCalls: options.request?.parallelToolCalls,
4389
4415
  maxToolRounds: options.request?.maxToolRounds,
4390
4416
  onToolExecution: options.request?.onToolExecution,
4417
+ transformToolOutput: options.request?.transformToolOutput,
4418
+ transformToolArguments: options.request?.transformToolArguments,
4419
+ transformToolCallParams: options.request?.transformToolCallParams,
4420
+ unknownToolError: options.request?.unknownToolError,
4391
4421
  toolDebug: options.request?.toolDebug,
4392
4422
  body: options.request?.body,
4393
4423
  signal: requestSignal
@@ -4475,7 +4505,7 @@ async function callModel(adapter, options) {
4475
4505
  handleTextDelta(chunk.textDelta);
4476
4506
  }
4477
4507
  if (chunk.usage) {
4478
- latestUsage = mergeUsage2(latestUsage, chunk.usage);
4508
+ latestUsage = preferLatestUsage(latestUsage, chunk.usage);
4479
4509
  }
4480
4510
  if (chunk.finishReason) {
4481
4511
  latestFinishReason = chunk.finishReason;
@@ -4483,7 +4513,7 @@ async function callModel(adapter, options) {
4483
4513
  }
4484
4514
  });
4485
4515
  const finalText = typeof response2.text === "string" && response2.text.length > 0 ? response2.text : streamedRaw;
4486
- const usage = mergeUsage2(latestUsage, response2.usage);
4516
+ const usage = preferLatestUsage(latestUsage, response2.usage);
4487
4517
  const finishReason = response2.finishReason ?? latestFinishReason;
4488
4518
  emitStreamingData(finalText, true, usage, finishReason);
4489
4519
  emitObserve(options.observe, {
@@ -7,3 +7,4 @@ export declare function isRecord(value: unknown): value is Record<string, unknow
7
7
  export declare function pickString(value: unknown): string | undefined;
8
8
  export declare function toFiniteNumber(value: unknown): number | undefined;
9
9
  export declare function mergeUsage(base: LLMUsage | undefined, next: LLMUsage | undefined): LLMUsage | undefined;
10
+ export declare function preferLatestUsage(base: LLMUsage | undefined, next: LLMUsage | undefined): LLMUsage | undefined;
package/dist/types.d.ts CHANGED
@@ -110,6 +110,7 @@ export interface MCPListToolsResult {
110
110
  export interface MCPCallToolParams {
111
111
  name: string;
112
112
  arguments?: Record<string, unknown>;
113
+ _meta?: Record<string, unknown>;
113
114
  }
114
115
  export interface MCPToolClient {
115
116
  id: string;
@@ -156,6 +157,7 @@ export interface LLMRequest {
156
157
  onToolExecution?: (execution: LLMToolExecution) => void;
157
158
  transformToolOutput?: LLMToolOutputTransformer;
158
159
  transformToolArguments?: LLMToolArgumentsTransformer;
160
+ transformToolCallParams?: LLMToolCallParamsTransformer;
159
161
  unknownToolError?: (toolName: string) => string;
160
162
  toolDebug?: boolean | LLMToolDebugOptions;
161
163
  body?: Record<string, unknown>;
@@ -237,6 +239,11 @@ export type LLMToolArgumentsTransformer = (args: Record<string, unknown>, contex
237
239
  remoteName: string;
238
240
  clientId: string;
239
241
  }) => Record<string, unknown> | Promise<Record<string, unknown>>;
242
+ export type LLMToolCallParamsTransformer = (params: MCPCallToolParams, context: {
243
+ name: string;
244
+ remoteName: string;
245
+ clientId: string;
246
+ }) => MCPCallToolParams | Promise<MCPCallToolParams>;
240
247
  export interface LLMToolDebugOptions {
241
248
  enabled?: boolean;
242
249
  logger?: (line: string) => void;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "extrait",
3
- "version": "0.5.4",
3
+ "version": "0.5.6",
4
4
  "repository": {
5
5
  "type": "git",
6
6
  "url": "git+https://github.com/tterrasson/extrait.git"