npm - extrait - Versions diffs - 0.5.4 → 0.5.5 - Mend

extrait 0.5.4 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md CHANGED Viewed

@@ -8,7 +8,8 @@ Structured JSON extraction from LLMs with validation, repair, and streaming.
   </a>
 </p>
-**Features:**
+## Features
 - Multi-candidate JSON extraction from LLM responses
 - Automatic repair with jsonrepair
 - Zod schema validation and coercion
@@ -19,6 +20,8 @@ Structured JSON extraction from LLMs with validation, repair, and streaming.
 ## Installation
+Install `extrait` with your preferred package manager.
 ```bash
 bun add extrait
 # or
@@ -29,56 +32,118 @@ deno add npm:extrait
 ## Quick Start
+Use a custom OpenAI-compatible transport to point `extrait` at a local endpoint.
 ```typescript
 import { createLLM, prompt, s } from "extrait";
 import { z } from "zod";
 const llm = createLLM({
   provider: "openai-compatible",
-  model: "gpt-5-nano",
-  transport: { apiKey: process.env.LLM_API_KEY },
+  model: "mistralai/ministral-3-3b",
+  transport: {
+    baseURL: "http://localhost:1234/v1",
+    apiKey: process.env.LLM_API_KEY ?? "local-demo-key",
+  },
 });
-const SummarySchema = s.schema(
-  "Summary",
+const RecipeSchema = s.schema(
+  "Recipe",
   z.object({
-    summary: s.string().min(1).describe("One-sentence summary"),
-    tags: s.array(s.string()).default([]).describe("Keywords"),
+    title: s.string().min(1).describe("Short recipe title"),
+    ingredients: s.array(s.string()).min(1).describe("Ingredient list"),
   })
 );
 const result = await llm.structured(
-  SummarySchema,
-  prompt`Summarize this: """${text}"""`
+  RecipeSchema,
+  prompt`Extract a simple recipe from this text: """${text}"""`
 );
 console.log(result.data);
 ```
+## Examples at a Glance
+These examples cover the most common usage patterns in the repository.
+- [`examples/simple.ts`](examples/simple.ts) - Basic structured output with streaming
+- [`examples/streaming.ts`](examples/streaming.ts) - Real-time partial output and snapshot updates
+- [`examples/calculator-tool.ts`](examples/calculator-tool.ts) - Structured extraction with MCP tools
+- [`examples/conversation.ts`](examples/conversation.ts) - Multi-turn prompts and multimodal content
+- [`examples/image-analysis.ts`](examples/image-analysis.ts) - Vision input with structured output
+- [`examples/embeddings.ts`](examples/embeddings.ts) - Embeddings and similarity workflows
+```bash
+bun run dev simple "Bun.js runtime"
+bun run dev streaming
+bun run dev calculator-tool
+```
 ## API Reference
-### Creating an LLM Client
+The sections below cover the main building blocks of the library.
+### Create an LLM Client
+Use `createLLM()` to configure the provider, model, transport, and client defaults.
 ```typescript
 const llm = createLLM({
   provider: "openai-compatible" | "anthropic-compatible",
   model: "gpt-5-nano",
+  baseURL: "https://api.openai.com",       // optional alias for transport.baseURL
+  apiKey: process.env.LLM_API_KEY,         // optional alias for transport.apiKey
   transport: {
-    baseURL: "https://api.openai.com",   // optional
-    apiKey: process.env.LLM_API_KEY,     // optional
+    baseURL: "https://api.openai.com",     // optional
+    apiKey: process.env.LLM_API_KEY,       // optional
+    path: "/v1/chat/completions",          // optional; anthropic-compatible usually uses /v1/messages
+    headers: { "x-trace-id": "docs-demo" }, // optional extra headers
+    defaultBody: { user: "docs-demo" },    // optional provider body defaults
+    version: "2023-06-01",                 // anthropic-compatible only
+    fetcher: fetch,                        // optional custom fetch implementation
   },
   defaults: {
-    mode: "loose" | "strict",            // loose allows repair
-    selfHeal: 0 | 1 | 2,                 // retry attempts
-    debug: false,                        // show repair logs
-    timeout: { request: 30_000 },        // optional default timeouts
+    mode: "loose" | "strict",             // loose allows repair
+    selfHeal: 1,                          // optional retry attempts
+    debug: false,                         // optional structured debug output
+    systemPrompt: "You are a helpful assistant.",
+    timeout: {
+      request: 30_000,
+      tool: 10_000,
+    },
+  },
+});
+```
+`baseURL` and `apiKey` at the top level are shorthand aliases for `transport.baseURL` and `transport.apiKey`. For request-specific options such as `stream`, `request`, `schemaInstruction`, and parse tuning, see the sections below.
+Common setup patterns:
+```typescript
+// OpenAI-compatible gateway or local endpoint with top-level aliases
+const llm = createLLM({
+  provider: "openai-compatible",
+  model: "gpt-4o-mini",
+  baseURL: process.env.LLM_BASE_URL ?? "http://localhost:1234/v1",
+  apiKey: process.env.LLM_API_KEY ?? "local-demo-key",
+});
+// Anthropic-compatible endpoint with explicit API version
+const anthropic = createLLM({
+  provider: "anthropic-compatible",
+  model: "claude-3-5-sonnet-latest",
+  transport: {
+    baseURL: "https://api.anthropic.com",
+    apiKey: process.env.LLM_API_KEY,
+    version: "2023-06-01",
   },
 });
 ```
 ### Defining Schemas
-Use the `s` wrapper around Zod for enhanced schema building:
+Use the `s` wrapper around Zod for schema names, descriptions, and a more ergonomic authoring flow.
 ```typescript
 import { s } from "extrait";
@@ -115,6 +180,8 @@ const Schema = s.schema(
 ### Making Structured Calls
+`structured()` accepts a schema plus either a tagged prompt, a fluent prompt builder, or a raw message payload.
 ```typescript
 // Simple prompt
 const result = await llm.structured(
@@ -159,7 +226,7 @@ const result = await llm.structured(
       },
     },
     request: {
-      signal: abortController.signal,  // optional AbortSignal
+      signal: AbortSignal.timeout(30_000),  // optional AbortSignal
     },
     timeout: {
       request: 30_000,  // ms per LLM HTTP request
@@ -171,6 +238,22 @@ const result = await llm.structured(
 `prompt()` builds an ordered `messages` payload. Use ``prompt`...` `` for a single string prompt, or the fluent builder for multi-turn conversations. The `LLMMessage` type is exported if you need to type your own message arrays.
+You can also pass provider request options through `request`:
+```typescript
+const result = await llm.structured(
+  Schema,
+  prompt`Summarize this document: """${text}"""`,
+  {
+    request: {
+      temperature: 0,
+      maxTokens: 800,
+      body: { user: "demo-user" },
+    },
+  }
+);
+```
 ### Images (multimodal)
 Use `images()` to build base64 image content blocks for vision-capable models.
@@ -248,13 +331,15 @@ const messages = conversation("You are a vision assistant.", [
 ### Result Object
+Successful structured calls return validated data plus the raw response and trace metadata.
 ```typescript
 {
   data: T,                      // Validated data matching schema
   raw: string,                  // Raw LLM response
   thinkBlocks: ThinkBlock[],    // Extracted <think> blocks
   json: unknown | null,         // Parsed JSON before validation
-  attempts: AttemptTrace[],     // Self-heal attempts
+  attempts: StructuredAttempt<T>[], // One entry per parse / self-heal attempt
   usage?: {
     inputTokens?: number,
     outputTokens?: number,
@@ -265,8 +350,30 @@ const messages = conversation("You are a vision assistant.", [
 }
 ```
+Each `attempts` entry includes:
+```typescript
+{
+  attempt: number,
+  selfHeal: boolean,
+  via: "complete" | "stream",
+  raw: string,
+  thinkBlocks: ThinkBlock[],
+  json: unknown | null,
+  candidates: string[],
+  repairLog: string[],
+  zodIssues: z.ZodIssue[],
+  success: boolean,
+  usage?: LLMUsage,
+  finishReason?: string,
+  parsed: ParseLLMOutputResult<T>,
+}
+```
 ### Error Handling
+Catch `StructuredParseError` when repair and validation still fail.
 ```typescript
 import { StructuredParseError } from "extrait";
@@ -292,7 +399,7 @@ Generate vector embeddings using `llm.embed()`. It always returns `number[][]`
 const embedder = createLLM({
   provider: "openai-compatible",
   model: "text-embedding-3-small",
-  transport: { apiKey: process.env.OPENAI_API_KEY },
+  transport: { apiKey: process.env.LLM_API_KEY },
 });
 // Single string
@@ -332,7 +439,7 @@ const embedder = createLLM({
   model: "voyage-3",
   transport: {
     baseURL: "https://api.voyageai.com",
-    apiKey: process.env.VOYAGE_API_KEY,
+    apiKey: process.env.LLM_API_KEY,
   },
 });
@@ -343,6 +450,8 @@ Calling `llm.embed()` on an `anthropic-compatible` adapter throws a descriptive
 ### MCP Tools
+Attach MCP clients at request time to let the model call tools during structured generation.
 ```typescript
 import { createMCPClient } from "extrait";
@@ -415,7 +524,7 @@ const llm = createLLM({
 ## Examples
-Run examples with: `bun run dev <example-name>`
+Run repository examples with `bun run dev <example-name>`.
 Available examples:
 - `streaming` - Real LLM streaming + snapshot self-check ([streaming.ts](examples/streaming.ts))
@@ -429,6 +538,7 @@ Available examples:
 - `calculator-tool` - MCP tool integration ([calculator-tool.ts](examples/calculator-tool.ts))
 - `image-analysis` - Multimodal structured extraction from an image file ([image-analysis.ts](examples/image-analysis.ts))
 - `conversation` - Multi-turn conversation history and inline image messages ([conversation.ts](examples/conversation.ts))
+- `simulated-tools` - Inject fake tool calls/results into conversation context without real execution ([simulated-tools.ts](examples/simulated-tools.ts))
 - `embeddings` - Vector embeddings, cosine similarity, and semantic comparison ([embeddings.ts](examples/embeddings.ts))
 Pass arguments after the example name:
@@ -445,6 +555,8 @@ bun run dev embeddings "the cat sat on the mat" "a feline rested on the rug"
 ## Environment Variables
+These environment variables are used across the examples and common client setups.
 - `LLM_PROVIDER` - `openai-compatible` or `anthropic-compatible`
 - `LLM_BASE_URL` - API endpoint (optional)
 - `LLM_MODEL` - Model name (default: `gpt-5-nano`)
@@ -453,6 +565,8 @@ bun run dev embeddings "the cat sat on the mat" "a feline rested on the rug"
 ## Testing
+Run the test suite with Bun.
 ```bash
 bun run test
 ```

package/dist/index.cjs CHANGED Viewed

@@ -1594,6 +1594,25 @@ function mergeUsage(base, next) {
   }
   return Object.keys(merged).length > 0 ? merged : undefined;
 }
+function preferLatestUsage(base, next) {
+  if (!base && !next) {
+    return;
+  }
+  const merged = {};
+  if (base?.inputTokens !== undefined || next?.inputTokens !== undefined) {
+    merged.inputTokens = next?.inputTokens ?? base?.inputTokens;
+  }
+  if (base?.outputTokens !== undefined || next?.outputTokens !== undefined) {
+    merged.outputTokens = next?.outputTokens ?? base?.outputTokens;
+  }
+  if (base?.totalTokens !== undefined || next?.totalTokens !== undefined) {
+    merged.totalTokens = next?.totalTokens ?? base?.totalTokens;
+  }
+  if (base?.cost !== undefined || next?.cost !== undefined) {
+    merged.cost = next?.cost ?? base?.cost;
+  }
+  return Object.keys(merged).length > 0 ? merged : undefined;
+}
 function addOptional(a, b) {
   if (a === undefined && b === undefined) {
     return;
@@ -1658,7 +1677,7 @@ function createOpenAICompatibleAdapter(options) {
         const delta = pickAssistantDelta(json);
         const chunkUsage = pickUsage(json);
         const chunkFinishReason = pickFinishReason(json);
-        usage = mergeUsage(usage, chunkUsage);
+        usage = preferLatestUsage(usage, chunkUsage);
         if (chunkFinishReason) {
           finishReason = chunkFinishReason;
         }
@@ -1993,7 +2012,7 @@ async function streamWithChatCompletionsWithMCP(options, fetcher, path, request,
       const chunkUsage = pickUsage(json);
       const chunkFinishReason = pickFinishReason(json);
       collectOpenAIStreamToolCalls(json, streamedToolCalls);
-      roundUsage = mergeUsage(roundUsage, chunkUsage);
+      roundUsage = preferLatestUsage(roundUsage, chunkUsage);
       if (chunkFinishReason) {
         roundFinishReason = chunkFinishReason;
       }
@@ -2099,7 +2118,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
     const delta = pickResponsesStreamTextDelta(json);
     const chunkUsage = pickResponsesStreamUsage(json);
     const chunkFinishReason = pickResponsesStreamFinishReason(json);
-    usage = mergeUsage(usage, chunkUsage);
+    usage = preferLatestUsage(usage, chunkUsage);
     if (chunkFinishReason) {
       finishReason = chunkFinishReason;
     }
@@ -2121,7 +2140,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
   const out = {
     text: text.length > 0 ? text : pickResponsesText(finalPayload) || pickAssistantText(finalPayload),
     raw: finalPayload,
-    usage: mergeUsage(usage, pickUsage(finalPayload)),
+    usage: preferLatestUsage(usage, pickUsage(finalPayload)),
     finishReason: finishReason ?? pickResponsesFinishReason(finalPayload) ?? pickFinishReason(finalPayload)
   };
   callbacks.onComplete?.(out);
@@ -2184,7 +2203,7 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
       const chunkUsage = pickResponsesStreamUsage(json);
       const chunkFinishReason = pickResponsesStreamFinishReason(json);
       collectResponsesStreamToolCalls(json, streamedToolCalls);
-      roundUsage = mergeUsage(roundUsage, chunkUsage);
+      roundUsage = preferLatestUsage(roundUsage, chunkUsage);
       if (chunkFinishReason) {
         roundFinishReason = chunkFinishReason;
       }
@@ -2202,9 +2221,8 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
         callbacks.onChunk?.(chunk);
       }
     });
-    aggregatedUsage = mergeUsage(aggregatedUsage, roundUsage);
-    const payloadUsage = roundPayload ? pickUsage(roundPayload) : undefined;
-    aggregatedUsage = mergeUsage(aggregatedUsage, payloadUsage);
+    const resolvedRoundUsage = preferLatestUsage(roundUsage, roundPayload ? pickUsage(roundPayload) : undefined);
+    aggregatedUsage = mergeUsage(aggregatedUsage, resolvedRoundUsage);
     if (roundFinishReason) {
       finishReason = roundFinishReason;
     } else if (roundPayload) {
@@ -2744,7 +2762,7 @@ function createAnthropicCompatibleAdapter(options) {
         const delta = pickAnthropicDelta(json);
         const chunkUsage = pickUsage2(json);
         const chunkFinishReason = pickFinishReason2(json);
-        usage = mergeUsage(usage, chunkUsage);
+        usage = preferLatestUsage(usage, chunkUsage);
         if (chunkFinishReason) {
           finishReason = chunkFinishReason;
         }
@@ -2941,7 +2959,7 @@ async function streamWithMCPToolLoop(options, fetcher, path, request, callbacks)
       const chunkUsage = pickUsage2(json);
       const chunkFinishReason = pickFinishReason2(json);
       collectAnthropicStreamToolCalls(json, streamedToolCalls);
-      roundUsage = mergeUsage(roundUsage, chunkUsage);
+      roundUsage = preferLatestUsage(roundUsage, chunkUsage);
       if (chunkFinishReason) {
         roundFinishReason = chunkFinishReason;
       }
@@ -4564,7 +4582,7 @@ async function callModel(adapter, options) {
           handleTextDelta(chunk.textDelta);
         }
         if (chunk.usage) {
-          latestUsage = mergeUsage2(latestUsage, chunk.usage);
+          latestUsage = preferLatestUsage(latestUsage, chunk.usage);
         }
         if (chunk.finishReason) {
           latestFinishReason = chunk.finishReason;
@@ -4572,7 +4590,7 @@ async function callModel(adapter, options) {
       }
     });
     const finalText = typeof response2.text === "string" && response2.text.length > 0 ? response2.text : streamedRaw;
-    const usage = mergeUsage2(latestUsage, response2.usage);
+    const usage = preferLatestUsage(latestUsage, response2.usage);
     const finishReason = response2.finishReason ?? latestFinishReason;
     emitStreamingData(finalText, true, usage, finishReason);
     emitObserve(options.observe, {

package/dist/index.js CHANGED Viewed

@@ -1505,6 +1505,25 @@ function mergeUsage(base, next) {
   }
   return Object.keys(merged).length > 0 ? merged : undefined;
 }
+function preferLatestUsage(base, next) {
+  if (!base && !next) {
+    return;
+  }
+  const merged = {};
+  if (base?.inputTokens !== undefined || next?.inputTokens !== undefined) {
+    merged.inputTokens = next?.inputTokens ?? base?.inputTokens;
+  }
+  if (base?.outputTokens !== undefined || next?.outputTokens !== undefined) {
+    merged.outputTokens = next?.outputTokens ?? base?.outputTokens;
+  }
+  if (base?.totalTokens !== undefined || next?.totalTokens !== undefined) {
+    merged.totalTokens = next?.totalTokens ?? base?.totalTokens;
+  }
+  if (base?.cost !== undefined || next?.cost !== undefined) {
+    merged.cost = next?.cost ?? base?.cost;
+  }
+  return Object.keys(merged).length > 0 ? merged : undefined;
+}
 function addOptional(a, b) {
   if (a === undefined && b === undefined) {
     return;
@@ -1569,7 +1588,7 @@ function createOpenAICompatibleAdapter(options) {
         const delta = pickAssistantDelta(json);
         const chunkUsage = pickUsage(json);
         const chunkFinishReason = pickFinishReason(json);
-        usage = mergeUsage(usage, chunkUsage);
+        usage = preferLatestUsage(usage, chunkUsage);
         if (chunkFinishReason) {
           finishReason = chunkFinishReason;
         }
@@ -1904,7 +1923,7 @@ async function streamWithChatCompletionsWithMCP(options, fetcher, path, request,
       const chunkUsage = pickUsage(json);
       const chunkFinishReason = pickFinishReason(json);
       collectOpenAIStreamToolCalls(json, streamedToolCalls);
-      roundUsage = mergeUsage(roundUsage, chunkUsage);
+      roundUsage = preferLatestUsage(roundUsage, chunkUsage);
       if (chunkFinishReason) {
         roundFinishReason = chunkFinishReason;
       }
@@ -2010,7 +2029,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
     const delta = pickResponsesStreamTextDelta(json);
     const chunkUsage = pickResponsesStreamUsage(json);
     const chunkFinishReason = pickResponsesStreamFinishReason(json);
-    usage = mergeUsage(usage, chunkUsage);
+    usage = preferLatestUsage(usage, chunkUsage);
     if (chunkFinishReason) {
       finishReason = chunkFinishReason;
     }
@@ -2032,7 +2051,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
   const out = {
     text: text.length > 0 ? text : pickResponsesText(finalPayload) || pickAssistantText(finalPayload),
     raw: finalPayload,
-    usage: mergeUsage(usage, pickUsage(finalPayload)),
+    usage: preferLatestUsage(usage, pickUsage(finalPayload)),
     finishReason: finishReason ?? pickResponsesFinishReason(finalPayload) ?? pickFinishReason(finalPayload)
   };
   callbacks.onComplete?.(out);
@@ -2095,7 +2114,7 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
       const chunkUsage = pickResponsesStreamUsage(json);
       const chunkFinishReason = pickResponsesStreamFinishReason(json);
       collectResponsesStreamToolCalls(json, streamedToolCalls);
-      roundUsage = mergeUsage(roundUsage, chunkUsage);
+      roundUsage = preferLatestUsage(roundUsage, chunkUsage);
       if (chunkFinishReason) {
         roundFinishReason = chunkFinishReason;
       }
@@ -2113,9 +2132,8 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
         callbacks.onChunk?.(chunk);
       }
     });
-    aggregatedUsage = mergeUsage(aggregatedUsage, roundUsage);
-    const payloadUsage = roundPayload ? pickUsage(roundPayload) : undefined;
-    aggregatedUsage = mergeUsage(aggregatedUsage, payloadUsage);
+    const resolvedRoundUsage = preferLatestUsage(roundUsage, roundPayload ? pickUsage(roundPayload) : undefined);
+    aggregatedUsage = mergeUsage(aggregatedUsage, resolvedRoundUsage);
     if (roundFinishReason) {
       finishReason = roundFinishReason;
     } else if (roundPayload) {
@@ -2655,7 +2673,7 @@ function createAnthropicCompatibleAdapter(options) {
         const delta = pickAnthropicDelta(json);
         const chunkUsage = pickUsage2(json);
         const chunkFinishReason = pickFinishReason2(json);
-        usage = mergeUsage(usage, chunkUsage);
+        usage = preferLatestUsage(usage, chunkUsage);
         if (chunkFinishReason) {
           finishReason = chunkFinishReason;
         }
@@ -2852,7 +2870,7 @@ async function streamWithMCPToolLoop(options, fetcher, path, request, callbacks)
       const chunkUsage = pickUsage2(json);
       const chunkFinishReason = pickFinishReason2(json);
       collectAnthropicStreamToolCalls(json, streamedToolCalls);
-      roundUsage = mergeUsage(roundUsage, chunkUsage);
+      roundUsage = preferLatestUsage(roundUsage, chunkUsage);
       if (chunkFinishReason) {
         roundFinishReason = chunkFinishReason;
       }
@@ -4475,7 +4493,7 @@ async function callModel(adapter, options) {
           handleTextDelta(chunk.textDelta);
         }
         if (chunk.usage) {
-          latestUsage = mergeUsage2(latestUsage, chunk.usage);
+          latestUsage = preferLatestUsage(latestUsage, chunk.usage);
         }
         if (chunk.finishReason) {
           latestFinishReason = chunk.finishReason;
@@ -4483,7 +4501,7 @@ async function callModel(adapter, options) {
       }
     });
     const finalText = typeof response2.text === "string" && response2.text.length > 0 ? response2.text : streamedRaw;
-    const usage = mergeUsage2(latestUsage, response2.usage);
+    const usage = preferLatestUsage(latestUsage, response2.usage);
     const finishReason = response2.finishReason ?? latestFinishReason;
     emitStreamingData(finalText, true, usage, finishReason);
     emitObserve(options.observe, {

package/dist/providers/utils.d.ts CHANGED Viewed

@@ -7,3 +7,4 @@ export declare function isRecord(value: unknown): value is Record<string, unknow
 export declare function pickString(value: unknown): string | undefined;
 export declare function toFiniteNumber(value: unknown): number | undefined;
 export declare function mergeUsage(base: LLMUsage | undefined, next: LLMUsage | undefined): LLMUsage | undefined;
+export declare function preferLatestUsage(base: LLMUsage | undefined, next: LLMUsage | undefined): LLMUsage | undefined;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "extrait",
-  "version": "0.5.4",
+  "version": "0.5.5",
   "repository": {
     "type": "git",
     "url": "git+https://github.com/tterrasson/extrait.git"