npm - extrait - Versions diffs - 0.5.3 → 0.5.5 - Mend

extrait 0.5.3 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +195 -20
package/dist/conversation.d.ts +16 -3
package/dist/index.cjs +114 -20
package/dist/index.d.ts +1 -1
package/dist/index.js +114 -20
package/dist/llm.d.ts +2 -1
package/dist/providers/openai-compatible.d.ts +1 -0
package/dist/providers/utils.d.ts +1 -0
package/dist/types.d.ts +22 -0
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -8,16 +8,20 @@ Structured JSON extraction from LLMs with validation, repair, and streaming.
   </a>
 </p>
-**Features:**
+## Features
 - Multi-candidate JSON extraction from LLM responses
 - Automatic repair with jsonrepair
 - Zod schema validation and coercion
 - Optional self-healing for validation failures
 - Streaming support
 - MCP tools
+- Vector embeddings (OpenAI-compatible + Voyage AI)
 ## Installation
+Install `extrait` with your preferred package manager.
 ```bash
 bun add extrait
 # or
@@ -28,56 +32,118 @@ deno add npm:extrait
 ## Quick Start
+Use a custom OpenAI-compatible transport to point `extrait` at a local endpoint.
 ```typescript
 import { createLLM, prompt, s } from "extrait";
 import { z } from "zod";
 const llm = createLLM({
   provider: "openai-compatible",
-  model: "gpt-5-nano",
-  transport: { apiKey: process.env.LLM_API_KEY },
+  model: "mistralai/ministral-3-3b",
+  transport: {
+    baseURL: "http://localhost:1234/v1",
+    apiKey: process.env.LLM_API_KEY ?? "local-demo-key",
+  },
 });
-const SummarySchema = s.schema(
-  "Summary",
+const RecipeSchema = s.schema(
+  "Recipe",
   z.object({
-    summary: s.string().min(1).describe("One-sentence summary"),
-    tags: s.array(s.string()).default([]).describe("Keywords"),
+    title: s.string().min(1).describe("Short recipe title"),
+    ingredients: s.array(s.string()).min(1).describe("Ingredient list"),
   })
 );
 const result = await llm.structured(
-  SummarySchema,
-  prompt`Summarize this: """${text}"""`
+  RecipeSchema,
+  prompt`Extract a simple recipe from this text: """${text}"""`
 );
 console.log(result.data);
 ```
+## Examples at a Glance
+These examples cover the most common usage patterns in the repository.
+- [`examples/simple.ts`](examples/simple.ts) - Basic structured output with streaming
+- [`examples/streaming.ts`](examples/streaming.ts) - Real-time partial output and snapshot updates
+- [`examples/calculator-tool.ts`](examples/calculator-tool.ts) - Structured extraction with MCP tools
+- [`examples/conversation.ts`](examples/conversation.ts) - Multi-turn prompts and multimodal content
+- [`examples/image-analysis.ts`](examples/image-analysis.ts) - Vision input with structured output
+- [`examples/embeddings.ts`](examples/embeddings.ts) - Embeddings and similarity workflows
+```bash
+bun run dev simple "Bun.js runtime"
+bun run dev streaming
+bun run dev calculator-tool
+```
 ## API Reference
-### Creating an LLM Client
+The sections below cover the main building blocks of the library.
+### Create an LLM Client
+Use `createLLM()` to configure the provider, model, transport, and client defaults.
 ```typescript
 const llm = createLLM({
   provider: "openai-compatible" | "anthropic-compatible",
   model: "gpt-5-nano",
+  baseURL: "https://api.openai.com",       // optional alias for transport.baseURL
+  apiKey: process.env.LLM_API_KEY,         // optional alias for transport.apiKey
   transport: {
-    baseURL: "https://api.openai.com",   // optional
-    apiKey: process.env.LLM_API_KEY,     // optional
+    baseURL: "https://api.openai.com",     // optional
+    apiKey: process.env.LLM_API_KEY,       // optional
+    path: "/v1/chat/completions",          // optional; anthropic-compatible usually uses /v1/messages
+    headers: { "x-trace-id": "docs-demo" }, // optional extra headers
+    defaultBody: { user: "docs-demo" },    // optional provider body defaults
+    version: "2023-06-01",                 // anthropic-compatible only
+    fetcher: fetch,                        // optional custom fetch implementation
   },
   defaults: {
-    mode: "loose" | "strict",            // loose allows repair
-    selfHeal: 0 | 1 | 2,                 // retry attempts
-    debug: false,                        // show repair logs
-    timeout: { request: 30_000 },        // optional default timeouts
+    mode: "loose" | "strict",             // loose allows repair
+    selfHeal: 1,                          // optional retry attempts
+    debug: false,                         // optional structured debug output
+    systemPrompt: "You are a helpful assistant.",
+    timeout: {
+      request: 30_000,
+      tool: 10_000,
+    },
+  },
+});
+```
+`baseURL` and `apiKey` at the top level are shorthand aliases for `transport.baseURL` and `transport.apiKey`. For request-specific options such as `stream`, `request`, `schemaInstruction`, and parse tuning, see the sections below.
+Common setup patterns:
+```typescript
+// OpenAI-compatible gateway or local endpoint with top-level aliases
+const llm = createLLM({
+  provider: "openai-compatible",
+  model: "gpt-4o-mini",
+  baseURL: process.env.LLM_BASE_URL ?? "http://localhost:1234/v1",
+  apiKey: process.env.LLM_API_KEY ?? "local-demo-key",
+});
+// Anthropic-compatible endpoint with explicit API version
+const anthropic = createLLM({
+  provider: "anthropic-compatible",
+  model: "claude-3-5-sonnet-latest",
+  transport: {
+    baseURL: "https://api.anthropic.com",
+    apiKey: process.env.LLM_API_KEY,
+    version: "2023-06-01",
   },
 });
 ```
 ### Defining Schemas
-Use the `s` wrapper around Zod for enhanced schema building:
+Use the `s` wrapper around Zod for schema names, descriptions, and a more ergonomic authoring flow.
 ```typescript
 import { s } from "extrait";
@@ -114,6 +180,8 @@ const Schema = s.schema(
 ### Making Structured Calls
+`structured()` accepts a schema plus either a tagged prompt, a fluent prompt builder, or a raw message payload.
 ```typescript
 // Simple prompt
 const result = await llm.structured(
@@ -158,7 +226,7 @@ const result = await llm.structured(
       },
     },
     request: {
-      signal: abortController.signal,  // optional AbortSignal
+      signal: AbortSignal.timeout(30_000),  // optional AbortSignal
     },
     timeout: {
       request: 30_000,  // ms per LLM HTTP request
@@ -170,6 +238,22 @@ const result = await llm.structured(
 `prompt()` builds an ordered `messages` payload. Use ``prompt`...` `` for a single string prompt, or the fluent builder for multi-turn conversations. The `LLMMessage` type is exported if you need to type your own message arrays.
+You can also pass provider request options through `request`:
+```typescript
+const result = await llm.structured(
+  Schema,
+  prompt`Summarize this document: """${text}"""`,
+  {
+    request: {
+      temperature: 0,
+      maxTokens: 800,
+      body: { user: "demo-user" },
+    },
+  }
+);
+```
 ### Images (multimodal)
 Use `images()` to build base64 image content blocks for vision-capable models.
@@ -247,13 +331,15 @@ const messages = conversation("You are a vision assistant.", [
 ### Result Object
+Successful structured calls return validated data plus the raw response and trace metadata.
 ```typescript
 {
   data: T,                      // Validated data matching schema
   raw: string,                  // Raw LLM response
   thinkBlocks: ThinkBlock[],    // Extracted <think> blocks
   json: unknown | null,         // Parsed JSON before validation
-  attempts: AttemptTrace[],     // Self-heal attempts
+  attempts: StructuredAttempt<T>[], // One entry per parse / self-heal attempt
   usage?: {
     inputTokens?: number,
     outputTokens?: number,
@@ -264,8 +350,30 @@ const messages = conversation("You are a vision assistant.", [
 }
 ```
+Each `attempts` entry includes:
+```typescript
+{
+  attempt: number,
+  selfHeal: boolean,
+  via: "complete" | "stream",
+  raw: string,
+  thinkBlocks: ThinkBlock[],
+  json: unknown | null,
+  candidates: string[],
+  repairLog: string[],
+  zodIssues: z.ZodIssue[],
+  success: boolean,
+  usage?: LLMUsage,
+  finishReason?: string,
+  parsed: ParseLLMOutputResult<T>,
+}
+```
 ### Error Handling
+Catch `StructuredParseError` when repair and validation still fail.
 ```typescript
 import { StructuredParseError } from "extrait";
@@ -282,8 +390,68 @@ try {
 }
 ```
+### Embeddings
+Generate vector embeddings using `llm.embed()`. It always returns `number[][]` — one vector per input string.
+```typescript
+// Create a dedicated embedder client (recommended)
+const embedder = createLLM({
+  provider: "openai-compatible",
+  model: "text-embedding-3-small",
+  transport: { apiKey: process.env.LLM_API_KEY },
+});
+// Single string
+const { embeddings, model, usage } = await embedder.embed("Hello world");
+const vector: number[] = embeddings[0];
+// Multiple strings in one request
+const { embeddings } = await embedder.embed(["text one", "text two", "text three"]);
+// embeddings[0], embeddings[1], embeddings[2] — one vector each
+// Optional: override model or request extra options per call
+const { embeddings } = await embedder.embed("Hello", {
+  model: "text-embedding-ada-002",
+  dimensions: 512,              // supported by text-embedding-3-* models
+  body: { user: "user-id" },    // pass-through to provider
+});
+```
+**Result shape:**
+```typescript
+{
+  embeddings: number[][];  // one vector per input
+  model: string;
+  usage?: { inputTokens?: number; totalTokens?: number };
+  raw?: unknown;           // full provider response
+}
+```
+**Anthropic / Voyage AI**
+Anthropic does not provide a native embedding API. Their recommended solution is [Voyage AI](https://api.voyageai.com), which uses the same OpenAI-compatible format:
+```typescript
+const embedder = createLLM({
+  provider: "openai-compatible",
+  model: "voyage-3",
+  transport: {
+    baseURL: "https://api.voyageai.com",
+    apiKey: process.env.LLM_API_KEY,
+  },
+});
+const { embeddings } = await embedder.embed(["query", "document"]);
+```
+Calling `llm.embed()` on an `anthropic-compatible` adapter throws a descriptive error pointing to Voyage AI.
 ### MCP Tools
+Attach MCP clients at request time to let the model call tools during structured generation.
 ```typescript
 import { createMCPClient } from "extrait";
@@ -356,7 +524,7 @@ const llm = createLLM({
 ## Examples
-Run examples with: `bun run dev <example-name>`
+Run repository examples with `bun run dev <example-name>`.
 Available examples:
 - `streaming` - Real LLM streaming + snapshot self-check ([streaming.ts](examples/streaming.ts))
@@ -370,6 +538,8 @@ Available examples:
 - `calculator-tool` - MCP tool integration ([calculator-tool.ts](examples/calculator-tool.ts))
 - `image-analysis` - Multimodal structured extraction from an image file ([image-analysis.ts](examples/image-analysis.ts))
 - `conversation` - Multi-turn conversation history and inline image messages ([conversation.ts](examples/conversation.ts))
+- `simulated-tools` - Inject fake tool calls/results into conversation context without real execution ([simulated-tools.ts](examples/simulated-tools.ts))
+- `embeddings` - Vector embeddings, cosine similarity, and semantic comparison ([embeddings.ts](examples/embeddings.ts))
 Pass arguments after the example name:
 ```bash
@@ -380,10 +550,13 @@ bun run dev timeout 5000
 bun run dev simple "Bun.js runtime"
 bun run dev sentiment-analysis "I love this product."
 bun run dev multi-step-reasoning "Why is the sky blue?"
+bun run dev embeddings "the cat sat on the mat" "a feline rested on the rug"
 ```
 ## Environment Variables
+These environment variables are used across the examples and common client setups.
 - `LLM_PROVIDER` - `openai-compatible` or `anthropic-compatible`
 - `LLM_BASE_URL` - API endpoint (optional)
 - `LLM_MODEL` - Model name (default: `gpt-5-nano`)
@@ -392,6 +565,8 @@ bun run dev multi-step-reasoning "Why is the sky blue?"
 ## Testing
+Run the test suite with Bun.
 ```bash
 bun run test
 ```

package/dist/conversation.d.ts CHANGED Viewed

@@ -1,8 +1,21 @@
 import { type ImageInput } from "./image";
 import type { LLMMessage } from "./types";
-export interface ConversationEntry {
-    role: "user" | "assistant";
+export type ConversationEntry = {
+    role: "user";
     text: string;
     images?: ImageInput[];
-}
+} | {
+    role: "assistant";
+    text: string;
+    images?: ImageInput[];
+} | {
+    role: "tool_call";
+    id: string;
+    name: string;
+    arguments?: Record<string, unknown>;
+} | {
+    role: "tool_result";
+    id: string;
+    output: unknown;
+};
 export declare function conversation(systemPrompt: string, entries: ConversationEntry[]): LLMMessage[];

package/dist/index.cjs CHANGED Viewed

@@ -1594,6 +1594,25 @@ function mergeUsage(base, next) {
   }
   return Object.keys(merged).length > 0 ? merged : undefined;
 }
+function preferLatestUsage(base, next) {
+  if (!base && !next) {
+    return;
+  }
+  const merged = {};
+  if (base?.inputTokens !== undefined || next?.inputTokens !== undefined) {
+    merged.inputTokens = next?.inputTokens ?? base?.inputTokens;
+  }
+  if (base?.outputTokens !== undefined || next?.outputTokens !== undefined) {
+    merged.outputTokens = next?.outputTokens ?? base?.outputTokens;
+  }
+  if (base?.totalTokens !== undefined || next?.totalTokens !== undefined) {
+    merged.totalTokens = next?.totalTokens ?? base?.totalTokens;
+  }
+  if (base?.cost !== undefined || next?.cost !== undefined) {
+    merged.cost = next?.cost ?? base?.cost;
+  }
+  return Object.keys(merged).length > 0 ? merged : undefined;
+}
 function addOptional(a, b) {
   if (a === undefined && b === undefined) {
     return;
@@ -1606,6 +1625,7 @@ function createOpenAICompatibleAdapter(options) {
   const fetcher = options.fetcher ?? fetch;
   const path = options.path ?? "/v1/chat/completions";
   const responsesPath = options.responsesPath ?? "/v1/responses";
+  const embeddingPath = options.embeddingPath ?? "/v1/embeddings";
   return {
     provider: "openai-compatible",
     model: options.model,
@@ -1657,7 +1677,7 @@ function createOpenAICompatibleAdapter(options) {
         const delta = pickAssistantDelta(json);
         const chunkUsage = pickUsage(json);
         const chunkFinishReason = pickFinishReason(json);
-        usage = mergeUsage(usage, chunkUsage);
+        usage = preferLatestUsage(usage, chunkUsage);
         if (chunkFinishReason) {
           finishReason = chunkFinishReason;
         }
@@ -1678,6 +1698,36 @@ function createOpenAICompatibleAdapter(options) {
       const out = { text, usage, finishReason };
       callbacks.onComplete?.(out);
       return out;
+    },
+    async embed(request) {
+      const body = cleanUndefined({
+        ...options.defaultBody,
+        ...request.body,
+        model: request.model ?? options.model,
+        input: request.input,
+        dimensions: request.dimensions,
+        encoding_format: "float"
+      });
+      const response = await fetcher(buildURL(options.baseURL, embeddingPath), {
+        method: "POST",
+        headers: buildHeaders(options),
+        body: JSON.stringify(body)
+      });
+      if (!response.ok) {
+        const message = await response.text();
+        throw new Error(`HTTP ${response.status}: ${message}`);
+      }
+      const json = await response.json();
+      const data = json.data;
+      if (!Array.isArray(data)) {
+        throw new Error("Unexpected embedding response: missing data array");
+      }
+      return {
+        embeddings: data.map((d) => isRecord2(d) && Array.isArray(d.embedding) ? d.embedding : []),
+        model: pickString(json.model) ?? body.model,
+        usage: pickUsage(json),
+        raw: json
+      };
     }
   };
 }
@@ -1962,7 +2012,7 @@ async function streamWithChatCompletionsWithMCP(options, fetcher, path, request,
       const chunkUsage = pickUsage(json);
       const chunkFinishReason = pickFinishReason(json);
       collectOpenAIStreamToolCalls(json, streamedToolCalls);
-      roundUsage = mergeUsage(roundUsage, chunkUsage);
+      roundUsage = preferLatestUsage(roundUsage, chunkUsage);
       if (chunkFinishReason) {
         roundFinishReason = chunkFinishReason;
       }
@@ -2068,7 +2118,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
     const delta = pickResponsesStreamTextDelta(json);
     const chunkUsage = pickResponsesStreamUsage(json);
     const chunkFinishReason = pickResponsesStreamFinishReason(json);
-    usage = mergeUsage(usage, chunkUsage);
+    usage = preferLatestUsage(usage, chunkUsage);
     if (chunkFinishReason) {
       finishReason = chunkFinishReason;
     }
@@ -2090,7 +2140,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
   const out = {
     text: text.length > 0 ? text : pickResponsesText(finalPayload) || pickAssistantText(finalPayload),
     raw: finalPayload,
-    usage: mergeUsage(usage, pickUsage(finalPayload)),
+    usage: preferLatestUsage(usage, pickUsage(finalPayload)),
     finishReason: finishReason ?? pickResponsesFinishReason(finalPayload) ?? pickFinishReason(finalPayload)
   };
   callbacks.onComplete?.(out);
@@ -2153,7 +2203,7 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
       const chunkUsage = pickResponsesStreamUsage(json);
       const chunkFinishReason = pickResponsesStreamFinishReason(json);
       collectResponsesStreamToolCalls(json, streamedToolCalls);
-      roundUsage = mergeUsage(roundUsage, chunkUsage);
+      roundUsage = preferLatestUsage(roundUsage, chunkUsage);
       if (chunkFinishReason) {
         roundFinishReason = chunkFinishReason;
       }
@@ -2171,9 +2221,8 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
         callbacks.onChunk?.(chunk);
       }
     });
-    aggregatedUsage = mergeUsage(aggregatedUsage, roundUsage);
-    const payloadUsage = roundPayload ? pickUsage(roundPayload) : undefined;
-    aggregatedUsage = mergeUsage(aggregatedUsage, payloadUsage);
+    const resolvedRoundUsage = preferLatestUsage(roundUsage, roundPayload ? pickUsage(roundPayload) : undefined);
+    aggregatedUsage = mergeUsage(aggregatedUsage, resolvedRoundUsage);
     if (roundFinishReason) {
       finishReason = roundFinishReason;
     } else if (roundPayload) {
@@ -2266,10 +2315,7 @@ function buildResponsesInput(request) {
   return buildMessages(request);
 }
 function toOpenAIMessage(message) {
-  return {
-    role: message.role,
-    content: message.content
-  };
+  return { ...message };
 }
 function toResponsesTools(tools) {
   if (!Array.isArray(tools) || tools.length === 0) {
@@ -2716,7 +2762,7 @@ function createAnthropicCompatibleAdapter(options) {
         const delta = pickAnthropicDelta(json);
         const chunkUsage = pickUsage2(json);
         const chunkFinishReason = pickFinishReason2(json);
-        usage = mergeUsage(usage, chunkUsage);
+        usage = preferLatestUsage(usage, chunkUsage);
         if (chunkFinishReason) {
           finishReason = chunkFinishReason;
         }
@@ -2737,6 +2783,9 @@ function createAnthropicCompatibleAdapter(options) {
       const out = { text, usage, finishReason };
       callbacks.onComplete?.(out);
       return out;
+    },
+    async embed() {
+      throw new Error("Anthropic does not provide a native embedding API. " + "Use the openai-compatible provider with Voyage AI (https://api.voyageai.com) — " + "Anthropic's recommended embedding solution, which uses the same request format.");
     }
   };
 }
@@ -2910,7 +2959,7 @@ async function streamWithMCPToolLoop(options, fetcher, path, request, callbacks)
       const chunkUsage = pickUsage2(json);
       const chunkFinishReason = pickFinishReason2(json);
       collectAnthropicStreamToolCalls(json, streamedToolCalls);
-      roundUsage = mergeUsage(roundUsage, chunkUsage);
+      roundUsage = preferLatestUsage(roundUsage, chunkUsage);
       if (chunkFinishReason) {
         roundFinishReason = chunkFinishReason;
       }
@@ -3015,6 +3064,23 @@ function toAnthropicInput(messages) {
       continue;
     }
     sawNonSystem = true;
+    if (message.role === "assistant" && Array.isArray(message.tool_calls)) {
+      const parts = [];
+      if (message.content)
+        parts.push({ type: "text", text: message.content });
+      for (const tc of message.tool_calls) {
+        parts.push({ type: "tool_use", id: tc.id, name: tc.function.name, input: JSON.parse(tc.function.arguments) });
+      }
+      normalizedMessages.push({ role: "assistant", content: parts });
+      continue;
+    }
+    if (message.role === "tool") {
+      normalizedMessages.push({
+        role: "user",
+        content: [{ type: "tool_result", tool_use_id: message.tool_call_id, content: message.content }]
+      });
+      continue;
+    }
     normalizedMessages.push({
       role: message.role,
       content: message.content
@@ -4516,7 +4582,7 @@ async function callModel(adapter, options) {
           handleTextDelta(chunk.textDelta);
         }
         if (chunk.usage) {
-          latestUsage = mergeUsage2(latestUsage, chunk.usage);
+          latestUsage = preferLatestUsage(latestUsage, chunk.usage);
         }
         if (chunk.finishReason) {
           latestFinishReason = chunk.finishReason;
@@ -4524,7 +4590,7 @@ async function callModel(adapter, options) {
       }
     });
     const finalText = typeof response2.text === "string" && response2.text.length > 0 ? response2.text : streamedRaw;
-    const usage = mergeUsage2(latestUsage, response2.usage);
+    const usage = preferLatestUsage(latestUsage, response2.usage);
     const finishReason = response2.finishReason ?? latestFinishReason;
     emitStreamingData(finalText, true, usage, finishReason);
     emitObserve(options.observe, {
@@ -4794,6 +4860,12 @@ function createLLM(config, registry = createDefaultProviderRegistry()) {
     async structured(schema, prompt, options) {
       const merged = mergeStructuredOptions(defaults, options);
       return structured(adapter, schema, prompt, merged);
+    },
+    async embed(input, options = {}) {
+      if (!adapter.embed) {
+        throw new Error(`Provider "${adapter.provider ?? "unknown"}" does not support embeddings.`);
+      }
+      return adapter.embed({ ...options, input });
     }
   };
 }
@@ -4955,10 +5027,32 @@ async function resizeImage(source, size, mimeType) {
 function conversation(systemPrompt, entries) {
   return [
     { role: "system", content: systemPrompt },
-    ...entries.map((entry) => ({
-      role: entry.role,
-      content: entry.images && entry.images.length > 0 ? [{ type: "text", text: entry.text }, ...images(entry.images)] : entry.text
-    }))
+    ...entries.map((entry) => {
+      if (entry.role === "tool_call") {
+        return {
+          role: "assistant",
+          content: "",
+          tool_calls: [
+            {
+              id: entry.id,
+              type: "function",
+              function: { name: entry.name, arguments: JSON.stringify(entry.arguments ?? {}) }
+            }
+          ]
+        };
+      }
+      if (entry.role === "tool_result") {
+        return {
+          role: "tool",
+          content: typeof entry.output === "string" ? entry.output : JSON.stringify(entry.output),
+          tool_call_id: entry.id
+        };
+      }
+      return {
+        role: entry.role,
+        content: entry.images && entry.images.length > 0 ? [{ type: "text", text: entry.text }, ...images(entry.images)] : entry.text
+      };
+    })
   ];
 }
 // src/prompt.ts

package/dist/index.d.ts CHANGED Viewed

@@ -14,4 +14,4 @@ export { createOpenAICompatibleAdapter, type OpenAICompatibleAdapterOptions, } f
 export { createAnthropicCompatibleAdapter, DEFAULT_ANTHROPIC_MAX_TOKENS, DEFAULT_ANTHROPIC_VERSION, type AnthropicCompatibleAdapterOptions, } from "./providers/anthropic-compatible";
 export { DEFAULT_MAX_TOOL_ROUNDS } from "./providers/mcp-runtime";
 export { createDefaultProviderRegistry, createModelAdapter, createProviderRegistry, registerBuiltinProviders, type BuiltinProviderKind, type ModelAdapterConfig, type ProviderFactory, type ProviderRegistry, type ProviderTransportConfig, } from "./providers/registry";
-export type { CandidateDiagnostics, LLMImageContent, LLMMessageContent, LLMTextContent, ExtractJsonCandidatesOptions, ExtractionCandidate, ExtractionHeuristicsOptions, ExtractionParseHint, HTTPHeaders, LLMAdapter, LLMMessage, LLMRequest, LLMResponse, LLMStreamCallbacks, LLMStreamChunk, LLMToolCall, LLMToolDebugOptions, LLMToolExecution, LLMToolOutputTransformer, LLMToolArgumentsTransformer, LLMToolChoice, MCPCallToolParams, MCPListToolsResult, MCPToolClient, MCPToolDescriptor, MCPToolSchema, LLMUsage, MarkdownCodeBlock, MarkdownCodeOptions, ParseLLMOutputOptions, ParseLLMOutputResult, ParseTraceEvent, PipelineError, StructuredAttempt, StructuredCallOptions, StructuredDebugOptions, StructuredError, StructuredMode, StructuredOptions, StructuredPromptBuilder, StructuredPromptContext, StructuredPromptPayload, StructuredPromptResolver, StructuredPromptValue, StructuredResult, StructuredStreamData, StructuredStreamEvent, StructuredStreamInput, StructuredStreamOptions, StructuredSelfHealInput, StructuredTimeoutOptions, ThinkDiagnostics, ThinkBlock, StructuredTraceEvent, } from "./types";
+export type { CandidateDiagnostics, EmbeddingRequest, EmbeddingResult, LLMImageContent, LLMMessageContent, LLMTextContent, ExtractJsonCandidatesOptions, ExtractionCandidate, ExtractionHeuristicsOptions, ExtractionParseHint, HTTPHeaders, LLMAdapter, LLMMessage, LLMRequest, LLMResponse, LLMStreamCallbacks, LLMStreamChunk, LLMToolCall, LLMToolCallRef, LLMToolDebugOptions, LLMToolExecution, LLMToolOutputTransformer, LLMToolArgumentsTransformer, LLMToolChoice, MCPCallToolParams, MCPListToolsResult, MCPToolClient, MCPToolDescriptor, MCPToolSchema, LLMUsage, MarkdownCodeBlock, MarkdownCodeOptions, ParseLLMOutputOptions, ParseLLMOutputResult, ParseTraceEvent, PipelineError, StructuredAttempt, StructuredCallOptions, StructuredDebugOptions, StructuredError, StructuredMode, StructuredOptions, StructuredPromptBuilder, StructuredPromptContext, StructuredPromptPayload, StructuredPromptResolver, StructuredPromptValue, StructuredResult, StructuredStreamData, StructuredStreamEvent, StructuredStreamInput, StructuredStreamOptions, StructuredSelfHealInput, StructuredTimeoutOptions, ThinkDiagnostics, ThinkBlock, StructuredTraceEvent, } from "./types";

package/dist/index.js CHANGED Viewed

@@ -1505,6 +1505,25 @@ function mergeUsage(base, next) {
   }
   return Object.keys(merged).length > 0 ? merged : undefined;
 }
+function preferLatestUsage(base, next) {
+  if (!base && !next) {
+    return;
+  }
+  const merged = {};
+  if (base?.inputTokens !== undefined || next?.inputTokens !== undefined) {
+    merged.inputTokens = next?.inputTokens ?? base?.inputTokens;
+  }
+  if (base?.outputTokens !== undefined || next?.outputTokens !== undefined) {
+    merged.outputTokens = next?.outputTokens ?? base?.outputTokens;
+  }
+  if (base?.totalTokens !== undefined || next?.totalTokens !== undefined) {
+    merged.totalTokens = next?.totalTokens ?? base?.totalTokens;
+  }
+  if (base?.cost !== undefined || next?.cost !== undefined) {
+    merged.cost = next?.cost ?? base?.cost;
+  }
+  return Object.keys(merged).length > 0 ? merged : undefined;
+}
 function addOptional(a, b) {
   if (a === undefined && b === undefined) {
     return;
@@ -1517,6 +1536,7 @@ function createOpenAICompatibleAdapter(options) {
   const fetcher = options.fetcher ?? fetch;
   const path = options.path ?? "/v1/chat/completions";
   const responsesPath = options.responsesPath ?? "/v1/responses";
+  const embeddingPath = options.embeddingPath ?? "/v1/embeddings";
   return {
     provider: "openai-compatible",
     model: options.model,
@@ -1568,7 +1588,7 @@ function createOpenAICompatibleAdapter(options) {
         const delta = pickAssistantDelta(json);
         const chunkUsage = pickUsage(json);
         const chunkFinishReason = pickFinishReason(json);
-        usage = mergeUsage(usage, chunkUsage);
+        usage = preferLatestUsage(usage, chunkUsage);
         if (chunkFinishReason) {
           finishReason = chunkFinishReason;
         }
@@ -1589,6 +1609,36 @@ function createOpenAICompatibleAdapter(options) {
       const out = { text, usage, finishReason };
       callbacks.onComplete?.(out);
       return out;
+    },
+    async embed(request) {
+      const body = cleanUndefined({
+        ...options.defaultBody,
+        ...request.body,
+        model: request.model ?? options.model,
+        input: request.input,
+        dimensions: request.dimensions,
+        encoding_format: "float"
+      });
+      const response = await fetcher(buildURL(options.baseURL, embeddingPath), {
+        method: "POST",
+        headers: buildHeaders(options),
+        body: JSON.stringify(body)
+      });
+      if (!response.ok) {
+        const message = await response.text();
+        throw new Error(`HTTP ${response.status}: ${message}`);
+      }
+      const json = await response.json();
+      const data = json.data;
+      if (!Array.isArray(data)) {
+        throw new Error("Unexpected embedding response: missing data array");
+      }
+      return {
+        embeddings: data.map((d) => isRecord2(d) && Array.isArray(d.embedding) ? d.embedding : []),
+        model: pickString(json.model) ?? body.model,
+        usage: pickUsage(json),
+        raw: json
+      };
     }
   };
 }
@@ -1873,7 +1923,7 @@ async function streamWithChatCompletionsWithMCP(options, fetcher, path, request,
       const chunkUsage = pickUsage(json);
       const chunkFinishReason = pickFinishReason(json);
       collectOpenAIStreamToolCalls(json, streamedToolCalls);
-      roundUsage = mergeUsage(roundUsage, chunkUsage);
+      roundUsage = preferLatestUsage(roundUsage, chunkUsage);
       if (chunkFinishReason) {
         roundFinishReason = chunkFinishReason;
       }
@@ -1979,7 +2029,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
     const delta = pickResponsesStreamTextDelta(json);
     const chunkUsage = pickResponsesStreamUsage(json);
     const chunkFinishReason = pickResponsesStreamFinishReason(json);
-    usage = mergeUsage(usage, chunkUsage);
+    usage = preferLatestUsage(usage, chunkUsage);
     if (chunkFinishReason) {
       finishReason = chunkFinishReason;
     }
@@ -2001,7 +2051,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
   const out = {
     text: text.length > 0 ? text : pickResponsesText(finalPayload) || pickAssistantText(finalPayload),
     raw: finalPayload,
-    usage: mergeUsage(usage, pickUsage(finalPayload)),
+    usage: preferLatestUsage(usage, pickUsage(finalPayload)),
     finishReason: finishReason ?? pickResponsesFinishReason(finalPayload) ?? pickFinishReason(finalPayload)
   };
   callbacks.onComplete?.(out);
@@ -2064,7 +2114,7 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
       const chunkUsage = pickResponsesStreamUsage(json);
       const chunkFinishReason = pickResponsesStreamFinishReason(json);
       collectResponsesStreamToolCalls(json, streamedToolCalls);
-      roundUsage = mergeUsage(roundUsage, chunkUsage);
+      roundUsage = preferLatestUsage(roundUsage, chunkUsage);
       if (chunkFinishReason) {
         roundFinishReason = chunkFinishReason;
       }
@@ -2082,9 +2132,8 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
         callbacks.onChunk?.(chunk);
       }
     });
-    aggregatedUsage = mergeUsage(aggregatedUsage, roundUsage);
-    const payloadUsage = roundPayload ? pickUsage(roundPayload) : undefined;
-    aggregatedUsage = mergeUsage(aggregatedUsage, payloadUsage);
+    const resolvedRoundUsage = preferLatestUsage(roundUsage, roundPayload ? pickUsage(roundPayload) : undefined);
+    aggregatedUsage = mergeUsage(aggregatedUsage, resolvedRoundUsage);
     if (roundFinishReason) {
       finishReason = roundFinishReason;
     } else if (roundPayload) {
@@ -2177,10 +2226,7 @@ function buildResponsesInput(request) {
   return buildMessages(request);
 }
 function toOpenAIMessage(message) {
-  return {
-    role: message.role,
-    content: message.content
-  };
+  return { ...message };
 }
 function toResponsesTools(tools) {
   if (!Array.isArray(tools) || tools.length === 0) {
@@ -2627,7 +2673,7 @@ function createAnthropicCompatibleAdapter(options) {
         const delta = pickAnthropicDelta(json);
         const chunkUsage = pickUsage2(json);
         const chunkFinishReason = pickFinishReason2(json);
-        usage = mergeUsage(usage, chunkUsage);
+        usage = preferLatestUsage(usage, chunkUsage);
         if (chunkFinishReason) {
           finishReason = chunkFinishReason;
         }
@@ -2648,6 +2694,9 @@ function createAnthropicCompatibleAdapter(options) {
       const out = { text, usage, finishReason };
       callbacks.onComplete?.(out);
       return out;
+    },
+    async embed() {
+      throw new Error("Anthropic does not provide a native embedding API. " + "Use the openai-compatible provider with Voyage AI (https://api.voyageai.com) — " + "Anthropic's recommended embedding solution, which uses the same request format.");
     }
   };
 }
@@ -2821,7 +2870,7 @@ async function streamWithMCPToolLoop(options, fetcher, path, request, callbacks)
       const chunkUsage = pickUsage2(json);
       const chunkFinishReason = pickFinishReason2(json);
       collectAnthropicStreamToolCalls(json, streamedToolCalls);
-      roundUsage = mergeUsage(roundUsage, chunkUsage);
+      roundUsage = preferLatestUsage(roundUsage, chunkUsage);
       if (chunkFinishReason) {
         roundFinishReason = chunkFinishReason;
       }
@@ -2926,6 +2975,23 @@ function toAnthropicInput(messages) {
       continue;
     }
     sawNonSystem = true;
+    if (message.role === "assistant" && Array.isArray(message.tool_calls)) {
+      const parts = [];
+      if (message.content)
+        parts.push({ type: "text", text: message.content });
+      for (const tc of message.tool_calls) {
+        parts.push({ type: "tool_use", id: tc.id, name: tc.function.name, input: JSON.parse(tc.function.arguments) });
+      }
+      normalizedMessages.push({ role: "assistant", content: parts });
+      continue;
+    }
+    if (message.role === "tool") {
+      normalizedMessages.push({
+        role: "user",
+        content: [{ type: "tool_result", tool_use_id: message.tool_call_id, content: message.content }]
+      });
+      continue;
+    }
     normalizedMessages.push({
       role: message.role,
       content: message.content
@@ -4427,7 +4493,7 @@ async function callModel(adapter, options) {
           handleTextDelta(chunk.textDelta);
         }
         if (chunk.usage) {
-          latestUsage = mergeUsage2(latestUsage, chunk.usage);
+          latestUsage = preferLatestUsage(latestUsage, chunk.usage);
         }
         if (chunk.finishReason) {
           latestFinishReason = chunk.finishReason;
@@ -4435,7 +4501,7 @@ async function callModel(adapter, options) {
       }
     });
     const finalText = typeof response2.text === "string" && response2.text.length > 0 ? response2.text : streamedRaw;
-    const usage = mergeUsage2(latestUsage, response2.usage);
+    const usage = preferLatestUsage(latestUsage, response2.usage);
     const finishReason = response2.finishReason ?? latestFinishReason;
     emitStreamingData(finalText, true, usage, finishReason);
     emitObserve(options.observe, {
@@ -4705,6 +4771,12 @@ function createLLM(config, registry = createDefaultProviderRegistry()) {
     async structured(schema, prompt, options) {
       const merged = mergeStructuredOptions(defaults, options);
       return structured(adapter, schema, prompt, merged);
+    },
+    async embed(input, options = {}) {
+      if (!adapter.embed) {
+        throw new Error(`Provider "${adapter.provider ?? "unknown"}" does not support embeddings.`);
+      }
+      return adapter.embed({ ...options, input });
     }
   };
 }
@@ -4870,10 +4942,32 @@ async function resizeImage(source, size, mimeType) {
 function conversation(systemPrompt, entries) {
   return [
     { role: "system", content: systemPrompt },
-    ...entries.map((entry) => ({
-      role: entry.role,
-      content: entry.images && entry.images.length > 0 ? [{ type: "text", text: entry.text }, ...images(entry.images)] : entry.text
-    }))
+    ...entries.map((entry) => {
+      if (entry.role === "tool_call") {
+        return {
+          role: "assistant",
+          content: "",
+          tool_calls: [
+            {
+              id: entry.id,
+              type: "function",
+              function: { name: entry.name, arguments: JSON.stringify(entry.arguments ?? {}) }
+            }
+          ]
+        };
+      }
+      if (entry.role === "tool_result") {
+        return {
+          role: "tool",
+          content: typeof entry.output === "string" ? entry.output : JSON.stringify(entry.output),
+          tool_call_id: entry.id
+        };
+      }
+      return {
+        role: entry.role,
+        content: entry.images && entry.images.length > 0 ? [{ type: "text", text: entry.text }, ...images(entry.images)] : entry.text
+      };
+    })
   ];
 }
 // src/prompt.ts

package/dist/llm.d.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import type { z } from "zod";
 import { type ModelAdapterConfig, type ProviderRegistry } from "./providers/registry";
-import type { LLMAdapter, StructuredCallOptions, StructuredPromptBuilder, StructuredResult } from "./types";
+import type { EmbeddingRequest, EmbeddingResult, LLMAdapter, StructuredCallOptions, StructuredPromptBuilder, StructuredResult } from "./types";
 export interface CreateLLMOptions extends ModelAdapterConfig {
     defaults?: StructuredCallOptions<z.ZodTypeAny>;
 }
@@ -9,5 +9,6 @@ export interface LLMClient {
     provider?: string;
     model?: string;
     structured<TSchema extends z.ZodTypeAny>(schema: TSchema, prompt: StructuredPromptBuilder, options?: StructuredCallOptions<TSchema>): Promise<StructuredResult<z.infer<TSchema>>>;
+    embed(input: string | string[], options?: Omit<EmbeddingRequest, "input">): Promise<EmbeddingResult>;
 }
 export declare function createLLM(config: CreateLLMOptions, registry?: ProviderRegistry): LLMClient;

package/dist/providers/openai-compatible.d.ts CHANGED Viewed

@@ -5,6 +5,7 @@ export interface OpenAICompatibleAdapterOptions {
     apiKey?: string;
     path?: string;
     responsesPath?: string;
+    embeddingPath?: string;
     defaultMaxToolRounds?: number;
     headers?: HTTPHeaders;
     defaultBody?: Record<string, unknown>;

package/dist/providers/utils.d.ts CHANGED Viewed

@@ -7,3 +7,4 @@ export declare function isRecord(value: unknown): value is Record<string, unknow
 export declare function pickString(value: unknown): string | undefined;
 export declare function toFiniteNumber(value: unknown): number | undefined;
 export declare function mergeUsage(base: LLMUsage | undefined, next: LLMUsage | undefined): LLMUsage | undefined;
+export declare function preferLatestUsage(base: LLMUsage | undefined, next: LLMUsage | undefined): LLMUsage | undefined;

package/dist/types.d.ts CHANGED Viewed

@@ -130,9 +130,18 @@ export interface LLMImageContent {
     };
 }
 export type LLMMessageContent = string | (LLMTextContent | LLMImageContent)[];
+export interface LLMToolCallRef {
+    id: string;
+    type: "function";
+    function: {
+        name: string;
+        arguments: string;
+    };
+}
 export interface LLMMessage {
     role: "system" | "user" | "assistant" | "tool";
     content: LLMMessageContent;
+    [key: string]: unknown;
 }
 export interface LLMRequest {
     prompt?: string;
@@ -179,11 +188,24 @@ export interface LLMStreamCallbacks {
     onChunk?: (chunk: LLMStreamChunk) => void;
     onComplete?: (response: LLMResponse) => void;
 }
+export interface EmbeddingRequest {
+    input: string | string[];
+    model?: string;
+    dimensions?: number;
+    body?: Record<string, unknown>;
+}
+export interface EmbeddingResult {
+    embeddings: number[][];
+    model: string;
+    usage?: LLMUsage;
+    raw?: unknown;
+}
 export interface LLMAdapter {
     provider?: string;
     model?: string;
     complete(request: LLMRequest): Promise<LLMResponse>;
     stream?(request: LLMRequest, callbacks?: LLMStreamCallbacks): Promise<LLMResponse>;
+    embed?(request: EmbeddingRequest): Promise<EmbeddingResult>;
 }
 export interface LLMToolCall {
     id: string;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "extrait",
-  "version": "0.5.3",
+  "version": "0.5.5",
   "repository": {
     "type": "git",
     "url": "git+https://github.com/tterrasson/extrait.git"