npm - plasalid - Versions diffs - 0.7.9 → 0.8.1 - Mend

plasalid 0.7.9 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/README.md +22 -6
package/dist/ai/agent.d.ts +1 -0
package/dist/ai/agent.js +25 -10
package/dist/ai/provider.d.ts +21 -1
package/dist/ai/providers/anthropic.d.ts +0 -1
package/dist/ai/providers/anthropic.js +2 -3
package/dist/ai/providers/gemini.d.ts +14 -0
package/dist/ai/providers/gemini.js +188 -0
package/dist/ai/providers/index.d.ts +2 -1
package/dist/ai/providers/index.js +23 -8
package/dist/ai/providers/openai-compat.d.ts +6 -1
package/dist/ai/providers/openai-compat.js +48 -104
package/dist/ai/providers/openai-shared.d.ts +26 -0
package/dist/ai/providers/openai-shared.js +118 -0
package/dist/ai/providers/openai.d.ts +27 -3
package/dist/ai/providers/openai.js +142 -91
package/dist/cli/commands/scan.js +78 -10
package/dist/cli/commands/status.js +15 -2
package/dist/cli/ink/ScanDashboard.d.ts +7 -6
package/dist/cli/ink/ScanDashboard.js +14 -6
package/dist/cli/setup.js +175 -119
package/dist/config.d.ts +10 -4
package/dist/config.js +40 -11
package/dist/scanner/clarifier.d.ts +2 -0
package/dist/scanner/clarifier.js +1 -0
package/dist/scanner/concurrency.d.ts +9 -2
package/dist/scanner/concurrency.js +3 -1
package/dist/scanner/engine.d.ts +2 -1
package/dist/scanner/engine.js +21 -3
package/dist/scanner/hooks.d.ts +6 -0
package/dist/scanner/parse.js +28 -16
package/dist/scanner/pdf/pdf.d.ts +3 -2
package/dist/scanner/pdf/pdf.js +11 -1
package/dist/scanner/pdf/rasterize.d.ts +6 -0
package/dist/scanner/pdf/rasterize.js +36 -0
package/dist/scanner/worker.d.ts +6 -0
package/dist/scanner/worker.js +16 -3
package/package.json +2 -1

package/README.md CHANGED Viewed

@@ -47,7 +47,7 @@ We also built strict boundaries around your privacy. The database is encrypted l
 * **Everything runs on your machine.** Your ledger is stored in an AES-256 encrypted SQLite database. There are no cloud aggregators or upstream accounts. No third party ever touches your data.
 * **PII redacted by default.** Your name, phone numbers, and full account details are completely scrubbed before any prompt leaves your hardware.
-* **Bring your own AI.** Choose Anthropic or any OpenAI-compatible local model during setup. If you run a local model, your setup stays 100% private and offline.
+* **Bring your own AI.** Choose Anthropic, OpenAI, Google Gemini, or any OpenAI-compatible local model during setup. If you run a local model, your setup stays 100% private and offline.
 * **A harness layer for AI agents.** The structured ledger acts as your baseline data layer. It is designed to be open and ready for any external tools you want to plug in.
@@ -144,11 +144,27 @@ Plasalid stores everything in `~/.plasalid/`:
 ### Environment Variables
 ```bash
-ANTHROPIC_API_KEY=            # Anthropic API key (required when provider is anthropic)
-PLASALID_MODEL=               # Model name; default for Anthropic: claude-sonnet-4-6
-PLASALID_PROVIDER=            # anthropic | openai-compatible. default: anthropic
-OPENAI_COMPATIBLE_BASE_URL=   # e.g. http://localhost:11434/v1 (ollama)
-OPENAI_COMPATIBLE_API_KEY=    # API key for the OpenAI-compatible server (often unused)
+# Provider selection
+PLASALID_PROVIDER=            # anthropic | openai | gemini | openai-compat (default: anthropic)
+# Anthropic
+ANTHROPIC_API_KEY=            # required when provider is anthropic
+ANTHROPIC_MODEL=              # default: claude-sonnet-4-6
+# OpenAI
+OPENAI_API_KEY=               # required when provider is openai
+OPENAI_MODEL=                 # default: gpt-5.4-mini
+# Google Gemini
+GEMINI_API_KEY=               # required when provider is gemini
+GEMINI_MODEL=                 # default: gemini-2.5-pro
+# OpenAI-compatible (LM Studio, Ollama, vLLM, etc.)
+OPENAI_COMPAT_BASE_URL=       # e.g. http://localhost:1234/v1
+OPENAI_COMPAT_API_KEY=        # often blank for local servers
+OPENAI_COMPAT_MODEL=          # e.g. qwen/qwen3-vl-7b
+# Storage
 PLASALID_DB_ENCRYPTION_KEY=   # DB encryption passphrase
 PLASALID_DB_PATH=             # Default: ~/.plasalid/db.sqlite
 PLASALID_DATA_DIR=            # Default: ~/.plasalid/data

package/dist/ai/agent.d.ts CHANGED Viewed

@@ -9,6 +9,7 @@ export type ProgressCallback = (event: {
     toolCount: number;
     elapsedMs: number;
 }) => void;
+export type TruncationReason = "tool_steps" | "max_tokens";
 /**
  * Conversational chat used by the Ink TUI. Reuses conversation_history for context
  * continuity, redacts PII on the way out, restores it on the way in for display.

package/dist/ai/agent.js CHANGED Viewed

@@ -1,15 +1,15 @@
-import { config } from "../config.js";
+import { config, getActiveModel } from "../config.js";
 import { buildChatSystemPrompt, buildScanSystemPrompt, buildClarifySystemPrompt, buildRecordSystemPrompt, } from "./system-prompt.js";
 import { getToolDefinitions, executeTool } from "./tools/index.js";
 import { getConversationHistory, saveMessage } from "./memory.js";
 import { recordQuestion } from "../db/queries/questions.js";
 import { redact, unredact } from "./redactor.js";
-import { createProvider } from "./providers/index.js";
+import { getProvider } from "./providers/index.js";
 import { AbortedError, ApiAuthError, ApiError, RateLimitError, } from "./errors.js";
 export { AbortedError } from "./errors.js";
-const provider = createProvider();
+const provider = getProvider();
 const MAX_TOOL_STEPS = 20;
-async function runAgent({ db, systemPrompt, tools, initialMessages, agentCtx, onProgress, signal, maxToolSteps, }) {
+async function runAgent({ db, systemPrompt, tools, initialMessages, agentCtx, onProgress, signal, maxToolSteps, maxOutputTokens, }) {
     const messages = [...initialMessages];
     const useThinking = config.thinkingBudget > 0 && provider.supportsThinking;
     const throwIfAborted = () => {
@@ -17,12 +17,14 @@ async function runAgent({ db, systemPrompt, tools, initialMessages, agentCtx, on
             throw new AbortedError();
     };
     const stepLimit = maxToolSteps ?? MAX_TOOL_STEPS;
+    const baseMaxTokens = maxOutputTokens ?? 4096;
+    const requestMaxTokens = useThinking ? 16000 : baseMaxTokens;
     const startTime = Date.now();
     let toolCount = 0;
     throwIfAborted();
     let response = await provider.sendMessage({
-        model: config.model,
-        maxTokens: useThinking ? 16000 : 4096,
+        model: getActiveModel(),
+        maxTokens: requestMaxTokens,
         system: systemPrompt,
         tools,
         messages,
@@ -50,8 +52,8 @@ async function runAgent({ db, systemPrompt, tools, initialMessages, agentCtx, on
         onProgress?.({ phase: "responding", toolCount, elapsedMs: Date.now() - startTime });
         throwIfAborted();
         response = await provider.sendMessage({
-            model: config.model,
-            maxTokens: useThinking ? 16000 : 4096,
+            model: getActiveModel(),
+            maxTokens: requestMaxTokens,
             system: systemPrompt,
             tools,
             messages,
@@ -59,13 +61,23 @@ async function runAgent({ db, systemPrompt, tools, initialMessages, agentCtx, on
             signal,
         });
     }
-    const truncated = response.stopReason === "tool_use" && toolCount >= stepLimit;
+    let truncated = null;
+    if (response.stopReason === "max_tokens") {
+        truncated = "max_tokens";
+    }
+    else if (response.stopReason === "tool_use" && toolCount >= stepLimit) {
+        truncated = "tool_steps";
+    }
     const textBlocks = response.content.filter((b) => b.type === "text");
     const text = unredact(textBlocks.map(b => b.text).join("\n"));
     return { text, messages, truncated };
 }
 const SCAN_MAX_TOOL_STEPS = 100;
 const RESOLVE_MAX_TOOL_STEPS = 60;
+// Statement pages routinely produce a single batched record_transactions call
+// holding 100+ rows; 4096 tokens cuts those off mid-array. 8192 is the
+// smallest cap that fits a dense page without forcing the agent to chunk.
+const SCAN_MAX_OUTPUT_TOKENS = 8192;
 /**
  * Conversational chat used by the Ink TUI. Reuses conversation_history for context
  * continuity, redacts PII on the way out, restores it on the way in for display.
@@ -138,6 +150,7 @@ export async function runScanAgent(opts) {
         onProgress: opts.onProgress,
         signal: opts.signal,
         maxToolSteps: SCAN_MAX_TOOL_STEPS,
+        maxOutputTokens: SCAN_MAX_OUTPUT_TOKENS,
     });
     if (truncated) {
         recordQuestion(opts.db, {
@@ -146,7 +159,9 @@ export async function runScanAgent(opts) {
             transaction_id: null,
             account_id: null,
             kind: "scan_truncated",
-            prompt: `Scan stopped at the tool-step cap (${SCAN_MAX_TOOL_STEPS}) before the agent finished parsing this chunk. Some transactions may be missing. Split the PDF further or raise the cap.`,
+            prompt: truncated === "max_tokens"
+                ? `Scan hit the output-token budget (${SCAN_MAX_OUTPUT_TOKENS}) mid-response, so the last tool call was cut off. Some transactions may be missing. Re-scan after splitting the PDF further, or raise the budget.`
+                : `Scan stopped at the tool-step cap (${SCAN_MAX_TOOL_STEPS}) before the agent finished parsing this chunk. Some transactions may be missing. Split the PDF further or raise the cap.`,
         });
         if (opts.agentCtx.progress && opts.agentCtx.chunkId) {
             opts.agentCtx.progress.emit({ chunkId: opts.agentCtx.chunkId, kind: "question" });

package/dist/ai/provider.d.ts CHANGED Viewed

@@ -11,6 +11,12 @@ export interface ToolUseBlock {
     id: string;
     name: string;
     input: any;
+    /**
+     * Opaque, vendor-specific signature that some providers (Gemini 2.5+) attach
+     * to function-call parts and require us to echo back on the next turn.
+     * Anthropic and OpenAI ignore it.
+     */
+    thoughtSignature?: string;
 }
 export interface DocumentBlock {
     type: "document";
@@ -21,7 +27,15 @@ export interface DocumentBlock {
     };
     title?: string;
 }
-export type NormalizedContentBlock = TextBlock | ToolUseBlock | DocumentBlock;
+export interface ImageBlock {
+    type: "image";
+    source: {
+        type: "base64";
+        media_type: "image/png" | "image/jpeg";
+        data: string;
+    };
+}
+export type NormalizedContentBlock = TextBlock | ToolUseBlock | DocumentBlock | ImageBlock;
 export interface NormalizedResponse {
     content: NormalizedContentBlock[];
     stopReason: string;
@@ -64,5 +78,11 @@ export interface SendMessageParams {
 export interface Provider {
     name: string;
     supportsThinking: boolean;
+    /**
+     * True for providers that accept PDF document blocks natively. False for
+     * plain OpenAI-compat endpoints — the scanner rasterizes pages to PNG for
+     * those and ships `image_url` parts instead.
+     */
+    acceptsDocuments: boolean;
     sendMessage(params: SendMessageParams): Promise<NormalizedResponse>;
 }

package/dist/ai/providers/anthropic.d.ts CHANGED Viewed

@@ -1,5 +1,4 @@
 import type { Provider } from "../provider.js";
 export declare function createAnthropicProvider(opts: {
     apiKey: string;
-    baseURL?: string;
 }): Provider;

package/dist/ai/providers/anthropic.js CHANGED Viewed

@@ -1,12 +1,11 @@
 import Anthropic from "@anthropic-ai/sdk";
 import { classifyProviderError } from "../errors.js";
 export function createAnthropicProvider(opts) {
-    const client = new Anthropic(opts.baseURL
-        ? { apiKey: opts.apiKey, baseURL: opts.baseURL }
-        : { apiKey: opts.apiKey });
+    const client = new Anthropic({ apiKey: opts.apiKey });
     return {
         name: "anthropic",
         supportsThinking: true,
+        acceptsDocuments: true,
         async sendMessage(params) {
             const apiParams = {
                 model: params.model,

package/dist/ai/providers/gemini.d.ts ADDED Viewed

@@ -0,0 +1,14 @@
+import type { Provider } from "../provider.js";
+/**
+ * Native Gemini provider that talks to Google's GenAI API. Required because
+ * Gemini's OpenAI-compat shim rejects PDF `file` content parts; the native
+ * API accepts them as `inlineData` with mimeType `application/pdf`.
+ *
+ * supportsThinking is `false` because Gemini 2.5+ runs thinking server-side
+ * automatically — we don't need a client-side budget like Claude's extended
+ * thinking, and the agent's thinkingBudget config still controls whether we
+ * raise maxTokens for the thinking path even on providers that ignore it.
+ */
+export declare function createGeminiProvider(opts: {
+    apiKey: string;
+}): Provider;

package/dist/ai/providers/gemini.js ADDED Viewed

@@ -0,0 +1,188 @@
+import { GoogleGenAI } from "@google/genai";
+import { classifyProviderError } from "../errors.js";
+/**
+ * Native Gemini provider that talks to Google's GenAI API. Required because
+ * Gemini's OpenAI-compat shim rejects PDF `file` content parts; the native
+ * API accepts them as `inlineData` with mimeType `application/pdf`.
+ *
+ * supportsThinking is `false` because Gemini 2.5+ runs thinking server-side
+ * automatically — we don't need a client-side budget like Claude's extended
+ * thinking, and the agent's thinkingBudget config still controls whether we
+ * raise maxTokens for the thinking path even on providers that ignore it.
+ */
+export function createGeminiProvider(opts) {
+    const client = new GoogleGenAI({ apiKey: opts.apiKey });
+    return {
+        name: "gemini",
+        supportsThinking: false,
+        acceptsDocuments: true,
+        async sendMessage(params) {
+            try {
+                const response = await client.models.generateContent({
+                    model: params.model,
+                    contents: convertMessages(params.messages),
+                    config: {
+                        systemInstruction: params.system,
+                        tools: convertTools(params.tools),
+                        maxOutputTokens: params.maxTokens,
+                        abortSignal: params.signal,
+                    },
+                });
+                return normalizeResponse(response);
+            }
+            catch (e) {
+                classifyProviderError(e, params.signal);
+            }
+        },
+    };
+}
+function convertMessages(messages) {
+    const result = [];
+    for (const msg of messages) {
+        if (msg.role === "user") {
+            if (Array.isArray(msg.content) &&
+                msg.content.length > 0 &&
+                msg.content[0].type === "tool_result") {
+                const toolResults = msg.content;
+                result.push({
+                    role: "user",
+                    parts: toolResults.map((tr) => ({
+                        functionResponse: {
+                            id: tr.tool_use_id,
+                            name: extractToolName(tr.tool_use_id),
+                            response: { content: tr.content },
+                        },
+                    })),
+                });
+            }
+            else if (Array.isArray(msg.content)) {
+                result.push({
+                    role: "user",
+                    parts: blocksToParts(msg.content),
+                });
+            }
+            else {
+                result.push({ role: "user", parts: [{ text: msg.content }] });
+            }
+        }
+        else {
+            if (Array.isArray(msg.content)) {
+                result.push({
+                    role: "model",
+                    parts: blocksToParts(msg.content),
+                });
+            }
+            else {
+                result.push({
+                    role: "model",
+                    parts: [{ text: msg.content }],
+                });
+            }
+        }
+    }
+    return result;
+}
+function blocksToParts(blocks) {
+    const parts = [];
+    for (const block of blocks) {
+        if (block.type === "text") {
+            parts.push({ text: block.text });
+        }
+        else if (block.type === "document") {
+            parts.push({
+                inlineData: {
+                    mimeType: block.source.media_type,
+                    data: block.source.data,
+                },
+            });
+        }
+        else if (block.type === "tool_use") {
+            const part = {
+                functionCall: {
+                    id: block.id,
+                    name: block.name,
+                    args: (block.input ?? {}),
+                },
+            };
+            // Gemini 2.5+ requires thought_signature to be echoed back on every
+            // assistant turn that carries function calls — otherwise the next API
+            // call fails with INVALID_ARGUMENT.
+            if (block.thoughtSignature) {
+                part.thoughtSignature = block.thoughtSignature;
+            }
+            parts.push(part);
+        }
+    }
+    return parts;
+}
+function convertTools(tools) {
+    if (tools.length === 0)
+        return undefined;
+    return [
+        {
+            functionDeclarations: tools.map((t) => ({
+                name: t.name,
+                description: t.description,
+                // Gemini accepts a raw JSON Schema via parametersJsonSchema; our
+                // ToolDefinition.input_schema is already in that shape, so it goes
+                // through without translation.
+                parametersJsonSchema: t.input_schema,
+            })),
+        },
+    ];
+}
+/**
+ * Gemini IDs tool calls with synthetic strings like `${name}-${index}` when
+ * the model doesn't return one. We embed the tool name in the ID so that the
+ * follow-up functionResponse part can recover it — Gemini requires a `name`
+ * field on every functionResponse, and the tool result message we receive
+ * from the agent only carries the tool_use_id.
+ */
+function extractToolName(toolUseId) {
+    const dash = toolUseId.lastIndexOf("-");
+    return dash > 0 ? toolUseId.slice(0, dash) : toolUseId;
+}
+function normalizeResponse(response) {
+    const candidate = response.candidates?.[0];
+    const content = [];
+    let toolIndex = 0;
+    for (const part of candidate?.content?.parts ?? []) {
+        if (part.thought)
+            continue;
+        if (typeof part.text === "string" && part.text.length > 0) {
+            content.push({ type: "text", text: part.text });
+        }
+        else if (part.functionCall) {
+            const name = part.functionCall.name ?? "unknown";
+            content.push({
+                type: "tool_use",
+                id: part.functionCall.id ?? `${name}-${toolIndex}`,
+                name,
+                input: part.functionCall.args ?? {},
+                ...(part.thoughtSignature
+                    ? { thoughtSignature: part.thoughtSignature }
+                    : {}),
+            });
+            toolIndex++;
+        }
+    }
+    const hasToolCalls = content.some((b) => b.type === "tool_use");
+    // Read finishReason even when content.parts is missing — that happens when
+    // a thinking model burns the entire output budget on thoughts (parts=[] +
+    // finishReason=MAX_TOKENS). Falling through to "end_turn" would hide that.
+    const stopReason = mapFinishReason(candidate?.finishReason, hasToolCalls);
+    const usage = response.usageMetadata
+        ? {
+            input_tokens: response.usageMetadata.promptTokenCount ?? 0,
+            output_tokens: response.usageMetadata.candidatesTokenCount ?? 0,
+        }
+        : undefined;
+    return { content, stopReason, ...(usage ? { usage } : {}) };
+}
+function mapFinishReason(reason, hasToolCalls) {
+    if (reason === "MAX_TOKENS")
+        return "max_tokens";
+    if (hasToolCalls)
+        return "tool_use";
+    return "end_turn";
+}

package/dist/ai/providers/index.d.ts CHANGED Viewed

@@ -1,2 +1,3 @@
 import type { Provider } from "../provider.js";
-export declare function createProvider(): Provider;
+/** Singleton so agent.ts and the scanner share one provider instance. */
+export declare function getProvider(): Provider;

package/dist/ai/providers/index.js CHANGED Viewed

@@ -1,12 +1,27 @@
 import { config } from "../../config.js";
 import { createAnthropicProvider } from "./anthropic.js";
-import { createOpenAICompatibleProvider } from "./openai.js";
-export function createProvider() {
-    if (config.providerType === "openai-compatible") {
-        return createOpenAICompatibleProvider({
-            apiKey: config.openaiCompatibleKey || "openai-compatible",
-            baseURL: config.openaiCompatibleBaseURL,
-        });
+import { createOpenAIProvider } from "./openai.js";
+import { createOpenAICompatProvider } from "./openai-compat.js";
+import { createGeminiProvider } from "./gemini.js";
+let cached = null;
+function buildProvider() {
+    switch (config.providerType) {
+        case "anthropic":
+            return createAnthropicProvider({ apiKey: config.anthropicKey });
+        case "openai":
+            return createOpenAIProvider({ apiKey: config.openaiKey });
+        case "gemini":
+            return createGeminiProvider({ apiKey: config.geminiKey });
+        case "openai-compat":
+            return createOpenAICompatProvider({
+                apiKey: config.openaiCompatKey || "openai-compat",
+                baseURL: config.openaiCompatBaseURL,
+            });
     }
-    return createAnthropicProvider({ apiKey: config.anthropicKey });
+}
+/** Singleton so agent.ts and the scanner share one provider instance. */
+export function getProvider() {
+    if (cached === null)
+        cached = buildProvider();
+    return cached;
 }

package/dist/ai/providers/openai-compat.d.ts CHANGED Viewed

@@ -1,5 +1,10 @@
 import type { Provider } from "../provider.js";
-export declare function createOpenAICompatibleProvider(opts: {
+/**
+ * Generic Chat Completions client for LM Studio / Ollama / vLLM / etc.
+ * `file` content parts are an OpenAI-only extension and are rejected here;
+ * the scanner rasterizes PDFs to PNG and we ship `image_url` parts.
+ */
+export declare function createOpenAICompatProvider(opts: {
     apiKey: string;
     baseURL: string;
 }): Provider;

package/dist/ai/providers/openai-compat.js CHANGED Viewed

@@ -1,67 +1,36 @@
 import OpenAI from "openai";
-export function createOpenAICompatibleProvider(opts) {
+import { classifyProviderError } from "../errors.js";
+import { convertAssistantMessage, convertToolResults, convertTools, createCompletionWithTokenFallback, isToolResultEnvelope, normalizeResponse, } from "./openai.js";
+/**
+ * Generic Chat Completions client for LM Studio / Ollama / vLLM / etc.
+ * `file` content parts are an OpenAI-only extension and are rejected here;
+ * the scanner rasterizes PDFs to PNG and we ship `image_url` parts.
+ */
+export function createOpenAICompatProvider(opts) {
     const client = new OpenAI({
         apiKey: opts.apiKey,
         baseURL: opts.baseURL,
     });
     return {
-        name: "openai-compatible",
+        name: "openai-compat",
         supportsThinking: false,
+        acceptsDocuments: false,
         async sendMessage(params) {
-            const messages = convertMessages(params.system, params.messages);
             const tools = convertTools(params.tools);
-            // Try max_tokens first (broadest compat: Ollama, vLLM, older OpenAI models),
-            // fall back to max_completion_tokens if rejected (newer OpenAI models require it)
+            const body = {
+                model: params.model,
+                maxTokens: params.maxTokens,
+                messages: convertMessages(params.system, params.messages),
+                tools: tools.length > 0 ? tools : undefined,
+            };
             let response;
             try {
-                response = await client.chat.completions.create({
-                    model: params.model,
-                    max_tokens: params.maxTokens,
-                    messages,
-                    tools: tools.length > 0 ? tools : undefined,
-                }, { signal: params.signal });
+                response = await createCompletionWithTokenFallback(client, body, { signal: params.signal });
             }
             catch (e) {
-                if (e.status === 400 && e.message?.includes("max_tokens")) {
-                    response = await client.chat.completions.create({
-                        model: params.model,
-                        max_completion_tokens: params.maxTokens,
-                        messages,
-                        tools: tools.length > 0 ? tools : undefined,
-                    }, { signal: params.signal });
-                }
-                else {
-                    throw e;
-                }
-            }
-            const choice = response.choices[0];
-            if (!choice) {
-                return { content: [], stopReason: "end_turn" };
-            }
-            const content = [];
-            if (choice.message.content) {
-                content.push({ type: "text", text: choice.message.content });
+                classifyProviderError(e, params.signal);
             }
-            if (choice.message.tool_calls) {
-                for (const tc of choice.message.tool_calls) {
-                    if (tc.type !== "function")
-                        continue;
-                    content.push({
-                        type: "tool_use",
-                        id: tc.id,
-                        name: tc.function.name,
-                        input: parseArguments(tc.function.arguments),
-                    });
-                }
-            }
-            const hasToolCalls = content.some((b) => b.type === "tool_use");
-            return {
-                content,
-                stopReason: hasToolCalls ? "tool_use" : "end_turn",
-                usage: response.usage
-                    ? { input_tokens: response.usage.prompt_tokens, output_tokens: response.usage.completion_tokens }
-                    : undefined,
-            };
+            return normalizeResponse(response);
         },
     };
 }
@@ -71,25 +40,11 @@ function convertMessages(system, messages) {
     ];
     for (const msg of messages) {
         if (msg.role === "user") {
-            if (Array.isArray(msg.content) &&
-                msg.content.length > 0 &&
-                msg.content[0].type === "tool_result") {
-                const toolResults = msg.content;
-                for (const tr of toolResults) {
-                    result.push({
-                        role: "tool",
-                        tool_call_id: tr.tool_use_id,
-                        content: tr.content,
-                    });
-                }
+            if (isToolResultEnvelope(msg.content)) {
+                result.push(...convertToolResults(msg.content));
             }
             else if (Array.isArray(msg.content)) {
-                // Strip document blocks (OpenAI-compat doesn't accept them); keep text.
-                const text = msg.content
-                    .filter((b) => b.type === "text")
-                    .map((b) => b.text)
-                    .join("\n");
-                result.push({ role: "user", content: text });
+                result.push(buildUserMessage(msg.content));
             }
             else {
                 result.push({ role: "user", content: msg.content });
@@ -97,26 +52,7 @@ function convertMessages(system, messages) {
         }
         else {
             if (Array.isArray(msg.content)) {
-                const blocks = msg.content;
-                const textParts = blocks
-                    .filter((b) => b.type === "text")
-                    .map((b) => b.text)
-                    .join("\n");
-                const toolCalls = blocks
-                    .filter((b) => b.type === "tool_use")
-                    .map((b) => {
-                    const tu = b;
-                    return {
-                        id: tu.id,
-                        type: "function",
-                        function: { name: tu.name, arguments: JSON.stringify(tu.input) },
-                    };
-                });
-                result.push({
-                    role: "assistant",
-                    content: textParts || null,
-                    ...(toolCalls.length > 0 ? { tool_calls: toolCalls } : {}),
-                });
+                result.push(convertAssistantMessage(msg.content));
             }
             else {
                 result.push({ role: "assistant", content: msg.content });
@@ -125,23 +61,31 @@ function convertMessages(system, messages) {
     }
     return result;
 }
-function convertTools(tools) {
-    return tools.map((t) => ({
-        type: "function",
-        function: {
-            name: t.name,
-            description: t.description,
-            parameters: t.input_schema,
-        },
-    }));
-}
-function parseArguments(args) {
-    if (typeof args !== "string")
-        return args;
-    try {
-        return JSON.parse(args);
+function buildUserMessage(blocks) {
+    for (const block of blocks) {
+        if (block.type === "document") {
+            throw new Error("openai-compat does not accept document blocks. The scanner should rasterize PDFs to images for this provider — this is a bug.");
+        }
+    }
+    const hasImage = blocks.some((b) => b.type === "image");
+    if (!hasImage) {
+        const text = blocks
+            .filter((b) => b.type === "text")
+            .map((b) => b.text)
+            .join("\n");
+        return { role: "user", content: text };
     }
-    catch {
-        return {};
+    const parts = [];
+    for (const block of blocks) {
+        if (block.type === "text") {
+            parts.push({ type: "text", text: block.text });
+        }
+        else if (block.type === "image") {
+            parts.push({
+                type: "image_url",
+                image_url: { url: `data:${block.source.media_type};base64,${block.source.data}` },
+            });
+        }
     }
+    return { role: "user", content: parts };
 }