plasalid 0.7.8 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -19,7 +19,7 @@ In US and Europe, the most of financial apps are likely powered by aggregators e
19
19
 
20
20
  Your data is locked in bank silos. Tracking your net worth means logging into half a dozen apps and crunching the numbers manually. This fragmentation creates massive blind spots. Subscriptions are forgotten, strange charges go unnoticed, and planning for big financial goals becomes a guessing game.
21
21
 
22
- **Plasalid is built to fix this AI. Think of it as a personal financial harness**.
22
+ **Plasalid is built to fix this. Think of it as a personal financial AI harness**.
23
23
 
24
24
  You drop your raw financial documents (bank statements, credit card bills, payslips) straight into a folder on your machine. Plasalid parses those files and extracts every transaction, balance, and holding. It transforms a messy pile of PDFs into a clean, double-entry ledger. You only have to build this foundation once. The result is an open, structured backend for your finances, ready to plug into any tool you want.
25
25
 
@@ -47,7 +47,7 @@ We also built strict boundaries around your privacy. The database is encrypted l
47
47
 
48
48
  * **Everything runs on your machine.** Your ledger is stored in an AES-256 encrypted SQLite database. There are no cloud aggregators or upstream accounts. No third party ever touches your data.
49
49
  * **PII redacted by default.** Your name, phone numbers, and full account details are completely scrubbed before any prompt leaves your hardware.
50
- * **Bring your own AI.** Choose Anthropic or any OpenAI-compatible local model during setup. If you run a local model, your setup stays 100% private and offline.
50
+ * **Bring your own AI.** Choose Anthropic, OpenAI, Google Gemini, or any OpenAI-compatible local model during setup. If you run a local model, your setup stays 100% private and offline.
51
51
  * **A harness layer for AI agents.** The structured ledger acts as your baseline data layer. It is designed to be open and ready for any external tools you want to plug in.
52
52
 
53
53
 
@@ -144,11 +144,27 @@ Plasalid stores everything in `~/.plasalid/`:
144
144
  ### Environment Variables
145
145
 
146
146
  ```bash
147
- ANTHROPIC_API_KEY= # Anthropic API key (required when provider is anthropic)
148
- PLASALID_MODEL= # Model name; default for Anthropic: claude-sonnet-4-6
149
- PLASALID_PROVIDER= # anthropic | openai-compatible. default: anthropic
150
- OPENAI_COMPATIBLE_BASE_URL= # e.g. http://localhost:11434/v1 (ollama)
151
- OPENAI_COMPATIBLE_API_KEY= # API key for the OpenAI-compatible server (often unused)
147
+ # Provider selection
148
+ PLASALID_PROVIDER= # anthropic | openai | gemini | openai-compat (default: anthropic)
149
+
150
+ # Anthropic
151
+ ANTHROPIC_API_KEY= # required when provider is anthropic
152
+ ANTHROPIC_MODEL= # default: claude-sonnet-4-6
153
+
154
+ # OpenAI
155
+ OPENAI_API_KEY= # required when provider is openai
156
+ OPENAI_MODEL= # default: gpt-5.4-mini
157
+
158
+ # Google Gemini
159
+ GEMINI_API_KEY= # required when provider is gemini
160
+ GEMINI_MODEL= # default: gemini-3.5-flash
161
+
162
+ # OpenAI-compatible (LM Studio, Ollama, vLLM, etc.)
163
+ OPENAI_COMPAT_BASE_URL= # e.g. http://localhost:1234/v1
164
+ OPENAI_COMPAT_API_KEY= # often blank for local servers
165
+ OPENAI_COMPAT_MODEL= # e.g. qwen/qwen3-vl-7b
166
+
167
+ # Storage
152
168
  PLASALID_DB_ENCRYPTION_KEY= # DB encryption passphrase
153
169
  PLASALID_DB_PATH= # Default: ~/.plasalid/db.sqlite
154
170
  PLASALID_DATA_DIR= # Default: ~/.plasalid/data
@@ -9,6 +9,7 @@ export type ProgressCallback = (event: {
9
9
  toolCount: number;
10
10
  elapsedMs: number;
11
11
  }) => void;
12
+ export type TruncationReason = "tool_steps" | "max_tokens";
12
13
  /**
13
14
  * Conversational chat used by the Ink TUI. Reuses conversation_history for context
14
15
  * continuity, redacts PII on the way out, restores it on the way in for display.
package/dist/ai/agent.js CHANGED
@@ -1,15 +1,15 @@
1
- import { config } from "../config.js";
1
+ import { config, getActiveModel } from "../config.js";
2
2
  import { buildChatSystemPrompt, buildScanSystemPrompt, buildClarifySystemPrompt, buildRecordSystemPrompt, } from "./system-prompt.js";
3
3
  import { getToolDefinitions, executeTool } from "./tools/index.js";
4
4
  import { getConversationHistory, saveMessage } from "./memory.js";
5
5
  import { recordQuestion } from "../db/queries/questions.js";
6
6
  import { redact, unredact } from "./redactor.js";
7
- import { createProvider } from "./providers/index.js";
7
+ import { getProvider } from "./providers/index.js";
8
8
  import { AbortedError, ApiAuthError, ApiError, RateLimitError, } from "./errors.js";
9
9
  export { AbortedError } from "./errors.js";
10
- const provider = createProvider();
10
+ const provider = getProvider();
11
11
  const MAX_TOOL_STEPS = 20;
12
- async function runAgent({ db, systemPrompt, tools, initialMessages, agentCtx, onProgress, signal, maxToolSteps, }) {
12
+ async function runAgent({ db, systemPrompt, tools, initialMessages, agentCtx, onProgress, signal, maxToolSteps, maxOutputTokens, }) {
13
13
  const messages = [...initialMessages];
14
14
  const useThinking = config.thinkingBudget > 0 && provider.supportsThinking;
15
15
  const throwIfAborted = () => {
@@ -17,12 +17,14 @@ async function runAgent({ db, systemPrompt, tools, initialMessages, agentCtx, on
17
17
  throw new AbortedError();
18
18
  };
19
19
  const stepLimit = maxToolSteps ?? MAX_TOOL_STEPS;
20
+ const baseMaxTokens = maxOutputTokens ?? 4096;
21
+ const requestMaxTokens = useThinking ? 16000 : baseMaxTokens;
20
22
  const startTime = Date.now();
21
23
  let toolCount = 0;
22
24
  throwIfAborted();
23
25
  let response = await provider.sendMessage({
24
- model: config.model,
25
- maxTokens: useThinking ? 16000 : 4096,
26
+ model: getActiveModel(),
27
+ maxTokens: requestMaxTokens,
26
28
  system: systemPrompt,
27
29
  tools,
28
30
  messages,
@@ -50,8 +52,8 @@ async function runAgent({ db, systemPrompt, tools, initialMessages, agentCtx, on
50
52
  onProgress?.({ phase: "responding", toolCount, elapsedMs: Date.now() - startTime });
51
53
  throwIfAborted();
52
54
  response = await provider.sendMessage({
53
- model: config.model,
54
- maxTokens: useThinking ? 16000 : 4096,
55
+ model: getActiveModel(),
56
+ maxTokens: requestMaxTokens,
55
57
  system: systemPrompt,
56
58
  tools,
57
59
  messages,
@@ -59,13 +61,23 @@ async function runAgent({ db, systemPrompt, tools, initialMessages, agentCtx, on
59
61
  signal,
60
62
  });
61
63
  }
62
- const truncated = response.stopReason === "tool_use" && toolCount >= stepLimit;
64
+ let truncated = null;
65
+ if (response.stopReason === "max_tokens") {
66
+ truncated = "max_tokens";
67
+ }
68
+ else if (response.stopReason === "tool_use" && toolCount >= stepLimit) {
69
+ truncated = "tool_steps";
70
+ }
63
71
  const textBlocks = response.content.filter((b) => b.type === "text");
64
72
  const text = unredact(textBlocks.map(b => b.text).join("\n"));
65
73
  return { text, messages, truncated };
66
74
  }
67
75
  const SCAN_MAX_TOOL_STEPS = 100;
68
76
  const RESOLVE_MAX_TOOL_STEPS = 60;
77
+ // Statement pages routinely produce a single batched record_transactions call
78
+ // holding 100+ rows; 4096 tokens cuts those off mid-array. 8192 is the
79
+ // smallest cap that fits a dense page without forcing the agent to chunk.
80
+ const SCAN_MAX_OUTPUT_TOKENS = 8192;
69
81
  /**
70
82
  * Conversational chat used by the Ink TUI. Reuses conversation_history for context
71
83
  * continuity, redacts PII on the way out, restores it on the way in for display.
@@ -138,6 +150,7 @@ export async function runScanAgent(opts) {
138
150
  onProgress: opts.onProgress,
139
151
  signal: opts.signal,
140
152
  maxToolSteps: SCAN_MAX_TOOL_STEPS,
153
+ maxOutputTokens: SCAN_MAX_OUTPUT_TOKENS,
141
154
  });
142
155
  if (truncated) {
143
156
  recordQuestion(opts.db, {
@@ -146,7 +159,9 @@ export async function runScanAgent(opts) {
146
159
  transaction_id: null,
147
160
  account_id: null,
148
161
  kind: "scan_truncated",
149
- prompt: `Scan stopped at the tool-step cap (${SCAN_MAX_TOOL_STEPS}) before the agent finished parsing this chunk. Some transactions may be missing. Split the PDF further or raise the cap.`,
162
+ prompt: truncated === "max_tokens"
163
+ ? `Scan hit the output-token budget (${SCAN_MAX_OUTPUT_TOKENS}) mid-response, so the last tool call was cut off. Some transactions may be missing. Re-scan after splitting the PDF further, or raise the budget.`
164
+ : `Scan stopped at the tool-step cap (${SCAN_MAX_TOOL_STEPS}) before the agent finished parsing this chunk. Some transactions may be missing. Split the PDF further or raise the cap.`,
150
165
  });
151
166
  if (opts.agentCtx.progress && opts.agentCtx.chunkId) {
152
167
  opts.agentCtx.progress.emit({ chunkId: opts.agentCtx.chunkId, kind: "question" });
@@ -11,6 +11,12 @@ export interface ToolUseBlock {
11
11
  id: string;
12
12
  name: string;
13
13
  input: any;
14
+ /**
15
+ * Opaque, vendor-specific signature that some providers (Gemini 2.5+) attach
16
+ * to function-call parts and require us to echo back on the next turn.
17
+ * Anthropic and OpenAI ignore it.
18
+ */
19
+ thoughtSignature?: string;
14
20
  }
15
21
  export interface DocumentBlock {
16
22
  type: "document";
@@ -21,7 +27,15 @@ export interface DocumentBlock {
21
27
  };
22
28
  title?: string;
23
29
  }
24
- export type NormalizedContentBlock = TextBlock | ToolUseBlock | DocumentBlock;
30
+ export interface ImageBlock {
31
+ type: "image";
32
+ source: {
33
+ type: "base64";
34
+ media_type: "image/png" | "image/jpeg";
35
+ data: string;
36
+ };
37
+ }
38
+ export type NormalizedContentBlock = TextBlock | ToolUseBlock | DocumentBlock | ImageBlock;
25
39
  export interface NormalizedResponse {
26
40
  content: NormalizedContentBlock[];
27
41
  stopReason: string;
@@ -64,5 +78,11 @@ export interface SendMessageParams {
64
78
  export interface Provider {
65
79
  name: string;
66
80
  supportsThinking: boolean;
81
+ /**
82
+ * True for providers that accept PDF document blocks natively. False for
83
+ * plain OpenAI-compat endpoints — the scanner rasterizes pages to PNG for
84
+ * those and ships `image_url` parts instead.
85
+ */
86
+ acceptsDocuments: boolean;
67
87
  sendMessage(params: SendMessageParams): Promise<NormalizedResponse>;
68
88
  }
@@ -1,5 +1,4 @@
1
1
  import type { Provider } from "../provider.js";
2
2
  export declare function createAnthropicProvider(opts: {
3
3
  apiKey: string;
4
- baseURL?: string;
5
4
  }): Provider;
@@ -1,12 +1,11 @@
1
1
  import Anthropic from "@anthropic-ai/sdk";
2
2
  import { classifyProviderError } from "../errors.js";
3
3
  export function createAnthropicProvider(opts) {
4
- const client = new Anthropic(opts.baseURL
5
- ? { apiKey: opts.apiKey, baseURL: opts.baseURL }
6
- : { apiKey: opts.apiKey });
4
+ const client = new Anthropic({ apiKey: opts.apiKey });
7
5
  return {
8
6
  name: "anthropic",
9
7
  supportsThinking: true,
8
+ acceptsDocuments: true,
10
9
  async sendMessage(params) {
11
10
  const apiParams = {
12
11
  model: params.model,
@@ -0,0 +1,14 @@
1
+ import type { Provider } from "../provider.js";
2
+ /**
3
+ * Native Gemini provider that talks to Google's GenAI API. Required because
4
+ * Gemini's OpenAI-compat shim rejects PDF `file` content parts; the native
5
+ * API accepts them as `inlineData` with mimeType `application/pdf`.
6
+ *
7
+ * supportsThinking is `false` because Gemini 2.5+ runs thinking server-side
8
+ * automatically — we don't need a client-side budget like Claude's extended
9
+ * thinking, and the agent's thinkingBudget config still controls whether we
10
+ * raise maxTokens for the thinking path even on providers that ignore it.
11
+ */
12
+ export declare function createGeminiProvider(opts: {
13
+ apiKey: string;
14
+ }): Provider;
@@ -0,0 +1,188 @@
1
+ import { GoogleGenAI } from "@google/genai";
2
+ import { classifyProviderError } from "../errors.js";
3
+ /**
4
+ * Native Gemini provider that talks to Google's GenAI API. Required because
5
+ * Gemini's OpenAI-compat shim rejects PDF `file` content parts; the native
6
+ * API accepts them as `inlineData` with mimeType `application/pdf`.
7
+ *
8
+ * supportsThinking is `false` because Gemini 2.5+ runs thinking server-side
9
+ * automatically — we don't need a client-side budget like Claude's extended
10
+ * thinking, and the agent's thinkingBudget config still controls whether we
11
+ * raise maxTokens for the thinking path even on providers that ignore it.
12
+ */
13
+ export function createGeminiProvider(opts) {
14
+ const client = new GoogleGenAI({ apiKey: opts.apiKey });
15
+ return {
16
+ name: "gemini",
17
+ supportsThinking: false,
18
+ acceptsDocuments: true,
19
+ async sendMessage(params) {
20
+ try {
21
+ const response = await client.models.generateContent({
22
+ model: params.model,
23
+ contents: convertMessages(params.messages),
24
+ config: {
25
+ systemInstruction: params.system,
26
+ tools: convertTools(params.tools),
27
+ maxOutputTokens: params.maxTokens,
28
+ abortSignal: params.signal,
29
+ },
30
+ });
31
+ return normalizeResponse(response);
32
+ }
33
+ catch (e) {
34
+ classifyProviderError(e, params.signal);
35
+ }
36
+ },
37
+ };
38
+ }
39
+ function convertMessages(messages) {
40
+ const result = [];
41
+ for (const msg of messages) {
42
+ if (msg.role === "user") {
43
+ if (Array.isArray(msg.content) &&
44
+ msg.content.length > 0 &&
45
+ msg.content[0].type === "tool_result") {
46
+ const toolResults = msg.content;
47
+ result.push({
48
+ role: "user",
49
+ parts: toolResults.map((tr) => ({
50
+ functionResponse: {
51
+ id: tr.tool_use_id,
52
+ name: extractToolName(tr.tool_use_id),
53
+ response: { content: tr.content },
54
+ },
55
+ })),
56
+ });
57
+ }
58
+ else if (Array.isArray(msg.content)) {
59
+ result.push({
60
+ role: "user",
61
+ parts: blocksToParts(msg.content),
62
+ });
63
+ }
64
+ else {
65
+ result.push({ role: "user", parts: [{ text: msg.content }] });
66
+ }
67
+ }
68
+ else {
69
+ if (Array.isArray(msg.content)) {
70
+ result.push({
71
+ role: "model",
72
+ parts: blocksToParts(msg.content),
73
+ });
74
+ }
75
+ else {
76
+ result.push({
77
+ role: "model",
78
+ parts: [{ text: msg.content }],
79
+ });
80
+ }
81
+ }
82
+ }
83
+ return result;
84
+ }
85
+ function blocksToParts(blocks) {
86
+ const parts = [];
87
+ for (const block of blocks) {
88
+ if (block.type === "text") {
89
+ parts.push({ text: block.text });
90
+ }
91
+ else if (block.type === "document") {
92
+ parts.push({
93
+ inlineData: {
94
+ mimeType: block.source.media_type,
95
+ data: block.source.data,
96
+ },
97
+ });
98
+ }
99
+ else if (block.type === "tool_use") {
100
+ const part = {
101
+ functionCall: {
102
+ id: block.id,
103
+ name: block.name,
104
+ args: (block.input ?? {}),
105
+ },
106
+ };
107
+ // Gemini 2.5+ requires thought_signature to be echoed back on every
108
+ // assistant turn that carries function calls — otherwise the next API
109
+ // call fails with INVALID_ARGUMENT.
110
+ if (block.thoughtSignature) {
111
+ part.thoughtSignature = block.thoughtSignature;
112
+ }
113
+ parts.push(part);
114
+ }
115
+ }
116
+ return parts;
117
+ }
118
+ function convertTools(tools) {
119
+ if (tools.length === 0)
120
+ return undefined;
121
+ return [
122
+ {
123
+ functionDeclarations: tools.map((t) => ({
124
+ name: t.name,
125
+ description: t.description,
126
+ // Gemini accepts a raw JSON Schema via parametersJsonSchema; our
127
+ // ToolDefinition.input_schema is already in that shape, so it goes
128
+ // through without translation.
129
+ parametersJsonSchema: t.input_schema,
130
+ })),
131
+ },
132
+ ];
133
+ }
134
+ /**
135
+ * Gemini IDs tool calls with synthetic strings like `${name}-${index}` when
136
+ * the model doesn't return one. We embed the tool name in the ID so that the
137
+ * follow-up functionResponse part can recover it — Gemini requires a `name`
138
+ * field on every functionResponse, and the tool result message we receive
139
+ * from the agent only carries the tool_use_id.
140
+ */
141
+ function extractToolName(toolUseId) {
142
+ const dash = toolUseId.lastIndexOf("-");
143
+ return dash > 0 ? toolUseId.slice(0, dash) : toolUseId;
144
+ }
145
+ function normalizeResponse(response) {
146
+ const candidate = response.candidates?.[0];
147
+ const content = [];
148
+ let toolIndex = 0;
149
+ for (const part of candidate?.content?.parts ?? []) {
150
+ if (part.thought)
151
+ continue;
152
+ if (typeof part.text === "string" && part.text.length > 0) {
153
+ content.push({ type: "text", text: part.text });
154
+ }
155
+ else if (part.functionCall) {
156
+ const name = part.functionCall.name ?? "unknown";
157
+ content.push({
158
+ type: "tool_use",
159
+ id: part.functionCall.id ?? `${name}-${toolIndex}`,
160
+ name,
161
+ input: part.functionCall.args ?? {},
162
+ ...(part.thoughtSignature
163
+ ? { thoughtSignature: part.thoughtSignature }
164
+ : {}),
165
+ });
166
+ toolIndex++;
167
+ }
168
+ }
169
+ const hasToolCalls = content.some((b) => b.type === "tool_use");
170
+ // Read finishReason even when content.parts is missing — that happens when
171
+ // a thinking model burns the entire output budget on thoughts (parts=[] +
172
+ // finishReason=MAX_TOKENS). Falling through to "end_turn" would hide that.
173
+ const stopReason = mapFinishReason(candidate?.finishReason, hasToolCalls);
174
+ const usage = response.usageMetadata
175
+ ? {
176
+ input_tokens: response.usageMetadata.promptTokenCount ?? 0,
177
+ output_tokens: response.usageMetadata.candidatesTokenCount ?? 0,
178
+ }
179
+ : undefined;
180
+ return { content, stopReason, ...(usage ? { usage } : {}) };
181
+ }
182
+ function mapFinishReason(reason, hasToolCalls) {
183
+ if (reason === "MAX_TOKENS")
184
+ return "max_tokens";
185
+ if (hasToolCalls)
186
+ return "tool_use";
187
+ return "end_turn";
188
+ }
@@ -1,2 +1,3 @@
1
1
  import type { Provider } from "../provider.js";
2
- export declare function createProvider(): Provider;
2
+ /** Singleton so agent.ts and the scanner share one provider instance. */
3
+ export declare function getProvider(): Provider;
@@ -1,12 +1,27 @@
1
1
  import { config } from "../../config.js";
2
2
  import { createAnthropicProvider } from "./anthropic.js";
3
- import { createOpenAICompatibleProvider } from "./openai.js";
4
- export function createProvider() {
5
- if (config.providerType === "openai-compatible") {
6
- return createOpenAICompatibleProvider({
7
- apiKey: config.openaiCompatibleKey || "openai-compatible",
8
- baseURL: config.openaiCompatibleBaseURL,
9
- });
3
+ import { createOpenAIProvider } from "./openai.js";
4
+ import { createOpenAICompatProvider } from "./openai-compat.js";
5
+ import { createGeminiProvider } from "./gemini.js";
6
+ let cached = null;
7
+ function buildProvider() {
8
+ switch (config.providerType) {
9
+ case "anthropic":
10
+ return createAnthropicProvider({ apiKey: config.anthropicKey });
11
+ case "openai":
12
+ return createOpenAIProvider({ apiKey: config.openaiKey });
13
+ case "gemini":
14
+ return createGeminiProvider({ apiKey: config.geminiKey });
15
+ case "openai-compat":
16
+ return createOpenAICompatProvider({
17
+ apiKey: config.openaiCompatKey || "openai-compat",
18
+ baseURL: config.openaiCompatBaseURL,
19
+ });
10
20
  }
11
- return createAnthropicProvider({ apiKey: config.anthropicKey });
21
+ }
22
+ /** Singleton so agent.ts and the scanner share one provider instance. */
23
+ export function getProvider() {
24
+ if (cached === null)
25
+ cached = buildProvider();
26
+ return cached;
12
27
  }
@@ -1,5 +1,10 @@
1
1
  import type { Provider } from "../provider.js";
2
- export declare function createOpenAICompatibleProvider(opts: {
2
+ /**
3
+ * Generic Chat Completions client for LM Studio / Ollama / vLLM / etc.
4
+ * `file` content parts are an OpenAI-only extension and are rejected here;
5
+ * the scanner rasterizes PDFs to PNG and we ship `image_url` parts.
6
+ */
7
+ export declare function createOpenAICompatProvider(opts: {
3
8
  apiKey: string;
4
9
  baseURL: string;
5
10
  }): Provider;
@@ -1,67 +1,36 @@
1
1
  import OpenAI from "openai";
2
- export function createOpenAICompatibleProvider(opts) {
2
+ import { classifyProviderError } from "../errors.js";
3
+ import { convertAssistantMessage, convertToolResults, convertTools, createCompletionWithTokenFallback, isToolResultEnvelope, normalizeResponse, } from "./openai.js";
4
+ /**
5
+ * Generic Chat Completions client for LM Studio / Ollama / vLLM / etc.
6
+ * `file` content parts are an OpenAI-only extension and are rejected here;
7
+ * the scanner rasterizes PDFs to PNG and we ship `image_url` parts.
8
+ */
9
+ export function createOpenAICompatProvider(opts) {
3
10
  const client = new OpenAI({
4
11
  apiKey: opts.apiKey,
5
12
  baseURL: opts.baseURL,
6
13
  });
7
14
  return {
8
- name: "openai-compatible",
15
+ name: "openai-compat",
9
16
  supportsThinking: false,
17
+ acceptsDocuments: false,
10
18
  async sendMessage(params) {
11
- const messages = convertMessages(params.system, params.messages);
12
19
  const tools = convertTools(params.tools);
13
- // Try max_tokens first (broadest compat: Ollama, vLLM, older OpenAI models),
14
- // fall back to max_completion_tokens if rejected (newer OpenAI models require it)
20
+ const body = {
21
+ model: params.model,
22
+ maxTokens: params.maxTokens,
23
+ messages: convertMessages(params.system, params.messages),
24
+ tools: tools.length > 0 ? tools : undefined,
25
+ };
15
26
  let response;
16
27
  try {
17
- response = await client.chat.completions.create({
18
- model: params.model,
19
- max_tokens: params.maxTokens,
20
- messages,
21
- tools: tools.length > 0 ? tools : undefined,
22
- }, { signal: params.signal });
28
+ response = await createCompletionWithTokenFallback(client, body, { signal: params.signal });
23
29
  }
24
30
  catch (e) {
25
- if (e.status === 400 && e.message?.includes("max_tokens")) {
26
- response = await client.chat.completions.create({
27
- model: params.model,
28
- max_completion_tokens: params.maxTokens,
29
- messages,
30
- tools: tools.length > 0 ? tools : undefined,
31
- }, { signal: params.signal });
32
- }
33
- else {
34
- throw e;
35
- }
36
- }
37
- const choice = response.choices[0];
38
- if (!choice) {
39
- return { content: [], stopReason: "end_turn" };
40
- }
41
- const content = [];
42
- if (choice.message.content) {
43
- content.push({ type: "text", text: choice.message.content });
31
+ classifyProviderError(e, params.signal);
44
32
  }
45
- if (choice.message.tool_calls) {
46
- for (const tc of choice.message.tool_calls) {
47
- if (tc.type !== "function")
48
- continue;
49
- content.push({
50
- type: "tool_use",
51
- id: tc.id,
52
- name: tc.function.name,
53
- input: parseArguments(tc.function.arguments),
54
- });
55
- }
56
- }
57
- const hasToolCalls = content.some((b) => b.type === "tool_use");
58
- return {
59
- content,
60
- stopReason: hasToolCalls ? "tool_use" : "end_turn",
61
- usage: response.usage
62
- ? { input_tokens: response.usage.prompt_tokens, output_tokens: response.usage.completion_tokens }
63
- : undefined,
64
- };
33
+ return normalizeResponse(response);
65
34
  },
66
35
  };
67
36
  }
@@ -71,25 +40,11 @@ function convertMessages(system, messages) {
71
40
  ];
72
41
  for (const msg of messages) {
73
42
  if (msg.role === "user") {
74
- if (Array.isArray(msg.content) &&
75
- msg.content.length > 0 &&
76
- msg.content[0].type === "tool_result") {
77
- const toolResults = msg.content;
78
- for (const tr of toolResults) {
79
- result.push({
80
- role: "tool",
81
- tool_call_id: tr.tool_use_id,
82
- content: tr.content,
83
- });
84
- }
43
+ if (isToolResultEnvelope(msg.content)) {
44
+ result.push(...convertToolResults(msg.content));
85
45
  }
86
46
  else if (Array.isArray(msg.content)) {
87
- // Strip document blocks (OpenAI-compat doesn't accept them); keep text.
88
- const text = msg.content
89
- .filter((b) => b.type === "text")
90
- .map((b) => b.text)
91
- .join("\n");
92
- result.push({ role: "user", content: text });
47
+ result.push(buildUserMessage(msg.content));
93
48
  }
94
49
  else {
95
50
  result.push({ role: "user", content: msg.content });
@@ -97,26 +52,7 @@ function convertMessages(system, messages) {
97
52
  }
98
53
  else {
99
54
  if (Array.isArray(msg.content)) {
100
- const blocks = msg.content;
101
- const textParts = blocks
102
- .filter((b) => b.type === "text")
103
- .map((b) => b.text)
104
- .join("\n");
105
- const toolCalls = blocks
106
- .filter((b) => b.type === "tool_use")
107
- .map((b) => {
108
- const tu = b;
109
- return {
110
- id: tu.id,
111
- type: "function",
112
- function: { name: tu.name, arguments: JSON.stringify(tu.input) },
113
- };
114
- });
115
- result.push({
116
- role: "assistant",
117
- content: textParts || null,
118
- ...(toolCalls.length > 0 ? { tool_calls: toolCalls } : {}),
119
- });
55
+ result.push(convertAssistantMessage(msg.content));
120
56
  }
121
57
  else {
122
58
  result.push({ role: "assistant", content: msg.content });
@@ -125,23 +61,31 @@ function convertMessages(system, messages) {
125
61
  }
126
62
  return result;
127
63
  }
128
- function convertTools(tools) {
129
- return tools.map((t) => ({
130
- type: "function",
131
- function: {
132
- name: t.name,
133
- description: t.description,
134
- parameters: t.input_schema,
135
- },
136
- }));
137
- }
138
- function parseArguments(args) {
139
- if (typeof args !== "string")
140
- return args;
141
- try {
142
- return JSON.parse(args);
64
+ function buildUserMessage(blocks) {
65
+ for (const block of blocks) {
66
+ if (block.type === "document") {
67
+ throw new Error("openai-compat does not accept document blocks. The scanner should rasterize PDFs to images for this provider — this is a bug.");
68
+ }
69
+ }
70
+ const hasImage = blocks.some((b) => b.type === "image");
71
+ if (!hasImage) {
72
+ const text = blocks
73
+ .filter((b) => b.type === "text")
74
+ .map((b) => b.text)
75
+ .join("\n");
76
+ return { role: "user", content: text };
143
77
  }
144
- catch {
145
- return {};
78
+ const parts = [];
79
+ for (const block of blocks) {
80
+ if (block.type === "text") {
81
+ parts.push({ type: "text", text: block.text });
82
+ }
83
+ else if (block.type === "image") {
84
+ parts.push({
85
+ type: "image_url",
86
+ image_url: { url: `data:${block.source.media_type};base64,${block.source.data}` },
87
+ });
88
+ }
146
89
  }
90
+ return { role: "user", content: parts };
147
91
  }