@xiaozhiclaw/provider-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/dist/adapters/aliyun-oss-file-upload-adapter.d.ts +44 -0
  2. package/dist/adapters/aliyun-oss-file-upload-adapter.js +96 -0
  3. package/dist/adapters/gemini-file-upload-adapter.d.ts +26 -0
  4. package/dist/adapters/gemini-file-upload-adapter.js +92 -0
  5. package/dist/adapters/hub-oss-file-upload-adapter.d.ts +29 -0
  6. package/dist/adapters/hub-oss-file-upload-adapter.js +53 -0
  7. package/dist/adapters/index.d.ts +10 -0
  8. package/dist/adapters/index.js +10 -0
  9. package/dist/adapters/openai-file-upload-adapter.d.ts +38 -0
  10. package/dist/adapters/openai-file-upload-adapter.js +56 -0
  11. package/dist/adapters/volcengine-file-upload-adapter.d.ts +24 -0
  12. package/dist/adapters/volcengine-file-upload-adapter.js +45 -0
  13. package/dist/builtin-providers.d.ts +8 -0
  14. package/dist/builtin-providers.js +2237 -0
  15. package/dist/constants.d.ts +1 -0
  16. package/dist/constants.js +1 -0
  17. package/dist/credentials.d.ts +1 -0
  18. package/dist/credentials.js +8 -0
  19. package/dist/debug-transport.d.ts +12 -0
  20. package/dist/debug-transport.js +99 -0
  21. package/dist/errors.d.ts +11 -0
  22. package/dist/errors.js +12 -0
  23. package/dist/events.d.ts +48 -0
  24. package/dist/events.js +1 -0
  25. package/dist/file-upload-service.d.ts +68 -0
  26. package/dist/file-upload-service.js +110 -0
  27. package/dist/gemini-schema-utils.d.ts +17 -0
  28. package/dist/gemini-schema-utils.js +76 -0
  29. package/dist/index.d.ts +37 -0
  30. package/dist/index.js +33 -0
  31. package/dist/llm-client.d.ts +43 -0
  32. package/dist/llm-client.js +217 -0
  33. package/dist/media-client.d.ts +42 -0
  34. package/dist/media-client.js +174 -0
  35. package/dist/media-transport.d.ts +176 -0
  36. package/dist/media-transport.js +16 -0
  37. package/dist/media.d.ts +2 -0
  38. package/dist/media.js +1 -0
  39. package/dist/model-detection.d.ts +22 -0
  40. package/dist/model-detection.js +28 -0
  41. package/dist/paths.d.ts +2 -0
  42. package/dist/paths.js +11 -0
  43. package/dist/provider-def.d.ts +220 -0
  44. package/dist/provider-def.js +9 -0
  45. package/dist/provider-registry.d.ts +51 -0
  46. package/dist/provider-registry.js +130 -0
  47. package/dist/provider-tool-api.d.ts +44 -0
  48. package/dist/provider-tool-api.js +9 -0
  49. package/dist/provider-variant-resolver.d.ts +35 -0
  50. package/dist/provider-variant-resolver.js +174 -0
  51. package/dist/retry.d.ts +37 -0
  52. package/dist/retry.js +71 -0
  53. package/dist/transport.d.ts +281 -0
  54. package/dist/transport.js +27 -0
  55. package/dist/transports/anthropic-messages.d.ts +65 -0
  56. package/dist/transports/anthropic-messages.js +1004 -0
  57. package/dist/transports/gemini-cache-api.d.ts +86 -0
  58. package/dist/transports/gemini-cache-api.js +141 -0
  59. package/dist/transports/gemini-file-api.d.ts +90 -0
  60. package/dist/transports/gemini-file-api.js +164 -0
  61. package/dist/transports/gemini-generatecontent.d.ts +56 -0
  62. package/dist/transports/gemini-generatecontent.js +688 -0
  63. package/dist/transports/gemini-lyria-realtime.d.ts +117 -0
  64. package/dist/transports/gemini-lyria-realtime.js +295 -0
  65. package/dist/transports/gemini-media.d.ts +53 -0
  66. package/dist/transports/gemini-media.js +383 -0
  67. package/dist/transports/media-resolve.d.ts +50 -0
  68. package/dist/transports/media-resolve.js +91 -0
  69. package/dist/transports/minimax-media.d.ts +56 -0
  70. package/dist/transports/minimax-media.js +433 -0
  71. package/dist/transports/openai-chat.d.ts +81 -0
  72. package/dist/transports/openai-chat.js +782 -0
  73. package/dist/transports/openai-media.d.ts +24 -0
  74. package/dist/transports/openai-media.js +118 -0
  75. package/dist/transports/openai-responses.d.ts +63 -0
  76. package/dist/transports/openai-responses.js +778 -0
  77. package/dist/transports/qwen-media.d.ts +59 -0
  78. package/dist/transports/qwen-media.js +411 -0
  79. package/dist/transports/realtime-transport.d.ts +183 -0
  80. package/dist/transports/realtime-transport.js +332 -0
  81. package/dist/transports/volcengine-grounding.d.ts +58 -0
  82. package/dist/transports/volcengine-grounding.js +69 -0
  83. package/dist/transports/volcengine-media.d.ts +94 -0
  84. package/dist/transports/volcengine-media.js +801 -0
  85. package/dist/transports/volcengine-responses.d.ts +64 -0
  86. package/dist/transports/volcengine-responses.js +797 -0
  87. package/dist/transports/zhipu-media.d.ts +82 -0
  88. package/dist/transports/zhipu-media.js +522 -0
  89. package/dist/transports/zhipu-tool-api.d.ts +35 -0
  90. package/dist/transports/zhipu-tool-api.js +126 -0
  91. package/dist/wire-types.d.ts +51 -0
  92. package/dist/wire-types.js +1 -0
  93. package/package.json +33 -0
@@ -0,0 +1,1004 @@
1
+ /**
2
+ * Anthropic Messages TransportSSE streaming for Claude API.
3
+ *
4
+ * Aligned with CC (claude-code-haha) src/services/api/claude.ts:
5
+ * - cache_control ephemeral injection on system prompt blocks
6
+ * - ensureToolResultPairing() conversation repair before every request
7
+ * - Retry with exponential backoff on transient errors (429/529/overloaded)
8
+ * - Non-streaming fallback when stream errors out
9
+ * - 90s idle watchdog timeout for silently dropped connections
10
+ * - Adaptive/budget thinking with temperature omit
11
+ * - Cache token extraction with >0 guard (CC updateUsage parity)
12
+ * - signature_delta handling for thinking blocks
13
+ */
14
+ import { MEDIA_MAX_UPLOAD_SIZE } from "../constants.js";
15
+ import { isLocalUrl, resolveMediaUrlViaUpload } from "./media-resolve.js";
16
+ import { DEFAULT_MAX_RETRIES, STREAM_IDLE_TIMEOUT_MS, TRANSIENT_STATUS_CODES, retryDelay, retrySleep, } from "../retry.js";
17
+ // 鈹€鈹€ Constants 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
18
+ const DEFAULT_TIMEOUT_MS = 180_000;
19
+ const NON_STREAMING_TIMEOUT_MS = 300_000;
20
+ const MAX_NON_STREAMING_TOKENS = 64_000;
21
+ // CC SYNTHETIC_TOOL_RESULT_PLACEHOLDER parity
22
+ const SYNTHETIC_TOOL_RESULT_CONTENT = "[Tool execution failed; output not available during conversation recovery]";
23
+ // 鈹€鈹€ Beta headers (CC parity for 3P callers) 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
24
+ // Only universally-applicable betas that any 3P API caller can use.
25
+ // 1P-only betas (claude-code-20250219, afk-mode, cli-internal, redact-thinking) are omitted.
26
+ const BETA_INTERLEAVED_THINKING = "interleaved-thinking-2025-05-14";
27
+ const BETA_CONTEXT_1M = "context-1m-2025-08-07";
28
+ const BETA_TOKEN_EFFICIENT_TOOLS = "token-efficient-tools-2026-03-28";
29
+ const BETA_PROMPT_CACHING_SCOPE = "prompt-caching-scope-2026-01-05";
30
+ const BETA_PDF_SUPPORT = "pdfs-2024-09-25";
31
+ /**
32
+ * Check if a model supports Adaptive Thinking (鎼?).
33
+ * Only Opus 4.6/4.7 and Sonnet 4.6 support type: "adaptive".
34
+ * Haiku and older models must use type: "enabled" with budget_tokens.
35
+ */
36
+ function supportsAdaptiveThinking(model) {
37
+ const ml = model.toLowerCase();
38
+ return ml.includes("opus-4-6") || ml.includes("opus-4-7")
39
+ || ml.includes("sonnet-4-6");
40
+ }
41
+ /**
42
+ * Build beta headers list based on model capabilities.
43
+ * CC parity: only send betas the model actually supports.
44
+ */
45
+ function buildBetaHeaders(model, hasThinking, hasTools) {
46
+ const betas = [];
47
+ const ml = model.toLowerCase();
48
+ // Beta headers are Anthropic-specific features. Only send for actual Claude models.
49
+ // Non-Claude providers using anthropic-messages transport (DeepSeek, Qwen, MiniMax)
50
+ // don't support these and may return 400 errors.
51
+ const isClaude = ml.startsWith("claude");
52
+ if (!isClaude) {
53
+ // Still allow user-specified betas via env (advanced override)
54
+ const envBetas = process.env.ANTHROPIC_BETAS;
55
+ if (envBetas) {
56
+ for (const b of envBetas.split(",").map(s => s.trim()).filter(Boolean)) {
57
+ betas.push(b);
58
+ }
59
+ }
60
+ return betas;
61
+ }
62
+ // 1M context models: claude-opus-4-6/4-7, claude-sonnet-4-6
63
+ const is1mModel = ml.includes("opus-4-6") || ml.includes("opus-4-7")
64
+ || ml.includes("sonnet-4-6");
65
+ if (is1mModel) {
66
+ betas.push(BETA_CONTEXT_1M);
67
+ }
68
+ // PDF support: always send for models with vision
69
+ betas.push(BETA_PDF_SUPPORT);
70
+ // Interleaved thinking: supported by all thinking-capable Claude models (Opus 4+, Sonnet 3.7+)
71
+ if (hasThinking && !ml.includes("haiku")) {
72
+ betas.push(BETA_INTERLEAVED_THINKING);
73
+ }
74
+ // Token-efficient tools: reduces tool schema token overhead (~4.5% savings)
75
+ if (hasTools) {
76
+ betas.push(BETA_TOKEN_EFFICIENT_TOOLS);
77
+ }
78
+ // Prompt caching scope: always sendheader is a no-op without scope field
79
+ betas.push(BETA_PROMPT_CACHING_SCOPE);
80
+ // User-specified additional betas via env
81
+ const envBetas = process.env.ANTHROPIC_BETAS;
82
+ if (envBetas) {
83
+ for (const b of envBetas.split(",").map(s => s.trim()).filter(Boolean)) {
84
+ if (!betas.includes(b))
85
+ betas.push(b);
86
+ }
87
+ }
88
+ return betas;
89
+ }
90
+ // 鈹€鈹€ Transport class 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
91
+ export class AnthropicMessagesTransport {
92
+ baseUrl;
93
+ apiVersion;
94
+ timeoutMs;
95
+ streamIdleTimeoutMs;
96
+ enablePromptCaching;
97
+ maxRetries;
98
+ omitZeroTemperature;
99
+ quirks;
100
+ fileUploadAdapter;
101
+ constructor(config) {
102
+ if (!config.baseUrl) {
103
+ throw new Error("AnthropicMessagesTransport: baseUrl is required (got empty or undefined)");
104
+ }
105
+ this.baseUrl = config.baseUrl.replace(/\/+$/, "");
106
+ this.apiVersion = config.apiVersion ?? "2023-06-01";
107
+ this.timeoutMs = config.timeoutMs ?? DEFAULT_TIMEOUT_MS;
108
+ this.streamIdleTimeoutMs = config.streamIdleTimeoutMs ?? STREAM_IDLE_TIMEOUT_MS;
109
+ this.enablePromptCaching = config.enablePromptCaching ?? true;
110
+ this.maxRetries = config.maxRetries ?? DEFAULT_MAX_RETRIES;
111
+ this.omitZeroTemperature = config.omitZeroTemperature ?? false;
112
+ this.quirks = config.quirks ?? {};
113
+ this.fileUploadAdapter = config.fileUploadAdapter;
114
+ }
115
+ async *stream(request, apiKey, signal) {
116
+ // Note: DeepSeek prefix completion requires OpenAI Chat Completions
117
+ // format (base_url /beta), NOT Anthropic Messages format.
118
+ // The Anthropic API at /anthropic does not support prefix: true.
119
+ const url = `${this.baseUrl}/v1/messages`;
120
+ // CC parity: ensureToolResultPairing before converting messages
121
+ const repairedMessages = ensureToolResultPairing(request.messages);
122
+ // Pre-resolve local media URLs for Anthropic (localhost not reachable from API)
123
+ const resolvedMessages = await resolveMessagesMediaForAnthropic(repairedMessages, this.fileUploadAdapter, apiKey, signal);
124
+ // Convert OpenAI-style messages 閳?Anthropic format
125
+ const { system, messages } = convertMessages(resolvedMessages, this.quirks, request.model);
126
+ // CC parity: system prompt as block array with cache_control on last block
127
+ const systemParam = buildSystemParam(system, this.enablePromptCaching);
128
+ const hasThinking = !!request.reasoning;
129
+ const maxTokens = request.maxTokens ?? 8192;
130
+ const body = {
131
+ model: request.model,
132
+ messages,
133
+ // DeepSeek thinking models share max_tokens between thinking + text output.
134
+ // Enforce minimum 4096 when thinking is enabled to prevent thinking exhausting
135
+ // the entire budget and producing empty text responses.
136
+ max_tokens: hasThinking ? Math.max(maxTokens, 4096) : maxTokens,
137
+ stream: true,
138
+ };
139
+ if (systemParam)
140
+ body.system = systemParam;
141
+ if (request.tools && request.tools.length > 0) {
142
+ const tools = request.tools.map(convertToolDef);
143
+ // 鎼?2.6: Active cachingcache_control on last tool in the prefix layer
144
+ // Prefix order: tools 閳?system 閳?messages. Mark last tool for caching.
145
+ // Use session-scoped ephemeral (no global scope) to prevent cross-session cache leakage.
146
+ // CC separates static/dynamic prompt with SYSTEM_PROMPT_DYNAMIC_BOUNDARY for global scope;
147
+ // without that boundary, global scope can leak session context.
148
+ if (this.enablePromptCaching && tools.length > 0) {
149
+ tools[tools.length - 1].cache_control = { type: "ephemeral" };
150
+ }
151
+ body.tools = tools;
152
+ if (request.toolChoice) {
153
+ body.tool_choice =
154
+ request.toolChoice === "auto"
155
+ ? { type: "auto" }
156
+ : request.toolChoice === "required"
157
+ ? { type: "any" }
158
+ : { type: "none" };
159
+ }
160
+ }
161
+ // CC parity: temperature must be undefined when thinking is enabled
162
+ if (!hasThinking && request.temperature !== undefined) {
163
+ // MiniMax rejects temperature=0; range is (0.0, 1.0]
164
+ if (!(this.omitZeroTemperature && request.temperature === 0)) {
165
+ body.temperature = request.temperature;
166
+ }
167
+ }
168
+ // Anthropic API: top_p (supported by Anthropic + MiniMax)
169
+ if (!hasThinking && request.topP !== undefined) {
170
+ body.top_p = request.topP;
171
+ }
172
+ // CC parity: adaptive vs budget thinking
173
+ if (hasThinking) {
174
+ if (this.quirks.useEffortInsteadOfBudget) {
175
+ // DeepSeek: budget_tokens is ignored, use output_config.effort instead.
176
+ // DeepSeek only supports "high" and "max"; lower levels map to "high".
177
+ body.thinking = { type: "enabled" };
178
+ const cap = this.quirks.maxReasoningEffort ?? "high";
179
+ const effortInput = request.reasoning?.effort ?? "high";
180
+ // Map: "low"/"medium"閳?high", "high"閳?high", anything above閳妽ap
181
+ const deepseekEffort = effortInput === "low" || effortInput === "medium"
182
+ ? "high"
183
+ : cap;
184
+ body.output_config = { effort: deepseekEffort };
185
+ // DeepSeek KV Cache: set user_id for session-level cache isolation.
186
+ // Server-side KV cache hit rate improves when same user_id sees repeated
187
+ // prefixes (system prompt, conversation history). Price: 1/50 of input.
188
+ // Use a hash of the system prompt as stable session identifier.
189
+ if (system) {
190
+ let h = 0x811c9dc5; // FNV-1a seed
191
+ for (let i = 0; i < system.length; i++) {
192
+ h ^= system.charCodeAt(i);
193
+ h = Math.imul(h, 0x01000193);
194
+ }
195
+ body.metadata = { user_id: `qa-${(h >>> 0).toString(36)}` };
196
+ }
197
+ }
198
+ else if (supportsAdaptiveThinking(request.model)) {
199
+ // Adaptive Thinking (anthropic-ProviderMax 鎼?):
200
+ // Opus 4.6/4.7 and Sonnet 4.6model self-adjusts thinking depth.
201
+ // Zero thinking for simple queries, deep thinking for complex ones.
202
+ // This is the CC (claude-code-haha) default mode.
203
+ body.thinking = { type: "adaptive" };
204
+ }
205
+ else {
206
+ // Extended Thinking (anthropic-ProviderMax 鎼?):
207
+ // Haiku 4.5 and older modelsexplicit budget control.
208
+ const budget = mapReasoningEffortToBudget(request.reasoning.effort, request.maxTokens);
209
+ body.thinking = {
210
+ type: "enabled",
211
+ budget_tokens: Math.min(maxTokens - 1, budget),
212
+ };
213
+ }
214
+ }
215
+ else if (this.quirks.supportsThinkingParam && this.quirks.disableThinkingByDefault) {
216
+ body.thinking = { type: "disabled" };
217
+ }
218
+ // CC parity: beta headers for model-specific capabilities
219
+ const betaHeaders = buildBetaHeaders(request.model, hasThinking, (request.tools?.length ?? 0) > 0);
220
+ const headers = {
221
+ "Content-Type": "application/json",
222
+ "x-api-key": apiKey,
223
+ "anthropic-version": this.apiVersion,
224
+ };
225
+ if (betaHeaders.length > 0) {
226
+ headers["anthropic-beta"] = betaHeaders.join(",");
227
+ }
228
+ // Retry loop with exponential backoff (CC withRetry parity)
229
+ let lastError = null;
230
+ for (let attempt = 0; attempt <= this.maxRetries; attempt++) {
231
+ if (signal?.aborted)
232
+ throw new Error("Request aborted");
233
+ if (attempt > 0 && lastError) {
234
+ await retrySleep(retryDelay(attempt), signal);
235
+ }
236
+ try {
237
+ yield* this.streamWithWatchdog(url, headers, body, signal);
238
+ return; // Success
239
+ }
240
+ catch (err) {
241
+ lastError = err instanceof Error ? err : new Error(String(err));
242
+ // Parse HTTP status from error message for retry decision
243
+ const status = parseHttpStatus(lastError.message);
244
+ const isTransient = status !== null && TRANSIENT_STATUS_CODES.has(status);
245
+ const isIdleTimeout = lastError.message.includes("Stream idle timeout");
246
+ const isOverflow = lastError.message.includes("model_context_window_exceeded")
247
+ || lastError.message.includes("context_length_exceeded");
248
+ if (isOverflow) {
249
+ // CC parity: context window overflowunrecoverable, don't retry
250
+ throw lastError;
251
+ }
252
+ if (!isTransient && !isIdleTimeout) {
253
+ // Non-transient errordon't retry
254
+ throw lastError;
255
+ }
256
+ if (attempt === this.maxRetries) {
257
+ // Last attempttry non-streaming fallback (CC parity)
258
+ // Skip fallback for stream-required models (QwQ, Omni) that reject non-streaming
259
+ if ((isIdleTimeout || isTransient) && !request.streamRequired) {
260
+ try {
261
+ yield* this.nonStreamingFallback(url, headers, body, signal);
262
+ return;
263
+ }
264
+ catch {
265
+ throw lastError;
266
+ }
267
+ }
268
+ throw lastError;
269
+ }
270
+ // Will retry
271
+ }
272
+ }
273
+ if (lastError)
274
+ throw lastError;
275
+ }
276
+ /**
277
+ * Stream with idle watchdog timer (CC parity: 90s default).
278
+ * Throws if no chunks received for streamIdleTimeoutMs.
279
+ */
280
+ async *streamWithWatchdog(url, headers, body, signal) {
281
+ const timeoutSignal = AbortSignal.timeout(this.timeoutMs);
282
+ const combinedSignal = signal
283
+ ? AbortSignal.any([signal, timeoutSignal])
284
+ : timeoutSignal;
285
+ const response = await fetch(url, {
286
+ method: "POST",
287
+ headers,
288
+ body: JSON.stringify(body),
289
+ signal: combinedSignal,
290
+ });
291
+ if (!response.ok) {
292
+ const errorBody = await response.text().catch(() => "");
293
+ const err = new Error(`Anthropic API error ${response.status}: ${errorBody.slice(0, 500)}`);
294
+ err.status = response.status;
295
+ throw err;
296
+ }
297
+ if (!response.body) {
298
+ throw new Error("Anthropic API returned no response body");
299
+ }
300
+ // CC parity: idle watchdogabort if no chunks for streamIdleTimeoutMs
301
+ let idleTimer = null;
302
+ let idleAborted = false;
303
+ const resetIdleTimer = () => {
304
+ if (idleTimer !== null)
305
+ clearTimeout(idleTimer);
306
+ idleTimer = setTimeout(() => {
307
+ idleAborted = true;
308
+ }, this.streamIdleTimeoutMs);
309
+ };
310
+ const clearIdleTimer = () => {
311
+ if (idleTimer !== null) {
312
+ clearTimeout(idleTimer);
313
+ idleTimer = null;
314
+ }
315
+ };
316
+ resetIdleTimer(); // Prime before stream starts
317
+ try {
318
+ yield* this.parseSSEStream(response.body, resetIdleTimer, () => idleAborted);
319
+ }
320
+ finally {
321
+ clearIdleTimer();
322
+ }
323
+ // CC parity: if watchdog fired and loop exited cleanly, throw
324
+ if (idleAborted) {
325
+ throw new Error("Stream idle timeout - no chunks received");
326
+ }
327
+ }
328
+ /**
329
+ * Non-streaming fallback (CC executeNonStreamingRequest parity).
330
+ * Used when streaming fails after all retries.
331
+ * Caps max_tokens at 64K and adjusts thinking budget accordingly.
332
+ */
333
+ async *nonStreamingFallback(url, headers, body, signal) {
334
+ const nonStreamBody = adjustParamsForNonStreaming({ ...body, stream: false });
335
+ const timeoutSignal = AbortSignal.timeout(NON_STREAMING_TIMEOUT_MS);
336
+ const combinedSignal = signal
337
+ ? AbortSignal.any([signal, timeoutSignal])
338
+ : timeoutSignal;
339
+ const response = await fetch(url, {
340
+ method: "POST",
341
+ headers,
342
+ body: JSON.stringify(nonStreamBody),
343
+ signal: combinedSignal,
344
+ });
345
+ if (!response.ok) {
346
+ const errorBody = await response.text().catch(() => "");
347
+ const err = new Error(`Anthropic API error ${response.status}: ${errorBody.slice(0, 500)}`);
348
+ err.status = response.status;
349
+ throw err;
350
+ }
351
+ const result = await response.json();
352
+ yield* this.mapNonStreamingResponse(result);
353
+ }
354
+ /**
355
+ * Convert a non-streaming API response to LLMChunk sequence.
356
+ */
357
+ *mapNonStreamingResponse(result) {
358
+ // Usage
359
+ const usage = result.usage;
360
+ if (usage) {
361
+ yield {
362
+ type: "usage",
363
+ promptTokens: usage.input_tokens ?? 0,
364
+ completionTokens: usage.output_tokens ?? 0,
365
+ cacheReadTokens: usage.cache_read_input_tokens > 0
366
+ ? usage.cache_read_input_tokens
367
+ : undefined,
368
+ cacheCreationTokens: usage.cache_creation_input_tokens > 0
369
+ ? usage.cache_creation_input_tokens
370
+ : undefined,
371
+ reasoningTokens: usage.reasoning_input_tokens > 0
372
+ ? usage.reasoning_input_tokens
373
+ : undefined,
374
+ };
375
+ }
376
+ // Content blocks
377
+ const content = result.content;
378
+ if (Array.isArray(content)) {
379
+ let toolIndex = 0;
380
+ for (const block of content) {
381
+ const blockType = block.type;
382
+ if (blockType === "text") {
383
+ yield { type: "delta", text: block.text };
384
+ }
385
+ else if (blockType === "thinking") {
386
+ yield { type: "reasoning_delta", text: block.thinking };
387
+ }
388
+ else if (blockType === "tool_use") {
389
+ yield {
390
+ type: "tool_call_delta",
391
+ index: toolIndex++,
392
+ id: block.id,
393
+ name: block.name,
394
+ arguments: JSON.stringify(block.input ?? {}),
395
+ };
396
+ }
397
+ }
398
+ }
399
+ // Stop reason
400
+ const stopReason = result.stop_reason;
401
+ if (stopReason) {
402
+ yield { type: "done", finishReason: mapAnthropicStopReason(stopReason) };
403
+ }
404
+ }
405
+ async *parseSSEStream(body, resetIdleTimer, isIdleAborted) {
406
+ const decoder = new TextDecoder();
407
+ let buffer = "";
408
+ let currentEvent = "";
409
+ // Track active content blocks for tool_use accumulation + signature
410
+ const blocks = new Map();
411
+ // CC parity: accumulated usage with >0 guard
412
+ const accUsage = {
413
+ inputTokens: 0,
414
+ outputTokens: 0,
415
+ cacheReadTokens: 0,
416
+ cacheCreationTokens: 0,
417
+ reasoningTokens: 0,
418
+ };
419
+ for await (const raw of body) {
420
+ // CC parity: reset idle watchdog on every chunk
421
+ resetIdleTimer();
422
+ if (isIdleAborted())
423
+ break;
424
+ buffer += decoder.decode(raw, { stream: true });
425
+ let newlineIdx;
426
+ while ((newlineIdx = buffer.indexOf("\n")) !== -1) {
427
+ const line = buffer.slice(0, newlineIdx).trim();
428
+ buffer = buffer.slice(newlineIdx + 1);
429
+ if (!line) {
430
+ currentEvent = "";
431
+ continue;
432
+ }
433
+ // SSE spec: field name is before colon, value after colon with optional leading space.
434
+ // Some providers (Qwen) omit the space: "event:message_start" vs "event: message_start"
435
+ if (line.startsWith("event:")) {
436
+ currentEvent = line.slice(6).trimStart();
437
+ continue;
438
+ }
439
+ if (!line.startsWith("data:"))
440
+ continue;
441
+ const data = line.slice(5).trimStart();
442
+ let parsed;
443
+ try {
444
+ parsed = JSON.parse(data);
445
+ }
446
+ catch {
447
+ continue;
448
+ }
449
+ yield* this.mapEvent(currentEvent, parsed, blocks, accUsage);
450
+ }
451
+ }
452
+ }
453
+ *mapEvent(event, data, blocks, accUsage) {
454
+ switch (event) {
455
+ case "message_start": {
456
+ const msg = data.message;
457
+ const usage = msg?.usage;
458
+ if (usage) {
459
+ // CC parity: update accumulated usage with >0 guard
460
+ updateAccumulatedUsage(accUsage, usage);
461
+ yield {
462
+ type: "usage",
463
+ promptTokens: accUsage.inputTokens,
464
+ completionTokens: accUsage.outputTokens,
465
+ cacheReadTokens: accUsage.cacheReadTokens > 0 ? accUsage.cacheReadTokens : undefined,
466
+ cacheCreationTokens: accUsage.cacheCreationTokens > 0 ? accUsage.cacheCreationTokens : undefined,
467
+ reasoningTokens: accUsage.reasoningTokens > 0 ? accUsage.reasoningTokens : undefined,
468
+ };
469
+ }
470
+ break;
471
+ }
472
+ case "content_block_start": {
473
+ const index = data.index;
474
+ const block = data.content_block;
475
+ if (!block)
476
+ break;
477
+ const blockType = block.type;
478
+ blocks.set(index, {
479
+ type: blockType,
480
+ id: block.id,
481
+ name: block.name,
482
+ // CC parity: initialize signature for thinking blocks
483
+ signature: blockType === "thinking" ? "" : undefined,
484
+ });
485
+ if (blockType === "tool_use") {
486
+ yield {
487
+ type: "tool_call_delta",
488
+ index,
489
+ id: block.id,
490
+ name: block.name,
491
+ arguments: "",
492
+ };
493
+ }
494
+ break;
495
+ }
496
+ case "content_block_delta": {
497
+ const index = data.index;
498
+ const delta = data.delta;
499
+ if (!delta)
500
+ break;
501
+ const deltaType = delta.type;
502
+ if (deltaType === "text_delta") {
503
+ yield { type: "delta", text: delta.text };
504
+ }
505
+ else if (deltaType === "input_json_delta") {
506
+ yield {
507
+ type: "tool_call_delta",
508
+ index,
509
+ arguments: delta.partial_json,
510
+ };
511
+ }
512
+ else if (deltaType === "thinking_delta") {
513
+ yield {
514
+ type: "reasoning_delta",
515
+ text: delta.thinking,
516
+ };
517
+ }
518
+ else if (deltaType === "signature_delta") {
519
+ // CC parity: store signature on thinking block
520
+ const blockInfo = blocks.get(index);
521
+ if (blockInfo && typeof delta.signature === "string") {
522
+ blockInfo.signature = delta.signature;
523
+ }
524
+ }
525
+ break;
526
+ }
527
+ case "content_block_stop": {
528
+ const index = data.index;
529
+ const stoppedBlock = blocks.get(index);
530
+ // Emit complete thinking block with signature for passback
531
+ if (stoppedBlock?.type === "thinking" && typeof stoppedBlock.signature === "string") {
532
+ yield {
533
+ type: "reasoning_block_complete",
534
+ thinking: "", // text already streamed via reasoning_delta
535
+ signature: stoppedBlock.signature,
536
+ };
537
+ }
538
+ blocks.delete(index);
539
+ break;
540
+ }
541
+ case "message_delta": {
542
+ const delta = data.delta;
543
+ const usage = data.usage;
544
+ if (usage) {
545
+ // CC parity: update with >0 guard (prevent delta zero overwrite)
546
+ updateAccumulatedUsage(accUsage, usage);
547
+ yield {
548
+ type: "usage",
549
+ promptTokens: accUsage.inputTokens,
550
+ completionTokens: accUsage.outputTokens,
551
+ cacheReadTokens: accUsage.cacheReadTokens > 0 ? accUsage.cacheReadTokens : undefined,
552
+ cacheCreationTokens: accUsage.cacheCreationTokens > 0 ? accUsage.cacheCreationTokens : undefined,
553
+ reasoningTokens: accUsage.reasoningTokens > 0 ? accUsage.reasoningTokens : undefined,
554
+ };
555
+ }
556
+ if (delta?.stop_reason) {
557
+ yield { type: "done", finishReason: mapAnthropicStopReason(delta.stop_reason) };
558
+ }
559
+ break;
560
+ }
561
+ case "message_stop":
562
+ break;
563
+ case "error": {
564
+ const error = data.error;
565
+ throw new Error(`Anthropic stream error: ${error?.message ?? JSON.stringify(data)}`);
566
+ }
567
+ }
568
+ }
569
+ }
570
+ /**
571
+ * CC updateUsage parity: only update a token count if the new value is > 0.
572
+ * Prevents message_delta events from overwriting real counts with explicit 0.
573
+ */
574
+ function updateAccumulatedUsage(acc, usage) {
575
+ // CC parity: >0 guard on input-related tokens only.
576
+ // output_tokens uses ?? fallback because 0 at message_start is legitimate.
577
+ if (usage.input_tokens > 0)
578
+ acc.inputTokens = usage.input_tokens;
579
+ acc.outputTokens = usage.output_tokens ?? acc.outputTokens;
580
+ if (usage.cache_read_input_tokens > 0)
581
+ acc.cacheReadTokens = usage.cache_read_input_tokens;
582
+ if (usage.cache_creation_input_tokens > 0)
583
+ acc.cacheCreationTokens = usage.cache_creation_input_tokens;
584
+ // DeepSeek / Anthropic reasoning tokens (thinking budget consumption)
585
+ if (usage.reasoning_input_tokens > 0)
586
+ acc.reasoningTokens = usage.reasoning_input_tokens;
587
+ }
588
+ // 鈹€鈹€ Stop reason normalization (Anthropic 閳?OpenAI standard) 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
589
+ function mapAnthropicStopReason(reason) {
590
+ switch (reason) {
591
+ case "end_turn":
592
+ case "stop_sequence":
593
+ return "stop";
594
+ case "tool_use":
595
+ return "tool_calls";
596
+ case "max_tokens":
597
+ return "length";
598
+ default:
599
+ return reason;
600
+ }
601
+ }
602
+ // 鈹€鈹€ Reasoning effort 閳?thinking budget (CC paramsFromContext parity) 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
603
+ /**
604
+ * CC parity: Haiku models do not support tool_reference blocks in tool_result content.
605
+ * All other Anthropic-compatible models (including DeepSeek via /anthropic endpoint) support it.
606
+ */
607
+ function modelSupportsToolReference(model) {
608
+ return !/haiku/i.test(model);
609
+ }
610
+ const THINKING_BUDGET = {
611
+ high: 16000,
612
+ medium: 8000,
613
+ low: 4000,
614
+ };
615
+ /**
616
+ * Map reasoning effort to thinking budget_tokens.
617
+ * CC parity: getMaxThinkingTokensForModel() = maxOutput - 1
618
+ * When maxTokens is available, scale dynamically; otherwise use static defaults.
619
+ */
620
+ function mapReasoningEffortToBudget(effort, maxTokens) {
621
+ if (maxTokens && maxTokens > 16384) {
622
+ // Dynamic budget based on model's actual max output (CC parity)
623
+ switch (effort) {
624
+ case "high": return Math.min(maxTokens - 1, Math.max(16000, Math.floor(maxTokens * 0.5)));
625
+ case "medium": return Math.min(maxTokens - 1, Math.max(8000, Math.floor(maxTokens * 0.25)));
626
+ case "low": return Math.min(maxTokens - 1, 4000);
627
+ default: return THINKING_BUDGET.high;
628
+ }
629
+ }
630
+ return THINKING_BUDGET[effort] ?? THINKING_BUDGET.high;
631
+ }
632
+ // 鈹€鈹€ System prompt 閳?cache_control block array (CC buildSystemPromptBlocks) 鈹€鈹€鈹€
633
+ /**
634
+ * Build system parameter with cache_control on the last block.
635
+ * CC parity: only ONE cache_control marker per request (at last block).
636
+ * Session-scoped ephemeral (no global scope) to prevent cross-session cache leakage.
637
+ */
638
+ function buildSystemParam(system, enableCaching) {
639
+ if (!system)
640
+ return undefined;
641
+ if (!enableCaching)
642
+ return system;
643
+ // Return as block array with cache_control on last block
644
+ return [
645
+ {
646
+ type: "text",
647
+ text: system,
648
+ cache_control: { type: "ephemeral" },
649
+ },
650
+ ];
651
+ }
652
+ // 鈹€鈹€ ensureToolResultPairing (CC conversation repair, pre-request) 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
653
+ /**
654
+ * Repair tool_use/tool_result mismatches before sending to API.
655
+ * CC parity: fixes 3 classes of problems:
656
+ * 1. Orphaned tool_results (no matching tool_use) 閳?stripped
657
+ * 2. Missing tool_results (tool_use with no result) 閳?synthetic error inserted
658
+ * 3. Duplicate tool_use IDs 閳?cross-message dedup
659
+ */
660
+ function ensureToolResultPairing(messages) {
661
+ const result = [];
662
+ const allSeenToolUseIds = new Set();
663
+ for (let i = 0; i < messages.length; i++) {
664
+ const msg = messages[i];
665
+ if (msg.role === "assistant" && msg.tool_calls && msg.tool_calls.length > 0) {
666
+ // Dedup tool_use IDs across messages (CC allSeenToolUseIds parity)
667
+ const dedupedCalls = msg.tool_calls.filter(tc => {
668
+ if (allSeenToolUseIds.has(tc.id))
669
+ return false;
670
+ allSeenToolUseIds.add(tc.id);
671
+ return true;
672
+ });
673
+ const toolUseIds = new Set(dedupedCalls.map(tc => tc.id));
674
+ // Look ahead for matching tool_results
675
+ const existingResultIds = new Set();
676
+ const nextMessages = messages.slice(i + 1);
677
+ for (const nm of nextMessages) {
678
+ if (nm.role !== "tool")
679
+ break;
680
+ if (nm.tool_call_id)
681
+ existingResultIds.add(nm.tool_call_id);
682
+ }
683
+ // Push the assistant message (possibly with deduped tool calls)
684
+ if (dedupedCalls.length !== msg.tool_calls.length) {
685
+ result.push({ ...msg, tool_calls: dedupedCalls });
686
+ }
687
+ else {
688
+ result.push(msg);
689
+ }
690
+ // CC parity: inject synthetic error tool_results for missing ones
691
+ const missingIds = [...toolUseIds].filter(id => !existingResultIds.has(id));
692
+ for (const id of missingIds) {
693
+ const tc = dedupedCalls.find(c => c.id === id);
694
+ result.push({
695
+ role: "tool",
696
+ tool_call_id: id,
697
+ name: tc?.function.name,
698
+ content: "[Tool execution failed; output not available during conversation recovery]",
699
+ });
700
+ }
701
+ }
702
+ else if (msg.role === "tool") {
703
+ // Check if this tool_result has a matching tool_use
704
+ const hasMatch = allSeenToolUseIds.has(msg.tool_call_id ?? "");
705
+ if (hasMatch) {
706
+ result.push(msg);
707
+ }
708
+ // CC parity: strip orphaned tool_results silently
709
+ }
710
+ else {
711
+ result.push(msg);
712
+ }
713
+ }
714
+ return result;
715
+ }
716
+ // 鈹€鈹€ Local media resolution for Anthropic 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
717
+ /**
718
+ * Pre-resolve local media URLs for Anthropic.
719
+ * When Hub OSS adapter is available, uploads files 閳?public URL (no size limit).
720
+ * Otherwise falls back to base64 data URL (閳?0MB limit).
721
+ */
722
+ async function resolveMessagesMediaForAnthropic(messages, uploadAdapter, apiKey, signal) {
723
+ const needsResolution = messages.some((m) => m.imageUrls?.some(isLocalUrl) || m.fileIds?.some((f) => isLocalUrl(f.id)));
724
+ if (!needsResolution)
725
+ return messages;
726
+ return Promise.all(messages.map(async (msg) => {
727
+ // Resolve imageUrls on both user messages and tool result messages
728
+ if (msg.role !== "user" && msg.role !== "tool")
729
+ return msg;
730
+ const patch = {};
731
+ if (msg.imageUrls?.some(isLocalUrl)) {
732
+ if (!uploadAdapter || !apiKey) {
733
+ throw new Error("FileUploadAdapter required for local image URLs. Configure OSS_ACCESS_KEY_ID/OSS_ACCESS_KEY_SECRET or QLOGICAGENT_HUB_URL.");
734
+ }
735
+ patch.imageUrls = await Promise.all(msg.imageUrls.map((url) => isLocalUrl(url)
736
+ ? resolveMediaUrlViaUpload(url, { uploadAdapter, apiKey, signal })
737
+ : Promise.resolve(url)));
738
+ }
739
+ if (msg.role === "user" && msg.fileIds?.some((f) => isLocalUrl(f.id))) {
740
+ if (!uploadAdapter || !apiKey) {
741
+ throw new Error("FileUploadAdapter required for local file URLs. Configure OSS_ACCESS_KEY_ID/OSS_ACCESS_KEY_SECRET or QLOGICAGENT_HUB_URL.");
742
+ }
743
+ patch.fileIds = await Promise.all(msg.fileIds.map(async (f) => {
744
+ if (!isLocalUrl(f.id))
745
+ return f;
746
+ const resolved = await resolveMediaUrlViaUpload(f.id, { uploadAdapter, apiKey, signal });
747
+ return { ...f, id: resolved };
748
+ }));
749
+ }
750
+ return Object.keys(patch).length > 0 ? { ...msg, ...patch } : msg;
751
+ }));
752
+ }
753
+ // 鈹€鈹€ Message format conversion (OpenAI 閳?Anthropic) 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
754
+ function convertMessages(messages, quirks = {}, model) {
755
+ let system;
756
+ const out = [];
757
+ for (const msg of messages) {
758
+ if (msg.role === "system") {
759
+ system = system ? `${system}\n\n${msg.content ?? ""}` : (msg.content ?? "");
760
+ continue;
761
+ }
762
+ if (msg.role === "user") {
763
+ const hasImages = msg.imageUrls && msg.imageUrls.length > 0 && !quirks.filterImageBlocks;
764
+ const hasFiles = msg.fileIds && msg.fileIds.length > 0;
765
+ if (hasImages || hasFiles) {
766
+ // Vision + PDF + documents: build content block array
767
+ const content = [];
768
+ if (msg.content) {
769
+ content.push({ type: "text", text: msg.content });
770
+ }
771
+ if (hasImages) {
772
+ const multiImage = msg.imageUrls.length > 1;
773
+ for (let imgIdx = 0; imgIdx < msg.imageUrls.length; imgIdx++) {
774
+ const url = msg.imageUrls[imgIdx];
775
+ // CC parity: inject [Image N] label before each image when multiple images present
776
+ if (multiImage) {
777
+ content.push({ type: "text", text: `[Image ${imgIdx + 1}]` });
778
+ }
779
+ if (url.startsWith("data:")) {
780
+ // Base64 data URL 閳?extract media_type and data
781
+ const match = /^data:([^;]+);base64,(.+)$/.exec(url);
782
+ if (match) {
783
+ const mimeType = match[1];
784
+ // PDF documents use "document" block type (anthropic-ProviderMax 鎼?1)
785
+ if (mimeType === "application/pdf") {
786
+ content.push({
787
+ type: "document",
788
+ source: { type: "base64", media_type: mimeType, data: match[2] },
789
+ });
790
+ }
791
+ else {
792
+ content.push({
793
+ type: "image",
794
+ source: { type: "base64", media_type: mimeType, data: match[2] },
795
+ });
796
+ }
797
+ }
798
+ }
799
+ else if (url.endsWith(".pdf") || url.includes(".pdf?")) {
800
+ // PDF URL 閳?document block (anthropic-ProviderMax 鎼?1)
801
+ content.push({
802
+ type: "document",
803
+ source: { type: "url", url },
804
+ });
805
+ }
806
+ else {
807
+ // HTTP(S) URL 閳?use source.type="url"
808
+ content.push({
809
+ type: "image",
810
+ source: { type: "url", url },
811
+ });
812
+ }
813
+ }
814
+ }
815
+ // fileIds: PDFs 閳?document blocks; other files 閳?text annotations
816
+ // Size-aware: files over 50MB always degrade to text annotation (agent uses tools)
817
+ if (hasFiles) {
818
+ for (const f of msg.fileIds) {
819
+ const mime = f.mimeType || "";
820
+ const tooLarge = f.size != null && f.size > MEDIA_MAX_UPLOAD_SIZE;
821
+ if (tooLarge) {
822
+ const sizeLabel = `${(f.size / (1024 * 1024)).toFixed(1)}MB`;
823
+ content.push({ type: "text", text: `[Attached: ${f.id} (${mime || "unknown"}, ${sizeLabel})file too large for direct vision, use tools to process]` });
824
+ }
825
+ else if (mime === "application/pdf" || f.id.endsWith(".pdf")) {
826
+ if (f.id.startsWith("data:")) {
827
+ const match = /^data:[^;]+;base64,(.+)$/.exec(f.id);
828
+ if (match) {
829
+ content.push({
830
+ type: "document",
831
+ source: { type: "base64", media_type: "application/pdf", data: match[1] },
832
+ });
833
+ }
834
+ }
835
+ else {
836
+ content.push({
837
+ type: "document",
838
+ source: { type: "url", url: f.id },
839
+ });
840
+ }
841
+ }
842
+ else {
843
+ // Non-PDF files: annotate as text so agent uses tools
844
+ const label = mime ? `[Attached: ${f.id} (${mime})]` : `[Attached: ${f.id}]`;
845
+ content.push({ type: "text", text: label });
846
+ }
847
+ }
848
+ }
849
+ out.push({ role: "user", content });
850
+ }
851
+ else {
852
+ out.push({ role: "user", content: msg.content ?? "" });
853
+ }
854
+ }
855
+ else if (msg.role === "assistant") {
856
+ // Provider-aware thinking block filtering (CC/altcode parity):
857
+ // Some providers (GLM) don't support thinking blocks in passback.
858
+ const effectiveThinkingBlocks = quirks.filterThinkingBlocks
859
+ ? undefined
860
+ : msg.thinkingBlocks;
861
+ if (msg.tool_calls && msg.tool_calls.length > 0) {
862
+ const content = [];
863
+ // Thinking blocks must appear first (DeepSeek/Claude passback requirement).
864
+ // CC parity (stripSignatureBlocks): skip blocks with empty/missing
865
+ // signaturesDeepSeek may not emit signature_delta, and sending
866
+ // an empty signature causes API 400.
867
+ if (effectiveThinkingBlocks) {
868
+ for (const tb of effectiveThinkingBlocks) {
869
+ if (!tb.signature)
870
+ continue;
871
+ content.push({ type: "thinking", thinking: tb.thinking, signature: tb.signature });
872
+ }
873
+ }
874
+ if (msg.content) {
875
+ content.push({ type: "text", text: msg.content });
876
+ }
877
+ for (const tc of msg.tool_calls) {
878
+ let input;
879
+ try {
880
+ input = JSON.parse(tc.function.arguments);
881
+ }
882
+ catch {
883
+ input = {};
884
+ }
885
+ content.push({
886
+ type: "tool_use",
887
+ id: tc.id,
888
+ name: tc.function.name,
889
+ input,
890
+ });
891
+ }
892
+ out.push({ role: "assistant", content });
893
+ }
894
+ else if (effectiveThinkingBlocks && effectiveThinkingBlocks.length > 0) {
895
+ // Non-tool-call assistant message with thinking blocks:
896
+ // must use content block array format to include thinking.
897
+ // CC parity: skip blocks with empty/missing signatures.
898
+ const content = [];
899
+ for (const tb of effectiveThinkingBlocks) {
900
+ if (!tb.signature)
901
+ continue;
902
+ content.push({ type: "thinking", thinking: tb.thinking, signature: tb.signature });
903
+ }
904
+ if (msg.content) {
905
+ content.push({ type: "text", text: msg.content });
906
+ }
907
+ out.push({ role: "assistant", content });
908
+ }
909
+ else {
910
+ out.push({ role: "assistant", content: msg.content ?? "" });
911
+ }
912
+ }
913
+ else if (msg.role === "tool") {
914
+ // CC parity (normalizeMessagesForAPI): merge consecutive tool_result
915
+ // messages into the preceding user message's content array.
916
+ // Anthropic API requires all tool_results for one assistant turn
917
+ // in a single user message.
918
+ const content = msg.content ?? "";
919
+ // AP2: prefer explicit is_error field from buildToolResultMessage,
920
+ // fall back to content-prefix heuristic for backward compat.
921
+ const isError = msg.is_error === true
922
+ || content.startsWith("Error:") || content === SYNTHETIC_TOOL_RESULT_CONTENT;
923
+ // CC parity: tool_referencewhen the message carries toolReferences
924
+ // (from tool_search), use tool_reference blocks instead of plain text.
925
+ // This enables the Anthropic API to auto-expand the tool schemas
926
+ // so the model can see the activated tools' full definitions.
927
+ const toolRefs = msg.toolReferences;
928
+ const useToolReference = toolRefs && toolRefs.length > 0 && !isError
929
+ && (!model || modelSupportsToolReference(model));
930
+ // Image URLs from tool results (e.g. read tool reading an image file).
931
+ // Anthropic natively supports images in tool_result content blocks.
932
+ const imgUrls = msg.imageUrls;
933
+ let toolResultContent;
934
+ if (useToolReference) {
935
+ toolResultContent = toolRefs.map(name => ({ type: "tool_reference", tool_name: name }));
936
+ }
937
+ else if (imgUrls && imgUrls.length > 0 && !isError) {
938
+ // Include images as content blocks alongside text
939
+ const blocks = [];
940
+ if (content)
941
+ blocks.push({ type: "text", text: content });
942
+ for (const url of imgUrls) {
943
+ if (url.startsWith("data:")) {
944
+ const match = /^data:([^;]+);base64,(.+)$/.exec(url);
945
+ if (match) {
946
+ blocks.push({ type: "image", source: { type: "base64", media_type: match[1], data: match[2] } });
947
+ }
948
+ }
949
+ else {
950
+ blocks.push({ type: "image", source: { type: "url", url } });
951
+ }
952
+ }
953
+ toolResultContent = blocks;
954
+ }
955
+ else {
956
+ toolResultContent = content;
957
+ }
958
+ const block = {
959
+ type: "tool_result",
960
+ tool_use_id: msg.tool_call_id ?? "",
961
+ content: toolResultContent,
962
+ ...(isError && { is_error: true }),
963
+ };
964
+ const prev = out[out.length - 1];
965
+ if (prev && prev.role === "user" && Array.isArray(prev.content)) {
966
+ // Merge into existing user content block array
967
+ prev.content.push(block);
968
+ }
969
+ else {
970
+ out.push({ role: "user", content: [block] });
971
+ }
972
+ }
973
+ }
974
+ return { system, messages: out };
975
+ }
976
+ function convertToolDef(tool) {
977
+ return {
978
+ name: tool.function.name,
979
+ description: tool.function.description,
980
+ input_schema: tool.function.parameters ?? { type: "object", properties: {} },
981
+ };
982
+ }
983
+ // 鈹€鈹€ Helpers 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
984
+ function parseHttpStatus(message) {
985
+ const match = /Anthropic API error (\d{3})/.exec(message);
986
+ return match ? parseInt(match[1], 10) : null;
987
+ }
988
+ /**
989
+ * CC adjustParamsForNonStreaming parity: cap max_tokens at 64K and
990
+ * re-constrain thinking budget_tokens < capped max_tokens.
991
+ */
992
+ function adjustParamsForNonStreaming(params) {
993
+ const maxTokens = typeof params.max_tokens === "number" ? params.max_tokens : 8192;
994
+ const cappedMaxTokens = Math.min(maxTokens, MAX_NON_STREAMING_TOKENS);
995
+ const adjusted = { ...params, max_tokens: cappedMaxTokens };
996
+ const thinking = adjusted.thinking;
997
+ if (thinking?.type === "enabled" && thinking.budget_tokens) {
998
+ adjusted.thinking = {
999
+ ...thinking,
1000
+ budget_tokens: Math.min(thinking.budget_tokens, cappedMaxTokens - 1),
1001
+ };
1002
+ }
1003
+ return adjusted;
1004
+ }