@xiaozhiclaw/provider-core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/aliyun-oss-file-upload-adapter.d.ts +44 -0
- package/dist/adapters/aliyun-oss-file-upload-adapter.js +96 -0
- package/dist/adapters/gemini-file-upload-adapter.d.ts +26 -0
- package/dist/adapters/gemini-file-upload-adapter.js +92 -0
- package/dist/adapters/hub-oss-file-upload-adapter.d.ts +29 -0
- package/dist/adapters/hub-oss-file-upload-adapter.js +53 -0
- package/dist/adapters/index.d.ts +10 -0
- package/dist/adapters/index.js +10 -0
- package/dist/adapters/openai-file-upload-adapter.d.ts +38 -0
- package/dist/adapters/openai-file-upload-adapter.js +56 -0
- package/dist/adapters/volcengine-file-upload-adapter.d.ts +24 -0
- package/dist/adapters/volcengine-file-upload-adapter.js +45 -0
- package/dist/builtin-providers.d.ts +8 -0
- package/dist/builtin-providers.js +2237 -0
- package/dist/constants.d.ts +1 -0
- package/dist/constants.js +1 -0
- package/dist/credentials.d.ts +1 -0
- package/dist/credentials.js +8 -0
- package/dist/debug-transport.d.ts +12 -0
- package/dist/debug-transport.js +99 -0
- package/dist/errors.d.ts +11 -0
- package/dist/errors.js +12 -0
- package/dist/events.d.ts +48 -0
- package/dist/events.js +1 -0
- package/dist/file-upload-service.d.ts +68 -0
- package/dist/file-upload-service.js +110 -0
- package/dist/gemini-schema-utils.d.ts +17 -0
- package/dist/gemini-schema-utils.js +76 -0
- package/dist/index.d.ts +37 -0
- package/dist/index.js +33 -0
- package/dist/llm-client.d.ts +43 -0
- package/dist/llm-client.js +217 -0
- package/dist/media-client.d.ts +42 -0
- package/dist/media-client.js +174 -0
- package/dist/media-transport.d.ts +176 -0
- package/dist/media-transport.js +16 -0
- package/dist/media.d.ts +2 -0
- package/dist/media.js +1 -0
- package/dist/model-detection.d.ts +22 -0
- package/dist/model-detection.js +28 -0
- package/dist/paths.d.ts +2 -0
- package/dist/paths.js +11 -0
- package/dist/provider-def.d.ts +220 -0
- package/dist/provider-def.js +9 -0
- package/dist/provider-registry.d.ts +51 -0
- package/dist/provider-registry.js +130 -0
- package/dist/provider-tool-api.d.ts +44 -0
- package/dist/provider-tool-api.js +9 -0
- package/dist/provider-variant-resolver.d.ts +35 -0
- package/dist/provider-variant-resolver.js +174 -0
- package/dist/retry.d.ts +37 -0
- package/dist/retry.js +71 -0
- package/dist/transport.d.ts +281 -0
- package/dist/transport.js +27 -0
- package/dist/transports/anthropic-messages.d.ts +65 -0
- package/dist/transports/anthropic-messages.js +1004 -0
- package/dist/transports/gemini-cache-api.d.ts +86 -0
- package/dist/transports/gemini-cache-api.js +141 -0
- package/dist/transports/gemini-file-api.d.ts +90 -0
- package/dist/transports/gemini-file-api.js +164 -0
- package/dist/transports/gemini-generatecontent.d.ts +56 -0
- package/dist/transports/gemini-generatecontent.js +688 -0
- package/dist/transports/gemini-lyria-realtime.d.ts +117 -0
- package/dist/transports/gemini-lyria-realtime.js +295 -0
- package/dist/transports/gemini-media.d.ts +53 -0
- package/dist/transports/gemini-media.js +383 -0
- package/dist/transports/media-resolve.d.ts +50 -0
- package/dist/transports/media-resolve.js +91 -0
- package/dist/transports/minimax-media.d.ts +56 -0
- package/dist/transports/minimax-media.js +433 -0
- package/dist/transports/openai-chat.d.ts +81 -0
- package/dist/transports/openai-chat.js +782 -0
- package/dist/transports/openai-media.d.ts +24 -0
- package/dist/transports/openai-media.js +118 -0
- package/dist/transports/openai-responses.d.ts +63 -0
- package/dist/transports/openai-responses.js +778 -0
- package/dist/transports/qwen-media.d.ts +59 -0
- package/dist/transports/qwen-media.js +411 -0
- package/dist/transports/realtime-transport.d.ts +183 -0
- package/dist/transports/realtime-transport.js +332 -0
- package/dist/transports/volcengine-grounding.d.ts +58 -0
- package/dist/transports/volcengine-grounding.js +69 -0
- package/dist/transports/volcengine-media.d.ts +94 -0
- package/dist/transports/volcengine-media.js +801 -0
- package/dist/transports/volcengine-responses.d.ts +64 -0
- package/dist/transports/volcengine-responses.js +797 -0
- package/dist/transports/zhipu-media.d.ts +82 -0
- package/dist/transports/zhipu-media.js +522 -0
- package/dist/transports/zhipu-tool-api.d.ts +35 -0
- package/dist/transports/zhipu-tool-api.js +126 -0
- package/dist/wire-types.d.ts +51 -0
- package/dist/wire-types.js +1 -0
- package/package.json +33 -0
|
@@ -0,0 +1,1004 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Anthropic Messages TransportSSE streaming for Claude API.
|
|
3
|
+
*
|
|
4
|
+
* Aligned with CC (claude-code-haha) src/services/api/claude.ts:
|
|
5
|
+
* - cache_control ephemeral injection on system prompt blocks
|
|
6
|
+
* - ensureToolResultPairing() conversation repair before every request
|
|
7
|
+
* - Retry with exponential backoff on transient errors (429/529/overloaded)
|
|
8
|
+
* - Non-streaming fallback when stream errors out
|
|
9
|
+
* - 90s idle watchdog timeout for silently dropped connections
|
|
10
|
+
* - Adaptive/budget thinking with temperature omit
|
|
11
|
+
* - Cache token extraction with >0 guard (CC updateUsage parity)
|
|
12
|
+
* - signature_delta handling for thinking blocks
|
|
13
|
+
*/
|
|
14
|
+
import { MEDIA_MAX_UPLOAD_SIZE } from "../constants.js";
|
|
15
|
+
import { isLocalUrl, resolveMediaUrlViaUpload } from "./media-resolve.js";
|
|
16
|
+
import { DEFAULT_MAX_RETRIES, STREAM_IDLE_TIMEOUT_MS, TRANSIENT_STATUS_CODES, retryDelay, retrySleep, } from "../retry.js";
|
|
17
|
+
// 鈹€鈹€ Constants 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
|
|
18
|
+
const DEFAULT_TIMEOUT_MS = 180_000;
|
|
19
|
+
const NON_STREAMING_TIMEOUT_MS = 300_000;
|
|
20
|
+
const MAX_NON_STREAMING_TOKENS = 64_000;
|
|
21
|
+
// CC SYNTHETIC_TOOL_RESULT_PLACEHOLDER parity
|
|
22
|
+
const SYNTHETIC_TOOL_RESULT_CONTENT = "[Tool execution failed; output not available during conversation recovery]";
|
|
23
|
+
// 鈹€鈹€ Beta headers (CC parity for 3P callers) 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
|
|
24
|
+
// Only universally-applicable betas that any 3P API caller can use.
|
|
25
|
+
// 1P-only betas (claude-code-20250219, afk-mode, cli-internal, redact-thinking) are omitted.
|
|
26
|
+
const BETA_INTERLEAVED_THINKING = "interleaved-thinking-2025-05-14";
|
|
27
|
+
const BETA_CONTEXT_1M = "context-1m-2025-08-07";
|
|
28
|
+
const BETA_TOKEN_EFFICIENT_TOOLS = "token-efficient-tools-2026-03-28";
|
|
29
|
+
const BETA_PROMPT_CACHING_SCOPE = "prompt-caching-scope-2026-01-05";
|
|
30
|
+
const BETA_PDF_SUPPORT = "pdfs-2024-09-25";
|
|
31
|
+
/**
|
|
32
|
+
* Check if a model supports Adaptive Thinking (鎼?).
|
|
33
|
+
* Only Opus 4.6/4.7 and Sonnet 4.6 support type: "adaptive".
|
|
34
|
+
* Haiku and older models must use type: "enabled" with budget_tokens.
|
|
35
|
+
*/
|
|
36
|
+
function supportsAdaptiveThinking(model) {
|
|
37
|
+
const ml = model.toLowerCase();
|
|
38
|
+
return ml.includes("opus-4-6") || ml.includes("opus-4-7")
|
|
39
|
+
|| ml.includes("sonnet-4-6");
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Build beta headers list based on model capabilities.
|
|
43
|
+
* CC parity: only send betas the model actually supports.
|
|
44
|
+
*/
|
|
45
|
+
function buildBetaHeaders(model, hasThinking, hasTools) {
|
|
46
|
+
const betas = [];
|
|
47
|
+
const ml = model.toLowerCase();
|
|
48
|
+
// Beta headers are Anthropic-specific features. Only send for actual Claude models.
|
|
49
|
+
// Non-Claude providers using anthropic-messages transport (DeepSeek, Qwen, MiniMax)
|
|
50
|
+
// don't support these and may return 400 errors.
|
|
51
|
+
const isClaude = ml.startsWith("claude");
|
|
52
|
+
if (!isClaude) {
|
|
53
|
+
// Still allow user-specified betas via env (advanced override)
|
|
54
|
+
const envBetas = process.env.ANTHROPIC_BETAS;
|
|
55
|
+
if (envBetas) {
|
|
56
|
+
for (const b of envBetas.split(",").map(s => s.trim()).filter(Boolean)) {
|
|
57
|
+
betas.push(b);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
return betas;
|
|
61
|
+
}
|
|
62
|
+
// 1M context models: claude-opus-4-6/4-7, claude-sonnet-4-6
|
|
63
|
+
const is1mModel = ml.includes("opus-4-6") || ml.includes("opus-4-7")
|
|
64
|
+
|| ml.includes("sonnet-4-6");
|
|
65
|
+
if (is1mModel) {
|
|
66
|
+
betas.push(BETA_CONTEXT_1M);
|
|
67
|
+
}
|
|
68
|
+
// PDF support: always send for models with vision
|
|
69
|
+
betas.push(BETA_PDF_SUPPORT);
|
|
70
|
+
// Interleaved thinking: supported by all thinking-capable Claude models (Opus 4+, Sonnet 3.7+)
|
|
71
|
+
if (hasThinking && !ml.includes("haiku")) {
|
|
72
|
+
betas.push(BETA_INTERLEAVED_THINKING);
|
|
73
|
+
}
|
|
74
|
+
// Token-efficient tools: reduces tool schema token overhead (~4.5% savings)
|
|
75
|
+
if (hasTools) {
|
|
76
|
+
betas.push(BETA_TOKEN_EFFICIENT_TOOLS);
|
|
77
|
+
}
|
|
78
|
+
// Prompt caching scope: always sendheader is a no-op without scope field
|
|
79
|
+
betas.push(BETA_PROMPT_CACHING_SCOPE);
|
|
80
|
+
// User-specified additional betas via env
|
|
81
|
+
const envBetas = process.env.ANTHROPIC_BETAS;
|
|
82
|
+
if (envBetas) {
|
|
83
|
+
for (const b of envBetas.split(",").map(s => s.trim()).filter(Boolean)) {
|
|
84
|
+
if (!betas.includes(b))
|
|
85
|
+
betas.push(b);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
return betas;
|
|
89
|
+
}
|
|
90
|
+
// 鈹€鈹€ Transport class 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
|
|
91
|
+
export class AnthropicMessagesTransport {
|
|
92
|
+
baseUrl;
|
|
93
|
+
apiVersion;
|
|
94
|
+
timeoutMs;
|
|
95
|
+
streamIdleTimeoutMs;
|
|
96
|
+
enablePromptCaching;
|
|
97
|
+
maxRetries;
|
|
98
|
+
omitZeroTemperature;
|
|
99
|
+
quirks;
|
|
100
|
+
fileUploadAdapter;
|
|
101
|
+
constructor(config) {
|
|
102
|
+
if (!config.baseUrl) {
|
|
103
|
+
throw new Error("AnthropicMessagesTransport: baseUrl is required (got empty or undefined)");
|
|
104
|
+
}
|
|
105
|
+
this.baseUrl = config.baseUrl.replace(/\/+$/, "");
|
|
106
|
+
this.apiVersion = config.apiVersion ?? "2023-06-01";
|
|
107
|
+
this.timeoutMs = config.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
108
|
+
this.streamIdleTimeoutMs = config.streamIdleTimeoutMs ?? STREAM_IDLE_TIMEOUT_MS;
|
|
109
|
+
this.enablePromptCaching = config.enablePromptCaching ?? true;
|
|
110
|
+
this.maxRetries = config.maxRetries ?? DEFAULT_MAX_RETRIES;
|
|
111
|
+
this.omitZeroTemperature = config.omitZeroTemperature ?? false;
|
|
112
|
+
this.quirks = config.quirks ?? {};
|
|
113
|
+
this.fileUploadAdapter = config.fileUploadAdapter;
|
|
114
|
+
}
|
|
115
|
+
async *stream(request, apiKey, signal) {
|
|
116
|
+
// Note: DeepSeek prefix completion requires OpenAI Chat Completions
|
|
117
|
+
// format (base_url /beta), NOT Anthropic Messages format.
|
|
118
|
+
// The Anthropic API at /anthropic does not support prefix: true.
|
|
119
|
+
const url = `${this.baseUrl}/v1/messages`;
|
|
120
|
+
// CC parity: ensureToolResultPairing before converting messages
|
|
121
|
+
const repairedMessages = ensureToolResultPairing(request.messages);
|
|
122
|
+
// Pre-resolve local media URLs for Anthropic (localhost not reachable from API)
|
|
123
|
+
const resolvedMessages = await resolveMessagesMediaForAnthropic(repairedMessages, this.fileUploadAdapter, apiKey, signal);
|
|
124
|
+
// Convert OpenAI-style messages 閳?Anthropic format
|
|
125
|
+
const { system, messages } = convertMessages(resolvedMessages, this.quirks, request.model);
|
|
126
|
+
// CC parity: system prompt as block array with cache_control on last block
|
|
127
|
+
const systemParam = buildSystemParam(system, this.enablePromptCaching);
|
|
128
|
+
const hasThinking = !!request.reasoning;
|
|
129
|
+
const maxTokens = request.maxTokens ?? 8192;
|
|
130
|
+
const body = {
|
|
131
|
+
model: request.model,
|
|
132
|
+
messages,
|
|
133
|
+
// DeepSeek thinking models share max_tokens between thinking + text output.
|
|
134
|
+
// Enforce minimum 4096 when thinking is enabled to prevent thinking exhausting
|
|
135
|
+
// the entire budget and producing empty text responses.
|
|
136
|
+
max_tokens: hasThinking ? Math.max(maxTokens, 4096) : maxTokens,
|
|
137
|
+
stream: true,
|
|
138
|
+
};
|
|
139
|
+
if (systemParam)
|
|
140
|
+
body.system = systemParam;
|
|
141
|
+
if (request.tools && request.tools.length > 0) {
|
|
142
|
+
const tools = request.tools.map(convertToolDef);
|
|
143
|
+
// 鎼?2.6: Active cachingcache_control on last tool in the prefix layer
|
|
144
|
+
// Prefix order: tools 閳?system 閳?messages. Mark last tool for caching.
|
|
145
|
+
// Use session-scoped ephemeral (no global scope) to prevent cross-session cache leakage.
|
|
146
|
+
// CC separates static/dynamic prompt with SYSTEM_PROMPT_DYNAMIC_BOUNDARY for global scope;
|
|
147
|
+
// without that boundary, global scope can leak session context.
|
|
148
|
+
if (this.enablePromptCaching && tools.length > 0) {
|
|
149
|
+
tools[tools.length - 1].cache_control = { type: "ephemeral" };
|
|
150
|
+
}
|
|
151
|
+
body.tools = tools;
|
|
152
|
+
if (request.toolChoice) {
|
|
153
|
+
body.tool_choice =
|
|
154
|
+
request.toolChoice === "auto"
|
|
155
|
+
? { type: "auto" }
|
|
156
|
+
: request.toolChoice === "required"
|
|
157
|
+
? { type: "any" }
|
|
158
|
+
: { type: "none" };
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
// CC parity: temperature must be undefined when thinking is enabled
|
|
162
|
+
if (!hasThinking && request.temperature !== undefined) {
|
|
163
|
+
// MiniMax rejects temperature=0; range is (0.0, 1.0]
|
|
164
|
+
if (!(this.omitZeroTemperature && request.temperature === 0)) {
|
|
165
|
+
body.temperature = request.temperature;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
// Anthropic API: top_p (supported by Anthropic + MiniMax)
|
|
169
|
+
if (!hasThinking && request.topP !== undefined) {
|
|
170
|
+
body.top_p = request.topP;
|
|
171
|
+
}
|
|
172
|
+
// CC parity: adaptive vs budget thinking
|
|
173
|
+
if (hasThinking) {
|
|
174
|
+
if (this.quirks.useEffortInsteadOfBudget) {
|
|
175
|
+
// DeepSeek: budget_tokens is ignored, use output_config.effort instead.
|
|
176
|
+
// DeepSeek only supports "high" and "max"; lower levels map to "high".
|
|
177
|
+
body.thinking = { type: "enabled" };
|
|
178
|
+
const cap = this.quirks.maxReasoningEffort ?? "high";
|
|
179
|
+
const effortInput = request.reasoning?.effort ?? "high";
|
|
180
|
+
// Map: "low"/"medium"閳?high", "high"閳?high", anything above閳妽ap
|
|
181
|
+
const deepseekEffort = effortInput === "low" || effortInput === "medium"
|
|
182
|
+
? "high"
|
|
183
|
+
: cap;
|
|
184
|
+
body.output_config = { effort: deepseekEffort };
|
|
185
|
+
// DeepSeek KV Cache: set user_id for session-level cache isolation.
|
|
186
|
+
// Server-side KV cache hit rate improves when same user_id sees repeated
|
|
187
|
+
// prefixes (system prompt, conversation history). Price: 1/50 of input.
|
|
188
|
+
// Use a hash of the system prompt as stable session identifier.
|
|
189
|
+
if (system) {
|
|
190
|
+
let h = 0x811c9dc5; // FNV-1a seed
|
|
191
|
+
for (let i = 0; i < system.length; i++) {
|
|
192
|
+
h ^= system.charCodeAt(i);
|
|
193
|
+
h = Math.imul(h, 0x01000193);
|
|
194
|
+
}
|
|
195
|
+
body.metadata = { user_id: `qa-${(h >>> 0).toString(36)}` };
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
else if (supportsAdaptiveThinking(request.model)) {
|
|
199
|
+
// Adaptive Thinking (anthropic-ProviderMax 鎼?):
|
|
200
|
+
// Opus 4.6/4.7 and Sonnet 4.6model self-adjusts thinking depth.
|
|
201
|
+
// Zero thinking for simple queries, deep thinking for complex ones.
|
|
202
|
+
// This is the CC (claude-code-haha) default mode.
|
|
203
|
+
body.thinking = { type: "adaptive" };
|
|
204
|
+
}
|
|
205
|
+
else {
|
|
206
|
+
// Extended Thinking (anthropic-ProviderMax 鎼?):
|
|
207
|
+
// Haiku 4.5 and older modelsexplicit budget control.
|
|
208
|
+
const budget = mapReasoningEffortToBudget(request.reasoning.effort, request.maxTokens);
|
|
209
|
+
body.thinking = {
|
|
210
|
+
type: "enabled",
|
|
211
|
+
budget_tokens: Math.min(maxTokens - 1, budget),
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
else if (this.quirks.supportsThinkingParam && this.quirks.disableThinkingByDefault) {
|
|
216
|
+
body.thinking = { type: "disabled" };
|
|
217
|
+
}
|
|
218
|
+
// CC parity: beta headers for model-specific capabilities
|
|
219
|
+
const betaHeaders = buildBetaHeaders(request.model, hasThinking, (request.tools?.length ?? 0) > 0);
|
|
220
|
+
const headers = {
|
|
221
|
+
"Content-Type": "application/json",
|
|
222
|
+
"x-api-key": apiKey,
|
|
223
|
+
"anthropic-version": this.apiVersion,
|
|
224
|
+
};
|
|
225
|
+
if (betaHeaders.length > 0) {
|
|
226
|
+
headers["anthropic-beta"] = betaHeaders.join(",");
|
|
227
|
+
}
|
|
228
|
+
// Retry loop with exponential backoff (CC withRetry parity)
|
|
229
|
+
let lastError = null;
|
|
230
|
+
for (let attempt = 0; attempt <= this.maxRetries; attempt++) {
|
|
231
|
+
if (signal?.aborted)
|
|
232
|
+
throw new Error("Request aborted");
|
|
233
|
+
if (attempt > 0 && lastError) {
|
|
234
|
+
await retrySleep(retryDelay(attempt), signal);
|
|
235
|
+
}
|
|
236
|
+
try {
|
|
237
|
+
yield* this.streamWithWatchdog(url, headers, body, signal);
|
|
238
|
+
return; // Success
|
|
239
|
+
}
|
|
240
|
+
catch (err) {
|
|
241
|
+
lastError = err instanceof Error ? err : new Error(String(err));
|
|
242
|
+
// Parse HTTP status from error message for retry decision
|
|
243
|
+
const status = parseHttpStatus(lastError.message);
|
|
244
|
+
const isTransient = status !== null && TRANSIENT_STATUS_CODES.has(status);
|
|
245
|
+
const isIdleTimeout = lastError.message.includes("Stream idle timeout");
|
|
246
|
+
const isOverflow = lastError.message.includes("model_context_window_exceeded")
|
|
247
|
+
|| lastError.message.includes("context_length_exceeded");
|
|
248
|
+
if (isOverflow) {
|
|
249
|
+
// CC parity: context window overflowunrecoverable, don't retry
|
|
250
|
+
throw lastError;
|
|
251
|
+
}
|
|
252
|
+
if (!isTransient && !isIdleTimeout) {
|
|
253
|
+
// Non-transient errordon't retry
|
|
254
|
+
throw lastError;
|
|
255
|
+
}
|
|
256
|
+
if (attempt === this.maxRetries) {
|
|
257
|
+
// Last attempttry non-streaming fallback (CC parity)
|
|
258
|
+
// Skip fallback for stream-required models (QwQ, Omni) that reject non-streaming
|
|
259
|
+
if ((isIdleTimeout || isTransient) && !request.streamRequired) {
|
|
260
|
+
try {
|
|
261
|
+
yield* this.nonStreamingFallback(url, headers, body, signal);
|
|
262
|
+
return;
|
|
263
|
+
}
|
|
264
|
+
catch {
|
|
265
|
+
throw lastError;
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
throw lastError;
|
|
269
|
+
}
|
|
270
|
+
// Will retry
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
if (lastError)
|
|
274
|
+
throw lastError;
|
|
275
|
+
}
|
|
276
|
+
/**
|
|
277
|
+
* Stream with idle watchdog timer (CC parity: 90s default).
|
|
278
|
+
* Throws if no chunks received for streamIdleTimeoutMs.
|
|
279
|
+
*/
|
|
280
|
+
async *streamWithWatchdog(url, headers, body, signal) {
|
|
281
|
+
const timeoutSignal = AbortSignal.timeout(this.timeoutMs);
|
|
282
|
+
const combinedSignal = signal
|
|
283
|
+
? AbortSignal.any([signal, timeoutSignal])
|
|
284
|
+
: timeoutSignal;
|
|
285
|
+
const response = await fetch(url, {
|
|
286
|
+
method: "POST",
|
|
287
|
+
headers,
|
|
288
|
+
body: JSON.stringify(body),
|
|
289
|
+
signal: combinedSignal,
|
|
290
|
+
});
|
|
291
|
+
if (!response.ok) {
|
|
292
|
+
const errorBody = await response.text().catch(() => "");
|
|
293
|
+
const err = new Error(`Anthropic API error ${response.status}: ${errorBody.slice(0, 500)}`);
|
|
294
|
+
err.status = response.status;
|
|
295
|
+
throw err;
|
|
296
|
+
}
|
|
297
|
+
if (!response.body) {
|
|
298
|
+
throw new Error("Anthropic API returned no response body");
|
|
299
|
+
}
|
|
300
|
+
// CC parity: idle watchdogabort if no chunks for streamIdleTimeoutMs
|
|
301
|
+
let idleTimer = null;
|
|
302
|
+
let idleAborted = false;
|
|
303
|
+
const resetIdleTimer = () => {
|
|
304
|
+
if (idleTimer !== null)
|
|
305
|
+
clearTimeout(idleTimer);
|
|
306
|
+
idleTimer = setTimeout(() => {
|
|
307
|
+
idleAborted = true;
|
|
308
|
+
}, this.streamIdleTimeoutMs);
|
|
309
|
+
};
|
|
310
|
+
const clearIdleTimer = () => {
|
|
311
|
+
if (idleTimer !== null) {
|
|
312
|
+
clearTimeout(idleTimer);
|
|
313
|
+
idleTimer = null;
|
|
314
|
+
}
|
|
315
|
+
};
|
|
316
|
+
resetIdleTimer(); // Prime before stream starts
|
|
317
|
+
try {
|
|
318
|
+
yield* this.parseSSEStream(response.body, resetIdleTimer, () => idleAborted);
|
|
319
|
+
}
|
|
320
|
+
finally {
|
|
321
|
+
clearIdleTimer();
|
|
322
|
+
}
|
|
323
|
+
// CC parity: if watchdog fired and loop exited cleanly, throw
|
|
324
|
+
if (idleAborted) {
|
|
325
|
+
throw new Error("Stream idle timeout - no chunks received");
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
/**
|
|
329
|
+
* Non-streaming fallback (CC executeNonStreamingRequest parity).
|
|
330
|
+
* Used when streaming fails after all retries.
|
|
331
|
+
* Caps max_tokens at 64K and adjusts thinking budget accordingly.
|
|
332
|
+
*/
|
|
333
|
+
async *nonStreamingFallback(url, headers, body, signal) {
|
|
334
|
+
const nonStreamBody = adjustParamsForNonStreaming({ ...body, stream: false });
|
|
335
|
+
const timeoutSignal = AbortSignal.timeout(NON_STREAMING_TIMEOUT_MS);
|
|
336
|
+
const combinedSignal = signal
|
|
337
|
+
? AbortSignal.any([signal, timeoutSignal])
|
|
338
|
+
: timeoutSignal;
|
|
339
|
+
const response = await fetch(url, {
|
|
340
|
+
method: "POST",
|
|
341
|
+
headers,
|
|
342
|
+
body: JSON.stringify(nonStreamBody),
|
|
343
|
+
signal: combinedSignal,
|
|
344
|
+
});
|
|
345
|
+
if (!response.ok) {
|
|
346
|
+
const errorBody = await response.text().catch(() => "");
|
|
347
|
+
const err = new Error(`Anthropic API error ${response.status}: ${errorBody.slice(0, 500)}`);
|
|
348
|
+
err.status = response.status;
|
|
349
|
+
throw err;
|
|
350
|
+
}
|
|
351
|
+
const result = await response.json();
|
|
352
|
+
yield* this.mapNonStreamingResponse(result);
|
|
353
|
+
}
|
|
354
|
+
/**
|
|
355
|
+
* Convert a non-streaming API response to LLMChunk sequence.
|
|
356
|
+
*/
|
|
357
|
+
*mapNonStreamingResponse(result) {
|
|
358
|
+
// Usage
|
|
359
|
+
const usage = result.usage;
|
|
360
|
+
if (usage) {
|
|
361
|
+
yield {
|
|
362
|
+
type: "usage",
|
|
363
|
+
promptTokens: usage.input_tokens ?? 0,
|
|
364
|
+
completionTokens: usage.output_tokens ?? 0,
|
|
365
|
+
cacheReadTokens: usage.cache_read_input_tokens > 0
|
|
366
|
+
? usage.cache_read_input_tokens
|
|
367
|
+
: undefined,
|
|
368
|
+
cacheCreationTokens: usage.cache_creation_input_tokens > 0
|
|
369
|
+
? usage.cache_creation_input_tokens
|
|
370
|
+
: undefined,
|
|
371
|
+
reasoningTokens: usage.reasoning_input_tokens > 0
|
|
372
|
+
? usage.reasoning_input_tokens
|
|
373
|
+
: undefined,
|
|
374
|
+
};
|
|
375
|
+
}
|
|
376
|
+
// Content blocks
|
|
377
|
+
const content = result.content;
|
|
378
|
+
if (Array.isArray(content)) {
|
|
379
|
+
let toolIndex = 0;
|
|
380
|
+
for (const block of content) {
|
|
381
|
+
const blockType = block.type;
|
|
382
|
+
if (blockType === "text") {
|
|
383
|
+
yield { type: "delta", text: block.text };
|
|
384
|
+
}
|
|
385
|
+
else if (blockType === "thinking") {
|
|
386
|
+
yield { type: "reasoning_delta", text: block.thinking };
|
|
387
|
+
}
|
|
388
|
+
else if (blockType === "tool_use") {
|
|
389
|
+
yield {
|
|
390
|
+
type: "tool_call_delta",
|
|
391
|
+
index: toolIndex++,
|
|
392
|
+
id: block.id,
|
|
393
|
+
name: block.name,
|
|
394
|
+
arguments: JSON.stringify(block.input ?? {}),
|
|
395
|
+
};
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
// Stop reason
|
|
400
|
+
const stopReason = result.stop_reason;
|
|
401
|
+
if (stopReason) {
|
|
402
|
+
yield { type: "done", finishReason: mapAnthropicStopReason(stopReason) };
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
async *parseSSEStream(body, resetIdleTimer, isIdleAborted) {
|
|
406
|
+
const decoder = new TextDecoder();
|
|
407
|
+
let buffer = "";
|
|
408
|
+
let currentEvent = "";
|
|
409
|
+
// Track active content blocks for tool_use accumulation + signature
|
|
410
|
+
const blocks = new Map();
|
|
411
|
+
// CC parity: accumulated usage with >0 guard
|
|
412
|
+
const accUsage = {
|
|
413
|
+
inputTokens: 0,
|
|
414
|
+
outputTokens: 0,
|
|
415
|
+
cacheReadTokens: 0,
|
|
416
|
+
cacheCreationTokens: 0,
|
|
417
|
+
reasoningTokens: 0,
|
|
418
|
+
};
|
|
419
|
+
for await (const raw of body) {
|
|
420
|
+
// CC parity: reset idle watchdog on every chunk
|
|
421
|
+
resetIdleTimer();
|
|
422
|
+
if (isIdleAborted())
|
|
423
|
+
break;
|
|
424
|
+
buffer += decoder.decode(raw, { stream: true });
|
|
425
|
+
let newlineIdx;
|
|
426
|
+
while ((newlineIdx = buffer.indexOf("\n")) !== -1) {
|
|
427
|
+
const line = buffer.slice(0, newlineIdx).trim();
|
|
428
|
+
buffer = buffer.slice(newlineIdx + 1);
|
|
429
|
+
if (!line) {
|
|
430
|
+
currentEvent = "";
|
|
431
|
+
continue;
|
|
432
|
+
}
|
|
433
|
+
// SSE spec: field name is before colon, value after colon with optional leading space.
|
|
434
|
+
// Some providers (Qwen) omit the space: "event:message_start" vs "event: message_start"
|
|
435
|
+
if (line.startsWith("event:")) {
|
|
436
|
+
currentEvent = line.slice(6).trimStart();
|
|
437
|
+
continue;
|
|
438
|
+
}
|
|
439
|
+
if (!line.startsWith("data:"))
|
|
440
|
+
continue;
|
|
441
|
+
const data = line.slice(5).trimStart();
|
|
442
|
+
let parsed;
|
|
443
|
+
try {
|
|
444
|
+
parsed = JSON.parse(data);
|
|
445
|
+
}
|
|
446
|
+
catch {
|
|
447
|
+
continue;
|
|
448
|
+
}
|
|
449
|
+
yield* this.mapEvent(currentEvent, parsed, blocks, accUsage);
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
*mapEvent(event, data, blocks, accUsage) {
|
|
454
|
+
switch (event) {
|
|
455
|
+
case "message_start": {
|
|
456
|
+
const msg = data.message;
|
|
457
|
+
const usage = msg?.usage;
|
|
458
|
+
if (usage) {
|
|
459
|
+
// CC parity: update accumulated usage with >0 guard
|
|
460
|
+
updateAccumulatedUsage(accUsage, usage);
|
|
461
|
+
yield {
|
|
462
|
+
type: "usage",
|
|
463
|
+
promptTokens: accUsage.inputTokens,
|
|
464
|
+
completionTokens: accUsage.outputTokens,
|
|
465
|
+
cacheReadTokens: accUsage.cacheReadTokens > 0 ? accUsage.cacheReadTokens : undefined,
|
|
466
|
+
cacheCreationTokens: accUsage.cacheCreationTokens > 0 ? accUsage.cacheCreationTokens : undefined,
|
|
467
|
+
reasoningTokens: accUsage.reasoningTokens > 0 ? accUsage.reasoningTokens : undefined,
|
|
468
|
+
};
|
|
469
|
+
}
|
|
470
|
+
break;
|
|
471
|
+
}
|
|
472
|
+
case "content_block_start": {
|
|
473
|
+
const index = data.index;
|
|
474
|
+
const block = data.content_block;
|
|
475
|
+
if (!block)
|
|
476
|
+
break;
|
|
477
|
+
const blockType = block.type;
|
|
478
|
+
blocks.set(index, {
|
|
479
|
+
type: blockType,
|
|
480
|
+
id: block.id,
|
|
481
|
+
name: block.name,
|
|
482
|
+
// CC parity: initialize signature for thinking blocks
|
|
483
|
+
signature: blockType === "thinking" ? "" : undefined,
|
|
484
|
+
});
|
|
485
|
+
if (blockType === "tool_use") {
|
|
486
|
+
yield {
|
|
487
|
+
type: "tool_call_delta",
|
|
488
|
+
index,
|
|
489
|
+
id: block.id,
|
|
490
|
+
name: block.name,
|
|
491
|
+
arguments: "",
|
|
492
|
+
};
|
|
493
|
+
}
|
|
494
|
+
break;
|
|
495
|
+
}
|
|
496
|
+
case "content_block_delta": {
|
|
497
|
+
const index = data.index;
|
|
498
|
+
const delta = data.delta;
|
|
499
|
+
if (!delta)
|
|
500
|
+
break;
|
|
501
|
+
const deltaType = delta.type;
|
|
502
|
+
if (deltaType === "text_delta") {
|
|
503
|
+
yield { type: "delta", text: delta.text };
|
|
504
|
+
}
|
|
505
|
+
else if (deltaType === "input_json_delta") {
|
|
506
|
+
yield {
|
|
507
|
+
type: "tool_call_delta",
|
|
508
|
+
index,
|
|
509
|
+
arguments: delta.partial_json,
|
|
510
|
+
};
|
|
511
|
+
}
|
|
512
|
+
else if (deltaType === "thinking_delta") {
|
|
513
|
+
yield {
|
|
514
|
+
type: "reasoning_delta",
|
|
515
|
+
text: delta.thinking,
|
|
516
|
+
};
|
|
517
|
+
}
|
|
518
|
+
else if (deltaType === "signature_delta") {
|
|
519
|
+
// CC parity: store signature on thinking block
|
|
520
|
+
const blockInfo = blocks.get(index);
|
|
521
|
+
if (blockInfo && typeof delta.signature === "string") {
|
|
522
|
+
blockInfo.signature = delta.signature;
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
break;
|
|
526
|
+
}
|
|
527
|
+
case "content_block_stop": {
|
|
528
|
+
const index = data.index;
|
|
529
|
+
const stoppedBlock = blocks.get(index);
|
|
530
|
+
// Emit complete thinking block with signature for passback
|
|
531
|
+
if (stoppedBlock?.type === "thinking" && typeof stoppedBlock.signature === "string") {
|
|
532
|
+
yield {
|
|
533
|
+
type: "reasoning_block_complete",
|
|
534
|
+
thinking: "", // text already streamed via reasoning_delta
|
|
535
|
+
signature: stoppedBlock.signature,
|
|
536
|
+
};
|
|
537
|
+
}
|
|
538
|
+
blocks.delete(index);
|
|
539
|
+
break;
|
|
540
|
+
}
|
|
541
|
+
case "message_delta": {
|
|
542
|
+
const delta = data.delta;
|
|
543
|
+
const usage = data.usage;
|
|
544
|
+
if (usage) {
|
|
545
|
+
// CC parity: update with >0 guard (prevent delta zero overwrite)
|
|
546
|
+
updateAccumulatedUsage(accUsage, usage);
|
|
547
|
+
yield {
|
|
548
|
+
type: "usage",
|
|
549
|
+
promptTokens: accUsage.inputTokens,
|
|
550
|
+
completionTokens: accUsage.outputTokens,
|
|
551
|
+
cacheReadTokens: accUsage.cacheReadTokens > 0 ? accUsage.cacheReadTokens : undefined,
|
|
552
|
+
cacheCreationTokens: accUsage.cacheCreationTokens > 0 ? accUsage.cacheCreationTokens : undefined,
|
|
553
|
+
reasoningTokens: accUsage.reasoningTokens > 0 ? accUsage.reasoningTokens : undefined,
|
|
554
|
+
};
|
|
555
|
+
}
|
|
556
|
+
if (delta?.stop_reason) {
|
|
557
|
+
yield { type: "done", finishReason: mapAnthropicStopReason(delta.stop_reason) };
|
|
558
|
+
}
|
|
559
|
+
break;
|
|
560
|
+
}
|
|
561
|
+
case "message_stop":
|
|
562
|
+
break;
|
|
563
|
+
case "error": {
|
|
564
|
+
const error = data.error;
|
|
565
|
+
throw new Error(`Anthropic stream error: ${error?.message ?? JSON.stringify(data)}`);
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
/**
|
|
571
|
+
* CC updateUsage parity: only update a token count if the new value is > 0.
|
|
572
|
+
* Prevents message_delta events from overwriting real counts with explicit 0.
|
|
573
|
+
*/
|
|
574
|
+
function updateAccumulatedUsage(acc, usage) {
|
|
575
|
+
// CC parity: >0 guard on input-related tokens only.
|
|
576
|
+
// output_tokens uses ?? fallback because 0 at message_start is legitimate.
|
|
577
|
+
if (usage.input_tokens > 0)
|
|
578
|
+
acc.inputTokens = usage.input_tokens;
|
|
579
|
+
acc.outputTokens = usage.output_tokens ?? acc.outputTokens;
|
|
580
|
+
if (usage.cache_read_input_tokens > 0)
|
|
581
|
+
acc.cacheReadTokens = usage.cache_read_input_tokens;
|
|
582
|
+
if (usage.cache_creation_input_tokens > 0)
|
|
583
|
+
acc.cacheCreationTokens = usage.cache_creation_input_tokens;
|
|
584
|
+
// DeepSeek / Anthropic reasoning tokens (thinking budget consumption)
|
|
585
|
+
if (usage.reasoning_input_tokens > 0)
|
|
586
|
+
acc.reasoningTokens = usage.reasoning_input_tokens;
|
|
587
|
+
}
|
|
588
|
+
// 鈹€鈹€ Stop reason normalization (Anthropic 閳?OpenAI standard) 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
|
|
589
|
+
function mapAnthropicStopReason(reason) {
|
|
590
|
+
switch (reason) {
|
|
591
|
+
case "end_turn":
|
|
592
|
+
case "stop_sequence":
|
|
593
|
+
return "stop";
|
|
594
|
+
case "tool_use":
|
|
595
|
+
return "tool_calls";
|
|
596
|
+
case "max_tokens":
|
|
597
|
+
return "length";
|
|
598
|
+
default:
|
|
599
|
+
return reason;
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
// 鈹€鈹€ Reasoning effort 閳?thinking budget (CC paramsFromContext parity) 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
|
|
603
|
+
/**
|
|
604
|
+
* CC parity: Haiku models do not support tool_reference blocks in tool_result content.
|
|
605
|
+
* All other Anthropic-compatible models (including DeepSeek via /anthropic endpoint) support it.
|
|
606
|
+
*/
|
|
607
|
+
function modelSupportsToolReference(model) {
|
|
608
|
+
return !/haiku/i.test(model);
|
|
609
|
+
}
|
|
610
|
+
const THINKING_BUDGET = {
|
|
611
|
+
high: 16000,
|
|
612
|
+
medium: 8000,
|
|
613
|
+
low: 4000,
|
|
614
|
+
};
|
|
615
|
+
/**
|
|
616
|
+
* Map reasoning effort to thinking budget_tokens.
|
|
617
|
+
* CC parity: getMaxThinkingTokensForModel() = maxOutput - 1
|
|
618
|
+
* When maxTokens is available, scale dynamically; otherwise use static defaults.
|
|
619
|
+
*/
|
|
620
|
+
function mapReasoningEffortToBudget(effort, maxTokens) {
|
|
621
|
+
if (maxTokens && maxTokens > 16384) {
|
|
622
|
+
// Dynamic budget based on model's actual max output (CC parity)
|
|
623
|
+
switch (effort) {
|
|
624
|
+
case "high": return Math.min(maxTokens - 1, Math.max(16000, Math.floor(maxTokens * 0.5)));
|
|
625
|
+
case "medium": return Math.min(maxTokens - 1, Math.max(8000, Math.floor(maxTokens * 0.25)));
|
|
626
|
+
case "low": return Math.min(maxTokens - 1, 4000);
|
|
627
|
+
default: return THINKING_BUDGET.high;
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
return THINKING_BUDGET[effort] ?? THINKING_BUDGET.high;
|
|
631
|
+
}
|
|
632
|
+
// 鈹€鈹€ System prompt 閳?cache_control block array (CC buildSystemPromptBlocks) 鈹€鈹€鈹€
|
|
633
|
+
/**
|
|
634
|
+
* Build system parameter with cache_control on the last block.
|
|
635
|
+
* CC parity: only ONE cache_control marker per request (at last block).
|
|
636
|
+
* Session-scoped ephemeral (no global scope) to prevent cross-session cache leakage.
|
|
637
|
+
*/
|
|
638
|
+
function buildSystemParam(system, enableCaching) {
|
|
639
|
+
if (!system)
|
|
640
|
+
return undefined;
|
|
641
|
+
if (!enableCaching)
|
|
642
|
+
return system;
|
|
643
|
+
// Return as block array with cache_control on last block
|
|
644
|
+
return [
|
|
645
|
+
{
|
|
646
|
+
type: "text",
|
|
647
|
+
text: system,
|
|
648
|
+
cache_control: { type: "ephemeral" },
|
|
649
|
+
},
|
|
650
|
+
];
|
|
651
|
+
}
|
|
652
|
+
// 鈹€鈹€ ensureToolResultPairing (CC conversation repair, pre-request) 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
|
|
653
|
+
/**
|
|
654
|
+
* Repair tool_use/tool_result mismatches before sending to API.
|
|
655
|
+
* CC parity: fixes 3 classes of problems:
|
|
656
|
+
* 1. Orphaned tool_results (no matching tool_use) 閳?stripped
|
|
657
|
+
* 2. Missing tool_results (tool_use with no result) 閳?synthetic error inserted
|
|
658
|
+
* 3. Duplicate tool_use IDs 閳?cross-message dedup
|
|
659
|
+
*/
|
|
660
|
+
function ensureToolResultPairing(messages) {
|
|
661
|
+
const result = [];
|
|
662
|
+
const allSeenToolUseIds = new Set();
|
|
663
|
+
for (let i = 0; i < messages.length; i++) {
|
|
664
|
+
const msg = messages[i];
|
|
665
|
+
if (msg.role === "assistant" && msg.tool_calls && msg.tool_calls.length > 0) {
|
|
666
|
+
// Dedup tool_use IDs across messages (CC allSeenToolUseIds parity)
|
|
667
|
+
const dedupedCalls = msg.tool_calls.filter(tc => {
|
|
668
|
+
if (allSeenToolUseIds.has(tc.id))
|
|
669
|
+
return false;
|
|
670
|
+
allSeenToolUseIds.add(tc.id);
|
|
671
|
+
return true;
|
|
672
|
+
});
|
|
673
|
+
const toolUseIds = new Set(dedupedCalls.map(tc => tc.id));
|
|
674
|
+
// Look ahead for matching tool_results
|
|
675
|
+
const existingResultIds = new Set();
|
|
676
|
+
const nextMessages = messages.slice(i + 1);
|
|
677
|
+
for (const nm of nextMessages) {
|
|
678
|
+
if (nm.role !== "tool")
|
|
679
|
+
break;
|
|
680
|
+
if (nm.tool_call_id)
|
|
681
|
+
existingResultIds.add(nm.tool_call_id);
|
|
682
|
+
}
|
|
683
|
+
// Push the assistant message (possibly with deduped tool calls)
|
|
684
|
+
if (dedupedCalls.length !== msg.tool_calls.length) {
|
|
685
|
+
result.push({ ...msg, tool_calls: dedupedCalls });
|
|
686
|
+
}
|
|
687
|
+
else {
|
|
688
|
+
result.push(msg);
|
|
689
|
+
}
|
|
690
|
+
// CC parity: inject synthetic error tool_results for missing ones
|
|
691
|
+
const missingIds = [...toolUseIds].filter(id => !existingResultIds.has(id));
|
|
692
|
+
for (const id of missingIds) {
|
|
693
|
+
const tc = dedupedCalls.find(c => c.id === id);
|
|
694
|
+
result.push({
|
|
695
|
+
role: "tool",
|
|
696
|
+
tool_call_id: id,
|
|
697
|
+
name: tc?.function.name,
|
|
698
|
+
content: "[Tool execution failed; output not available during conversation recovery]",
|
|
699
|
+
});
|
|
700
|
+
}
|
|
701
|
+
}
|
|
702
|
+
else if (msg.role === "tool") {
|
|
703
|
+
// Check if this tool_result has a matching tool_use
|
|
704
|
+
const hasMatch = allSeenToolUseIds.has(msg.tool_call_id ?? "");
|
|
705
|
+
if (hasMatch) {
|
|
706
|
+
result.push(msg);
|
|
707
|
+
}
|
|
708
|
+
// CC parity: strip orphaned tool_results silently
|
|
709
|
+
}
|
|
710
|
+
else {
|
|
711
|
+
result.push(msg);
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
return result;
|
|
715
|
+
}
|
|
716
|
+
// 鈹€鈹€ Local media resolution for Anthropic 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
|
|
717
|
+
/**
|
|
718
|
+
* Pre-resolve local media URLs for Anthropic.
|
|
719
|
+
* When Hub OSS adapter is available, uploads files 閳?public URL (no size limit).
|
|
720
|
+
* Otherwise falls back to base64 data URL (閳?0MB limit).
|
|
721
|
+
*/
|
|
722
|
+
async function resolveMessagesMediaForAnthropic(messages, uploadAdapter, apiKey, signal) {
|
|
723
|
+
const needsResolution = messages.some((m) => m.imageUrls?.some(isLocalUrl) || m.fileIds?.some((f) => isLocalUrl(f.id)));
|
|
724
|
+
if (!needsResolution)
|
|
725
|
+
return messages;
|
|
726
|
+
return Promise.all(messages.map(async (msg) => {
|
|
727
|
+
// Resolve imageUrls on both user messages and tool result messages
|
|
728
|
+
if (msg.role !== "user" && msg.role !== "tool")
|
|
729
|
+
return msg;
|
|
730
|
+
const patch = {};
|
|
731
|
+
if (msg.imageUrls?.some(isLocalUrl)) {
|
|
732
|
+
if (!uploadAdapter || !apiKey) {
|
|
733
|
+
throw new Error("FileUploadAdapter required for local image URLs. Configure OSS_ACCESS_KEY_ID/OSS_ACCESS_KEY_SECRET or QLOGICAGENT_HUB_URL.");
|
|
734
|
+
}
|
|
735
|
+
patch.imageUrls = await Promise.all(msg.imageUrls.map((url) => isLocalUrl(url)
|
|
736
|
+
? resolveMediaUrlViaUpload(url, { uploadAdapter, apiKey, signal })
|
|
737
|
+
: Promise.resolve(url)));
|
|
738
|
+
}
|
|
739
|
+
if (msg.role === "user" && msg.fileIds?.some((f) => isLocalUrl(f.id))) {
|
|
740
|
+
if (!uploadAdapter || !apiKey) {
|
|
741
|
+
throw new Error("FileUploadAdapter required for local file URLs. Configure OSS_ACCESS_KEY_ID/OSS_ACCESS_KEY_SECRET or QLOGICAGENT_HUB_URL.");
|
|
742
|
+
}
|
|
743
|
+
patch.fileIds = await Promise.all(msg.fileIds.map(async (f) => {
|
|
744
|
+
if (!isLocalUrl(f.id))
|
|
745
|
+
return f;
|
|
746
|
+
const resolved = await resolveMediaUrlViaUpload(f.id, { uploadAdapter, apiKey, signal });
|
|
747
|
+
return { ...f, id: resolved };
|
|
748
|
+
}));
|
|
749
|
+
}
|
|
750
|
+
return Object.keys(patch).length > 0 ? { ...msg, ...patch } : msg;
|
|
751
|
+
}));
|
|
752
|
+
}
|
|
753
|
+
// 鈹€鈹€ Message format conversion (OpenAI 閳?Anthropic) 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
|
|
754
|
+
function convertMessages(messages, quirks = {}, model) {
|
|
755
|
+
let system;
|
|
756
|
+
const out = [];
|
|
757
|
+
for (const msg of messages) {
|
|
758
|
+
if (msg.role === "system") {
|
|
759
|
+
system = system ? `${system}\n\n${msg.content ?? ""}` : (msg.content ?? "");
|
|
760
|
+
continue;
|
|
761
|
+
}
|
|
762
|
+
if (msg.role === "user") {
|
|
763
|
+
const hasImages = msg.imageUrls && msg.imageUrls.length > 0 && !quirks.filterImageBlocks;
|
|
764
|
+
const hasFiles = msg.fileIds && msg.fileIds.length > 0;
|
|
765
|
+
if (hasImages || hasFiles) {
|
|
766
|
+
// Vision + PDF + documents: build content block array
|
|
767
|
+
const content = [];
|
|
768
|
+
if (msg.content) {
|
|
769
|
+
content.push({ type: "text", text: msg.content });
|
|
770
|
+
}
|
|
771
|
+
if (hasImages) {
|
|
772
|
+
const multiImage = msg.imageUrls.length > 1;
|
|
773
|
+
for (let imgIdx = 0; imgIdx < msg.imageUrls.length; imgIdx++) {
|
|
774
|
+
const url = msg.imageUrls[imgIdx];
|
|
775
|
+
// CC parity: inject [Image N] label before each image when multiple images present
|
|
776
|
+
if (multiImage) {
|
|
777
|
+
content.push({ type: "text", text: `[Image ${imgIdx + 1}]` });
|
|
778
|
+
}
|
|
779
|
+
if (url.startsWith("data:")) {
|
|
780
|
+
// Base64 data URL 閳?extract media_type and data
|
|
781
|
+
const match = /^data:([^;]+);base64,(.+)$/.exec(url);
|
|
782
|
+
if (match) {
|
|
783
|
+
const mimeType = match[1];
|
|
784
|
+
// PDF documents use "document" block type (anthropic-ProviderMax 鎼?1)
|
|
785
|
+
if (mimeType === "application/pdf") {
|
|
786
|
+
content.push({
|
|
787
|
+
type: "document",
|
|
788
|
+
source: { type: "base64", media_type: mimeType, data: match[2] },
|
|
789
|
+
});
|
|
790
|
+
}
|
|
791
|
+
else {
|
|
792
|
+
content.push({
|
|
793
|
+
type: "image",
|
|
794
|
+
source: { type: "base64", media_type: mimeType, data: match[2] },
|
|
795
|
+
});
|
|
796
|
+
}
|
|
797
|
+
}
|
|
798
|
+
}
|
|
799
|
+
else if (url.endsWith(".pdf") || url.includes(".pdf?")) {
|
|
800
|
+
// PDF URL 閳?document block (anthropic-ProviderMax 鎼?1)
|
|
801
|
+
content.push({
|
|
802
|
+
type: "document",
|
|
803
|
+
source: { type: "url", url },
|
|
804
|
+
});
|
|
805
|
+
}
|
|
806
|
+
else {
|
|
807
|
+
// HTTP(S) URL 閳?use source.type="url"
|
|
808
|
+
content.push({
|
|
809
|
+
type: "image",
|
|
810
|
+
source: { type: "url", url },
|
|
811
|
+
});
|
|
812
|
+
}
|
|
813
|
+
}
|
|
814
|
+
}
|
|
815
|
+
// fileIds: PDFs 閳?document blocks; other files 閳?text annotations
|
|
816
|
+
// Size-aware: files over 50MB always degrade to text annotation (agent uses tools)
|
|
817
|
+
if (hasFiles) {
|
|
818
|
+
for (const f of msg.fileIds) {
|
|
819
|
+
const mime = f.mimeType || "";
|
|
820
|
+
const tooLarge = f.size != null && f.size > MEDIA_MAX_UPLOAD_SIZE;
|
|
821
|
+
if (tooLarge) {
|
|
822
|
+
const sizeLabel = `${(f.size / (1024 * 1024)).toFixed(1)}MB`;
|
|
823
|
+
content.push({ type: "text", text: `[Attached: ${f.id} (${mime || "unknown"}, ${sizeLabel})file too large for direct vision, use tools to process]` });
|
|
824
|
+
}
|
|
825
|
+
else if (mime === "application/pdf" || f.id.endsWith(".pdf")) {
|
|
826
|
+
if (f.id.startsWith("data:")) {
|
|
827
|
+
const match = /^data:[^;]+;base64,(.+)$/.exec(f.id);
|
|
828
|
+
if (match) {
|
|
829
|
+
content.push({
|
|
830
|
+
type: "document",
|
|
831
|
+
source: { type: "base64", media_type: "application/pdf", data: match[1] },
|
|
832
|
+
});
|
|
833
|
+
}
|
|
834
|
+
}
|
|
835
|
+
else {
|
|
836
|
+
content.push({
|
|
837
|
+
type: "document",
|
|
838
|
+
source: { type: "url", url: f.id },
|
|
839
|
+
});
|
|
840
|
+
}
|
|
841
|
+
}
|
|
842
|
+
else {
|
|
843
|
+
// Non-PDF files: annotate as text so agent uses tools
|
|
844
|
+
const label = mime ? `[Attached: ${f.id} (${mime})]` : `[Attached: ${f.id}]`;
|
|
845
|
+
content.push({ type: "text", text: label });
|
|
846
|
+
}
|
|
847
|
+
}
|
|
848
|
+
}
|
|
849
|
+
out.push({ role: "user", content });
|
|
850
|
+
}
|
|
851
|
+
else {
|
|
852
|
+
out.push({ role: "user", content: msg.content ?? "" });
|
|
853
|
+
}
|
|
854
|
+
}
|
|
855
|
+
else if (msg.role === "assistant") {
|
|
856
|
+
// Provider-aware thinking block filtering (CC/altcode parity):
|
|
857
|
+
// Some providers (GLM) don't support thinking blocks in passback.
|
|
858
|
+
const effectiveThinkingBlocks = quirks.filterThinkingBlocks
|
|
859
|
+
? undefined
|
|
860
|
+
: msg.thinkingBlocks;
|
|
861
|
+
if (msg.tool_calls && msg.tool_calls.length > 0) {
|
|
862
|
+
const content = [];
|
|
863
|
+
// Thinking blocks must appear first (DeepSeek/Claude passback requirement).
|
|
864
|
+
// CC parity (stripSignatureBlocks): skip blocks with empty/missing
|
|
865
|
+
// signaturesDeepSeek may not emit signature_delta, and sending
|
|
866
|
+
// an empty signature causes API 400.
|
|
867
|
+
if (effectiveThinkingBlocks) {
|
|
868
|
+
for (const tb of effectiveThinkingBlocks) {
|
|
869
|
+
if (!tb.signature)
|
|
870
|
+
continue;
|
|
871
|
+
content.push({ type: "thinking", thinking: tb.thinking, signature: tb.signature });
|
|
872
|
+
}
|
|
873
|
+
}
|
|
874
|
+
if (msg.content) {
|
|
875
|
+
content.push({ type: "text", text: msg.content });
|
|
876
|
+
}
|
|
877
|
+
for (const tc of msg.tool_calls) {
|
|
878
|
+
let input;
|
|
879
|
+
try {
|
|
880
|
+
input = JSON.parse(tc.function.arguments);
|
|
881
|
+
}
|
|
882
|
+
catch {
|
|
883
|
+
input = {};
|
|
884
|
+
}
|
|
885
|
+
content.push({
|
|
886
|
+
type: "tool_use",
|
|
887
|
+
id: tc.id,
|
|
888
|
+
name: tc.function.name,
|
|
889
|
+
input,
|
|
890
|
+
});
|
|
891
|
+
}
|
|
892
|
+
out.push({ role: "assistant", content });
|
|
893
|
+
}
|
|
894
|
+
else if (effectiveThinkingBlocks && effectiveThinkingBlocks.length > 0) {
|
|
895
|
+
// Non-tool-call assistant message with thinking blocks:
|
|
896
|
+
// must use content block array format to include thinking.
|
|
897
|
+
// CC parity: skip blocks with empty/missing signatures.
|
|
898
|
+
const content = [];
|
|
899
|
+
for (const tb of effectiveThinkingBlocks) {
|
|
900
|
+
if (!tb.signature)
|
|
901
|
+
continue;
|
|
902
|
+
content.push({ type: "thinking", thinking: tb.thinking, signature: tb.signature });
|
|
903
|
+
}
|
|
904
|
+
if (msg.content) {
|
|
905
|
+
content.push({ type: "text", text: msg.content });
|
|
906
|
+
}
|
|
907
|
+
out.push({ role: "assistant", content });
|
|
908
|
+
}
|
|
909
|
+
else {
|
|
910
|
+
out.push({ role: "assistant", content: msg.content ?? "" });
|
|
911
|
+
}
|
|
912
|
+
}
|
|
913
|
+
else if (msg.role === "tool") {
|
|
914
|
+
// CC parity (normalizeMessagesForAPI): merge consecutive tool_result
|
|
915
|
+
// messages into the preceding user message's content array.
|
|
916
|
+
// Anthropic API requires all tool_results for one assistant turn
|
|
917
|
+
// in a single user message.
|
|
918
|
+
const content = msg.content ?? "";
|
|
919
|
+
// AP2: prefer explicit is_error field from buildToolResultMessage,
|
|
920
|
+
// fall back to content-prefix heuristic for backward compat.
|
|
921
|
+
const isError = msg.is_error === true
|
|
922
|
+
|| content.startsWith("Error:") || content === SYNTHETIC_TOOL_RESULT_CONTENT;
|
|
923
|
+
// CC parity: tool_referencewhen the message carries toolReferences
|
|
924
|
+
// (from tool_search), use tool_reference blocks instead of plain text.
|
|
925
|
+
// This enables the Anthropic API to auto-expand the tool schemas
|
|
926
|
+
// so the model can see the activated tools' full definitions.
|
|
927
|
+
const toolRefs = msg.toolReferences;
|
|
928
|
+
const useToolReference = toolRefs && toolRefs.length > 0 && !isError
|
|
929
|
+
&& (!model || modelSupportsToolReference(model));
|
|
930
|
+
// Image URLs from tool results (e.g. read tool reading an image file).
|
|
931
|
+
// Anthropic natively supports images in tool_result content blocks.
|
|
932
|
+
const imgUrls = msg.imageUrls;
|
|
933
|
+
let toolResultContent;
|
|
934
|
+
if (useToolReference) {
|
|
935
|
+
toolResultContent = toolRefs.map(name => ({ type: "tool_reference", tool_name: name }));
|
|
936
|
+
}
|
|
937
|
+
else if (imgUrls && imgUrls.length > 0 && !isError) {
|
|
938
|
+
// Include images as content blocks alongside text
|
|
939
|
+
const blocks = [];
|
|
940
|
+
if (content)
|
|
941
|
+
blocks.push({ type: "text", text: content });
|
|
942
|
+
for (const url of imgUrls) {
|
|
943
|
+
if (url.startsWith("data:")) {
|
|
944
|
+
const match = /^data:([^;]+);base64,(.+)$/.exec(url);
|
|
945
|
+
if (match) {
|
|
946
|
+
blocks.push({ type: "image", source: { type: "base64", media_type: match[1], data: match[2] } });
|
|
947
|
+
}
|
|
948
|
+
}
|
|
949
|
+
else {
|
|
950
|
+
blocks.push({ type: "image", source: { type: "url", url } });
|
|
951
|
+
}
|
|
952
|
+
}
|
|
953
|
+
toolResultContent = blocks;
|
|
954
|
+
}
|
|
955
|
+
else {
|
|
956
|
+
toolResultContent = content;
|
|
957
|
+
}
|
|
958
|
+
const block = {
|
|
959
|
+
type: "tool_result",
|
|
960
|
+
tool_use_id: msg.tool_call_id ?? "",
|
|
961
|
+
content: toolResultContent,
|
|
962
|
+
...(isError && { is_error: true }),
|
|
963
|
+
};
|
|
964
|
+
const prev = out[out.length - 1];
|
|
965
|
+
if (prev && prev.role === "user" && Array.isArray(prev.content)) {
|
|
966
|
+
// Merge into existing user content block array
|
|
967
|
+
prev.content.push(block);
|
|
968
|
+
}
|
|
969
|
+
else {
|
|
970
|
+
out.push({ role: "user", content: [block] });
|
|
971
|
+
}
|
|
972
|
+
}
|
|
973
|
+
}
|
|
974
|
+
return { system, messages: out };
|
|
975
|
+
}
|
|
976
|
+
function convertToolDef(tool) {
|
|
977
|
+
return {
|
|
978
|
+
name: tool.function.name,
|
|
979
|
+
description: tool.function.description,
|
|
980
|
+
input_schema: tool.function.parameters ?? { type: "object", properties: {} },
|
|
981
|
+
};
|
|
982
|
+
}
|
|
983
|
+
// 鈹€鈹€ Helpers 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
|
|
984
|
+
function parseHttpStatus(message) {
|
|
985
|
+
const match = /Anthropic API error (\d{3})/.exec(message);
|
|
986
|
+
return match ? parseInt(match[1], 10) : null;
|
|
987
|
+
}
|
|
988
|
+
/**
|
|
989
|
+
* CC adjustParamsForNonStreaming parity: cap max_tokens at 64K and
|
|
990
|
+
* re-constrain thinking budget_tokens < capped max_tokens.
|
|
991
|
+
*/
|
|
992
|
+
function adjustParamsForNonStreaming(params) {
|
|
993
|
+
const maxTokens = typeof params.max_tokens === "number" ? params.max_tokens : 8192;
|
|
994
|
+
const cappedMaxTokens = Math.min(maxTokens, MAX_NON_STREAMING_TOKENS);
|
|
995
|
+
const adjusted = { ...params, max_tokens: cappedMaxTokens };
|
|
996
|
+
const thinking = adjusted.thinking;
|
|
997
|
+
if (thinking?.type === "enabled" && thinking.budget_tokens) {
|
|
998
|
+
adjusted.thinking = {
|
|
999
|
+
...thinking,
|
|
1000
|
+
budget_tokens: Math.min(thinking.budget_tokens, cappedMaxTokens - 1),
|
|
1001
|
+
};
|
|
1002
|
+
}
|
|
1003
|
+
return adjusted;
|
|
1004
|
+
}
|