openclaw-freerouter 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -0
- package/LICENSE +21 -0
- package/README.md +103 -0
- package/index.ts +63 -0
- package/openclaw.plugin.json +25 -0
- package/package.json +37 -0
- package/src/auth.ts +128 -0
- package/src/config.ts +220 -0
- package/src/logger.ts +32 -0
- package/src/models.ts +142 -0
- package/src/provider.ts +676 -0
- package/src/router/config.ts +242 -0
- package/src/router/index.ts +114 -0
- package/src/router/rules.ts +299 -0
- package/src/router/selector.ts +117 -0
- package/src/router/types.ts +84 -0
- package/src/server.ts +381 -0
package/src/provider.ts
ADDED
|
@@ -0,0 +1,676 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ClawRouter Provider  handles forwarding to backend APIs
|
|
3
|
+
* Supports: Anthropic Messages API, OpenAI-compatible (Kimi, OpenAI)
|
|
4
|
+
* Zero external deps  uses native fetch + streams.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { getAuth } from "./auth.js";
|
|
8
|
+
import { getConfig, toInternalApiType, supportsAdaptiveThinking as configSupportsAdaptive, getThinkingBudget } from "./config.js";
|
|
9
|
+
import { logger } from "./logger.js";
|
|
10
|
+
import type { IncomingMessage, ServerResponse } from "node:http";
|
|
11
|
+
// --- Timeout Configuration ---
|
|
12
|
+
const TIER_TIMEOUTS: Record<string, number> = {
|
|
13
|
+
SIMPLE: 30_000,
|
|
14
|
+
MEDIUM: 60_000,
|
|
15
|
+
COMPLEX: 120_000,
|
|
16
|
+
REASONING: 120_000,
|
|
17
|
+
EXPLICIT: 120_000,
|
|
18
|
+
};
|
|
19
|
+
const STREAM_STALL_TIMEOUT = 30_000;
|
|
20
|
+
|
|
21
|
+
function getTierTimeout(tier: string): number {
|
|
22
|
+
return TIER_TIMEOUTS[tier] ?? 60_000;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export class TimeoutError extends Error {
|
|
26
|
+
constructor(message: string) {
|
|
27
|
+
super(message);
|
|
28
|
+
this.name = "TimeoutError";
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
// Provider configs loaded from openclaw.json
|
|
34
|
+
export type ProviderConfig = {
|
|
35
|
+
baseUrl: string;
|
|
36
|
+
api: "anthropic-messages" | "openai-completions";
|
|
37
|
+
headers?: Record<string, string>;
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
// OpenAI tool types
|
|
41
|
+
export type OpenAIFunction = {
|
|
42
|
+
name: string;
|
|
43
|
+
description?: string;
|
|
44
|
+
parameters?: Record<string, unknown>;
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
export type OpenAITool = {
|
|
48
|
+
type: "function";
|
|
49
|
+
function: OpenAIFunction;
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
export type OpenAIToolCall = {
|
|
53
|
+
id: string;
|
|
54
|
+
type: "function";
|
|
55
|
+
function: { name: string; arguments: string };
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
// OpenAI-format message
|
|
59
|
+
export type ChatMessage = {
|
|
60
|
+
role: "system" | "user" | "assistant" | "developer" | "tool";
|
|
61
|
+
content: string | null | Array<{ type: string; text?: string; image_url?: { url: string } }>;
|
|
62
|
+
tool_calls?: OpenAIToolCall[];
|
|
63
|
+
tool_call_id?: string;
|
|
64
|
+
name?: string;
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
export type ChatRequest = {
|
|
68
|
+
model: string;
|
|
69
|
+
messages: ChatMessage[];
|
|
70
|
+
max_tokens?: number;
|
|
71
|
+
temperature?: number;
|
|
72
|
+
stream?: boolean;
|
|
73
|
+
top_p?: number;
|
|
74
|
+
stop?: string[];
|
|
75
|
+
tools?: OpenAITool[];
|
|
76
|
+
tool_choice?: unknown;
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
// Provider configs — loaded from freerouter.config.json via getProviderConfig()
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Get provider config from the loaded config file.
|
|
83
|
+
*/
|
|
84
|
+
function getProviderConfig(provider: string): ProviderConfig | undefined {
|
|
85
|
+
const cfg = getConfig();
|
|
86
|
+
const entry = cfg.providers[provider];
|
|
87
|
+
if (!entry) return undefined;
|
|
88
|
+
return {
|
|
89
|
+
baseUrl: entry.baseUrl,
|
|
90
|
+
api: toInternalApiType(entry.api),
|
|
91
|
+
headers: entry.headers,
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Parse a routed model ID like "anthropic/claude-opus-4-6" into provider + model parts.
|
|
97
|
+
*/
|
|
98
|
+
export function parseModelId(modelId: string): { provider: string; model: string } {
|
|
99
|
+
const slash = modelId.indexOf("/");
|
|
100
|
+
if (slash === -1) return { provider: "anthropic", model: modelId };
|
|
101
|
+
return { provider: modelId.slice(0, slash), model: modelId.slice(slash + 1) };
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Check if a model supports adaptive thinking (Opus 4.6+)
|
|
106
|
+
*/
|
|
107
|
+
function supportsAdaptiveThinking(modelId: string): boolean {
|
|
108
|
+
return modelId.includes("opus-4-6") || modelId.includes("opus-4.6");
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Get thinking config based on tier and model.
|
|
113
|
+
*/
|
|
114
|
+
function getThinkingConfig(tier: string, modelId: string): { type: string; budget_tokens?: number; effort?: string } | undefined {
|
|
115
|
+
if (supportsAdaptiveThinking(modelId) && (tier === "COMPLEX" || tier === "REASONING")) {
|
|
116
|
+
return { type: "adaptive" };
|
|
117
|
+
}
|
|
118
|
+
if (tier === "MEDIUM") {
|
|
119
|
+
return { type: "enabled", budget_tokens: 4096 };
|
|
120
|
+
}
|
|
121
|
+
return undefined;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/** Convert OpenAI tools to Anthropic tools format */
|
|
125
|
+
function convertToolsToAnthropic(tools: OpenAITool[]): Array<{ name: string; description?: string; input_schema: Record<string, unknown> }> {
|
|
126
|
+
return tools.map(t => ({
|
|
127
|
+
name: t.function.name,
|
|
128
|
+
...(t.function.description ? { description: t.function.description } : {}),
|
|
129
|
+
input_schema: t.function.parameters ?? { type: "object", properties: {} },
|
|
130
|
+
}));
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/** Convert OpenAI tool_choice to Anthropic tool_choice format */
|
|
134
|
+
function convertToolChoiceToAnthropic(toolChoice: unknown): { type: string; name?: string } | undefined {
|
|
135
|
+
if (toolChoice === "none") return { type: "none" };
|
|
136
|
+
if (toolChoice === "auto" || toolChoice === undefined) return { type: "auto" };
|
|
137
|
+
if (toolChoice === "required") return { type: "any" };
|
|
138
|
+
if (typeof toolChoice === "object" && toolChoice !== null) {
|
|
139
|
+
const tc = toolChoice as { type?: string; function?: { name: string } };
|
|
140
|
+
if (tc.function?.name) return { type: "tool", name: tc.function.name };
|
|
141
|
+
}
|
|
142
|
+
return { type: "auto" };
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Convert OpenAI messages array to Anthropic messages format.
|
|
147
|
+
* Handles system extraction, tool_calls, tool results, and content merging.
|
|
148
|
+
*/
|
|
149
|
+
function convertMessagesToAnthropic(
|
|
150
|
+
openaiMessages: ChatMessage[]
|
|
151
|
+
): { system: string; messages: Array<{ role: string; content: unknown }> } {
|
|
152
|
+
let systemContent = "";
|
|
153
|
+
const messages: Array<{ role: string; content: unknown }> = [];
|
|
154
|
+
|
|
155
|
+
for (let i = 0; i < openaiMessages.length; i++) {
|
|
156
|
+
const msg = openaiMessages[i];
|
|
157
|
+
|
|
158
|
+
// Extract system/developer messages
|
|
159
|
+
if (msg.role === "system" || msg.role === "developer") {
|
|
160
|
+
const text = typeof msg.content === "string"
|
|
161
|
+
? msg.content
|
|
162
|
+
: (msg.content ?? []).filter((b: any) => b.type === "text").map((b: any) => b.text).join("\n");
|
|
163
|
+
systemContent += (systemContent ? "\n" : "") + text;
|
|
164
|
+
continue;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Tool role -> tool_result content block (wrapped in user message)
|
|
168
|
+
if (msg.role === "tool") {
|
|
169
|
+
const toolResult = {
|
|
170
|
+
type: "tool_result" as const,
|
|
171
|
+
tool_use_id: msg.tool_call_id ?? "",
|
|
172
|
+
content: typeof msg.content === "string" ? msg.content : JSON.stringify(msg.content ?? ""),
|
|
173
|
+
};
|
|
174
|
+
// Merge with previous user message if it only has tool_results
|
|
175
|
+
const last = messages[messages.length - 1];
|
|
176
|
+
if (last && last.role === "user" && Array.isArray(last.content) &&
|
|
177
|
+
(last.content as any[]).every((b: any) => b.type === "tool_result")) {
|
|
178
|
+
(last.content as any[]).push(toolResult);
|
|
179
|
+
} else {
|
|
180
|
+
messages.push({ role: "user", content: [toolResult] });
|
|
181
|
+
}
|
|
182
|
+
continue;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// Assistant with tool_calls -> content blocks with tool_use
|
|
186
|
+
if (msg.role === "assistant" && msg.tool_calls && msg.tool_calls.length > 0) {
|
|
187
|
+
const contentBlocks: Array<Record<string, unknown>> = [];
|
|
188
|
+
// Include text content first
|
|
189
|
+
if (msg.content) {
|
|
190
|
+
const text = typeof msg.content === "string" ? msg.content
|
|
191
|
+
: msg.content.filter((b: any) => b.type === "text").map((b: any) => b.text).join("");
|
|
192
|
+
if (text) contentBlocks.push({ type: "text", text });
|
|
193
|
+
}
|
|
194
|
+
// Add tool_use blocks
|
|
195
|
+
for (const tc of msg.tool_calls) {
|
|
196
|
+
let input: unknown = {};
|
|
197
|
+
try { input = JSON.parse(tc.function.arguments); } catch { input = {}; }
|
|
198
|
+
contentBlocks.push({ type: "tool_use", id: tc.id, name: tc.function.name, input });
|
|
199
|
+
}
|
|
200
|
+
messages.push({ role: "assistant", content: contentBlocks });
|
|
201
|
+
continue;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// Regular user/assistant messages
|
|
205
|
+
const text = typeof msg.content === "string"
|
|
206
|
+
? msg.content
|
|
207
|
+
: (msg.content ?? []).filter((b: any) => b.type === "text").map((b: any) => b.text).join("\n");
|
|
208
|
+
messages.push({ role: msg.role === "assistant" ? "assistant" : "user", content: text || "" });
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
return { system: systemContent, messages };
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Read a stream with stall detection. Aborts if no data for STREAM_STALL_TIMEOUT.
|
|
217
|
+
*/
|
|
218
|
+
async function readStreamWithStallDetection(
|
|
219
|
+
reader: ReadableStreamDefaultReader<Uint8Array>,
|
|
220
|
+
onChunk: (value: Uint8Array) => void,
|
|
221
|
+
abortController: AbortController,
|
|
222
|
+
): Promise<void> {
|
|
223
|
+
while (true) {
|
|
224
|
+
let stallTimerId: ReturnType<typeof setTimeout> | undefined;
|
|
225
|
+
const stallPromise = new Promise<never>((_, reject) => {
|
|
226
|
+
stallTimerId = setTimeout(() => {
|
|
227
|
+
abortController.abort();
|
|
228
|
+
reject(new TimeoutError(`Stream stalled: no data for ${STREAM_STALL_TIMEOUT / 1000}s`));
|
|
229
|
+
}, STREAM_STALL_TIMEOUT);
|
|
230
|
+
});
|
|
231
|
+
|
|
232
|
+
try {
|
|
233
|
+
const result = await Promise.race([reader.read(), stallPromise]);
|
|
234
|
+
clearTimeout(stallTimerId);
|
|
235
|
+
const { done, value } = result as ReadableStreamReadResult<Uint8Array>;
|
|
236
|
+
if (done) break;
|
|
237
|
+
if (value) onChunk(value);
|
|
238
|
+
} catch (err) {
|
|
239
|
+
clearTimeout(stallTimerId);
|
|
240
|
+
throw err;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
/**
|
|
246
|
+
* Forward a chat request to Anthropic Messages API, streaming back as OpenAI SSE.
|
|
247
|
+
*/
|
|
248
|
+
async function forwardToAnthropic(
|
|
249
|
+
req: ChatRequest,
|
|
250
|
+
modelName: string,
|
|
251
|
+
tier: string,
|
|
252
|
+
res: ServerResponse,
|
|
253
|
+
stream: boolean,
|
|
254
|
+
): Promise<void> {
|
|
255
|
+
const auth = getAuth("anthropic");
|
|
256
|
+
if (!auth?.token) throw new Error("No Anthropic auth token");
|
|
257
|
+
|
|
258
|
+
const config = getProviderConfig("anthropic");
|
|
259
|
+
if (!config) throw new Error("Anthropic provider not configured");
|
|
260
|
+
const { system: systemContent, messages } = convertMessagesToAnthropic(req.messages);
|
|
261
|
+
|
|
262
|
+
const isOAuth = auth.token!.startsWith("sk-ant-oat");
|
|
263
|
+
const thinkingConfig = getThinkingConfig(tier, modelName);
|
|
264
|
+
const maxTokens = req.max_tokens ?? 4096;
|
|
265
|
+
|
|
266
|
+
const body: Record<string, unknown> = {
|
|
267
|
+
model: modelName,
|
|
268
|
+
messages,
|
|
269
|
+
max_tokens: (thinkingConfig?.type === "enabled" && thinkingConfig.budget_tokens) ? maxTokens + thinkingConfig.budget_tokens : maxTokens,
|
|
270
|
+
stream: stream,
|
|
271
|
+
};
|
|
272
|
+
|
|
273
|
+
// Add tools if present
|
|
274
|
+
if (req.tools && req.tools.length > 0) {
|
|
275
|
+
body.tools = convertToolsToAnthropic(req.tools);
|
|
276
|
+
if (req.tool_choice !== undefined) {
|
|
277
|
+
body.tool_choice = convertToolChoiceToAnthropic(req.tool_choice);
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// System prompt
|
|
282
|
+
if (isOAuth) {
|
|
283
|
+
const systemBlocks: Array<{ type: string; text: string; cache_control?: { type: string } }> = [
|
|
284
|
+
{
|
|
285
|
+
type: "text",
|
|
286
|
+
text: "You are Claude Code, Anthropic\'s official CLI for Claude.",
|
|
287
|
+
cache_control: { type: "ephemeral" },
|
|
288
|
+
},
|
|
289
|
+
];
|
|
290
|
+
if (systemContent) {
|
|
291
|
+
systemBlocks.push({ type: "text", text: systemContent, cache_control: { type: "ephemeral" } });
|
|
292
|
+
}
|
|
293
|
+
body.system = systemBlocks;
|
|
294
|
+
} else if (systemContent) {
|
|
295
|
+
body.system = systemContent;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
if (thinkingConfig) {
|
|
299
|
+
if (thinkingConfig.type === "adaptive") {
|
|
300
|
+
body.thinking = { type: "adaptive" };
|
|
301
|
+
} else {
|
|
302
|
+
body.thinking = { type: "enabled", budget_tokens: thinkingConfig.budget_tokens };
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
if (req.temperature !== undefined && !thinkingConfig) {
|
|
307
|
+
body.temperature = req.temperature;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
const url = `${config.baseUrl}/v1/messages`;
|
|
311
|
+
const timeoutMs = getTierTimeout(tier);
|
|
312
|
+
logger.info(`-> Anthropic: ${modelName} (tier=${tier}, thinking=${thinkingConfig?.type ?? "off"}, stream=${stream}, tools=${req.tools?.length ?? 0}, timeout=${timeoutMs / 1000}s)`);
|
|
313
|
+
|
|
314
|
+
const authHeaders: Record<string, string> = {
|
|
315
|
+
"Content-Type": "application/json",
|
|
316
|
+
"anthropic-version": "2023-06-01",
|
|
317
|
+
"accept": "application/json",
|
|
318
|
+
};
|
|
319
|
+
|
|
320
|
+
if (isOAuth) {
|
|
321
|
+
authHeaders["Authorization"] = `Bearer ${auth.token}`;
|
|
322
|
+
authHeaders["anthropic-beta"] = "claude-code-20250219,oauth-2025-04-20,fine-grained-tool-streaming-2025-05-14,interleaved-thinking-2025-05-14";
|
|
323
|
+
authHeaders["user-agent"] = "claude-cli/2.1.2 (external, cli)";
|
|
324
|
+
authHeaders["x-app"] = "cli";
|
|
325
|
+
authHeaders["anthropic-dangerous-direct-browser-access"] = "true";
|
|
326
|
+
} else {
|
|
327
|
+
authHeaders["x-api-key"] = auth.token!;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
// Timeout via AbortController
|
|
331
|
+
const abortController = new AbortController();
|
|
332
|
+
const timeoutId = setTimeout(() => abortController.abort(), timeoutMs);
|
|
333
|
+
|
|
334
|
+
let response: Response;
|
|
335
|
+
try {
|
|
336
|
+
response = await fetch(url, {
|
|
337
|
+
method: "POST",
|
|
338
|
+
headers: authHeaders,
|
|
339
|
+
body: JSON.stringify(body),
|
|
340
|
+
signal: abortController.signal,
|
|
341
|
+
});
|
|
342
|
+
} catch (err: any) {
|
|
343
|
+
clearTimeout(timeoutId);
|
|
344
|
+
if (err?.name === "AbortError" || abortController.signal.aborted) {
|
|
345
|
+
logger.error(`\u23f1 TIMEOUT: Anthropic ${modelName} after ${timeoutMs / 1000}s (tier=${tier})`);
|
|
346
|
+
throw new TimeoutError(`Anthropic request timed out after ${timeoutMs / 1000}s (model=${modelName}, tier=${tier})`);
|
|
347
|
+
}
|
|
348
|
+
throw err;
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
if (!response.ok) {
|
|
352
|
+
clearTimeout(timeoutId);
|
|
353
|
+
const errText = await response.text();
|
|
354
|
+
logger.error(`Anthropic ${response.status}: ${errText}`);
|
|
355
|
+
throw new Error(`Anthropic API error ${response.status}: ${errText}`);
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
if (!stream) {
|
|
359
|
+
clearTimeout(timeoutId);
|
|
360
|
+
const data = await response.json() as {
|
|
361
|
+
content: Array<{ type: string; text?: string; thinking?: string; id?: string; name?: string; input?: unknown }>;
|
|
362
|
+
usage?: { input_tokens: number; output_tokens: number };
|
|
363
|
+
model: string;
|
|
364
|
+
stop_reason?: string;
|
|
365
|
+
};
|
|
366
|
+
|
|
367
|
+
const textContent = data.content
|
|
368
|
+
.filter((b) => b.type === "text")
|
|
369
|
+
.map((b) => b.text ?? "")
|
|
370
|
+
.join("");
|
|
371
|
+
|
|
372
|
+
// Convert tool_use blocks to OpenAI tool_calls
|
|
373
|
+
const toolUseBlocks = data.content.filter((b) => b.type === "tool_use");
|
|
374
|
+
const toolCalls: OpenAIToolCall[] = toolUseBlocks.map((b, idx) => ({
|
|
375
|
+
id: b.id ?? `call_${Date.now()}_${idx}`,
|
|
376
|
+
type: "function" as const,
|
|
377
|
+
function: { name: b.name ?? "", arguments: JSON.stringify(b.input ?? {}) },
|
|
378
|
+
}));
|
|
379
|
+
|
|
380
|
+
const finishReason = data.stop_reason === "tool_use" ? "tool_calls"
|
|
381
|
+
: data.stop_reason === "end_turn" ? "stop"
|
|
382
|
+
: (data.stop_reason ?? "stop");
|
|
383
|
+
|
|
384
|
+
const message: Record<string, unknown> = {
|
|
385
|
+
role: "assistant",
|
|
386
|
+
content: textContent || (toolCalls.length > 0 ? null : ""),
|
|
387
|
+
};
|
|
388
|
+
if (toolCalls.length > 0) message.tool_calls = toolCalls;
|
|
389
|
+
|
|
390
|
+
const openaiResponse = {
|
|
391
|
+
id: `chatcmpl-${Date.now()}`,
|
|
392
|
+
object: "chat.completion",
|
|
393
|
+
created: Math.floor(Date.now() / 1000),
|
|
394
|
+
model: `freerouter/${modelName}`,
|
|
395
|
+
choices: [{ index: 0, message, finish_reason: finishReason }],
|
|
396
|
+
usage: {
|
|
397
|
+
prompt_tokens: data.usage?.input_tokens ?? 0,
|
|
398
|
+
completion_tokens: data.usage?.output_tokens ?? 0,
|
|
399
|
+
total_tokens: (data.usage?.input_tokens ?? 0) + (data.usage?.output_tokens ?? 0),
|
|
400
|
+
},
|
|
401
|
+
};
|
|
402
|
+
|
|
403
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
404
|
+
res.end(JSON.stringify(openaiResponse));
|
|
405
|
+
return;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
// Streaming: convert Anthropic SSE to OpenAI SSE format
|
|
409
|
+
res.writeHead(200, {
|
|
410
|
+
"Content-Type": "text/event-stream",
|
|
411
|
+
"Cache-Control": "no-cache",
|
|
412
|
+
"Connection": "keep-alive",
|
|
413
|
+
});
|
|
414
|
+
|
|
415
|
+
clearTimeout(timeoutId); // Stall detection takes over for streaming
|
|
416
|
+
const reader = response.body?.getReader();
|
|
417
|
+
if (!reader) throw new Error("No response body");
|
|
418
|
+
|
|
419
|
+
const decoder = new TextDecoder();
|
|
420
|
+
let buffer = "";
|
|
421
|
+
let insideThinking = false;
|
|
422
|
+
// Track tool_use streaming state
|
|
423
|
+
let currentBlockType: string | null = null;
|
|
424
|
+
let currentToolIndex = -1;
|
|
425
|
+
let stopReason: string | null = null;
|
|
426
|
+
|
|
427
|
+
const makeChunk = (delta: Record<string, unknown>, finish: string | null = null) => ({
|
|
428
|
+
id: `chatcmpl-${Date.now()}`,
|
|
429
|
+
object: "chat.completion.chunk",
|
|
430
|
+
created: Math.floor(Date.now() / 1000),
|
|
431
|
+
model: `freerouter/${modelName}`,
|
|
432
|
+
choices: [{ index: 0, delta, finish_reason: finish }],
|
|
433
|
+
});
|
|
434
|
+
|
|
435
|
+
try {
|
|
436
|
+
await readStreamWithStallDetection(reader, (value) => {
|
|
437
|
+
buffer += decoder.decode(value, { stream: true });
|
|
438
|
+
const lines = buffer.split("\n");
|
|
439
|
+
buffer = lines.pop() ?? "";
|
|
440
|
+
|
|
441
|
+
for (const line of lines) {
|
|
442
|
+
if (!line.startsWith("data: ")) continue;
|
|
443
|
+
const jsonStr = line.slice(6).trim();
|
|
444
|
+
if (jsonStr === "[DONE]" || !jsonStr) continue;
|
|
445
|
+
|
|
446
|
+
try {
|
|
447
|
+
const event = JSON.parse(jsonStr);
|
|
448
|
+
|
|
449
|
+
if (event.type === "content_block_start") {
|
|
450
|
+
const block = event.content_block;
|
|
451
|
+
if (block?.type === "thinking") {
|
|
452
|
+
insideThinking = true;
|
|
453
|
+
currentBlockType = "thinking";
|
|
454
|
+
} else if (block?.type === "tool_use") {
|
|
455
|
+
insideThinking = false;
|
|
456
|
+
currentBlockType = "tool_use";
|
|
457
|
+
currentToolIndex++;
|
|
458
|
+
// Emit first tool_calls chunk with id and function name
|
|
459
|
+
const chunk = makeChunk({
|
|
460
|
+
tool_calls: [{
|
|
461
|
+
index: currentToolIndex,
|
|
462
|
+
id: block.id,
|
|
463
|
+
type: "function",
|
|
464
|
+
function: { name: block.name, arguments: "" },
|
|
465
|
+
}],
|
|
466
|
+
});
|
|
467
|
+
res.write(`data: ${JSON.stringify(chunk)}\n\n`);
|
|
468
|
+
} else {
|
|
469
|
+
insideThinking = false;
|
|
470
|
+
currentBlockType = block?.type ?? "text";
|
|
471
|
+
}
|
|
472
|
+
continue;
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
if (event.type === "content_block_stop") {
|
|
476
|
+
insideThinking = false;
|
|
477
|
+
currentBlockType = null;
|
|
478
|
+
continue;
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
if (event.type === "content_block_delta") {
|
|
482
|
+
if (insideThinking) continue;
|
|
483
|
+
|
|
484
|
+
// Handle tool_use argument streaming
|
|
485
|
+
if (currentBlockType === "tool_use" && event.delta?.type === "input_json_delta") {
|
|
486
|
+
const chunk = makeChunk({
|
|
487
|
+
tool_calls: [{
|
|
488
|
+
index: currentToolIndex,
|
|
489
|
+
function: { arguments: event.delta.partial_json ?? "" },
|
|
490
|
+
}],
|
|
491
|
+
});
|
|
492
|
+
res.write(`data: ${JSON.stringify(chunk)}\n\n`);
|
|
493
|
+
continue;
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
// Regular text delta
|
|
497
|
+
const text = event.delta?.text;
|
|
498
|
+
if (text) {
|
|
499
|
+
res.write(`data: ${JSON.stringify(makeChunk({ content: text }))}\n\n`);
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
if (event.type === "message_delta") {
|
|
504
|
+
stopReason = event.delta?.stop_reason ?? null;
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
if (event.type === "message_stop") {
|
|
508
|
+
const finish = stopReason === "tool_use" ? "tool_calls" : "stop";
|
|
509
|
+
res.write(`data: ${JSON.stringify(makeChunk({}, finish))}\n\n`);
|
|
510
|
+
}
|
|
511
|
+
} catch {
|
|
512
|
+
// skip unparseable lines
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
}, abortController);
|
|
516
|
+
} catch (err) {
|
|
517
|
+
if (err instanceof TimeoutError) {
|
|
518
|
+
logger.error(`\u23f1 STREAM STALL: Anthropic ${modelName} - ${(err as Error).message}`);
|
|
519
|
+
}
|
|
520
|
+
throw err;
|
|
521
|
+
} finally {
|
|
522
|
+
res.write("data: [DONE]\n\n");
|
|
523
|
+
res.end();
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
/**
|
|
528
|
+
* Forward a chat request to OpenAI-compatible API (Kimi), streaming back as-is.
|
|
529
|
+
*/
|
|
530
|
+
async function forwardToOpenAI(
|
|
531
|
+
req: ChatRequest,
|
|
532
|
+
provider: string,
|
|
533
|
+
modelName: string,
|
|
534
|
+
tier: string,
|
|
535
|
+
res: ServerResponse,
|
|
536
|
+
stream: boolean,
|
|
537
|
+
): Promise<void> {
|
|
538
|
+
const auth = getAuth(provider);
|
|
539
|
+
if (!auth?.apiKey) throw new Error(`No API key for ${provider}`);
|
|
540
|
+
|
|
541
|
+
const config = getProviderConfig(provider);
|
|
542
|
+
if (!config) throw new Error(`Unknown provider: ${provider}`);
|
|
543
|
+
|
|
544
|
+
const body: Record<string, unknown> = {
|
|
545
|
+
model: modelName,
|
|
546
|
+
messages: req.messages,
|
|
547
|
+
stream: stream,
|
|
548
|
+
};
|
|
549
|
+
|
|
550
|
+
if (req.max_tokens) body.max_tokens = req.max_tokens;
|
|
551
|
+
if (req.temperature !== undefined) body.temperature = req.temperature;
|
|
552
|
+
if (req.top_p !== undefined) body.top_p = req.top_p;
|
|
553
|
+
|
|
554
|
+
const url = `${config.baseUrl}/chat/completions`;
|
|
555
|
+
logger.info(`-> ${provider}: ${modelName} (tier=${tier}, stream=${stream})`);
|
|
556
|
+
|
|
557
|
+
const headers: Record<string, string> = {
|
|
558
|
+
"Content-Type": "application/json",
|
|
559
|
+
"Authorization": `Bearer ${auth.apiKey}`,
|
|
560
|
+
...config.headers,
|
|
561
|
+
};
|
|
562
|
+
|
|
563
|
+
const timeoutMs = getTierTimeout(tier);
|
|
564
|
+
const abortController = new AbortController();
|
|
565
|
+
const timeoutId = setTimeout(() => abortController.abort(), timeoutMs);
|
|
566
|
+
|
|
567
|
+
let response: Response;
|
|
568
|
+
try {
|
|
569
|
+
response = await fetch(url, {
|
|
570
|
+
method: "POST",
|
|
571
|
+
headers,
|
|
572
|
+
body: JSON.stringify(body),
|
|
573
|
+
signal: abortController.signal,
|
|
574
|
+
});
|
|
575
|
+
} catch (err: any) {
|
|
576
|
+
clearTimeout(timeoutId);
|
|
577
|
+
if (err?.name === "AbortError" || abortController.signal.aborted) {
|
|
578
|
+
logger.error(`\u23f1 TIMEOUT: ${provider} ${modelName} after ${timeoutMs / 1000}s (tier=${tier})`);
|
|
579
|
+
throw new TimeoutError(`${provider} request timed out after ${timeoutMs / 1000}s (model=${modelName}, tier=${tier})`);
|
|
580
|
+
}
|
|
581
|
+
throw err;
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
if (!response.ok) {
|
|
585
|
+
clearTimeout(timeoutId);
|
|
586
|
+
const errText = await response.text();
|
|
587
|
+
logger.error(`${provider} ${response.status}: ${errText}`);
|
|
588
|
+
throw new Error(`${provider} API error ${response.status}: ${errText}`);
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
clearTimeout(timeoutId);
|
|
592
|
+
|
|
593
|
+
if (!stream) {
|
|
594
|
+
const data = await response.json() as Record<string, unknown>;
|
|
595
|
+
if (data.model) data.model = `freerouter/${modelName}`;
|
|
596
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
597
|
+
res.end(JSON.stringify(data));
|
|
598
|
+
return;
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
// Streaming: pass through SSE with model name rewrite
|
|
602
|
+
res.writeHead(200, {
|
|
603
|
+
"Content-Type": "text/event-stream",
|
|
604
|
+
"Cache-Control": "no-cache",
|
|
605
|
+
"Connection": "keep-alive",
|
|
606
|
+
});
|
|
607
|
+
|
|
608
|
+
const reader = response.body?.getReader();
|
|
609
|
+
if (!reader) throw new Error("No response body");
|
|
610
|
+
|
|
611
|
+
const decoder = new TextDecoder();
|
|
612
|
+
let buffer = "";
|
|
613
|
+
|
|
614
|
+
try {
|
|
615
|
+
await readStreamWithStallDetection(reader, (value) => {
|
|
616
|
+
buffer += decoder.decode(value, { stream: true });
|
|
617
|
+
const lines = buffer.split("\n");
|
|
618
|
+
buffer = lines.pop() ?? "";
|
|
619
|
+
|
|
620
|
+
for (const line of lines) {
|
|
621
|
+
if (line.startsWith("data: ")) {
|
|
622
|
+
const jsonStr = line.slice(6).trim();
|
|
623
|
+
if (jsonStr === "[DONE]") {
|
|
624
|
+
res.write("data: [DONE]\n\n");
|
|
625
|
+
continue;
|
|
626
|
+
}
|
|
627
|
+
try {
|
|
628
|
+
const chunk = JSON.parse(jsonStr);
|
|
629
|
+
if (chunk.model) chunk.model = `freerouter/${modelName}`;
|
|
630
|
+
res.write(`data: ${JSON.stringify(chunk)}\n\n`);
|
|
631
|
+
} catch {
|
|
632
|
+
res.write(line + "\n");
|
|
633
|
+
}
|
|
634
|
+
} else if (line.trim()) {
|
|
635
|
+
res.write(line + "\n");
|
|
636
|
+
} else {
|
|
637
|
+
res.write("\n");
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
}, abortController);
|
|
641
|
+
} catch (err) {
|
|
642
|
+
if (err instanceof TimeoutError) {
|
|
643
|
+
logger.error(`\u23f1 STREAM STALL: ${provider} ${modelName} - ${(err as Error).message}`);
|
|
644
|
+
}
|
|
645
|
+
throw err;
|
|
646
|
+
} finally {
|
|
647
|
+
if (!res.writableEnded) {
|
|
648
|
+
res.write("data: [DONE]\n\n");
|
|
649
|
+
res.end();
|
|
650
|
+
}
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
/**
|
|
655
|
+
* Forward a chat completion request to the appropriate backend.
|
|
656
|
+
*/
|
|
657
|
+
export async function forwardRequest(
|
|
658
|
+
chatReq: ChatRequest,
|
|
659
|
+
routedModel: string,
|
|
660
|
+
tier: string,
|
|
661
|
+
res: ServerResponse,
|
|
662
|
+
stream: boolean,
|
|
663
|
+
): Promise<void> {
|
|
664
|
+
const { provider, model } = parseModelId(routedModel);
|
|
665
|
+
|
|
666
|
+
const providerConfig = getProviderConfig(provider);
|
|
667
|
+
if (!providerConfig) {
|
|
668
|
+
throw new Error(`Unsupported provider: ${provider}`);
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
if (providerConfig.api === "anthropic-messages") {
|
|
672
|
+
await forwardToAnthropic(chatReq, model, tier, res, stream);
|
|
673
|
+
} else {
|
|
674
|
+
await forwardToOpenAI(chatReq, provider, model, tier, res, stream);
|
|
675
|
+
}
|
|
676
|
+
}
|