@khanglvm/llm-router 1.3.1 → 2.0.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +39 -0
- package/README.md +337 -41
- package/package.json +19 -3
- package/src/cli/router-module.js +7331 -3805
- package/src/cli/wrangler-toml.js +1 -1
- package/src/cli-entry.js +162 -24
- package/src/node/amp-client-config.js +426 -0
- package/src/node/coding-tool-config.js +763 -0
- package/src/node/config-store.js +49 -18
- package/src/node/instance-state.js +213 -12
- package/src/node/listen-port.js +5 -37
- package/src/node/local-server-settings.js +122 -0
- package/src/node/local-server.js +3 -2
- package/src/node/provider-probe.js +13 -0
- package/src/node/start-command.js +282 -40
- package/src/node/startup-manager.js +64 -29
- package/src/node/web-command.js +106 -0
- package/src/node/web-console-assets.js +26 -0
- package/src/node/web-console-client.js +56 -0
- package/src/node/web-console-dev-assets.js +258 -0
- package/src/node/web-console-server.js +3146 -0
- package/src/node/web-console-styles.generated.js +1 -0
- package/src/node/web-console-ui/config-editor-utils.js +616 -0
- package/src/node/web-console-ui/lib/utils.js +6 -0
- package/src/node/web-console-ui/rate-limit-utils.js +144 -0
- package/src/node/web-console-ui/select-search-utils.js +36 -0
- package/src/runtime/codex-request-transformer.js +46 -5
- package/src/runtime/codex-response-transformer.js +268 -35
- package/src/runtime/config.js +1394 -35
- package/src/runtime/handler/amp-gemini.js +913 -0
- package/src/runtime/handler/amp-response.js +308 -0
- package/src/runtime/handler/amp.js +290 -0
- package/src/runtime/handler/auth.js +17 -2
- package/src/runtime/handler/provider-call.js +168 -50
- package/src/runtime/handler/provider-translation.js +937 -26
- package/src/runtime/handler/request.js +149 -6
- package/src/runtime/handler/route-debug.js +22 -1
- package/src/runtime/handler.js +449 -9
- package/src/runtime/subscription-auth.js +1 -6
- package/src/shared/local-router-defaults.js +62 -0
- package/src/translator/index.js +3 -1
- package/src/translator/request/openai-to-claude.js +217 -6
- package/src/translator/response/openai-to-claude.js +206 -58
|
@@ -10,7 +10,15 @@ import {
|
|
|
10
10
|
import { claudeToOpenAINonStreamResponse } from "../../translator/response/claude-to-openai.js";
|
|
11
11
|
import { shouldRetryStatus } from "./fallback.js";
|
|
12
12
|
import { jsonResponse, passthroughResponseWithCors } from "./http.js";
|
|
13
|
-
import {
|
|
13
|
+
import {
|
|
14
|
+
convertClaudeNonStreamToOpenAIResponses,
|
|
15
|
+
convertOpenAINonStreamToClaude,
|
|
16
|
+
handleClaudeStreamToOpenAI,
|
|
17
|
+
handleClaudeStreamToOpenAIResponses,
|
|
18
|
+
handleOpenAIStreamToClaude,
|
|
19
|
+
normalizeClaudePassthroughStream
|
|
20
|
+
} from "./provider-translation.js";
|
|
21
|
+
import { maybeRewriteAmpClientResponse } from "./amp-response.js";
|
|
14
22
|
import { applyCachingMapping, mergeCachingHeaders } from "./cache-mapping.js";
|
|
15
23
|
import { applyReasoningEffortMapping } from "./reasoning-effort.js";
|
|
16
24
|
import { resolveUpstreamTimeoutMs } from "./request.js";
|
|
@@ -22,6 +30,7 @@ import {
|
|
|
22
30
|
extractCodexFinalResponse,
|
|
23
31
|
handleCodexStreamToOpenAI
|
|
24
32
|
} from "../codex-response-transformer.js";
|
|
33
|
+
import { toBoolean } from "./utils.js";
|
|
25
34
|
|
|
26
35
|
async function toProviderError(response) {
|
|
27
36
|
const raw = await response.text();
|
|
@@ -76,38 +85,45 @@ async function adaptProviderResponse({
|
|
|
76
85
|
translate,
|
|
77
86
|
sourceFormat,
|
|
78
87
|
targetFormat,
|
|
79
|
-
fallbackModel
|
|
88
|
+
fallbackModel,
|
|
89
|
+
requestKind,
|
|
90
|
+
requestBody,
|
|
91
|
+
clientType
|
|
80
92
|
}) {
|
|
93
|
+
const buildSuccessResponse = async (resultResponse) => ({
|
|
94
|
+
ok: true,
|
|
95
|
+
status: 200,
|
|
96
|
+
retryable: false,
|
|
97
|
+
response: await maybeRewriteAmpClientResponse(resultResponse, {
|
|
98
|
+
clientType,
|
|
99
|
+
requestBody,
|
|
100
|
+
stream
|
|
101
|
+
})
|
|
102
|
+
});
|
|
103
|
+
|
|
81
104
|
if (stream) {
|
|
82
105
|
if (!translate) {
|
|
83
|
-
return
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
};
|
|
106
|
+
return buildSuccessResponse(
|
|
107
|
+
sourceFormat === FORMATS.CLAUDE && targetFormat === FORMATS.CLAUDE
|
|
108
|
+
? normalizeClaudePassthroughStream(response)
|
|
109
|
+
: passthroughResponseWithCors(response, {
|
|
110
|
+
"Content-Type": "text/event-stream",
|
|
111
|
+
"Cache-Control": "no-cache",
|
|
112
|
+
Connection: "keep-alive"
|
|
113
|
+
})
|
|
114
|
+
);
|
|
93
115
|
}
|
|
94
116
|
|
|
95
117
|
if (sourceFormat === FORMATS.CLAUDE && targetFormat === FORMATS.OPENAI) {
|
|
96
|
-
return
|
|
97
|
-
ok: true,
|
|
98
|
-
status: 200,
|
|
99
|
-
retryable: false,
|
|
100
|
-
response: handleOpenAIStreamToClaude(response)
|
|
101
|
-
};
|
|
118
|
+
return buildSuccessResponse(handleOpenAIStreamToClaude(response));
|
|
102
119
|
}
|
|
103
120
|
|
|
104
121
|
if (sourceFormat === FORMATS.OPENAI && targetFormat === FORMATS.CLAUDE) {
|
|
105
|
-
return
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
};
|
|
122
|
+
return buildSuccessResponse(
|
|
123
|
+
requestKind === "responses"
|
|
124
|
+
? handleClaudeStreamToOpenAIResponses(response, requestBody, fallbackModel)
|
|
125
|
+
: handleClaudeStreamToOpenAI(response)
|
|
126
|
+
);
|
|
111
127
|
}
|
|
112
128
|
|
|
113
129
|
return {
|
|
@@ -126,12 +142,7 @@ async function adaptProviderResponse({
|
|
|
126
142
|
}
|
|
127
143
|
|
|
128
144
|
if (!translate) {
|
|
129
|
-
return
|
|
130
|
-
ok: true,
|
|
131
|
-
status: 200,
|
|
132
|
-
retryable: false,
|
|
133
|
-
response: passthroughResponseWithCors(response)
|
|
134
|
-
};
|
|
145
|
+
return buildSuccessResponse(passthroughResponseWithCors(response));
|
|
135
146
|
}
|
|
136
147
|
|
|
137
148
|
const raw = await response.text();
|
|
@@ -152,21 +163,15 @@ async function adaptProviderResponse({
|
|
|
152
163
|
}
|
|
153
164
|
|
|
154
165
|
if (sourceFormat === FORMATS.CLAUDE && targetFormat === FORMATS.OPENAI) {
|
|
155
|
-
return
|
|
156
|
-
ok: true,
|
|
157
|
-
status: 200,
|
|
158
|
-
retryable: false,
|
|
159
|
-
response: jsonResponse(convertOpenAINonStreamToClaude(parsed, fallbackModel))
|
|
160
|
-
};
|
|
166
|
+
return buildSuccessResponse(jsonResponse(convertOpenAINonStreamToClaude(parsed, fallbackModel)));
|
|
161
167
|
}
|
|
162
168
|
|
|
163
169
|
if (sourceFormat === FORMATS.OPENAI && targetFormat === FORMATS.CLAUDE) {
|
|
164
|
-
return
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
};
|
|
170
|
+
return buildSuccessResponse(
|
|
171
|
+
requestKind === "responses"
|
|
172
|
+
? jsonResponse(convertClaudeNonStreamToOpenAIResponses(parsed, requestBody, fallbackModel))
|
|
173
|
+
: jsonResponse(claudeToOpenAINonStreamResponse(parsed))
|
|
174
|
+
);
|
|
170
175
|
}
|
|
171
176
|
|
|
172
177
|
return {
|
|
@@ -184,13 +189,43 @@ async function adaptProviderResponse({
|
|
|
184
189
|
};
|
|
185
190
|
}
|
|
186
191
|
|
|
192
|
+
function isProviderDebugEnabled(env = {}) {
|
|
193
|
+
return toBoolean(
|
|
194
|
+
env?.LLM_ROUTER_DEBUG_ROUTING,
|
|
195
|
+
toBoolean(env?.LLM_ROUTER_DEBUG, false)
|
|
196
|
+
);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
function extractToolTypes(body) {
|
|
200
|
+
const tools = Array.isArray(body?.tools) ? body.tools : [];
|
|
201
|
+
return [...new Set(
|
|
202
|
+
tools
|
|
203
|
+
.map((tool) => String(tool?.type || "").trim())
|
|
204
|
+
.filter(Boolean)
|
|
205
|
+
)];
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
function logToolRouting({ env, clientType, candidate, originalBody, providerBody, sourceFormat, targetFormat } = {}) {
|
|
209
|
+
if (!isProviderDebugEnabled(env)) return;
|
|
210
|
+
|
|
211
|
+
const originalToolTypes = extractToolTypes(originalBody);
|
|
212
|
+
const providerToolTypes = extractToolTypes(providerBody);
|
|
213
|
+
if (originalToolTypes.length === 0 && providerToolTypes.length === 0) return;
|
|
214
|
+
|
|
215
|
+
console.warn(
|
|
216
|
+
`[llm-router] provider tool routing client=${clientType || "default"} candidate=${candidate?.providerId || "unknown"}/${candidate?.modelId || "unknown"} source=${sourceFormat} target=${targetFormat} original=${originalToolTypes.join(",") || "none"} upstream=${providerToolTypes.join(",") || "none"}`
|
|
217
|
+
);
|
|
218
|
+
}
|
|
219
|
+
|
|
187
220
|
export async function makeProviderCall({
|
|
188
221
|
body,
|
|
189
222
|
sourceFormat,
|
|
190
223
|
stream,
|
|
191
224
|
candidate,
|
|
225
|
+
requestKind,
|
|
192
226
|
requestHeaders,
|
|
193
|
-
env
|
|
227
|
+
env,
|
|
228
|
+
clientType
|
|
194
229
|
}) {
|
|
195
230
|
const provider = candidate.provider;
|
|
196
231
|
const targetFormat = candidate.targetFormat;
|
|
@@ -232,6 +267,15 @@ export async function makeProviderCall({
|
|
|
232
267
|
targetModel: candidate.backend,
|
|
233
268
|
requestHeaders
|
|
234
269
|
});
|
|
270
|
+
logToolRouting({
|
|
271
|
+
env,
|
|
272
|
+
clientType,
|
|
273
|
+
candidate,
|
|
274
|
+
originalBody: body,
|
|
275
|
+
providerBody,
|
|
276
|
+
sourceFormat,
|
|
277
|
+
targetFormat
|
|
278
|
+
});
|
|
235
279
|
|
|
236
280
|
if (isSubscriptionProvider(provider)) {
|
|
237
281
|
const subscriptionType = String(provider?.subscriptionType || provider?.subscription_type || "").trim().toLowerCase();
|
|
@@ -270,10 +314,62 @@ export async function makeProviderCall({
|
|
|
270
314
|
translate,
|
|
271
315
|
sourceFormat,
|
|
272
316
|
targetFormat,
|
|
273
|
-
fallbackModel
|
|
317
|
+
fallbackModel,
|
|
318
|
+
requestKind,
|
|
319
|
+
requestBody: body,
|
|
320
|
+
clientType
|
|
274
321
|
});
|
|
275
322
|
}
|
|
276
323
|
|
|
324
|
+
if (requestKind === "responses") {
|
|
325
|
+
if (stream) {
|
|
326
|
+
return {
|
|
327
|
+
ok: true,
|
|
328
|
+
status: 200,
|
|
329
|
+
retryable: false,
|
|
330
|
+
response: await maybeRewriteAmpClientResponse(
|
|
331
|
+
passthroughResponseWithCors(subscriptionResult.response, {
|
|
332
|
+
"Content-Type": "text/event-stream",
|
|
333
|
+
"Cache-Control": "no-cache",
|
|
334
|
+
Connection: "keep-alive"
|
|
335
|
+
}),
|
|
336
|
+
{
|
|
337
|
+
clientType,
|
|
338
|
+
requestBody: body,
|
|
339
|
+
stream
|
|
340
|
+
}
|
|
341
|
+
)
|
|
342
|
+
};
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
const parsedSubscriptionResponse = await extractCodexFinalResponse(subscriptionResult.response);
|
|
346
|
+
if (!parsedSubscriptionResponse) {
|
|
347
|
+
return {
|
|
348
|
+
ok: false,
|
|
349
|
+
status: 502,
|
|
350
|
+
retryable: true,
|
|
351
|
+
response: jsonResponse({
|
|
352
|
+
type: "error",
|
|
353
|
+
error: {
|
|
354
|
+
type: "api_error",
|
|
355
|
+
message: "Subscription provider stream did not contain a completed response payload."
|
|
356
|
+
}
|
|
357
|
+
}, 502)
|
|
358
|
+
};
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
return {
|
|
362
|
+
ok: true,
|
|
363
|
+
status: 200,
|
|
364
|
+
retryable: false,
|
|
365
|
+
response: await maybeRewriteAmpClientResponse(jsonResponse(parsedSubscriptionResponse), {
|
|
366
|
+
clientType,
|
|
367
|
+
requestBody: body,
|
|
368
|
+
stream
|
|
369
|
+
})
|
|
370
|
+
};
|
|
371
|
+
}
|
|
372
|
+
|
|
277
373
|
if (stream) {
|
|
278
374
|
const openAIStreamResponse = handleCodexStreamToOpenAI(subscriptionResult.response, {
|
|
279
375
|
fallbackModel
|
|
@@ -283,14 +379,22 @@ export async function makeProviderCall({
|
|
|
283
379
|
ok: true,
|
|
284
380
|
status: 200,
|
|
285
381
|
retryable: false,
|
|
286
|
-
response: handleOpenAIStreamToClaude(openAIStreamResponse)
|
|
382
|
+
response: await maybeRewriteAmpClientResponse(handleOpenAIStreamToClaude(openAIStreamResponse), {
|
|
383
|
+
clientType,
|
|
384
|
+
requestBody: body,
|
|
385
|
+
stream
|
|
386
|
+
})
|
|
287
387
|
};
|
|
288
388
|
}
|
|
289
389
|
return {
|
|
290
390
|
ok: true,
|
|
291
391
|
status: 200,
|
|
292
392
|
retryable: false,
|
|
293
|
-
response: openAIStreamResponse
|
|
393
|
+
response: await maybeRewriteAmpClientResponse(openAIStreamResponse, {
|
|
394
|
+
clientType,
|
|
395
|
+
requestBody: body,
|
|
396
|
+
stream
|
|
397
|
+
})
|
|
294
398
|
};
|
|
295
399
|
}
|
|
296
400
|
|
|
@@ -318,7 +422,14 @@ export async function makeProviderCall({
|
|
|
318
422
|
ok: true,
|
|
319
423
|
status: 200,
|
|
320
424
|
retryable: false,
|
|
321
|
-
response:
|
|
425
|
+
response: await maybeRewriteAmpClientResponse(
|
|
426
|
+
jsonResponse(convertOpenAINonStreamToClaude(openAINonStreamResponse, fallbackModel)),
|
|
427
|
+
{
|
|
428
|
+
clientType,
|
|
429
|
+
requestBody: body,
|
|
430
|
+
stream
|
|
431
|
+
}
|
|
432
|
+
)
|
|
322
433
|
};
|
|
323
434
|
}
|
|
324
435
|
|
|
@@ -326,11 +437,15 @@ export async function makeProviderCall({
|
|
|
326
437
|
ok: true,
|
|
327
438
|
status: 200,
|
|
328
439
|
retryable: false,
|
|
329
|
-
response: jsonResponse(openAINonStreamResponse)
|
|
440
|
+
response: await maybeRewriteAmpClientResponse(jsonResponse(openAINonStreamResponse), {
|
|
441
|
+
clientType,
|
|
442
|
+
requestBody: body,
|
|
443
|
+
stream
|
|
444
|
+
})
|
|
330
445
|
};
|
|
331
446
|
}
|
|
332
447
|
|
|
333
|
-
const providerUrl = resolveProviderUrl(provider, targetFormat);
|
|
448
|
+
const providerUrl = resolveProviderUrl(provider, targetFormat, requestKind);
|
|
334
449
|
const headers = mergeCachingHeaders(
|
|
335
450
|
buildProviderHeaders(provider, env, targetFormat),
|
|
336
451
|
requestHeaders,
|
|
@@ -405,6 +520,9 @@ export async function makeProviderCall({
|
|
|
405
520
|
translate,
|
|
406
521
|
sourceFormat,
|
|
407
522
|
targetFormat,
|
|
408
|
-
fallbackModel: candidate.backend
|
|
523
|
+
fallbackModel: candidate.backend,
|
|
524
|
+
requestKind,
|
|
525
|
+
requestBody: body,
|
|
526
|
+
clientType
|
|
409
527
|
});
|
|
410
528
|
}
|