@khanglvm/llm-router 1.3.1 → 2.0.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/CHANGELOG.md +39 -0
  2. package/README.md +337 -41
  3. package/package.json +19 -3
  4. package/src/cli/router-module.js +7331 -3805
  5. package/src/cli/wrangler-toml.js +1 -1
  6. package/src/cli-entry.js +162 -24
  7. package/src/node/amp-client-config.js +426 -0
  8. package/src/node/coding-tool-config.js +763 -0
  9. package/src/node/config-store.js +49 -18
  10. package/src/node/instance-state.js +213 -12
  11. package/src/node/listen-port.js +5 -37
  12. package/src/node/local-server-settings.js +122 -0
  13. package/src/node/local-server.js +3 -2
  14. package/src/node/provider-probe.js +13 -0
  15. package/src/node/start-command.js +282 -40
  16. package/src/node/startup-manager.js +64 -29
  17. package/src/node/web-command.js +106 -0
  18. package/src/node/web-console-assets.js +26 -0
  19. package/src/node/web-console-client.js +56 -0
  20. package/src/node/web-console-dev-assets.js +258 -0
  21. package/src/node/web-console-server.js +3146 -0
  22. package/src/node/web-console-styles.generated.js +1 -0
  23. package/src/node/web-console-ui/config-editor-utils.js +616 -0
  24. package/src/node/web-console-ui/lib/utils.js +6 -0
  25. package/src/node/web-console-ui/rate-limit-utils.js +144 -0
  26. package/src/node/web-console-ui/select-search-utils.js +36 -0
  27. package/src/runtime/codex-request-transformer.js +46 -5
  28. package/src/runtime/codex-response-transformer.js +268 -35
  29. package/src/runtime/config.js +1394 -35
  30. package/src/runtime/handler/amp-gemini.js +913 -0
  31. package/src/runtime/handler/amp-response.js +308 -0
  32. package/src/runtime/handler/amp.js +290 -0
  33. package/src/runtime/handler/auth.js +17 -2
  34. package/src/runtime/handler/provider-call.js +168 -50
  35. package/src/runtime/handler/provider-translation.js +937 -26
  36. package/src/runtime/handler/request.js +149 -6
  37. package/src/runtime/handler/route-debug.js +22 -1
  38. package/src/runtime/handler.js +449 -9
  39. package/src/runtime/subscription-auth.js +1 -6
  40. package/src/shared/local-router-defaults.js +62 -0
  41. package/src/translator/index.js +3 -1
  42. package/src/translator/request/openai-to-claude.js +217 -6
  43. package/src/translator/response/openai-to-claude.js +206 -58
@@ -10,7 +10,15 @@ import {
10
10
  import { claudeToOpenAINonStreamResponse } from "../../translator/response/claude-to-openai.js";
11
11
  import { shouldRetryStatus } from "./fallback.js";
12
12
  import { jsonResponse, passthroughResponseWithCors } from "./http.js";
13
- import { convertOpenAINonStreamToClaude, handleClaudeStreamToOpenAI, handleOpenAIStreamToClaude } from "./provider-translation.js";
13
+ import {
14
+ convertClaudeNonStreamToOpenAIResponses,
15
+ convertOpenAINonStreamToClaude,
16
+ handleClaudeStreamToOpenAI,
17
+ handleClaudeStreamToOpenAIResponses,
18
+ handleOpenAIStreamToClaude,
19
+ normalizeClaudePassthroughStream
20
+ } from "./provider-translation.js";
21
+ import { maybeRewriteAmpClientResponse } from "./amp-response.js";
14
22
  import { applyCachingMapping, mergeCachingHeaders } from "./cache-mapping.js";
15
23
  import { applyReasoningEffortMapping } from "./reasoning-effort.js";
16
24
  import { resolveUpstreamTimeoutMs } from "./request.js";
@@ -22,6 +30,7 @@ import {
22
30
  extractCodexFinalResponse,
23
31
  handleCodexStreamToOpenAI
24
32
  } from "../codex-response-transformer.js";
33
+ import { toBoolean } from "./utils.js";
25
34
 
26
35
  async function toProviderError(response) {
27
36
  const raw = await response.text();
@@ -76,38 +85,45 @@ async function adaptProviderResponse({
76
85
  translate,
77
86
  sourceFormat,
78
87
  targetFormat,
79
- fallbackModel
88
+ fallbackModel,
89
+ requestKind,
90
+ requestBody,
91
+ clientType
80
92
  }) {
93
+ const buildSuccessResponse = async (resultResponse) => ({
94
+ ok: true,
95
+ status: 200,
96
+ retryable: false,
97
+ response: await maybeRewriteAmpClientResponse(resultResponse, {
98
+ clientType,
99
+ requestBody,
100
+ stream
101
+ })
102
+ });
103
+
81
104
  if (stream) {
82
105
  if (!translate) {
83
- return {
84
- ok: true,
85
- status: 200,
86
- retryable: false,
87
- response: passthroughResponseWithCors(response, {
88
- "Content-Type": "text/event-stream",
89
- "Cache-Control": "no-cache",
90
- Connection: "keep-alive"
91
- })
92
- };
106
+ return buildSuccessResponse(
107
+ sourceFormat === FORMATS.CLAUDE && targetFormat === FORMATS.CLAUDE
108
+ ? normalizeClaudePassthroughStream(response)
109
+ : passthroughResponseWithCors(response, {
110
+ "Content-Type": "text/event-stream",
111
+ "Cache-Control": "no-cache",
112
+ Connection: "keep-alive"
113
+ })
114
+ );
93
115
  }
94
116
 
95
117
  if (sourceFormat === FORMATS.CLAUDE && targetFormat === FORMATS.OPENAI) {
96
- return {
97
- ok: true,
98
- status: 200,
99
- retryable: false,
100
- response: handleOpenAIStreamToClaude(response)
101
- };
118
+ return buildSuccessResponse(handleOpenAIStreamToClaude(response));
102
119
  }
103
120
 
104
121
  if (sourceFormat === FORMATS.OPENAI && targetFormat === FORMATS.CLAUDE) {
105
- return {
106
- ok: true,
107
- status: 200,
108
- retryable: false,
109
- response: handleClaudeStreamToOpenAI(response)
110
- };
122
+ return buildSuccessResponse(
123
+ requestKind === "responses"
124
+ ? handleClaudeStreamToOpenAIResponses(response, requestBody, fallbackModel)
125
+ : handleClaudeStreamToOpenAI(response)
126
+ );
111
127
  }
112
128
 
113
129
  return {
@@ -126,12 +142,7 @@ async function adaptProviderResponse({
126
142
  }
127
143
 
128
144
  if (!translate) {
129
- return {
130
- ok: true,
131
- status: 200,
132
- retryable: false,
133
- response: passthroughResponseWithCors(response)
134
- };
145
+ return buildSuccessResponse(passthroughResponseWithCors(response));
135
146
  }
136
147
 
137
148
  const raw = await response.text();
@@ -152,21 +163,15 @@ async function adaptProviderResponse({
152
163
  }
153
164
 
154
165
  if (sourceFormat === FORMATS.CLAUDE && targetFormat === FORMATS.OPENAI) {
155
- return {
156
- ok: true,
157
- status: 200,
158
- retryable: false,
159
- response: jsonResponse(convertOpenAINonStreamToClaude(parsed, fallbackModel))
160
- };
166
+ return buildSuccessResponse(jsonResponse(convertOpenAINonStreamToClaude(parsed, fallbackModel)));
161
167
  }
162
168
 
163
169
  if (sourceFormat === FORMATS.OPENAI && targetFormat === FORMATS.CLAUDE) {
164
- return {
165
- ok: true,
166
- status: 200,
167
- retryable: false,
168
- response: jsonResponse(claudeToOpenAINonStreamResponse(parsed))
169
- };
170
+ return buildSuccessResponse(
171
+ requestKind === "responses"
172
+ ? jsonResponse(convertClaudeNonStreamToOpenAIResponses(parsed, requestBody, fallbackModel))
173
+ : jsonResponse(claudeToOpenAINonStreamResponse(parsed))
174
+ );
170
175
  }
171
176
 
172
177
  return {
@@ -184,13 +189,43 @@ async function adaptProviderResponse({
184
189
  };
185
190
  }
186
191
 
192
+ function isProviderDebugEnabled(env = {}) {
193
+ return toBoolean(
194
+ env?.LLM_ROUTER_DEBUG_ROUTING,
195
+ toBoolean(env?.LLM_ROUTER_DEBUG, false)
196
+ );
197
+ }
198
+
199
+ function extractToolTypes(body) {
200
+ const tools = Array.isArray(body?.tools) ? body.tools : [];
201
+ return [...new Set(
202
+ tools
203
+ .map((tool) => String(tool?.type || "").trim())
204
+ .filter(Boolean)
205
+ )];
206
+ }
207
+
208
+ function logToolRouting({ env, clientType, candidate, originalBody, providerBody, sourceFormat, targetFormat } = {}) {
209
+ if (!isProviderDebugEnabled(env)) return;
210
+
211
+ const originalToolTypes = extractToolTypes(originalBody);
212
+ const providerToolTypes = extractToolTypes(providerBody);
213
+ if (originalToolTypes.length === 0 && providerToolTypes.length === 0) return;
214
+
215
+ console.warn(
216
+ `[llm-router] provider tool routing client=${clientType || "default"} candidate=${candidate?.providerId || "unknown"}/${candidate?.modelId || "unknown"} source=${sourceFormat} target=${targetFormat} original=${originalToolTypes.join(",") || "none"} upstream=${providerToolTypes.join(",") || "none"}`
217
+ );
218
+ }
219
+
187
220
  export async function makeProviderCall({
188
221
  body,
189
222
  sourceFormat,
190
223
  stream,
191
224
  candidate,
225
+ requestKind,
192
226
  requestHeaders,
193
- env
227
+ env,
228
+ clientType
194
229
  }) {
195
230
  const provider = candidate.provider;
196
231
  const targetFormat = candidate.targetFormat;
@@ -232,6 +267,15 @@ export async function makeProviderCall({
232
267
  targetModel: candidate.backend,
233
268
  requestHeaders
234
269
  });
270
+ logToolRouting({
271
+ env,
272
+ clientType,
273
+ candidate,
274
+ originalBody: body,
275
+ providerBody,
276
+ sourceFormat,
277
+ targetFormat
278
+ });
235
279
 
236
280
  if (isSubscriptionProvider(provider)) {
237
281
  const subscriptionType = String(provider?.subscriptionType || provider?.subscription_type || "").trim().toLowerCase();
@@ -270,10 +314,62 @@ export async function makeProviderCall({
270
314
  translate,
271
315
  sourceFormat,
272
316
  targetFormat,
273
- fallbackModel
317
+ fallbackModel,
318
+ requestKind,
319
+ requestBody: body,
320
+ clientType
274
321
  });
275
322
  }
276
323
 
324
+ if (requestKind === "responses") {
325
+ if (stream) {
326
+ return {
327
+ ok: true,
328
+ status: 200,
329
+ retryable: false,
330
+ response: await maybeRewriteAmpClientResponse(
331
+ passthroughResponseWithCors(subscriptionResult.response, {
332
+ "Content-Type": "text/event-stream",
333
+ "Cache-Control": "no-cache",
334
+ Connection: "keep-alive"
335
+ }),
336
+ {
337
+ clientType,
338
+ requestBody: body,
339
+ stream
340
+ }
341
+ )
342
+ };
343
+ }
344
+
345
+ const parsedSubscriptionResponse = await extractCodexFinalResponse(subscriptionResult.response);
346
+ if (!parsedSubscriptionResponse) {
347
+ return {
348
+ ok: false,
349
+ status: 502,
350
+ retryable: true,
351
+ response: jsonResponse({
352
+ type: "error",
353
+ error: {
354
+ type: "api_error",
355
+ message: "Subscription provider stream did not contain a completed response payload."
356
+ }
357
+ }, 502)
358
+ };
359
+ }
360
+
361
+ return {
362
+ ok: true,
363
+ status: 200,
364
+ retryable: false,
365
+ response: await maybeRewriteAmpClientResponse(jsonResponse(parsedSubscriptionResponse), {
366
+ clientType,
367
+ requestBody: body,
368
+ stream
369
+ })
370
+ };
371
+ }
372
+
277
373
  if (stream) {
278
374
  const openAIStreamResponse = handleCodexStreamToOpenAI(subscriptionResult.response, {
279
375
  fallbackModel
@@ -283,14 +379,22 @@ export async function makeProviderCall({
283
379
  ok: true,
284
380
  status: 200,
285
381
  retryable: false,
286
- response: handleOpenAIStreamToClaude(openAIStreamResponse)
382
+ response: await maybeRewriteAmpClientResponse(handleOpenAIStreamToClaude(openAIStreamResponse), {
383
+ clientType,
384
+ requestBody: body,
385
+ stream
386
+ })
287
387
  };
288
388
  }
289
389
  return {
290
390
  ok: true,
291
391
  status: 200,
292
392
  retryable: false,
293
- response: openAIStreamResponse
393
+ response: await maybeRewriteAmpClientResponse(openAIStreamResponse, {
394
+ clientType,
395
+ requestBody: body,
396
+ stream
397
+ })
294
398
  };
295
399
  }
296
400
 
@@ -318,7 +422,14 @@ export async function makeProviderCall({
318
422
  ok: true,
319
423
  status: 200,
320
424
  retryable: false,
321
- response: jsonResponse(convertOpenAINonStreamToClaude(openAINonStreamResponse, fallbackModel))
425
+ response: await maybeRewriteAmpClientResponse(
426
+ jsonResponse(convertOpenAINonStreamToClaude(openAINonStreamResponse, fallbackModel)),
427
+ {
428
+ clientType,
429
+ requestBody: body,
430
+ stream
431
+ }
432
+ )
322
433
  };
323
434
  }
324
435
 
@@ -326,11 +437,15 @@ export async function makeProviderCall({
326
437
  ok: true,
327
438
  status: 200,
328
439
  retryable: false,
329
- response: jsonResponse(openAINonStreamResponse)
440
+ response: await maybeRewriteAmpClientResponse(jsonResponse(openAINonStreamResponse), {
441
+ clientType,
442
+ requestBody: body,
443
+ stream
444
+ })
330
445
  };
331
446
  }
332
447
 
333
- const providerUrl = resolveProviderUrl(provider, targetFormat);
448
+ const providerUrl = resolveProviderUrl(provider, targetFormat, requestKind);
334
449
  const headers = mergeCachingHeaders(
335
450
  buildProviderHeaders(provider, env, targetFormat),
336
451
  requestHeaders,
@@ -405,6 +520,9 @@ export async function makeProviderCall({
405
520
  translate,
406
521
  sourceFormat,
407
522
  targetFormat,
408
- fallbackModel: candidate.backend
523
+ fallbackModel: candidate.backend,
524
+ requestKind,
525
+ requestBody: body,
526
+ clientType
409
527
  });
410
528
  }