@khanglvm/llm-router 2.0.0-beta.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/README.md +163 -426
  3. package/package.json +3 -3
  4. package/src/cli/router-module.js +2773 -2587
  5. package/src/cli-entry.js +32 -103
  6. package/src/node/activity-log.js +119 -0
  7. package/src/node/coding-tool-config.js +85 -11
  8. package/src/node/config-workflows.js +51 -12
  9. package/src/node/instance-state.js +1 -1
  10. package/src/node/litellm-context-catalog.js +184 -0
  11. package/src/node/local-server.js +23 -3
  12. package/src/node/port-reclaim.js +2 -2
  13. package/src/node/start-command.js +22 -22
  14. package/src/node/startup-manager.js +3 -3
  15. package/src/node/web-command.js +1 -1
  16. package/src/node/web-console-assets.js +1 -1
  17. package/src/node/web-console-client.js +34 -29
  18. package/src/node/web-console-server.js +420 -38
  19. package/src/node/web-console-styles.generated.js +1 -1
  20. package/src/node/web-console-ui/buffered-text-input.js +133 -0
  21. package/src/node/web-console-ui/config-editor-utils.js +57 -4
  22. package/src/node/web-console-ui/dropdown-placement.js +153 -0
  23. package/src/node/web-console-ui/select-search-utils.js +6 -0
  24. package/src/node/web-console-ui/transient-integer-input-utils.js +12 -0
  25. package/src/runtime/balancer.js +78 -1
  26. package/src/runtime/codex-request-transformer.js +16 -7
  27. package/src/runtime/config.js +448 -12
  28. package/src/runtime/handler/amp-response.js +5 -3
  29. package/src/runtime/handler/amp-web-search.js +2232 -0
  30. package/src/runtime/handler/fallback.js +30 -2
  31. package/src/runtime/handler/provider-call.js +353 -36
  32. package/src/runtime/handler/provider-translation.js +14 -0
  33. package/src/runtime/handler/request.js +128 -2
  34. package/src/runtime/handler/route-debug.js +36 -0
  35. package/src/runtime/handler.js +210 -20
  36. package/src/runtime/subscription-provider.js +1 -1
  37. package/src/shared/coding-tool-bindings.js +49 -0
  38. package/src/shared/local-router-defaults.js +62 -0
  39. package/src/translator/request/claude-to-openai.js +43 -0
@@ -3,11 +3,28 @@ import { extractAmpGeminiRouteInfo } from "./amp-gemini.js";
3
3
  import { toNonNegativeInteger } from "./utils.js";
4
4
 
5
5
  const DEFAULT_MAX_REQUEST_BODY_BYTES = 1 * 1024 * 1024;
6
+ const DEFAULT_RESPONSES_MAX_REQUEST_BODY_BYTES = 8 * 1024 * 1024;
6
7
  const MIN_MAX_REQUEST_BODY_BYTES = 4 * 1024;
7
8
  const MAX_MAX_REQUEST_BODY_BYTES = 20 * 1024 * 1024;
8
9
  const DEFAULT_UPSTREAM_TIMEOUT_MS = 60_000;
9
10
  const MIN_UPSTREAM_TIMEOUT_MS = 1_000;
10
11
  const MAX_UPSTREAM_TIMEOUT_MS = 300_000;
12
+ const DEFAULT_OUTPUT_TOKEN_RESERVE = 1_024;
13
+ const AMP_ANTHROPIC_CONTEXT_BASELINE_TOKENS = 200_000;
14
+ const AMP_ANTHROPIC_CONTEXT_1M_TOKENS = 1_000_000;
15
+ const AMP_ANTHROPIC_CONTEXT_1M_BETA = "context-1m-2025-08-07";
16
+ const AMP_CONTEXT_HINTS_BY_MODEL = new Map([
17
+ ["free", { minimumContextTokens: 136_000, source: "amp:model:free" }],
18
+ ["rush", { minimumContextTokens: 136_000, source: "amp:model:rush" }],
19
+ ["smart", { minimumContextTokens: 168_000, source: "amp:model:smart" }],
20
+ ["deep", { minimumContextTokens: 272_000, source: "amp:model:deep" }],
21
+ ["large", { minimumContextTokens: 936_000, source: "amp:model:large" }],
22
+ ["claude-haiku-4-5-20251001", { minimumContextTokens: 136_000, source: "amp:model:claude-haiku-4-5-20251001" }],
23
+ ["claude-opus-4-6", { minimumContextTokens: 168_000, source: "amp:model:claude-opus-4-6" }],
24
+ ["openai/gpt-5.3-codex", { minimumContextTokens: 272_000, source: "amp:model:openai/gpt-5.3-codex" }],
25
+ ["claude-sonnet-4-6", { minimumContextTokens: 936_000, source: "amp:model:claude-sonnet-4-6" }],
26
+ ["gpt-5.3-codex", { minimumContextTokens: 968_000, source: "amp:model:gpt-5.3-codex" }]
27
+ ]);
11
28
  const AMP_API_PROVIDER_PREFIX = "/api/provider/";
12
29
  const AMP_MANAGEMENT_ROOT_PREFIXES = [
13
30
  "/auth",
@@ -133,10 +150,14 @@ function resolveAmpProviderRoute(path, method) {
133
150
  return null;
134
151
  }
135
152
 
136
- export function resolveMaxRequestBodyBytes(env = {}) {
153
+ export function resolveMaxRequestBodyBytes(env = {}, options = {}) {
154
+ const requestKind = String(options?.requestKind || "").trim().toLowerCase();
155
+ const fallbackLimit = requestKind === "responses"
156
+ ? DEFAULT_RESPONSES_MAX_REQUEST_BODY_BYTES
157
+ : DEFAULT_MAX_REQUEST_BODY_BYTES;
137
158
  const configured = toNonNegativeInteger(
138
159
  env?.LLM_ROUTER_MAX_REQUEST_BODY_BYTES,
139
- DEFAULT_MAX_REQUEST_BODY_BYTES
160
+ fallbackLimit
140
161
  );
141
162
  return Math.min(
142
163
  MAX_MAX_REQUEST_BODY_BYTES,
@@ -162,6 +183,26 @@ function parseContentLength(value) {
162
183
  return parsed;
163
184
  }
164
185
 
186
+ function parseNonNegativeNumber(value) {
187
+ const parsed = Number(value);
188
+ if (!Number.isFinite(parsed) || parsed < 0) return 0;
189
+ return Math.floor(parsed);
190
+ }
191
+
192
+ function parseHeaderTokenList(value) {
193
+ if (!value) return [];
194
+ return String(value)
195
+ .split(",")
196
+ .map((entry) => entry.trim().toLowerCase())
197
+ .filter(Boolean);
198
+ }
199
+
200
+ function lookupAmpContextHint(model) {
201
+ const key = String(model || "").trim().toLowerCase();
202
+ if (!key) return null;
203
+ return AMP_CONTEXT_HINTS_BY_MODEL.get(key) || null;
204
+ }
205
+
165
206
  function createRequestBodyTooLargeError(maxBytes) {
166
207
  const error = new Error(`Request body exceeds ${maxBytes} bytes.`);
167
208
  error.code = "REQUEST_BODY_TOO_LARGE";
@@ -210,6 +251,91 @@ export async function parseJsonBodyWithLimit(request, maxBytes) {
210
251
  return JSON.parse(raw);
211
252
  }
212
253
 
254
+ export function estimateRequestContextTokens(body = {}) {
255
+ if (!body || typeof body !== "object") {
256
+ return {
257
+ estimatedInputTokens: 0,
258
+ requestedOutputTokens: 0,
259
+ safetyPaddingTokens: 0,
260
+ estimatedRequiredTokens: 0
261
+ };
262
+ }
263
+
264
+ let serialized = "";
265
+ try {
266
+ serialized = JSON.stringify(body) || "";
267
+ } catch {
268
+ serialized = "";
269
+ }
270
+
271
+ const charLength = serialized.length;
272
+ const byteLength = serialized
273
+ ? new TextEncoder().encode(serialized).byteLength
274
+ : 0;
275
+ const estimatedInputTokens = Math.max(
276
+ Math.ceil(charLength / 4),
277
+ Math.ceil(byteLength / 3)
278
+ );
279
+
280
+ const explicitOutputTokens = Math.max(
281
+ parseNonNegativeNumber(body?.max_output_tokens),
282
+ parseNonNegativeNumber(body?.max_completion_tokens),
283
+ parseNonNegativeNumber(body?.max_tokens)
284
+ );
285
+ const requestedOutputTokens = explicitOutputTokens > 0
286
+ ? explicitOutputTokens
287
+ : DEFAULT_OUTPUT_TOKEN_RESERVE;
288
+ const safetyPaddingTokens = estimatedInputTokens > 0
289
+ ? Math.max(256, Math.ceil(estimatedInputTokens * 0.1))
290
+ : 0;
291
+
292
+ return {
293
+ estimatedInputTokens,
294
+ requestedOutputTokens,
295
+ safetyPaddingTokens,
296
+ estimatedRequiredTokens: estimatedInputTokens + requestedOutputTokens + safetyPaddingTokens
297
+ };
298
+ }
299
+
300
+ export function inferAmpContextRequirement(request, body = {}, options = {}) {
301
+ if (String(options?.clientType || "").trim().toLowerCase() !== "amp") {
302
+ return {
303
+ minimumContextTokens: 0,
304
+ source: ""
305
+ };
306
+ }
307
+
308
+ const providerHint = String(options?.providerHint || "").trim().toLowerCase();
309
+ const requestKind = String(options?.requestKind || "").trim().toLowerCase();
310
+ const requestedModelHint = lookupAmpContextHint(body?.model);
311
+ const anthropicBetaFlags = parseHeaderTokenList(
312
+ request?.headers?.get("anthropic-beta") || request?.headers?.get("Anthropic-Beta")
313
+ );
314
+
315
+ if (anthropicBetaFlags.includes(AMP_ANTHROPIC_CONTEXT_1M_BETA)) {
316
+ return {
317
+ minimumContextTokens: AMP_ANTHROPIC_CONTEXT_1M_TOKENS,
318
+ source: `amp:anthropic-beta:${AMP_ANTHROPIC_CONTEXT_1M_BETA}`
319
+ };
320
+ }
321
+
322
+ if (requestedModelHint) {
323
+ return requestedModelHint;
324
+ }
325
+
326
+ if (providerHint === "anthropic" || requestKind === "messages") {
327
+ return {
328
+ minimumContextTokens: AMP_ANTHROPIC_CONTEXT_BASELINE_TOKENS,
329
+ source: "amp:anthropic-route:200k-baseline"
330
+ };
331
+ }
332
+
333
+ return {
334
+ minimumContextTokens: 0,
335
+ source: ""
336
+ };
337
+ }
338
+
213
339
  export function isJsonRequest(request) {
214
340
  const contentType = String(request.headers.get("content-type") || "").toLowerCase();
215
341
  return contentType.includes("application/json") || contentType.includes("+json");
@@ -41,6 +41,9 @@ export function buildRouteDebugState(enabled, resolved) {
41
41
  selectedCandidate: "",
42
42
  skippedCandidates: [],
43
43
  attempts: [],
44
+ contextRequiredTokens: "",
45
+ contextHintSource: "",
46
+ contextRisk: "",
44
47
  toolTypes: "",
45
48
  toolRouting: ""
46
49
  };
@@ -81,6 +84,24 @@ export function setRouteToolDebug(debugState, toolTypes, toolRouting = "") {
81
84
  debugState.toolRouting = String(toolRouting || "").trim();
82
85
  }
83
86
 
87
+ export function setRouteContextDebug(debugState, { requiredTokens, hintSource, risk } = {}) {
88
+ if (!debugState?.enabled) return;
89
+
90
+ const normalizedRequiredTokens = Number(requiredTokens);
91
+ if (Number.isFinite(normalizedRequiredTokens) && normalizedRequiredTokens > 0) {
92
+ debugState.contextRequiredTokens = String(Math.floor(normalizedRequiredTokens));
93
+ }
94
+
95
+ const normalizedHintSource = String(hintSource || "").trim();
96
+ if (normalizedHintSource) {
97
+ debugState.contextHintSource = normalizedHintSource;
98
+ }
99
+
100
+ if (risk !== undefined) {
101
+ debugState.contextRisk = String(risk || "").trim();
102
+ }
103
+ }
104
+
84
105
  export function withRouteDebugHeaders(response, debugState) {
85
106
  if (!debugState?.enabled || !(response instanceof Response)) {
86
107
  return response;
@@ -107,6 +128,21 @@ export function withRouteDebugHeaders(response, debugState) {
107
128
  headers.set("x-llm-router-attempts", attempts);
108
129
  }
109
130
 
131
+ const contextRequiredTokens = toSafeHeaderValue(debugState.contextRequiredTokens);
132
+ if (contextRequiredTokens) {
133
+ headers.set("x-llm-router-context-required", contextRequiredTokens);
134
+ }
135
+
136
+ const contextHintSource = toSafeHeaderValue(debugState.contextHintSource);
137
+ if (contextHintSource) {
138
+ headers.set("x-llm-router-context-hint-source", contextHintSource);
139
+ }
140
+
141
+ const contextRisk = toSafeHeaderValue(debugState.contextRisk);
142
+ if (contextRisk) {
143
+ headers.set("x-llm-router-context-risk", contextRisk);
144
+ }
145
+
110
146
  const toolTypes = toSafeHeaderValue(debugState.toolTypes);
111
147
  if (toolTypes) {
112
148
  headers.set("x-llm-router-tool-types", toolTypes);
@@ -23,6 +23,8 @@ import {
23
23
  import { corsResponse, jsonResponse } from "./handler/http.js";
24
24
  import {
25
25
  detectUserRequestFormat,
26
+ estimateRequestContextTokens,
27
+ inferAmpContextRequirement,
26
28
  isAmpManagementPath,
27
29
  isJsonRequest,
28
30
  isStreamingEnabled,
@@ -43,6 +45,7 @@ import {
43
45
  convertAmpGeminiRequestToOpenAI,
44
46
  hasGeminiWebSearchTool
45
47
  } from "./handler/amp-gemini.js";
48
+ import { shouldInterceptAmpWebSearch } from "./handler/amp-web-search.js";
46
49
  import {
47
50
  isRequestFromAllowedIp,
48
51
  resolveAllowedOrigin,
@@ -55,7 +58,7 @@ import {
55
58
  resolveFallbackCircuitPolicy,
56
59
  resolveRetryPolicy
57
60
  } from "./handler/fallback.js";
58
- import { sleep } from "./handler/utils.js";
61
+ import { parseJsonSafely, sleep } from "./handler/utils.js";
59
62
  import {
60
63
  applyCandidateFailureState,
61
64
  applyRuntimeRetryPolicyGuards,
@@ -68,6 +71,7 @@ import {
68
71
  isRoutingDebugEnabled,
69
72
  recordRouteAttempt,
70
73
  recordRouteSkip,
74
+ setRouteContextDebug,
71
75
  setRouteSelectedCandidate,
72
76
  setRouteToolDebug,
73
77
  withRouteDebugHeaders
@@ -96,13 +100,6 @@ function filterCandidatesByFormat(candidates) {
96
100
  return { eligible, skipped };
97
101
  }
98
102
 
99
- function hasNextEligibleCandidate(entries, startIndex) {
100
- for (let index = startIndex + 1; index < (entries || []).length; index += 1) {
101
- if (entries[index]?.eligible) return true;
102
- }
103
- return false;
104
- }
105
-
106
103
  function extractBuiltInToolTypes(body) {
107
104
  const tools = Array.isArray(body?.tools) ? body.tools : [];
108
105
  const seen = new Set();
@@ -161,11 +158,111 @@ function isChatGPTCodexCandidate(candidate) {
161
158
  return subscriptionType === "chatgpt-codex";
162
159
  }
163
160
 
161
+ function resolveCandidateContextWindow(candidate) {
162
+ const raw = candidate?.contextWindow ?? candidate?.model?.contextWindow;
163
+ const parsed = Number(raw);
164
+ if (!Number.isFinite(parsed) || parsed <= 0) return null;
165
+ return Math.floor(parsed);
166
+ }
167
+
168
+ function resolveSelectedContextRisk(candidate, estimatedRequiredTokens) {
169
+ const requiredTokens = Number(estimatedRequiredTokens);
170
+ if (!candidate || !Number.isFinite(requiredTokens) || requiredTokens <= 0) return "";
171
+
172
+ const contextWindow = resolveCandidateContextWindow(candidate);
173
+ if (!contextWindow) {
174
+ return "selected-context-window-unknown";
175
+ }
176
+ if (contextWindow < requiredTokens) {
177
+ return `selected-context-window-below-required:${contextWindow}<${requiredTokens}`;
178
+ }
179
+ return "";
180
+ }
181
+
164
182
  const WEB_SEARCH_UNAVAILABLE_HINTS = [
165
183
  "web search credits are unavailable in this session",
166
184
  "web access unavailable (out of credits)",
167
185
  "web access unavailable"
168
186
  ];
187
+ const ACTIVITY_LOG_ERROR_DETAIL_MAX_CHARS = 240;
188
+
189
+ function queueActivityEvent(onActivityLog, payload) {
190
+ if (typeof onActivityLog !== "function") return;
191
+ try {
192
+ const result = onActivityLog(payload);
193
+ if (result && typeof result.then === "function") {
194
+ result.catch(() => {});
195
+ }
196
+ } catch {
197
+ }
198
+ }
199
+
200
+ function getNextEligibleCandidateEntry(entries, startIndex) {
201
+ for (let index = startIndex + 1; index < (entries || []).length; index += 1) {
202
+ if (entries[index]?.eligible) return entries[index];
203
+ }
204
+ return null;
205
+ }
206
+
207
+ function formatActivityCandidateLabel(candidate) {
208
+ const providerId = String(candidate?.providerId || "unknown").trim() || "unknown";
209
+ const modelId = String(candidate?.modelId || candidate?.backend || "unknown").trim() || "unknown";
210
+ return `${providerId}/${modelId}`;
211
+ }
212
+
213
+ function formatActivityRouteLabel(requestedModel, resolved) {
214
+ const requested = String(requestedModel || "").trim() || "smart";
215
+ const routeRef = String(resolved?.routeRef || "").trim();
216
+ return routeRef && routeRef !== requested ? `${requested} -> ${routeRef}` : (routeRef || requested);
217
+ }
218
+
219
+ function formatFailureCategory(category) {
220
+ return String(category || "")
221
+ .trim()
222
+ .replace(/_/g, " ");
223
+ }
224
+
225
+ function buildFailureSummary(result, classification) {
226
+ const parts = [];
227
+ const status = Number.isFinite(result?.status) ? Number(result.status) : 0;
228
+ if (status > 0) parts.push(`status ${status}`);
229
+ const category = formatFailureCategory(classification?.category);
230
+ if (category) parts.push(category);
231
+ return parts.join(" · ") || "request failed";
232
+ }
233
+
234
+ function buildActivityDetail(baseMessage, providerMessage = "") {
235
+ const detail = String(providerMessage || "").trim();
236
+ if (!detail) return baseMessage;
237
+ return `${baseMessage} Provider said: ${detail}`;
238
+ }
239
+
240
+ async function readActivityErrorDetail(result) {
241
+ const response = result?.upstreamResponse instanceof Response
242
+ ? result.upstreamResponse
243
+ : (result?.response instanceof Response ? result.response : null);
244
+ if (!(response instanceof Response)) return "";
245
+
246
+ try {
247
+ const raw = (await response.clone().text()).trim();
248
+ if (!raw) return "";
249
+ const parsed = parseJsonSafely(raw, null);
250
+ const message = parsed?.error?.message
251
+ || parsed?.error?.code
252
+ || parsed?.error?.type
253
+ || parsed?.error
254
+ || parsed?.code
255
+ || parsed?.type
256
+ || parsed?.message
257
+ || raw;
258
+ const compact = String(message || "").replace(/\s+/g, " ").trim();
259
+ if (!compact) return "";
260
+ if (compact.length <= ACTIVITY_LOG_ERROR_DETAIL_MAX_CHARS) return compact;
261
+ return `${compact.slice(0, ACTIVITY_LOG_ERROR_DETAIL_MAX_CHARS - 1)}…`;
262
+ } catch {
263
+ return "";
264
+ }
265
+ }
169
266
 
170
267
  function extractAssistantTextFragments(payload) {
171
268
  const fragments = [];
@@ -340,7 +437,9 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
340
437
  return jsonResponse({ error: "Unsupported Media Type. Use application/json." }, 415);
341
438
  }
342
439
 
343
- const maxRequestBodyBytes = resolveMaxRequestBodyBytes(env);
440
+ const maxRequestBodyBytes = resolveMaxRequestBodyBytes(env, {
441
+ requestKind: options.requestKind
442
+ });
344
443
  let body;
345
444
  try {
346
445
  body = await parseJsonBodyWithLimit(request, maxRequestBodyBytes);
@@ -359,7 +458,14 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
359
458
  const requestedModel = body?.model || "smart";
360
459
  const stream = isStreamingEnabled(sourceFormat, body);
361
460
 
362
- if (shouldProxyAmpWebSearchRequest(options.clientType, builtInToolTypes, config)) {
461
+ const interceptAmpWebSearch = shouldInterceptAmpWebSearch({
462
+ clientType: options.clientType,
463
+ originalBody: body,
464
+ runtimeConfig: config,
465
+ env
466
+ });
467
+
468
+ if (!interceptAmpWebSearch && shouldProxyAmpWebSearchRequest(options.clientType, builtInToolTypes, config)) {
363
469
  const routeDebug = buildAmpWebSearchProxyDebugState(env, requestedModel, builtInToolTypes);
364
470
  if (routeDebug.enabled) {
365
471
  console.warn(
@@ -415,6 +521,30 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
415
521
  ...resolved,
416
522
  sourceFormat
417
523
  };
524
+ const routeLabel = formatActivityRouteLabel(requestedModel, resolved);
525
+ const requestContext = estimateRequestContextTokens(body);
526
+ const ampContextRequirement = inferAmpContextRequirement(request, body, options);
527
+ const effectiveRequiredTokens = Math.max(
528
+ Number(requestContext?.estimatedRequiredTokens) || 0,
529
+ Number(ampContextRequirement?.minimumContextTokens) || 0
530
+ );
531
+ const effectiveRequestContext = {
532
+ ...requestContext,
533
+ ampMinimumContextTokens: Number(ampContextRequirement?.minimumContextTokens) || 0,
534
+ ampContextSource: String(ampContextRequirement?.source || "").trim(),
535
+ estimatedRequiredTokens: effectiveRequiredTokens
536
+ };
537
+ setRouteContextDebug(routeDebug, {
538
+ requiredTokens: effectiveRequiredTokens,
539
+ hintSource: effectiveRequiredTokens > (Number(requestContext?.estimatedRequiredTokens) || 0)
540
+ ? ampContextRequirement?.source || "request-context-hint"
541
+ : (effectiveRequiredTokens > 0 ? "request-body-estimate" : "")
542
+ });
543
+ if (routeDebug.enabled && effectiveRequestContext.ampContextSource) {
544
+ console.warn(
545
+ `[llm-router] context hint request=${requestedModel} source=${effectiveRequestContext.ampContextSource} required=${effectiveRequiredTokens}`
546
+ );
547
+ }
418
548
  const routeCandidates = [resolved.primary, ...resolved.fallbacks];
419
549
  const formatFiltered = filterCandidatesByFormat(routeCandidates);
420
550
  for (const skipped of formatFiltered.skipped) {
@@ -449,6 +579,7 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
449
579
  candidates: prioritizedCandidates.candidates,
450
580
  stateStore,
451
581
  config,
582
+ requestContext: effectiveRequestContext,
452
583
  now
453
584
  });
454
585
  } catch (error) {
@@ -463,6 +594,12 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
463
594
 
464
595
  routeDebug.strategy = ranking.strategy;
465
596
  setRouteSelectedCandidate(routeDebug, ranking.selectedEntry?.candidate);
597
+ setRouteContextDebug(routeDebug, {
598
+ risk: resolveSelectedContextRisk(
599
+ ranking.selectedEntry?.candidate,
600
+ effectiveRequestContext.estimatedRequiredTokens
601
+ )
602
+ });
466
603
  for (const skippedEntry of (ranking.skippedEntries || [])) {
467
604
  recordRouteSkip(routeDebug, skippedEntry.candidate, skippedEntry.skipReasons);
468
605
  }
@@ -480,6 +617,7 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
480
617
  let lastErrorResult = null;
481
618
  let lastErrorMessage = "Unknown error";
482
619
  let routeSelectionCommitted = false;
620
+ let pendingFallbackContext = null;
483
621
 
484
622
  for (let index = 0; index < ranking.entries.length; index += 1) {
485
623
  const entry = ranking.entries[index];
@@ -505,14 +643,16 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
505
643
  while (attempt < maxAttempts) {
506
644
  attempt += 1;
507
645
  result = await makeProviderCall({
508
- body,
509
- sourceFormat,
510
- stream,
511
- requestKind: options.requestKind,
512
- candidate,
513
- requestHeaders: request.headers,
514
- env,
515
- clientType: options.clientType
646
+ body,
647
+ sourceFormat,
648
+ stream,
649
+ requestKind: options.requestKind,
650
+ candidate,
651
+ requestHeaders: request.headers,
652
+ env,
653
+ clientType: options.clientType,
654
+ runtimeConfig: config,
655
+ stateStore
516
656
  });
517
657
 
518
658
  if (!quotaConsumed && shouldConsumeQuotaFromResult(result)) {
@@ -538,7 +678,22 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
538
678
  }
539
679
  await clearCandidateRoutingState(stateStore, entry.candidateKey);
540
680
  setRouteSelectedCandidate(routeDebug, candidate, { overwrite: true });
681
+ setRouteContextDebug(routeDebug, {
682
+ risk: resolveSelectedContextRisk(candidate, effectiveRequestContext.estimatedRequiredTokens)
683
+ });
541
684
  recordRouteAttempt(routeDebug, candidate, result.status, null, attempt);
685
+ if (pendingFallbackContext) {
686
+ queueActivityEvent(options.onActivityLog, {
687
+ level: "success",
688
+ message: `Fallback request succeeded for ${routeLabel}.`,
689
+ detail: `${formatActivityCandidateLabel(candidate)} completed the request after ${pendingFallbackContext.failedCandidate} failed (${pendingFallbackContext.failureSummary}).`,
690
+ source: "runtime",
691
+ category: "usage",
692
+ kind: "fallback-succeeded",
693
+ route: routeLabel
694
+ });
695
+ pendingFallbackContext = null;
696
+ }
542
697
  return withRouteDebugHeaders(result.response, routeDebug);
543
698
  }
544
699
 
@@ -571,8 +726,41 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
571
726
  isFallbackAttempt
572
727
  );
573
728
 
574
- const hasNextCandidate = hasNextEligibleCandidate(ranking.entries, index);
729
+ const nextCandidateEntry = getNextEligibleCandidateEntry(ranking.entries, index);
730
+ const hasNextCandidate = Boolean(nextCandidateEntry);
731
+ const failureSummary = buildFailureSummary(result, classification);
732
+ const providerMessage = await readActivityErrorDetail(result);
733
+ if (hasNextCandidate && classification?.allowFallback !== false) {
734
+ queueActivityEvent(options.onActivityLog, {
735
+ level: "warn",
736
+ message: `Request fallback triggered for ${routeLabel}.`,
737
+ detail: buildActivityDetail(
738
+ `${formatActivityCandidateLabel(candidate)} failed (${failureSummary}). Trying ${formatActivityCandidateLabel(nextCandidateEntry?.candidate)} next.`,
739
+ providerMessage
740
+ ),
741
+ source: "runtime",
742
+ category: "usage",
743
+ kind: "fallback-triggered",
744
+ route: routeLabel
745
+ });
746
+ pendingFallbackContext = {
747
+ failedCandidate: formatActivityCandidateLabel(candidate),
748
+ failureSummary
749
+ };
750
+ }
575
751
  if (!hasNextCandidate || classification?.allowFallback === false) {
752
+ queueActivityEvent(options.onActivityLog, {
753
+ level: "error",
754
+ message: `Request failed for ${routeLabel}.`,
755
+ detail: buildActivityDetail(
756
+ `${formatActivityCandidateLabel(candidate)} failed (${failureSummary})${classification?.allowFallback === false ? ". Fallback stopped for this error." : ". No more fallbacks are available."}`,
757
+ providerMessage
758
+ ),
759
+ source: "runtime",
760
+ category: "usage",
761
+ kind: "request-failed",
762
+ route: routeLabel
763
+ });
576
764
  return withRouteDebugHeaders(await buildFailureResponse(result), routeDebug);
577
765
  }
578
766
  }
@@ -779,7 +967,9 @@ export function createFetchHandler(options) {
779
967
 
780
968
  let body;
781
969
  try {
782
- body = await parseJsonBodyWithLimit(request, resolveMaxRequestBodyBytes(env));
970
+ body = await parseJsonBodyWithLimit(request, resolveMaxRequestBodyBytes(env, {
971
+ requestKind: route.requestKind
972
+ }));
783
973
  } catch (error) {
784
974
  if (error && typeof error === "object" && error.code === "REQUEST_BODY_TOO_LARGE") {
785
975
  return respond(jsonResponse({ error: "Request body too large" }, 413));
@@ -142,7 +142,7 @@ export async function makeSubscriptionProviderCall({ provider, body, stream }) {
142
142
  type: 'error',
143
143
  error: {
144
144
  type: 'authentication_error',
145
- message: `Not authenticated for subscription profile '${profileId}'. Run 'llm-router subscription login --profile=${profileId}' first.`
145
+ message: `Not authenticated for subscription profile '${profileId}'. Run 'llr subscription login --profile=${profileId}' first.`
146
146
  }
147
147
  }), {
148
148
  status: 401,
@@ -0,0 +1,49 @@
1
+ export const CODEX_CLI_INHERIT_MODEL_VALUE = "__codex_cli_inherit__";
2
+ export const CODEX_CLI_REASONING_EFFORT_VALUES = Object.freeze([
3
+ "minimal",
4
+ "low",
5
+ "medium",
6
+ "high",
7
+ "xhigh"
8
+ ]);
9
+ export const CLAUDE_CODE_THINKING_LEVEL_VALUES = Object.freeze([
10
+ "low",
11
+ "medium",
12
+ "high",
13
+ "max"
14
+ ]);
15
+ export const CLAUDE_CODE_THINKING_TOKENS_BY_LEVEL = Object.freeze({
16
+ low: 4096,
17
+ medium: 12000,
18
+ high: 24000,
19
+ max: 31999
20
+ });
21
+
22
+ export function isCodexCliInheritModelBinding(value) {
23
+ return String(value || "").trim() === CODEX_CLI_INHERIT_MODEL_VALUE;
24
+ }
25
+
26
+ export function normalizeCodexCliReasoningEffort(value) {
27
+ const normalized = String(value || "").trim().toLowerCase();
28
+ return CODEX_CLI_REASONING_EFFORT_VALUES.includes(normalized) ? normalized : "";
29
+ }
30
+
31
+ export function normalizeClaudeCodeThinkingLevel(value) {
32
+ const normalized = String(value || "").trim().toLowerCase();
33
+ return CLAUDE_CODE_THINKING_LEVEL_VALUES.includes(normalized) ? normalized : "";
34
+ }
35
+
36
+ export function mapClaudeCodeThinkingLevelToTokens(level) {
37
+ const normalizedLevel = normalizeClaudeCodeThinkingLevel(level);
38
+ if (!normalizedLevel) return "";
39
+ return String(CLAUDE_CODE_THINKING_TOKENS_BY_LEVEL[normalizedLevel] || "");
40
+ }
41
+
42
+ export function mapClaudeCodeThinkingTokensToLevel(value) {
43
+ const parsed = Number(value);
44
+ if (!Number.isFinite(parsed) || parsed <= 0) return "";
45
+ if (parsed >= CLAUDE_CODE_THINKING_TOKENS_BY_LEVEL.max) return "max";
46
+ if (parsed >= CLAUDE_CODE_THINKING_TOKENS_BY_LEVEL.high) return "high";
47
+ if (parsed >= 6000) return "medium";
48
+ return "low";
49
+ }