@khanglvm/llm-router 2.3.1 → 2.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/CHANGELOG.md +15 -0
  2. package/README.md +2 -2
  3. package/package.json +1 -1
  4. package/src/cli/router-module.js +32 -5
  5. package/src/node/coding-tool-config.js +138 -25
  6. package/src/node/large-request-log.js +54 -0
  7. package/src/node/litellm-context-catalog.js +13 -1
  8. package/src/node/local-server.js +10 -0
  9. package/src/node/ollama-client.js +195 -0
  10. package/src/node/ollama-hardware.js +94 -0
  11. package/src/node/ollama-install.js +230 -0
  12. package/src/node/provider-probe.js +69 -5
  13. package/src/node/web-console-client.js +36 -36
  14. package/src/node/web-console-server.js +478 -8
  15. package/src/node/web-console-styles.generated.js +1 -1
  16. package/src/node/web-console-ui/amp-utils.js +272 -0
  17. package/src/node/web-console-ui/api-client.js +128 -0
  18. package/src/node/web-console-ui/capability-utils.js +36 -0
  19. package/src/node/web-console-ui/config-editor-utils.js +20 -5
  20. package/src/node/web-console-ui/constants.js +140 -0
  21. package/src/node/web-console-ui/context-window-utils.js +262 -0
  22. package/src/node/web-console-ui/hooks/use-reorder-layout-animation.js +65 -0
  23. package/src/node/web-console-ui/provider-presets.js +211 -0
  24. package/src/node/web-console-ui/quick-start-utils.js +790 -0
  25. package/src/node/web-console-ui/utils.js +353 -0
  26. package/src/node/web-console-ui/web-search-utils.js +460 -0
  27. package/src/runtime/config.js +96 -9
  28. package/src/runtime/handler/fallback.js +71 -0
  29. package/src/runtime/handler/field-filter.js +39 -0
  30. package/src/runtime/handler/large-request-log.js +211 -0
  31. package/src/runtime/handler/provider-call.js +276 -15
  32. package/src/runtime/handler/reasoning-effort.js +11 -1
  33. package/src/runtime/handler/tool-name-sanitizer.js +258 -0
  34. package/src/runtime/handler.js +16 -3
  35. package/src/shared/coding-tool-bindings.js +3 -0
@@ -21,6 +21,7 @@ import {
21
21
  import { maybeRewriteAmpClientResponse } from "./amp-response.js";
22
22
  import { applyCachingMapping, mergeCachingHeaders } from "./cache-mapping.js";
23
23
  import { applyReasoningEffortMapping } from "./reasoning-effort.js";
24
+ import { stripUnsupportedFields } from "./field-filter.js";
24
25
  import { resolveUpstreamTimeoutMs } from "./request.js";
25
26
  import { parseJsonSafely } from "./utils.js";
26
27
  import { buildTimeoutSignal } from "../../shared/timeout-signal.js";
@@ -35,11 +36,139 @@ import {
35
36
  rewriteProviderBodyForAmpWebSearch,
36
37
  shouldInterceptAmpWebSearch
37
38
  } from "./amp-web-search.js";
39
+ import {
40
+ buildLargeRequestLogEntry,
41
+ isLargeRequestLoggingEnabled,
42
+ measureSerializedRequestBytes,
43
+ resolveLargeRequestLogThresholdBytes
44
+ } from "./large-request-log.js";
45
+
46
+ const OPENAI_TOOL_ROUTING_SUPPRESSION_TTL_MS = 30 * 60 * 1000;
47
+ const openAIToolRoutingSuppressionUntil = new Map();
38
48
 
39
49
  function isSubscriptionProvider(provider) {
40
50
  return provider?.type === "subscription";
41
51
  }
42
52
 
53
+ function normalizeFormatList(values) {
54
+ return [...new Set(
55
+ (Array.isArray(values) ? values : [values])
56
+ .map((value) => String(value || "").trim())
57
+ .filter((value) => value === FORMATS.OPENAI || value === FORMATS.CLAUDE)
58
+ )];
59
+ }
60
+
61
+ function resolveCandidateModel(provider, model, modelId) {
62
+ if (model && typeof model === "object" && !Array.isArray(model)) {
63
+ return model;
64
+ }
65
+ const normalizedModelId = String(modelId || "").trim();
66
+ if (!normalizedModelId || !Array.isArray(provider?.models)) return null;
67
+ return provider.models.find((entry) => String(entry?.id || "").trim() === normalizedModelId) || null;
68
+ }
69
+
70
+ function getProviderModelSupportedFormats(provider, model, modelId) {
71
+ const resolvedModel = resolveCandidateModel(provider, model, modelId);
72
+ const configuredFormats = normalizeFormatList(resolvedModel?.formats || resolvedModel?.format);
73
+ const resolvedModelId = String(resolvedModel?.id || modelId || "").trim();
74
+ if (!resolvedModelId) return configuredFormats;
75
+
76
+ const preferredFormat = provider?.lastProbe?.modelPreferredFormat?.[resolvedModelId];
77
+ if (preferredFormat === FORMATS.OPENAI || preferredFormat === FORMATS.CLAUDE) {
78
+ return [preferredFormat];
79
+ }
80
+
81
+ const probedFormats = normalizeFormatList(provider?.lastProbe?.modelSupport?.[resolvedModelId]);
82
+ return probedFormats.length > 0 ? probedFormats : configuredFormats;
83
+ }
84
+
85
+ function getProviderModelPreferredFormat(provider, model, modelId) {
86
+ const resolvedModel = resolveCandidateModel(provider, model, modelId);
87
+ const resolvedModelId = String(resolvedModel?.id || modelId || "").trim();
88
+ if (!resolvedModelId) return "";
89
+ const preferredFormat = String(provider?.lastProbe?.modelPreferredFormat?.[resolvedModelId] || "").trim();
90
+ return preferredFormat === FORMATS.OPENAI || preferredFormat === FORMATS.CLAUDE
91
+ ? preferredFormat
92
+ : "";
93
+ }
94
+
95
+ function buildOpenAIToolRoutingSuppressionKey(candidate) {
96
+ const providerId = String(candidate?.providerId || candidate?.provider?.id || "").trim();
97
+ const modelId = String(candidate?.modelId || candidate?.model?.id || candidate?.backend || "").trim();
98
+ if (!providerId || !modelId) return "";
99
+ return `${providerId}/${modelId}`;
100
+ }
101
+
102
+ function pruneOpenAIToolRoutingSuppressions(now = Date.now()) {
103
+ for (const [key, expiresAt] of openAIToolRoutingSuppressionUntil.entries()) {
104
+ if (!Number.isFinite(expiresAt) || expiresAt <= now) {
105
+ openAIToolRoutingSuppressionUntil.delete(key);
106
+ }
107
+ }
108
+ }
109
+
110
+ function isOpenAIToolRoutingSuppressed(candidate, now = Date.now()) {
111
+ const key = buildOpenAIToolRoutingSuppressionKey(candidate);
112
+ if (!key) return false;
113
+ pruneOpenAIToolRoutingSuppressions(now);
114
+ return Number(openAIToolRoutingSuppressionUntil.get(key)) > now;
115
+ }
116
+
117
+ function suppressOpenAIToolRouting(candidate, now = Date.now()) {
118
+ const key = buildOpenAIToolRoutingSuppressionKey(candidate);
119
+ if (!key) return;
120
+ openAIToolRoutingSuppressionUntil.set(key, now + OPENAI_TOOL_ROUTING_SUPPRESSION_TTL_MS);
121
+ }
122
+
123
+ export function resetOpenAIToolRoutingLearningState() {
124
+ openAIToolRoutingSuppressionUntil.clear();
125
+ }
126
+
127
+ function queueLargeRequestEvent(onLargeRequestLog, payload) {
128
+ if (typeof onLargeRequestLog !== "function") return;
129
+ try {
130
+ const result = onLargeRequestLog(payload);
131
+ if (result && typeof result.then === "function") {
132
+ result.catch(() => {});
133
+ }
134
+ } catch {
135
+ }
136
+ }
137
+
138
+ function maybeQueueLargeRequestLog({
139
+ env,
140
+ onLargeRequestLog,
141
+ providerBody,
142
+ serializedBody,
143
+ providerUrl,
144
+ candidate,
145
+ sourceFormat,
146
+ targetFormat,
147
+ requestKind,
148
+ clientType,
149
+ stream,
150
+ providerType = "http"
151
+ } = {}) {
152
+ if (!isLargeRequestLoggingEnabled(env) || typeof onLargeRequestLog !== "function") return;
153
+ const requestBytes = measureSerializedRequestBytes(serializedBody);
154
+ const thresholdBytes = resolveLargeRequestLogThresholdBytes(env);
155
+ if (requestBytes < thresholdBytes) return;
156
+
157
+ queueLargeRequestEvent(onLargeRequestLog, buildLargeRequestLogEntry({
158
+ providerBody,
159
+ requestBytes,
160
+ thresholdBytes,
161
+ providerUrl,
162
+ candidate,
163
+ sourceFormat,
164
+ targetFormat,
165
+ requestKind,
166
+ clientType,
167
+ stream,
168
+ providerType
169
+ }));
170
+ }
171
+
43
172
  async function toProviderError(response) {
44
173
  const raw = await response.text();
45
174
  const parsed = parseJsonSafely(raw);
@@ -97,7 +226,8 @@ async function adaptProviderResponse({
97
226
  requestKind,
98
227
  requestBody,
99
228
  clientType,
100
- env
229
+ env,
230
+ responsesDowngraded
101
231
  }) {
102
232
  const buildSuccessResponse = async (resultResponse) => ({
103
233
  ok: true,
@@ -111,6 +241,30 @@ async function adaptProviderResponse({
111
241
  })
112
242
  });
113
243
 
244
+ // Responses API was downgraded to Chat Completions for provider compatibility.
245
+ // Convert response back: Chat Completions → Claude → Responses API.
246
+ if (responsesDowngraded) {
247
+ if (stream) {
248
+ const claudeStream = handleOpenAIStreamToClaude(response);
249
+ return buildSuccessResponse(handleClaudeStreamToOpenAIResponses(claudeStream, requestBody, fallbackModel));
250
+ }
251
+ const raw = await response.text();
252
+ const parsed = parseJsonSafely(raw);
253
+ if (!parsed) {
254
+ return {
255
+ ok: false,
256
+ status: 502,
257
+ retryable: true,
258
+ response: jsonResponse({
259
+ type: "error",
260
+ error: { type: "api_error", message: "Provider returned invalid JSON." }
261
+ }, 502)
262
+ };
263
+ }
264
+ const claudeMessage = convertOpenAINonStreamToClaude(parsed, fallbackModel);
265
+ return buildSuccessResponse(jsonResponse(convertClaudeNonStreamToOpenAIResponses(claudeMessage, requestBody, fallbackModel)));
266
+ }
267
+
114
268
  if (stream) {
115
269
  if (!translate) {
116
270
  return buildSuccessResponse(
@@ -236,6 +390,9 @@ function normalizeProviderRequestKind(targetFormat, requestKind) {
236
390
 
237
391
  function shouldPreferOpenAIForClaudeToolCalls({
238
392
  provider,
393
+ model,
394
+ modelId,
395
+ candidate,
239
396
  sourceFormat,
240
397
  targetFormat,
241
398
  requestKind,
@@ -243,6 +400,11 @@ function shouldPreferOpenAIForClaudeToolCalls({
243
400
  } = {}) {
244
401
  if (sourceFormat !== FORMATS.CLAUDE || targetFormat !== FORMATS.CLAUDE) return false;
245
402
  if (!hasToolDefinitions(body)) return false;
403
+ if (candidate && isOpenAIToolRoutingSuppressed(candidate)) return false;
404
+ const preferredFormat = getProviderModelPreferredFormat(provider, model, modelId);
405
+ if (preferredFormat === FORMATS.CLAUDE) return false;
406
+ const modelFormats = getProviderModelSupportedFormats(provider, model, modelId);
407
+ if (modelFormats.length > 0 && !modelFormats.includes(FORMATS.OPENAI)) return false;
246
408
  if (!getProviderFormats(provider).includes(FORMATS.OPENAI)) return false;
247
409
  return Boolean(resolveProviderUrl(provider, FORMATS.OPENAI, normalizeProviderRequestKind(FORMATS.OPENAI, requestKind)));
248
410
  }
@@ -489,14 +651,22 @@ function buildProviderRequestPlan({
489
651
  requestKind,
490
652
  requestHeaders,
491
653
  interceptAmpWebSearch,
492
- stream
654
+ stream,
655
+ forceResponsesDowngrade = false
493
656
  }) {
494
657
  const normalizedRequestKind = normalizeProviderRequestKind(targetFormat, requestKind);
495
658
  const translate = needsTranslation(sourceFormat, targetFormat);
496
659
 
497
660
  let providerBody = { ...body };
661
+ let responsesDowngraded = false;
498
662
  if (translate) {
499
663
  providerBody = translateRequest(sourceFormat, targetFormat, candidate.backend, body, stream);
664
+ } else if (forceResponsesDowngrade) {
665
+ // Provider confirmed to not support Responses API — downgrade to Chat Completions
666
+ // via double-hop: Responses API → Claude → Chat Completions.
667
+ const intermediateBody = translateRequest(FORMATS.OPENAI, FORMATS.CLAUDE, candidate.backend, body, stream);
668
+ providerBody = translateRequest(FORMATS.CLAUDE, FORMATS.OPENAI, candidate.backend, intermediateBody, stream);
669
+ responsesDowngraded = true;
500
670
  }
501
671
 
502
672
  providerBody.model = candidate.backend;
@@ -513,9 +683,19 @@ function buildProviderRequestPlan({
513
683
  sourceFormat,
514
684
  targetFormat,
515
685
  targetModel: candidate.backend,
516
- requestHeaders
686
+ requestHeaders,
687
+ capabilities: candidate.model?.capabilities
517
688
  });
518
689
 
690
+ if (responsesDowngraded) {
691
+ // Strip Responses-API-only fields that Chat Completions providers reject.
692
+ delete providerBody.prompt_cache_key;
693
+ delete providerBody.store;
694
+ delete providerBody.include;
695
+ delete providerBody.text;
696
+ delete providerBody.service_tier;
697
+ }
698
+
519
699
  const declaredOpenAIHostedWebSearchToolType = getProviderOpenAIHostedWebSearchToolType(candidate.provider, {
520
700
  targetFormat,
521
701
  requestKind: normalizedRequestKind
@@ -532,11 +712,14 @@ function buildProviderRequestPlan({
532
712
  providerBody = rewriteProviderBodyForAmpWebSearch(providerBody, targetFormat, requestKind).providerBody;
533
713
  }
534
714
 
715
+ providerBody = stripUnsupportedFields(providerBody, candidate.model?.capabilities);
716
+
535
717
  return {
536
718
  targetFormat,
537
- requestKind: normalizedRequestKind,
719
+ requestKind: responsesDowngraded ? undefined : normalizedRequestKind,
538
720
  translate,
539
- providerBody
721
+ providerBody,
722
+ responsesDowngraded
540
723
  };
541
724
  }
542
725
 
@@ -552,7 +735,8 @@ export async function makeProviderCall({
552
735
  runtimeConfig,
553
736
  stateStore,
554
737
  ampContext,
555
- runtimeFlags
738
+ runtimeFlags,
739
+ onLargeRequestLog
556
740
  }) {
557
741
  const provider = candidate.provider;
558
742
  const targetFormat = candidate.targetFormat;
@@ -565,6 +749,9 @@ export async function makeProviderCall({
565
749
 
566
750
  const preferOpenAIToolRouting = !isSubscriptionProvider(provider) && shouldPreferOpenAIForClaudeToolCalls({
567
751
  provider,
752
+ model: candidate?.model,
753
+ modelId: candidate?.modelId,
754
+ candidate,
568
755
  sourceFormat,
569
756
  targetFormat,
570
757
  requestKind,
@@ -576,8 +763,17 @@ export async function makeProviderCall({
576
763
  effectiveBody = { ...body, reasoning_effort: ampContext.presets.reasoningEffort };
577
764
  }
578
765
 
766
+ // For Responses API requests to OpenAI-format providers, try the native endpoint first.
767
+ // If the provider doesn't support /v1/responses (returns 404/400), fall back to a
768
+ // downgraded Chat Completions plan with double-hop translation.
769
+ const needsResponsesDowngradeFallback = !isSubscriptionProvider(provider)
770
+ && sourceFormat === FORMATS.OPENAI
771
+ && targetFormat === FORMATS.OPENAI
772
+ && requestKind === "responses";
773
+
579
774
  let activePlan;
580
775
  let fallbackPlan = null;
776
+ let responsesDowngradedPlan = null;
581
777
  try {
582
778
  activePlan = buildProviderRequestPlan({
583
779
  body: effectiveBody,
@@ -601,6 +797,19 @@ export async function makeProviderCall({
601
797
  stream
602
798
  });
603
799
  }
800
+ if (needsResponsesDowngradeFallback) {
801
+ responsesDowngradedPlan = buildProviderRequestPlan({
802
+ body: effectiveBody,
803
+ sourceFormat,
804
+ targetFormat,
805
+ candidate,
806
+ requestKind,
807
+ requestHeaders,
808
+ interceptAmpWebSearch,
809
+ stream,
810
+ forceResponsesDowngrade: true
811
+ });
812
+ }
604
813
  } catch (error) {
605
814
  return {
606
815
  ok: false,
@@ -651,13 +860,33 @@ export async function makeProviderCall({
651
860
  prompt_cache_key: activePlan.providerBody.prompt_cache_key || ampContext.threadId
652
861
  };
653
862
  }
654
- const executeSubscriptionRequest = async (requestBody) => makeSubscriptionProviderCall({
655
- provider,
656
- body: requestBody,
657
- // ChatGPT Codex backend expects stream=true; non-stream responses are reconstructed from SSE.
658
- stream: subscriptionType === "chatgpt-codex" ? true : Boolean(stream),
659
- env
660
- });
863
+ const executeSubscriptionRequest = async (requestBody) => {
864
+ const requestStream = subscriptionType === "chatgpt-codex" ? true : Boolean(stream);
865
+ const providerUrl = subscriptionType === "chatgpt-codex"
866
+ ? "https://chatgpt.com/backend-api/codex/responses"
867
+ : "https://console.anthropic.com/v1/messages?beta=true";
868
+ maybeQueueLargeRequestLog({
869
+ env,
870
+ onLargeRequestLog,
871
+ providerBody: requestBody,
872
+ serializedBody: JSON.stringify(requestBody),
873
+ providerUrl,
874
+ candidate,
875
+ sourceFormat,
876
+ targetFormat: activePlan.targetFormat,
877
+ requestKind: activePlan.requestKind,
878
+ clientType,
879
+ stream: requestStream,
880
+ providerType: subscriptionType
881
+ });
882
+ return makeSubscriptionProviderCall({
883
+ provider,
884
+ body: requestBody,
885
+ // ChatGPT Codex backend expects stream=true; non-stream responses are reconstructed from SSE.
886
+ stream: requestStream,
887
+ env
888
+ });
889
+ };
661
890
  const subscriptionResult = await executeSubscriptionRequest(activePlan.providerBody);
662
891
 
663
892
  if (!subscriptionResult?.ok) {
@@ -854,11 +1083,26 @@ export async function makeProviderCall({
854
1083
  const timeoutMs = resolveUpstreamTimeoutMs(env);
855
1084
  const timeoutControl = buildTimeoutSignal(timeoutMs);
856
1085
  try {
1086
+ const serializedBody = JSON.stringify(plan.providerBody);
857
1087
  const init = {
858
1088
  method: "POST",
859
1089
  headers,
860
- body: JSON.stringify(plan.providerBody)
1090
+ body: serializedBody
861
1091
  };
1092
+ maybeQueueLargeRequestLog({
1093
+ env,
1094
+ onLargeRequestLog,
1095
+ providerBody: plan.providerBody,
1096
+ serializedBody,
1097
+ providerUrl,
1098
+ candidate,
1099
+ sourceFormat,
1100
+ targetFormat: plan.targetFormat,
1101
+ requestKind: plan.requestKind,
1102
+ clientType,
1103
+ stream,
1104
+ providerType: "http"
1105
+ });
862
1106
  if (timeoutControl.signal) {
863
1107
  init.signal = timeoutControl.signal;
864
1108
  }
@@ -908,6 +1152,9 @@ export async function makeProviderCall({
908
1152
  try {
909
1153
  const fallbackResponse = await executeHttpProviderRequest(fallbackPlan);
910
1154
  if (fallbackResponse instanceof Response && fallbackResponse.ok) {
1155
+ if (preferOpenAIToolRouting) {
1156
+ suppressOpenAIToolRouting(candidate);
1157
+ }
911
1158
  response = fallbackResponse;
912
1159
  activePlan = fallbackPlan;
913
1160
  }
@@ -934,6 +1181,19 @@ export async function makeProviderCall({
934
1181
  };
935
1182
  }
936
1183
 
1184
+ // Provider doesn't support native /v1/responses — retry with Chat Completions downgrade.
1185
+ if ((!response || !response.ok) && responsesDowngradedPlan) {
1186
+ try {
1187
+ const downgradedResponse = await executeHttpProviderRequest(responsesDowngradedPlan);
1188
+ if (downgradedResponse instanceof Response && downgradedResponse.ok) {
1189
+ response = downgradedResponse;
1190
+ activePlan = responsesDowngradedPlan;
1191
+ }
1192
+ } catch {
1193
+ // Keep the original failure if the downgraded request also fails.
1194
+ }
1195
+ }
1196
+
937
1197
  if (!response.ok) {
938
1198
  const hostedWebSearchErrorKind = await resolveHostedWebSearchErrorKind(response, activePlan.providerBody, {
939
1199
  targetFormat: activePlan.targetFormat,
@@ -983,6 +1243,7 @@ export async function makeProviderCall({
983
1243
  requestKind: activePlan.requestKind,
984
1244
  requestBody: body,
985
1245
  clientType,
986
- env
1246
+ env,
1247
+ responsesDowngraded: activePlan.responsesDowngraded
987
1248
  });
988
1249
  }
@@ -295,8 +295,18 @@ export function applyReasoningEffortMapping({
295
295
  sourceFormat,
296
296
  targetFormat,
297
297
  targetModel,
298
- requestHeaders
298
+ requestHeaders,
299
+ capabilities
299
300
  }) {
301
+ if (capabilities) {
302
+ if (targetFormat === FORMATS.OPENAI && capabilities.supportsReasoning === false) {
303
+ return providerBody;
304
+ }
305
+ if (targetFormat === FORMATS.CLAUDE && capabilities.supportsThinking === false) {
306
+ return providerBody;
307
+ }
308
+ }
309
+
300
310
  const effort = resolveRequestedEffort(originalBody, requestHeaders);
301
311
  if (!effort) return providerBody;
302
312