@khanglvm/llm-router 2.3.0 → 2.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/CHANGELOG.md +5 -0
  2. package/README.md +2 -2
  3. package/package.json +1 -1
  4. package/src/cli/router-module.js +32 -5
  5. package/src/node/coding-tool-config.js +138 -25
  6. package/src/node/large-request-log.js +54 -0
  7. package/src/node/litellm-context-catalog.js +13 -1
  8. package/src/node/local-server.js +10 -0
  9. package/src/node/ollama-client.js +195 -0
  10. package/src/node/ollama-hardware.js +94 -0
  11. package/src/node/ollama-install.js +230 -0
  12. package/src/node/provider-probe.js +69 -5
  13. package/src/node/web-console-client.js +36 -36
  14. package/src/node/web-console-server.js +478 -8
  15. package/src/node/web-console-styles.generated.js +1 -1
  16. package/src/node/web-console-ui/amp-utils.js +272 -0
  17. package/src/node/web-console-ui/api-client.js +128 -0
  18. package/src/node/web-console-ui/capability-utils.js +36 -0
  19. package/src/node/web-console-ui/config-editor-utils.js +20 -5
  20. package/src/node/web-console-ui/constants.js +140 -0
  21. package/src/node/web-console-ui/context-window-utils.js +262 -0
  22. package/src/node/web-console-ui/hooks/use-reorder-layout-animation.js +65 -0
  23. package/src/node/web-console-ui/provider-presets.js +211 -0
  24. package/src/node/web-console-ui/quick-start-utils.js +790 -0
  25. package/src/node/web-console-ui/utils.js +353 -0
  26. package/src/node/web-console-ui/web-search-utils.js +460 -0
  27. package/src/runtime/config.js +96 -9
  28. package/src/runtime/handler/fallback.js +71 -0
  29. package/src/runtime/handler/field-filter.js +39 -0
  30. package/src/runtime/handler/large-request-log.js +211 -0
  31. package/src/runtime/handler/provider-call.js +185 -15
  32. package/src/runtime/handler/reasoning-effort.js +11 -1
  33. package/src/runtime/handler/tool-name-sanitizer.js +258 -0
  34. package/src/runtime/handler.js +16 -3
  35. package/src/shared/coding-tool-bindings.js +3 -0
@@ -44,6 +44,18 @@ const POLICY_HINTS = [
44
44
  "unsafe",
45
45
  "flagged"
46
46
  ];
47
+ const MODEL_NOT_FOUND_HINTS = [
48
+ "model not found",
49
+ "model does not exist",
50
+ "model_not_found"
51
+ ];
52
+ const VRAM_EXHAUSTION_HINTS = [
53
+ "insufficient vram",
54
+ "out of memory",
55
+ "failed to load model",
56
+ "insufficient memory"
57
+ ];
58
+ const DEFAULT_ORIGIN_MODEL_NOT_FOUND_COOLDOWN_MS = 60 * 60_000;
47
59
  const CONTEXT_WINDOW_HINTS = [
48
60
  "context window",
49
61
  "maximum context length",
@@ -56,6 +68,17 @@ const CONTEXT_WINDOW_HINTS = [
56
68
  "too many tokens",
57
69
  "ran out of room in the model's context window"
58
70
  ];
71
+ const RATE_LIMIT_HINTS = [
72
+ "tokens per minute",
73
+ "requests per minute",
74
+ "rate limit",
75
+ "rate_limit",
76
+ "tpm",
77
+ "rpm",
78
+ "quota exceeded",
79
+ "quota_exceeded",
80
+ "limit exceeded"
81
+ ];
59
82
  const fallbackCircuitState = new Map();
60
83
 
61
84
  export function shouldRetryStatus(status) {
@@ -392,6 +415,16 @@ export async function classifyFailureResult(result, retryPolicy) {
392
415
  }
393
416
 
394
417
  if (status === 404 || status === 410) {
418
+ const hintText404 = await readProviderErrorHint(result);
419
+ if (hasAnyHint(hintText404, MODEL_NOT_FOUND_HINTS)) {
420
+ return {
421
+ category: "model_not_found",
422
+ retryable: false,
423
+ retryOrigin: false,
424
+ allowFallback: true,
425
+ originCooldownMs: DEFAULT_ORIGIN_MODEL_NOT_FOUND_COOLDOWN_MS
426
+ };
427
+ }
395
428
  return {
396
429
  category: "not_found",
397
430
  retryable: false,
@@ -412,9 +445,47 @@ export async function classifyFailureResult(result, retryPolicy) {
412
445
  originCooldownMs: 0
413
446
  };
414
447
  }
448
+ if (hasAnyHint(hintText, VRAM_EXHAUSTION_HINTS)) {
449
+ return {
450
+ category: "vram_exhaustion",
451
+ retryable: false,
452
+ retryOrigin: false,
453
+ allowFallback: true,
454
+ originCooldownMs: retryPolicy.originFallbackCooldownMs
455
+ };
456
+ }
457
+ if (status === 413 && hasAnyHint(hintText, RATE_LIMIT_HINTS)) {
458
+ const rateLimitCooldown = retryAfterMs > 0 ? retryAfterMs : retryPolicy.originRateLimitCooldownMs;
459
+ return {
460
+ category: "rate_limited",
461
+ retryable: true,
462
+ retryOrigin: false,
463
+ allowFallback: true,
464
+ originCooldownMs: rateLimitCooldown
465
+ };
466
+ }
415
467
  }
416
468
 
417
469
  if (status === 408 || status === 409 || status >= 500) {
470
+ const hintText5xx = await readProviderErrorHint(result);
471
+ if (hasAnyHint(hintText5xx, VRAM_EXHAUSTION_HINTS)) {
472
+ return {
473
+ category: "vram_exhaustion",
474
+ retryable: false,
475
+ retryOrigin: false,
476
+ allowFallback: true,
477
+ originCooldownMs: retryPolicy.originFallbackCooldownMs
478
+ };
479
+ }
480
+ if (hasAnyHint(hintText5xx, MODEL_NOT_FOUND_HINTS)) {
481
+ return {
482
+ category: "model_not_found",
483
+ retryable: false,
484
+ retryOrigin: false,
485
+ allowFallback: true,
486
+ originCooldownMs: DEFAULT_ORIGIN_MODEL_NOT_FOUND_COOLDOWN_MS
487
+ };
488
+ }
418
489
  return {
419
490
  category: "temporary_error",
420
491
  retryable: true,
@@ -0,0 +1,39 @@
1
+ /**
2
+ * Strips request body fields the target model doesn't support.
3
+ * Only acts when a capability is explicitly `false` — undefined means "pass through".
4
+ *
5
+ * @param {object} providerBody - Request body (already cloned upstream)
6
+ * @param {object} [capabilities] - Model capabilities from config
7
+ * @returns {object} The providerBody with unsupported fields deleted
8
+ */
9
+ export function stripUnsupportedFields(providerBody, capabilities) {
10
+ if (!capabilities || typeof capabilities !== "object") return providerBody;
11
+
12
+ if (capabilities.supportsReasoning === false) {
13
+ delete providerBody.reasoning_effort;
14
+ delete providerBody.reasoning;
15
+ }
16
+ if (capabilities.supportsThinking === false) {
17
+ delete providerBody.thinking;
18
+ }
19
+ if (capabilities.supportsResponseFormat === false) {
20
+ delete providerBody.response_format;
21
+ }
22
+ if (capabilities.supportsLogprobs === false) {
23
+ delete providerBody.logprobs;
24
+ delete providerBody.top_logprobs;
25
+ }
26
+ if (capabilities.supportsServiceTier === false) {
27
+ delete providerBody.service_tier;
28
+ }
29
+ if (capabilities.supportsPrediction === false) {
30
+ delete providerBody.prediction;
31
+ delete providerBody.predicted_output;
32
+ }
33
+ if (capabilities.supportsStreamOptions === false) {
34
+ delete providerBody.stream_options;
35
+ }
36
+
37
+ return providerBody;
38
+ }
39
+
@@ -0,0 +1,211 @@
1
+ const DEFAULT_TEXT_ENCODER = new TextEncoder();
2
+
3
+ export const LARGE_REQUEST_LOG_ENABLED_ENV = "LLM_ROUTER_LOG_LARGE_REQUESTS";
4
+ export const LARGE_REQUEST_LOG_THRESHOLD_ENV = "LLM_ROUTER_LARGE_REQUEST_LOG_THRESHOLD_BYTES";
5
+ export const LARGE_REQUEST_LOG_PATH_ENV = "LLM_ROUTER_LARGE_REQUEST_LOG_PATH";
6
+ export const DEFAULT_LARGE_REQUEST_LOG_THRESHOLD_BYTES = 20 * 1024 * 1024;
7
+ const LARGE_STRING_HINT_THRESHOLD_BYTES = 256 * 1024;
8
+ const MAX_LARGE_STRING_HINTS = 8;
9
+ const MAX_SUMMARY_NODES = 50_000;
10
+
11
+ function toBoolean(value, fallback = false) {
12
+ if (value === undefined || value === null || value === "") return fallback;
13
+ if (typeof value === "boolean") return value;
14
+ const normalized = String(value).trim().toLowerCase();
15
+ if (["1", "true", "yes", "y", "on"].includes(normalized)) return true;
16
+ if (["0", "false", "no", "n", "off"].includes(normalized)) return false;
17
+ return fallback;
18
+ }
19
+
20
+ function toPositiveInteger(value, fallback) {
21
+ if (value === undefined || value === null || value === "") return fallback;
22
+ const parsed = Number.parseInt(String(value), 10);
23
+ if (!Number.isFinite(parsed) || parsed <= 0) return fallback;
24
+ return parsed;
25
+ }
26
+
27
+ function appendToolType(target, value) {
28
+ const normalized = String(value || "").trim();
29
+ if (!normalized || target.includes(normalized)) return;
30
+ target.push(normalized);
31
+ }
32
+
33
+ function classifyContentType(type) {
34
+ const normalized = String(type || "").trim().toLowerCase();
35
+ if (!normalized) return "";
36
+ if (normalized === "image" || normalized === "image_url" || normalized === "input_image") return "image";
37
+ if (normalized === "document" || normalized === "input_document") return "document";
38
+ if (normalized === "audio" || normalized === "input_audio") return "audio";
39
+ if (normalized === "file" || normalized === "input_file") return "file";
40
+ if (normalized.includes("attachment")) return "attachment";
41
+ return "";
42
+ }
43
+
44
+ function maybeRecordLargeString(summary, value, path, hintType = "string") {
45
+ if (typeof value !== "string" || value.length === 0) return;
46
+ const bytes = DEFAULT_TEXT_ENCODER.encode(value).byteLength;
47
+ if (bytes > summary.largestStringBytes) {
48
+ summary.largestStringBytes = bytes;
49
+ }
50
+ if (bytes < LARGE_STRING_HINT_THRESHOLD_BYTES) return;
51
+
52
+ summary.largeStringCount += 1;
53
+ summary.largeStringHints.push({
54
+ path,
55
+ bytes,
56
+ type: hintType
57
+ });
58
+ summary.largeStringHints.sort((left, right) => right.bytes - left.bytes);
59
+ if (summary.largeStringHints.length > MAX_LARGE_STRING_HINTS) {
60
+ summary.largeStringHints.length = MAX_LARGE_STRING_HINTS;
61
+ }
62
+ }
63
+
64
+ function summarizeProviderBody(body) {
65
+ const toolTypes = [];
66
+ for (const tool of Array.isArray(body?.tools) ? body.tools : []) {
67
+ appendToolType(toolTypes, tool?.type);
68
+ }
69
+
70
+ const summary = {
71
+ topLevelKeys: body && typeof body === "object" && !Array.isArray(body) ? Object.keys(body).sort() : [],
72
+ messageCount: Array.isArray(body?.messages) ? body.messages.length : 0,
73
+ inputCount: Array.isArray(body?.input) ? body.input.length : 0,
74
+ toolCount: Array.isArray(body?.tools) ? body.tools.length : 0,
75
+ toolTypes,
76
+ contentPartCount: 0,
77
+ attachmentLikeParts: 0,
78
+ imageParts: 0,
79
+ documentParts: 0,
80
+ audioParts: 0,
81
+ fileParts: 0,
82
+ dataUrlStrings: 0,
83
+ base64SourceParts: 0,
84
+ largeStringCount: 0,
85
+ largestStringBytes: 0,
86
+ largeStringHints: [],
87
+ traversalTruncated: false
88
+ };
89
+
90
+ const stack = [{ value: body, path: "body" }];
91
+ const seen = new WeakSet();
92
+ let visited = 0;
93
+
94
+ while (stack.length > 0) {
95
+ const current = stack.pop();
96
+ visited += 1;
97
+ if (visited > MAX_SUMMARY_NODES) {
98
+ summary.traversalTruncated = true;
99
+ break;
100
+ }
101
+
102
+ const value = current?.value;
103
+ if (typeof value === "string") {
104
+ const isDataUrl = value.startsWith("data:");
105
+ if (isDataUrl) {
106
+ summary.dataUrlStrings += 1;
107
+ }
108
+ maybeRecordLargeString(summary, value, current.path, isDataUrl ? "data-url" : "string");
109
+ continue;
110
+ }
111
+ if (!value || typeof value !== "object") continue;
112
+ if (seen.has(value)) continue;
113
+ seen.add(value);
114
+
115
+ if (Array.isArray(value)) {
116
+ for (let index = value.length - 1; index >= 0; index -= 1) {
117
+ stack.push({
118
+ value: value[index],
119
+ path: `${current.path}[${index}]`
120
+ });
121
+ }
122
+ continue;
123
+ }
124
+
125
+ const contentType = classifyContentType(value.type);
126
+ if (contentType) {
127
+ summary.attachmentLikeParts += 1;
128
+ if (contentType === "image") summary.imageParts += 1;
129
+ if (contentType === "document") summary.documentParts += 1;
130
+ if (contentType === "audio") summary.audioParts += 1;
131
+ if (contentType === "file" || contentType === "attachment") summary.fileParts += 1;
132
+ }
133
+ if (value?.source && typeof value.source === "object") {
134
+ const sourceType = String(value.source.type || "").trim().toLowerCase();
135
+ if (sourceType === "base64") {
136
+ summary.base64SourceParts += 1;
137
+ maybeRecordLargeString(summary, value.source.data, `${current.path}.source.data`, "base64");
138
+ }
139
+ }
140
+
141
+ for (const [key, child] of Object.entries(value)) {
142
+ const childPath = `${current.path}.${key}`;
143
+ if (typeof child === "string") {
144
+ const hintType = key === "data"
145
+ ? "data"
146
+ : (key === "text" ? "text" : "string");
147
+ const isDataUrl = child.startsWith("data:");
148
+ if (isDataUrl) {
149
+ summary.dataUrlStrings += 1;
150
+ }
151
+ maybeRecordLargeString(summary, child, childPath, isDataUrl ? "data-url" : hintType);
152
+ continue;
153
+ }
154
+ if (key === "content" && Array.isArray(child)) {
155
+ summary.contentPartCount += child.length;
156
+ }
157
+ stack.push({
158
+ value: child,
159
+ path: childPath
160
+ });
161
+ }
162
+ }
163
+
164
+ return summary;
165
+ }
166
+
167
+ export function isLargeRequestLoggingEnabled(env = {}) {
168
+ return toBoolean(env?.[LARGE_REQUEST_LOG_ENABLED_ENV], false);
169
+ }
170
+
171
+ export function resolveLargeRequestLogThresholdBytes(env = {}) {
172
+ return toPositiveInteger(
173
+ env?.[LARGE_REQUEST_LOG_THRESHOLD_ENV],
174
+ DEFAULT_LARGE_REQUEST_LOG_THRESHOLD_BYTES
175
+ );
176
+ }
177
+
178
+ export function measureSerializedRequestBytes(serializedBody = "") {
179
+ return DEFAULT_TEXT_ENCODER.encode(String(serializedBody || "")).byteLength;
180
+ }
181
+
182
+ export function buildLargeRequestLogEntry({
183
+ providerBody,
184
+ requestBytes,
185
+ thresholdBytes,
186
+ providerUrl,
187
+ candidate,
188
+ sourceFormat,
189
+ targetFormat,
190
+ requestKind,
191
+ clientType,
192
+ stream,
193
+ providerType = "http"
194
+ } = {}) {
195
+ return {
196
+ kind: "large-provider-request",
197
+ providerType: String(providerType || "http").trim() || "http",
198
+ requestBytes: Number.isFinite(Number(requestBytes)) ? Number(requestBytes) : 0,
199
+ thresholdBytes: Number.isFinite(Number(thresholdBytes)) ? Number(thresholdBytes) : DEFAULT_LARGE_REQUEST_LOG_THRESHOLD_BYTES,
200
+ providerUrl: String(providerUrl || "").trim(),
201
+ clientType: String(clientType || "").trim(),
202
+ stream: Boolean(stream),
203
+ sourceFormat: String(sourceFormat || "").trim(),
204
+ targetFormat: String(targetFormat || "").trim(),
205
+ requestKind: String(requestKind || "").trim(),
206
+ requestedModel: String(candidate?.requestModelId || "").trim(),
207
+ providerId: String(candidate?.providerId || candidate?.provider?.id || "").trim(),
208
+ backendModel: String(candidate?.backend || candidate?.modelId || providerBody?.model || "").trim(),
209
+ bodySummary: summarizeProviderBody(providerBody)
210
+ };
211
+ }
@@ -21,6 +21,7 @@ import {
21
21
  import { maybeRewriteAmpClientResponse } from "./amp-response.js";
22
22
  import { applyCachingMapping, mergeCachingHeaders } from "./cache-mapping.js";
23
23
  import { applyReasoningEffortMapping } from "./reasoning-effort.js";
24
+ import { stripUnsupportedFields } from "./field-filter.js";
24
25
  import { resolveUpstreamTimeoutMs } from "./request.js";
25
26
  import { parseJsonSafely } from "./utils.js";
26
27
  import { buildTimeoutSignal } from "../../shared/timeout-signal.js";
@@ -35,11 +36,62 @@ import {
35
36
  rewriteProviderBodyForAmpWebSearch,
36
37
  shouldInterceptAmpWebSearch
37
38
  } from "./amp-web-search.js";
39
+ import {
40
+ buildLargeRequestLogEntry,
41
+ isLargeRequestLoggingEnabled,
42
+ measureSerializedRequestBytes,
43
+ resolveLargeRequestLogThresholdBytes
44
+ } from "./large-request-log.js";
38
45
 
39
46
  function isSubscriptionProvider(provider) {
40
47
  return provider?.type === "subscription";
41
48
  }
42
49
 
50
+ function queueLargeRequestEvent(onLargeRequestLog, payload) {
51
+ if (typeof onLargeRequestLog !== "function") return;
52
+ try {
53
+ const result = onLargeRequestLog(payload);
54
+ if (result && typeof result.then === "function") {
55
+ result.catch(() => {});
56
+ }
57
+ } catch {
58
+ }
59
+ }
60
+
61
+ function maybeQueueLargeRequestLog({
62
+ env,
63
+ onLargeRequestLog,
64
+ providerBody,
65
+ serializedBody,
66
+ providerUrl,
67
+ candidate,
68
+ sourceFormat,
69
+ targetFormat,
70
+ requestKind,
71
+ clientType,
72
+ stream,
73
+ providerType = "http"
74
+ } = {}) {
75
+ if (!isLargeRequestLoggingEnabled(env) || typeof onLargeRequestLog !== "function") return;
76
+ const requestBytes = measureSerializedRequestBytes(serializedBody);
77
+ const thresholdBytes = resolveLargeRequestLogThresholdBytes(env);
78
+ if (requestBytes < thresholdBytes) return;
79
+
80
+ queueLargeRequestEvent(onLargeRequestLog, buildLargeRequestLogEntry({
81
+ providerBody,
82
+ requestBytes,
83
+ thresholdBytes,
84
+ providerUrl,
85
+ candidate,
86
+ sourceFormat,
87
+ targetFormat,
88
+ requestKind,
89
+ clientType,
90
+ stream,
91
+ providerType
92
+ }));
93
+ }
94
+
43
95
  async function toProviderError(response) {
44
96
  const raw = await response.text();
45
97
  const parsed = parseJsonSafely(raw);
@@ -97,7 +149,8 @@ async function adaptProviderResponse({
97
149
  requestKind,
98
150
  requestBody,
99
151
  clientType,
100
- env
152
+ env,
153
+ responsesDowngraded
101
154
  }) {
102
155
  const buildSuccessResponse = async (resultResponse) => ({
103
156
  ok: true,
@@ -111,6 +164,30 @@ async function adaptProviderResponse({
111
164
  })
112
165
  });
113
166
 
167
+ // Responses API was downgraded to Chat Completions for provider compatibility.
168
+ // Convert response back: Chat Completions → Claude → Responses API.
169
+ if (responsesDowngraded) {
170
+ if (stream) {
171
+ const claudeStream = handleOpenAIStreamToClaude(response);
172
+ return buildSuccessResponse(handleClaudeStreamToOpenAIResponses(claudeStream, requestBody, fallbackModel));
173
+ }
174
+ const raw = await response.text();
175
+ const parsed = parseJsonSafely(raw);
176
+ if (!parsed) {
177
+ return {
178
+ ok: false,
179
+ status: 502,
180
+ retryable: true,
181
+ response: jsonResponse({
182
+ type: "error",
183
+ error: { type: "api_error", message: "Provider returned invalid JSON." }
184
+ }, 502)
185
+ };
186
+ }
187
+ const claudeMessage = convertOpenAINonStreamToClaude(parsed, fallbackModel);
188
+ return buildSuccessResponse(jsonResponse(convertClaudeNonStreamToOpenAIResponses(claudeMessage, requestBody, fallbackModel)));
189
+ }
190
+
114
191
  if (stream) {
115
192
  if (!translate) {
116
193
  return buildSuccessResponse(
@@ -489,14 +566,22 @@ function buildProviderRequestPlan({
489
566
  requestKind,
490
567
  requestHeaders,
491
568
  interceptAmpWebSearch,
492
- stream
569
+ stream,
570
+ forceResponsesDowngrade = false
493
571
  }) {
494
572
  const normalizedRequestKind = normalizeProviderRequestKind(targetFormat, requestKind);
495
573
  const translate = needsTranslation(sourceFormat, targetFormat);
496
574
 
497
575
  let providerBody = { ...body };
576
+ let responsesDowngraded = false;
498
577
  if (translate) {
499
578
  providerBody = translateRequest(sourceFormat, targetFormat, candidate.backend, body, stream);
579
+ } else if (forceResponsesDowngrade) {
580
+ // Provider confirmed to not support Responses API — downgrade to Chat Completions
581
+ // via double-hop: Responses API → Claude → Chat Completions.
582
+ const intermediateBody = translateRequest(FORMATS.OPENAI, FORMATS.CLAUDE, candidate.backend, body, stream);
583
+ providerBody = translateRequest(FORMATS.CLAUDE, FORMATS.OPENAI, candidate.backend, intermediateBody, stream);
584
+ responsesDowngraded = true;
500
585
  }
501
586
 
502
587
  providerBody.model = candidate.backend;
@@ -513,9 +598,19 @@ function buildProviderRequestPlan({
513
598
  sourceFormat,
514
599
  targetFormat,
515
600
  targetModel: candidate.backend,
516
- requestHeaders
601
+ requestHeaders,
602
+ capabilities: candidate.model?.capabilities
517
603
  });
518
604
 
605
+ if (responsesDowngraded) {
606
+ // Strip Responses-API-only fields that Chat Completions providers reject.
607
+ delete providerBody.prompt_cache_key;
608
+ delete providerBody.store;
609
+ delete providerBody.include;
610
+ delete providerBody.text;
611
+ delete providerBody.service_tier;
612
+ }
613
+
519
614
  const declaredOpenAIHostedWebSearchToolType = getProviderOpenAIHostedWebSearchToolType(candidate.provider, {
520
615
  targetFormat,
521
616
  requestKind: normalizedRequestKind
@@ -532,11 +627,14 @@ function buildProviderRequestPlan({
532
627
  providerBody = rewriteProviderBodyForAmpWebSearch(providerBody, targetFormat, requestKind).providerBody;
533
628
  }
534
629
 
630
+ providerBody = stripUnsupportedFields(providerBody, candidate.model?.capabilities);
631
+
535
632
  return {
536
633
  targetFormat,
537
- requestKind: normalizedRequestKind,
634
+ requestKind: responsesDowngraded ? undefined : normalizedRequestKind,
538
635
  translate,
539
- providerBody
636
+ providerBody,
637
+ responsesDowngraded
540
638
  };
541
639
  }
542
640
 
@@ -552,7 +650,8 @@ export async function makeProviderCall({
552
650
  runtimeConfig,
553
651
  stateStore,
554
652
  ampContext,
555
- runtimeFlags
653
+ runtimeFlags,
654
+ onLargeRequestLog
556
655
  }) {
557
656
  const provider = candidate.provider;
558
657
  const targetFormat = candidate.targetFormat;
@@ -576,8 +675,17 @@ export async function makeProviderCall({
576
675
  effectiveBody = { ...body, reasoning_effort: ampContext.presets.reasoningEffort };
577
676
  }
578
677
 
678
+ // For Responses API requests to OpenAI-format providers, try the native endpoint first.
679
+ // If the provider doesn't support /v1/responses (returns 404/400), fall back to a
680
+ // downgraded Chat Completions plan with double-hop translation.
681
+ const needsResponsesDowngradeFallback = !isSubscriptionProvider(provider)
682
+ && sourceFormat === FORMATS.OPENAI
683
+ && targetFormat === FORMATS.OPENAI
684
+ && requestKind === "responses";
685
+
579
686
  let activePlan;
580
687
  let fallbackPlan = null;
688
+ let responsesDowngradedPlan = null;
581
689
  try {
582
690
  activePlan = buildProviderRequestPlan({
583
691
  body: effectiveBody,
@@ -601,6 +709,19 @@ export async function makeProviderCall({
601
709
  stream
602
710
  });
603
711
  }
712
+ if (needsResponsesDowngradeFallback) {
713
+ responsesDowngradedPlan = buildProviderRequestPlan({
714
+ body: effectiveBody,
715
+ sourceFormat,
716
+ targetFormat,
717
+ candidate,
718
+ requestKind,
719
+ requestHeaders,
720
+ interceptAmpWebSearch,
721
+ stream,
722
+ forceResponsesDowngrade: true
723
+ });
724
+ }
604
725
  } catch (error) {
605
726
  return {
606
727
  ok: false,
@@ -651,13 +772,33 @@ export async function makeProviderCall({
651
772
  prompt_cache_key: activePlan.providerBody.prompt_cache_key || ampContext.threadId
652
773
  };
653
774
  }
654
- const executeSubscriptionRequest = async (requestBody) => makeSubscriptionProviderCall({
655
- provider,
656
- body: requestBody,
657
- // ChatGPT Codex backend expects stream=true; non-stream responses are reconstructed from SSE.
658
- stream: subscriptionType === "chatgpt-codex" ? true : Boolean(stream),
659
- env
660
- });
775
+ const executeSubscriptionRequest = async (requestBody) => {
776
+ const requestStream = subscriptionType === "chatgpt-codex" ? true : Boolean(stream);
777
+ const providerUrl = subscriptionType === "chatgpt-codex"
778
+ ? "https://chatgpt.com/backend-api/codex/responses"
779
+ : "https://console.anthropic.com/v1/messages?beta=true";
780
+ maybeQueueLargeRequestLog({
781
+ env,
782
+ onLargeRequestLog,
783
+ providerBody: requestBody,
784
+ serializedBody: JSON.stringify(requestBody),
785
+ providerUrl,
786
+ candidate,
787
+ sourceFormat,
788
+ targetFormat: activePlan.targetFormat,
789
+ requestKind: activePlan.requestKind,
790
+ clientType,
791
+ stream: requestStream,
792
+ providerType: subscriptionType
793
+ });
794
+ return makeSubscriptionProviderCall({
795
+ provider,
796
+ body: requestBody,
797
+ // ChatGPT Codex backend expects stream=true; non-stream responses are reconstructed from SSE.
798
+ stream: requestStream,
799
+ env
800
+ });
801
+ };
661
802
  const subscriptionResult = await executeSubscriptionRequest(activePlan.providerBody);
662
803
 
663
804
  if (!subscriptionResult?.ok) {
@@ -854,11 +995,26 @@ export async function makeProviderCall({
854
995
  const timeoutMs = resolveUpstreamTimeoutMs(env);
855
996
  const timeoutControl = buildTimeoutSignal(timeoutMs);
856
997
  try {
998
+ const serializedBody = JSON.stringify(plan.providerBody);
857
999
  const init = {
858
1000
  method: "POST",
859
1001
  headers,
860
- body: JSON.stringify(plan.providerBody)
1002
+ body: serializedBody
861
1003
  };
1004
+ maybeQueueLargeRequestLog({
1005
+ env,
1006
+ onLargeRequestLog,
1007
+ providerBody: plan.providerBody,
1008
+ serializedBody,
1009
+ providerUrl,
1010
+ candidate,
1011
+ sourceFormat,
1012
+ targetFormat: plan.targetFormat,
1013
+ requestKind: plan.requestKind,
1014
+ clientType,
1015
+ stream,
1016
+ providerType: "http"
1017
+ });
862
1018
  if (timeoutControl.signal) {
863
1019
  init.signal = timeoutControl.signal;
864
1020
  }
@@ -934,6 +1090,19 @@ export async function makeProviderCall({
934
1090
  };
935
1091
  }
936
1092
 
1093
+ // Provider doesn't support native /v1/responses — retry with Chat Completions downgrade.
1094
+ if ((!response || !response.ok) && responsesDowngradedPlan) {
1095
+ try {
1096
+ const downgradedResponse = await executeHttpProviderRequest(responsesDowngradedPlan);
1097
+ if (downgradedResponse instanceof Response && downgradedResponse.ok) {
1098
+ response = downgradedResponse;
1099
+ activePlan = responsesDowngradedPlan;
1100
+ }
1101
+ } catch {
1102
+ // Keep the original failure if the downgraded request also fails.
1103
+ }
1104
+ }
1105
+
937
1106
  if (!response.ok) {
938
1107
  const hostedWebSearchErrorKind = await resolveHostedWebSearchErrorKind(response, activePlan.providerBody, {
939
1108
  targetFormat: activePlan.targetFormat,
@@ -983,6 +1152,7 @@ export async function makeProviderCall({
983
1152
  requestKind: activePlan.requestKind,
984
1153
  requestBody: body,
985
1154
  clientType,
986
- env
1155
+ env,
1156
+ responsesDowngraded: activePlan.responsesDowngraded
987
1157
  });
988
1158
  }
@@ -295,8 +295,18 @@ export function applyReasoningEffortMapping({
295
295
  sourceFormat,
296
296
  targetFormat,
297
297
  targetModel,
298
- requestHeaders
298
+ requestHeaders,
299
+ capabilities
299
300
  }) {
301
+ if (capabilities) {
302
+ if (targetFormat === FORMATS.OPENAI && capabilities.supportsReasoning === false) {
303
+ return providerBody;
304
+ }
305
+ if (targetFormat === FORMATS.CLAUDE && capabilities.supportsThinking === false) {
306
+ return providerBody;
307
+ }
308
+ }
309
+
300
310
  const effort = resolveRequestedEffort(originalBody, requestHeaders);
301
311
  if (!effort) return providerBody;
302
312