@khanglvm/llm-router 2.3.1 → 2.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -0
- package/README.md +2 -2
- package/package.json +1 -1
- package/src/cli/router-module.js +32 -5
- package/src/node/coding-tool-config.js +138 -25
- package/src/node/large-request-log.js +54 -0
- package/src/node/litellm-context-catalog.js +13 -1
- package/src/node/local-server.js +10 -0
- package/src/node/ollama-client.js +195 -0
- package/src/node/ollama-hardware.js +94 -0
- package/src/node/ollama-install.js +230 -0
- package/src/node/provider-probe.js +69 -5
- package/src/node/web-console-client.js +36 -36
- package/src/node/web-console-server.js +478 -8
- package/src/node/web-console-styles.generated.js +1 -1
- package/src/node/web-console-ui/amp-utils.js +272 -0
- package/src/node/web-console-ui/api-client.js +128 -0
- package/src/node/web-console-ui/capability-utils.js +36 -0
- package/src/node/web-console-ui/config-editor-utils.js +20 -5
- package/src/node/web-console-ui/constants.js +140 -0
- package/src/node/web-console-ui/context-window-utils.js +262 -0
- package/src/node/web-console-ui/hooks/use-reorder-layout-animation.js +65 -0
- package/src/node/web-console-ui/provider-presets.js +211 -0
- package/src/node/web-console-ui/quick-start-utils.js +790 -0
- package/src/node/web-console-ui/utils.js +353 -0
- package/src/node/web-console-ui/web-search-utils.js +460 -0
- package/src/runtime/config.js +96 -9
- package/src/runtime/handler/fallback.js +71 -0
- package/src/runtime/handler/field-filter.js +39 -0
- package/src/runtime/handler/large-request-log.js +211 -0
- package/src/runtime/handler/provider-call.js +185 -15
- package/src/runtime/handler/reasoning-effort.js +11 -1
- package/src/runtime/handler/tool-name-sanitizer.js +258 -0
- package/src/runtime/handler.js +16 -3
- package/src/shared/coding-tool-bindings.js +3 -0
|
@@ -44,6 +44,18 @@ const POLICY_HINTS = [
|
|
|
44
44
|
"unsafe",
|
|
45
45
|
"flagged"
|
|
46
46
|
];
|
|
47
|
+
const MODEL_NOT_FOUND_HINTS = [
|
|
48
|
+
"model not found",
|
|
49
|
+
"model does not exist",
|
|
50
|
+
"model_not_found"
|
|
51
|
+
];
|
|
52
|
+
const VRAM_EXHAUSTION_HINTS = [
|
|
53
|
+
"insufficient vram",
|
|
54
|
+
"out of memory",
|
|
55
|
+
"failed to load model",
|
|
56
|
+
"insufficient memory"
|
|
57
|
+
];
|
|
58
|
+
const DEFAULT_ORIGIN_MODEL_NOT_FOUND_COOLDOWN_MS = 60 * 60_000;
|
|
47
59
|
const CONTEXT_WINDOW_HINTS = [
|
|
48
60
|
"context window",
|
|
49
61
|
"maximum context length",
|
|
@@ -56,6 +68,17 @@ const CONTEXT_WINDOW_HINTS = [
|
|
|
56
68
|
"too many tokens",
|
|
57
69
|
"ran out of room in the model's context window"
|
|
58
70
|
];
|
|
71
|
+
const RATE_LIMIT_HINTS = [
|
|
72
|
+
"tokens per minute",
|
|
73
|
+
"requests per minute",
|
|
74
|
+
"rate limit",
|
|
75
|
+
"rate_limit",
|
|
76
|
+
"tpm",
|
|
77
|
+
"rpm",
|
|
78
|
+
"quota exceeded",
|
|
79
|
+
"quota_exceeded",
|
|
80
|
+
"limit exceeded"
|
|
81
|
+
];
|
|
59
82
|
const fallbackCircuitState = new Map();
|
|
60
83
|
|
|
61
84
|
export function shouldRetryStatus(status) {
|
|
@@ -392,6 +415,16 @@ export async function classifyFailureResult(result, retryPolicy) {
|
|
|
392
415
|
}
|
|
393
416
|
|
|
394
417
|
if (status === 404 || status === 410) {
|
|
418
|
+
const hintText404 = await readProviderErrorHint(result);
|
|
419
|
+
if (hasAnyHint(hintText404, MODEL_NOT_FOUND_HINTS)) {
|
|
420
|
+
return {
|
|
421
|
+
category: "model_not_found",
|
|
422
|
+
retryable: false,
|
|
423
|
+
retryOrigin: false,
|
|
424
|
+
allowFallback: true,
|
|
425
|
+
originCooldownMs: DEFAULT_ORIGIN_MODEL_NOT_FOUND_COOLDOWN_MS
|
|
426
|
+
};
|
|
427
|
+
}
|
|
395
428
|
return {
|
|
396
429
|
category: "not_found",
|
|
397
430
|
retryable: false,
|
|
@@ -412,9 +445,47 @@ export async function classifyFailureResult(result, retryPolicy) {
|
|
|
412
445
|
originCooldownMs: 0
|
|
413
446
|
};
|
|
414
447
|
}
|
|
448
|
+
if (hasAnyHint(hintText, VRAM_EXHAUSTION_HINTS)) {
|
|
449
|
+
return {
|
|
450
|
+
category: "vram_exhaustion",
|
|
451
|
+
retryable: false,
|
|
452
|
+
retryOrigin: false,
|
|
453
|
+
allowFallback: true,
|
|
454
|
+
originCooldownMs: retryPolicy.originFallbackCooldownMs
|
|
455
|
+
};
|
|
456
|
+
}
|
|
457
|
+
if (status === 413 && hasAnyHint(hintText, RATE_LIMIT_HINTS)) {
|
|
458
|
+
const rateLimitCooldown = retryAfterMs > 0 ? retryAfterMs : retryPolicy.originRateLimitCooldownMs;
|
|
459
|
+
return {
|
|
460
|
+
category: "rate_limited",
|
|
461
|
+
retryable: true,
|
|
462
|
+
retryOrigin: false,
|
|
463
|
+
allowFallback: true,
|
|
464
|
+
originCooldownMs: rateLimitCooldown
|
|
465
|
+
};
|
|
466
|
+
}
|
|
415
467
|
}
|
|
416
468
|
|
|
417
469
|
if (status === 408 || status === 409 || status >= 500) {
|
|
470
|
+
const hintText5xx = await readProviderErrorHint(result);
|
|
471
|
+
if (hasAnyHint(hintText5xx, VRAM_EXHAUSTION_HINTS)) {
|
|
472
|
+
return {
|
|
473
|
+
category: "vram_exhaustion",
|
|
474
|
+
retryable: false,
|
|
475
|
+
retryOrigin: false,
|
|
476
|
+
allowFallback: true,
|
|
477
|
+
originCooldownMs: retryPolicy.originFallbackCooldownMs
|
|
478
|
+
};
|
|
479
|
+
}
|
|
480
|
+
if (hasAnyHint(hintText5xx, MODEL_NOT_FOUND_HINTS)) {
|
|
481
|
+
return {
|
|
482
|
+
category: "model_not_found",
|
|
483
|
+
retryable: false,
|
|
484
|
+
retryOrigin: false,
|
|
485
|
+
allowFallback: true,
|
|
486
|
+
originCooldownMs: DEFAULT_ORIGIN_MODEL_NOT_FOUND_COOLDOWN_MS
|
|
487
|
+
};
|
|
488
|
+
}
|
|
418
489
|
return {
|
|
419
490
|
category: "temporary_error",
|
|
420
491
|
retryable: true,
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Strips request body fields the target model doesn't support.
|
|
3
|
+
* Only acts when a capability is explicitly `false` — undefined means "pass through".
|
|
4
|
+
*
|
|
5
|
+
* @param {object} providerBody - Request body (already cloned upstream)
|
|
6
|
+
* @param {object} [capabilities] - Model capabilities from config
|
|
7
|
+
* @returns {object} The providerBody with unsupported fields deleted
|
|
8
|
+
*/
|
|
9
|
+
export function stripUnsupportedFields(providerBody, capabilities) {
|
|
10
|
+
if (!capabilities || typeof capabilities !== "object") return providerBody;
|
|
11
|
+
|
|
12
|
+
if (capabilities.supportsReasoning === false) {
|
|
13
|
+
delete providerBody.reasoning_effort;
|
|
14
|
+
delete providerBody.reasoning;
|
|
15
|
+
}
|
|
16
|
+
if (capabilities.supportsThinking === false) {
|
|
17
|
+
delete providerBody.thinking;
|
|
18
|
+
}
|
|
19
|
+
if (capabilities.supportsResponseFormat === false) {
|
|
20
|
+
delete providerBody.response_format;
|
|
21
|
+
}
|
|
22
|
+
if (capabilities.supportsLogprobs === false) {
|
|
23
|
+
delete providerBody.logprobs;
|
|
24
|
+
delete providerBody.top_logprobs;
|
|
25
|
+
}
|
|
26
|
+
if (capabilities.supportsServiceTier === false) {
|
|
27
|
+
delete providerBody.service_tier;
|
|
28
|
+
}
|
|
29
|
+
if (capabilities.supportsPrediction === false) {
|
|
30
|
+
delete providerBody.prediction;
|
|
31
|
+
delete providerBody.predicted_output;
|
|
32
|
+
}
|
|
33
|
+
if (capabilities.supportsStreamOptions === false) {
|
|
34
|
+
delete providerBody.stream_options;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
return providerBody;
|
|
38
|
+
}
|
|
39
|
+
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
const DEFAULT_TEXT_ENCODER = new TextEncoder();
|
|
2
|
+
|
|
3
|
+
export const LARGE_REQUEST_LOG_ENABLED_ENV = "LLM_ROUTER_LOG_LARGE_REQUESTS";
|
|
4
|
+
export const LARGE_REQUEST_LOG_THRESHOLD_ENV = "LLM_ROUTER_LARGE_REQUEST_LOG_THRESHOLD_BYTES";
|
|
5
|
+
export const LARGE_REQUEST_LOG_PATH_ENV = "LLM_ROUTER_LARGE_REQUEST_LOG_PATH";
|
|
6
|
+
export const DEFAULT_LARGE_REQUEST_LOG_THRESHOLD_BYTES = 20 * 1024 * 1024;
|
|
7
|
+
const LARGE_STRING_HINT_THRESHOLD_BYTES = 256 * 1024;
|
|
8
|
+
const MAX_LARGE_STRING_HINTS = 8;
|
|
9
|
+
const MAX_SUMMARY_NODES = 50_000;
|
|
10
|
+
|
|
11
|
+
function toBoolean(value, fallback = false) {
|
|
12
|
+
if (value === undefined || value === null || value === "") return fallback;
|
|
13
|
+
if (typeof value === "boolean") return value;
|
|
14
|
+
const normalized = String(value).trim().toLowerCase();
|
|
15
|
+
if (["1", "true", "yes", "y", "on"].includes(normalized)) return true;
|
|
16
|
+
if (["0", "false", "no", "n", "off"].includes(normalized)) return false;
|
|
17
|
+
return fallback;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function toPositiveInteger(value, fallback) {
|
|
21
|
+
if (value === undefined || value === null || value === "") return fallback;
|
|
22
|
+
const parsed = Number.parseInt(String(value), 10);
|
|
23
|
+
if (!Number.isFinite(parsed) || parsed <= 0) return fallback;
|
|
24
|
+
return parsed;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function appendToolType(target, value) {
|
|
28
|
+
const normalized = String(value || "").trim();
|
|
29
|
+
if (!normalized || target.includes(normalized)) return;
|
|
30
|
+
target.push(normalized);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function classifyContentType(type) {
|
|
34
|
+
const normalized = String(type || "").trim().toLowerCase();
|
|
35
|
+
if (!normalized) return "";
|
|
36
|
+
if (normalized === "image" || normalized === "image_url" || normalized === "input_image") return "image";
|
|
37
|
+
if (normalized === "document" || normalized === "input_document") return "document";
|
|
38
|
+
if (normalized === "audio" || normalized === "input_audio") return "audio";
|
|
39
|
+
if (normalized === "file" || normalized === "input_file") return "file";
|
|
40
|
+
if (normalized.includes("attachment")) return "attachment";
|
|
41
|
+
return "";
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function maybeRecordLargeString(summary, value, path, hintType = "string") {
|
|
45
|
+
if (typeof value !== "string" || value.length === 0) return;
|
|
46
|
+
const bytes = DEFAULT_TEXT_ENCODER.encode(value).byteLength;
|
|
47
|
+
if (bytes > summary.largestStringBytes) {
|
|
48
|
+
summary.largestStringBytes = bytes;
|
|
49
|
+
}
|
|
50
|
+
if (bytes < LARGE_STRING_HINT_THRESHOLD_BYTES) return;
|
|
51
|
+
|
|
52
|
+
summary.largeStringCount += 1;
|
|
53
|
+
summary.largeStringHints.push({
|
|
54
|
+
path,
|
|
55
|
+
bytes,
|
|
56
|
+
type: hintType
|
|
57
|
+
});
|
|
58
|
+
summary.largeStringHints.sort((left, right) => right.bytes - left.bytes);
|
|
59
|
+
if (summary.largeStringHints.length > MAX_LARGE_STRING_HINTS) {
|
|
60
|
+
summary.largeStringHints.length = MAX_LARGE_STRING_HINTS;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function summarizeProviderBody(body) {
|
|
65
|
+
const toolTypes = [];
|
|
66
|
+
for (const tool of Array.isArray(body?.tools) ? body.tools : []) {
|
|
67
|
+
appendToolType(toolTypes, tool?.type);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const summary = {
|
|
71
|
+
topLevelKeys: body && typeof body === "object" && !Array.isArray(body) ? Object.keys(body).sort() : [],
|
|
72
|
+
messageCount: Array.isArray(body?.messages) ? body.messages.length : 0,
|
|
73
|
+
inputCount: Array.isArray(body?.input) ? body.input.length : 0,
|
|
74
|
+
toolCount: Array.isArray(body?.tools) ? body.tools.length : 0,
|
|
75
|
+
toolTypes,
|
|
76
|
+
contentPartCount: 0,
|
|
77
|
+
attachmentLikeParts: 0,
|
|
78
|
+
imageParts: 0,
|
|
79
|
+
documentParts: 0,
|
|
80
|
+
audioParts: 0,
|
|
81
|
+
fileParts: 0,
|
|
82
|
+
dataUrlStrings: 0,
|
|
83
|
+
base64SourceParts: 0,
|
|
84
|
+
largeStringCount: 0,
|
|
85
|
+
largestStringBytes: 0,
|
|
86
|
+
largeStringHints: [],
|
|
87
|
+
traversalTruncated: false
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
const stack = [{ value: body, path: "body" }];
|
|
91
|
+
const seen = new WeakSet();
|
|
92
|
+
let visited = 0;
|
|
93
|
+
|
|
94
|
+
while (stack.length > 0) {
|
|
95
|
+
const current = stack.pop();
|
|
96
|
+
visited += 1;
|
|
97
|
+
if (visited > MAX_SUMMARY_NODES) {
|
|
98
|
+
summary.traversalTruncated = true;
|
|
99
|
+
break;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const value = current?.value;
|
|
103
|
+
if (typeof value === "string") {
|
|
104
|
+
const isDataUrl = value.startsWith("data:");
|
|
105
|
+
if (isDataUrl) {
|
|
106
|
+
summary.dataUrlStrings += 1;
|
|
107
|
+
}
|
|
108
|
+
maybeRecordLargeString(summary, value, current.path, isDataUrl ? "data-url" : "string");
|
|
109
|
+
continue;
|
|
110
|
+
}
|
|
111
|
+
if (!value || typeof value !== "object") continue;
|
|
112
|
+
if (seen.has(value)) continue;
|
|
113
|
+
seen.add(value);
|
|
114
|
+
|
|
115
|
+
if (Array.isArray(value)) {
|
|
116
|
+
for (let index = value.length - 1; index >= 0; index -= 1) {
|
|
117
|
+
stack.push({
|
|
118
|
+
value: value[index],
|
|
119
|
+
path: `${current.path}[${index}]`
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
continue;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
const contentType = classifyContentType(value.type);
|
|
126
|
+
if (contentType) {
|
|
127
|
+
summary.attachmentLikeParts += 1;
|
|
128
|
+
if (contentType === "image") summary.imageParts += 1;
|
|
129
|
+
if (contentType === "document") summary.documentParts += 1;
|
|
130
|
+
if (contentType === "audio") summary.audioParts += 1;
|
|
131
|
+
if (contentType === "file" || contentType === "attachment") summary.fileParts += 1;
|
|
132
|
+
}
|
|
133
|
+
if (value?.source && typeof value.source === "object") {
|
|
134
|
+
const sourceType = String(value.source.type || "").trim().toLowerCase();
|
|
135
|
+
if (sourceType === "base64") {
|
|
136
|
+
summary.base64SourceParts += 1;
|
|
137
|
+
maybeRecordLargeString(summary, value.source.data, `${current.path}.source.data`, "base64");
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
for (const [key, child] of Object.entries(value)) {
|
|
142
|
+
const childPath = `${current.path}.${key}`;
|
|
143
|
+
if (typeof child === "string") {
|
|
144
|
+
const hintType = key === "data"
|
|
145
|
+
? "data"
|
|
146
|
+
: (key === "text" ? "text" : "string");
|
|
147
|
+
const isDataUrl = child.startsWith("data:");
|
|
148
|
+
if (isDataUrl) {
|
|
149
|
+
summary.dataUrlStrings += 1;
|
|
150
|
+
}
|
|
151
|
+
maybeRecordLargeString(summary, child, childPath, isDataUrl ? "data-url" : hintType);
|
|
152
|
+
continue;
|
|
153
|
+
}
|
|
154
|
+
if (key === "content" && Array.isArray(child)) {
|
|
155
|
+
summary.contentPartCount += child.length;
|
|
156
|
+
}
|
|
157
|
+
stack.push({
|
|
158
|
+
value: child,
|
|
159
|
+
path: childPath
|
|
160
|
+
});
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
return summary;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
export function isLargeRequestLoggingEnabled(env = {}) {
|
|
168
|
+
return toBoolean(env?.[LARGE_REQUEST_LOG_ENABLED_ENV], false);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
export function resolveLargeRequestLogThresholdBytes(env = {}) {
|
|
172
|
+
return toPositiveInteger(
|
|
173
|
+
env?.[LARGE_REQUEST_LOG_THRESHOLD_ENV],
|
|
174
|
+
DEFAULT_LARGE_REQUEST_LOG_THRESHOLD_BYTES
|
|
175
|
+
);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
export function measureSerializedRequestBytes(serializedBody = "") {
|
|
179
|
+
return DEFAULT_TEXT_ENCODER.encode(String(serializedBody || "")).byteLength;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
export function buildLargeRequestLogEntry({
|
|
183
|
+
providerBody,
|
|
184
|
+
requestBytes,
|
|
185
|
+
thresholdBytes,
|
|
186
|
+
providerUrl,
|
|
187
|
+
candidate,
|
|
188
|
+
sourceFormat,
|
|
189
|
+
targetFormat,
|
|
190
|
+
requestKind,
|
|
191
|
+
clientType,
|
|
192
|
+
stream,
|
|
193
|
+
providerType = "http"
|
|
194
|
+
} = {}) {
|
|
195
|
+
return {
|
|
196
|
+
kind: "large-provider-request",
|
|
197
|
+
providerType: String(providerType || "http").trim() || "http",
|
|
198
|
+
requestBytes: Number.isFinite(Number(requestBytes)) ? Number(requestBytes) : 0,
|
|
199
|
+
thresholdBytes: Number.isFinite(Number(thresholdBytes)) ? Number(thresholdBytes) : DEFAULT_LARGE_REQUEST_LOG_THRESHOLD_BYTES,
|
|
200
|
+
providerUrl: String(providerUrl || "").trim(),
|
|
201
|
+
clientType: String(clientType || "").trim(),
|
|
202
|
+
stream: Boolean(stream),
|
|
203
|
+
sourceFormat: String(sourceFormat || "").trim(),
|
|
204
|
+
targetFormat: String(targetFormat || "").trim(),
|
|
205
|
+
requestKind: String(requestKind || "").trim(),
|
|
206
|
+
requestedModel: String(candidate?.requestModelId || "").trim(),
|
|
207
|
+
providerId: String(candidate?.providerId || candidate?.provider?.id || "").trim(),
|
|
208
|
+
backendModel: String(candidate?.backend || candidate?.modelId || providerBody?.model || "").trim(),
|
|
209
|
+
bodySummary: summarizeProviderBody(providerBody)
|
|
210
|
+
};
|
|
211
|
+
}
|
|
@@ -21,6 +21,7 @@ import {
|
|
|
21
21
|
import { maybeRewriteAmpClientResponse } from "./amp-response.js";
|
|
22
22
|
import { applyCachingMapping, mergeCachingHeaders } from "./cache-mapping.js";
|
|
23
23
|
import { applyReasoningEffortMapping } from "./reasoning-effort.js";
|
|
24
|
+
import { stripUnsupportedFields } from "./field-filter.js";
|
|
24
25
|
import { resolveUpstreamTimeoutMs } from "./request.js";
|
|
25
26
|
import { parseJsonSafely } from "./utils.js";
|
|
26
27
|
import { buildTimeoutSignal } from "../../shared/timeout-signal.js";
|
|
@@ -35,11 +36,62 @@ import {
|
|
|
35
36
|
rewriteProviderBodyForAmpWebSearch,
|
|
36
37
|
shouldInterceptAmpWebSearch
|
|
37
38
|
} from "./amp-web-search.js";
|
|
39
|
+
import {
|
|
40
|
+
buildLargeRequestLogEntry,
|
|
41
|
+
isLargeRequestLoggingEnabled,
|
|
42
|
+
measureSerializedRequestBytes,
|
|
43
|
+
resolveLargeRequestLogThresholdBytes
|
|
44
|
+
} from "./large-request-log.js";
|
|
38
45
|
|
|
39
46
|
function isSubscriptionProvider(provider) {
|
|
40
47
|
return provider?.type === "subscription";
|
|
41
48
|
}
|
|
42
49
|
|
|
50
|
+
function queueLargeRequestEvent(onLargeRequestLog, payload) {
|
|
51
|
+
if (typeof onLargeRequestLog !== "function") return;
|
|
52
|
+
try {
|
|
53
|
+
const result = onLargeRequestLog(payload);
|
|
54
|
+
if (result && typeof result.then === "function") {
|
|
55
|
+
result.catch(() => {});
|
|
56
|
+
}
|
|
57
|
+
} catch {
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function maybeQueueLargeRequestLog({
|
|
62
|
+
env,
|
|
63
|
+
onLargeRequestLog,
|
|
64
|
+
providerBody,
|
|
65
|
+
serializedBody,
|
|
66
|
+
providerUrl,
|
|
67
|
+
candidate,
|
|
68
|
+
sourceFormat,
|
|
69
|
+
targetFormat,
|
|
70
|
+
requestKind,
|
|
71
|
+
clientType,
|
|
72
|
+
stream,
|
|
73
|
+
providerType = "http"
|
|
74
|
+
} = {}) {
|
|
75
|
+
if (!isLargeRequestLoggingEnabled(env) || typeof onLargeRequestLog !== "function") return;
|
|
76
|
+
const requestBytes = measureSerializedRequestBytes(serializedBody);
|
|
77
|
+
const thresholdBytes = resolveLargeRequestLogThresholdBytes(env);
|
|
78
|
+
if (requestBytes < thresholdBytes) return;
|
|
79
|
+
|
|
80
|
+
queueLargeRequestEvent(onLargeRequestLog, buildLargeRequestLogEntry({
|
|
81
|
+
providerBody,
|
|
82
|
+
requestBytes,
|
|
83
|
+
thresholdBytes,
|
|
84
|
+
providerUrl,
|
|
85
|
+
candidate,
|
|
86
|
+
sourceFormat,
|
|
87
|
+
targetFormat,
|
|
88
|
+
requestKind,
|
|
89
|
+
clientType,
|
|
90
|
+
stream,
|
|
91
|
+
providerType
|
|
92
|
+
}));
|
|
93
|
+
}
|
|
94
|
+
|
|
43
95
|
async function toProviderError(response) {
|
|
44
96
|
const raw = await response.text();
|
|
45
97
|
const parsed = parseJsonSafely(raw);
|
|
@@ -97,7 +149,8 @@ async function adaptProviderResponse({
|
|
|
97
149
|
requestKind,
|
|
98
150
|
requestBody,
|
|
99
151
|
clientType,
|
|
100
|
-
env
|
|
152
|
+
env,
|
|
153
|
+
responsesDowngraded
|
|
101
154
|
}) {
|
|
102
155
|
const buildSuccessResponse = async (resultResponse) => ({
|
|
103
156
|
ok: true,
|
|
@@ -111,6 +164,30 @@ async function adaptProviderResponse({
|
|
|
111
164
|
})
|
|
112
165
|
});
|
|
113
166
|
|
|
167
|
+
// Responses API was downgraded to Chat Completions for provider compatibility.
|
|
168
|
+
// Convert response back: Chat Completions → Claude → Responses API.
|
|
169
|
+
if (responsesDowngraded) {
|
|
170
|
+
if (stream) {
|
|
171
|
+
const claudeStream = handleOpenAIStreamToClaude(response);
|
|
172
|
+
return buildSuccessResponse(handleClaudeStreamToOpenAIResponses(claudeStream, requestBody, fallbackModel));
|
|
173
|
+
}
|
|
174
|
+
const raw = await response.text();
|
|
175
|
+
const parsed = parseJsonSafely(raw);
|
|
176
|
+
if (!parsed) {
|
|
177
|
+
return {
|
|
178
|
+
ok: false,
|
|
179
|
+
status: 502,
|
|
180
|
+
retryable: true,
|
|
181
|
+
response: jsonResponse({
|
|
182
|
+
type: "error",
|
|
183
|
+
error: { type: "api_error", message: "Provider returned invalid JSON." }
|
|
184
|
+
}, 502)
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
const claudeMessage = convertOpenAINonStreamToClaude(parsed, fallbackModel);
|
|
188
|
+
return buildSuccessResponse(jsonResponse(convertClaudeNonStreamToOpenAIResponses(claudeMessage, requestBody, fallbackModel)));
|
|
189
|
+
}
|
|
190
|
+
|
|
114
191
|
if (stream) {
|
|
115
192
|
if (!translate) {
|
|
116
193
|
return buildSuccessResponse(
|
|
@@ -489,14 +566,22 @@ function buildProviderRequestPlan({
|
|
|
489
566
|
requestKind,
|
|
490
567
|
requestHeaders,
|
|
491
568
|
interceptAmpWebSearch,
|
|
492
|
-
stream
|
|
569
|
+
stream,
|
|
570
|
+
forceResponsesDowngrade = false
|
|
493
571
|
}) {
|
|
494
572
|
const normalizedRequestKind = normalizeProviderRequestKind(targetFormat, requestKind);
|
|
495
573
|
const translate = needsTranslation(sourceFormat, targetFormat);
|
|
496
574
|
|
|
497
575
|
let providerBody = { ...body };
|
|
576
|
+
let responsesDowngraded = false;
|
|
498
577
|
if (translate) {
|
|
499
578
|
providerBody = translateRequest(sourceFormat, targetFormat, candidate.backend, body, stream);
|
|
579
|
+
} else if (forceResponsesDowngrade) {
|
|
580
|
+
// Provider confirmed to not support Responses API — downgrade to Chat Completions
|
|
581
|
+
// via double-hop: Responses API → Claude → Chat Completions.
|
|
582
|
+
const intermediateBody = translateRequest(FORMATS.OPENAI, FORMATS.CLAUDE, candidate.backend, body, stream);
|
|
583
|
+
providerBody = translateRequest(FORMATS.CLAUDE, FORMATS.OPENAI, candidate.backend, intermediateBody, stream);
|
|
584
|
+
responsesDowngraded = true;
|
|
500
585
|
}
|
|
501
586
|
|
|
502
587
|
providerBody.model = candidate.backend;
|
|
@@ -513,9 +598,19 @@ function buildProviderRequestPlan({
|
|
|
513
598
|
sourceFormat,
|
|
514
599
|
targetFormat,
|
|
515
600
|
targetModel: candidate.backend,
|
|
516
|
-
requestHeaders
|
|
601
|
+
requestHeaders,
|
|
602
|
+
capabilities: candidate.model?.capabilities
|
|
517
603
|
});
|
|
518
604
|
|
|
605
|
+
if (responsesDowngraded) {
|
|
606
|
+
// Strip Responses-API-only fields that Chat Completions providers reject.
|
|
607
|
+
delete providerBody.prompt_cache_key;
|
|
608
|
+
delete providerBody.store;
|
|
609
|
+
delete providerBody.include;
|
|
610
|
+
delete providerBody.text;
|
|
611
|
+
delete providerBody.service_tier;
|
|
612
|
+
}
|
|
613
|
+
|
|
519
614
|
const declaredOpenAIHostedWebSearchToolType = getProviderOpenAIHostedWebSearchToolType(candidate.provider, {
|
|
520
615
|
targetFormat,
|
|
521
616
|
requestKind: normalizedRequestKind
|
|
@@ -532,11 +627,14 @@ function buildProviderRequestPlan({
|
|
|
532
627
|
providerBody = rewriteProviderBodyForAmpWebSearch(providerBody, targetFormat, requestKind).providerBody;
|
|
533
628
|
}
|
|
534
629
|
|
|
630
|
+
providerBody = stripUnsupportedFields(providerBody, candidate.model?.capabilities);
|
|
631
|
+
|
|
535
632
|
return {
|
|
536
633
|
targetFormat,
|
|
537
|
-
requestKind: normalizedRequestKind,
|
|
634
|
+
requestKind: responsesDowngraded ? undefined : normalizedRequestKind,
|
|
538
635
|
translate,
|
|
539
|
-
providerBody
|
|
636
|
+
providerBody,
|
|
637
|
+
responsesDowngraded
|
|
540
638
|
};
|
|
541
639
|
}
|
|
542
640
|
|
|
@@ -552,7 +650,8 @@ export async function makeProviderCall({
|
|
|
552
650
|
runtimeConfig,
|
|
553
651
|
stateStore,
|
|
554
652
|
ampContext,
|
|
555
|
-
runtimeFlags
|
|
653
|
+
runtimeFlags,
|
|
654
|
+
onLargeRequestLog
|
|
556
655
|
}) {
|
|
557
656
|
const provider = candidate.provider;
|
|
558
657
|
const targetFormat = candidate.targetFormat;
|
|
@@ -576,8 +675,17 @@ export async function makeProviderCall({
|
|
|
576
675
|
effectiveBody = { ...body, reasoning_effort: ampContext.presets.reasoningEffort };
|
|
577
676
|
}
|
|
578
677
|
|
|
678
|
+
// For Responses API requests to OpenAI-format providers, try the native endpoint first.
|
|
679
|
+
// If the provider doesn't support /v1/responses (returns 404/400), fall back to a
|
|
680
|
+
// downgraded Chat Completions plan with double-hop translation.
|
|
681
|
+
const needsResponsesDowngradeFallback = !isSubscriptionProvider(provider)
|
|
682
|
+
&& sourceFormat === FORMATS.OPENAI
|
|
683
|
+
&& targetFormat === FORMATS.OPENAI
|
|
684
|
+
&& requestKind === "responses";
|
|
685
|
+
|
|
579
686
|
let activePlan;
|
|
580
687
|
let fallbackPlan = null;
|
|
688
|
+
let responsesDowngradedPlan = null;
|
|
581
689
|
try {
|
|
582
690
|
activePlan = buildProviderRequestPlan({
|
|
583
691
|
body: effectiveBody,
|
|
@@ -601,6 +709,19 @@ export async function makeProviderCall({
|
|
|
601
709
|
stream
|
|
602
710
|
});
|
|
603
711
|
}
|
|
712
|
+
if (needsResponsesDowngradeFallback) {
|
|
713
|
+
responsesDowngradedPlan = buildProviderRequestPlan({
|
|
714
|
+
body: effectiveBody,
|
|
715
|
+
sourceFormat,
|
|
716
|
+
targetFormat,
|
|
717
|
+
candidate,
|
|
718
|
+
requestKind,
|
|
719
|
+
requestHeaders,
|
|
720
|
+
interceptAmpWebSearch,
|
|
721
|
+
stream,
|
|
722
|
+
forceResponsesDowngrade: true
|
|
723
|
+
});
|
|
724
|
+
}
|
|
604
725
|
} catch (error) {
|
|
605
726
|
return {
|
|
606
727
|
ok: false,
|
|
@@ -651,13 +772,33 @@ export async function makeProviderCall({
|
|
|
651
772
|
prompt_cache_key: activePlan.providerBody.prompt_cache_key || ampContext.threadId
|
|
652
773
|
};
|
|
653
774
|
}
|
|
654
|
-
const executeSubscriptionRequest = async (requestBody) =>
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
775
|
+
const executeSubscriptionRequest = async (requestBody) => {
|
|
776
|
+
const requestStream = subscriptionType === "chatgpt-codex" ? true : Boolean(stream);
|
|
777
|
+
const providerUrl = subscriptionType === "chatgpt-codex"
|
|
778
|
+
? "https://chatgpt.com/backend-api/codex/responses"
|
|
779
|
+
: "https://console.anthropic.com/v1/messages?beta=true";
|
|
780
|
+
maybeQueueLargeRequestLog({
|
|
781
|
+
env,
|
|
782
|
+
onLargeRequestLog,
|
|
783
|
+
providerBody: requestBody,
|
|
784
|
+
serializedBody: JSON.stringify(requestBody),
|
|
785
|
+
providerUrl,
|
|
786
|
+
candidate,
|
|
787
|
+
sourceFormat,
|
|
788
|
+
targetFormat: activePlan.targetFormat,
|
|
789
|
+
requestKind: activePlan.requestKind,
|
|
790
|
+
clientType,
|
|
791
|
+
stream: requestStream,
|
|
792
|
+
providerType: subscriptionType
|
|
793
|
+
});
|
|
794
|
+
return makeSubscriptionProviderCall({
|
|
795
|
+
provider,
|
|
796
|
+
body: requestBody,
|
|
797
|
+
// ChatGPT Codex backend expects stream=true; non-stream responses are reconstructed from SSE.
|
|
798
|
+
stream: requestStream,
|
|
799
|
+
env
|
|
800
|
+
});
|
|
801
|
+
};
|
|
661
802
|
const subscriptionResult = await executeSubscriptionRequest(activePlan.providerBody);
|
|
662
803
|
|
|
663
804
|
if (!subscriptionResult?.ok) {
|
|
@@ -854,11 +995,26 @@ export async function makeProviderCall({
|
|
|
854
995
|
const timeoutMs = resolveUpstreamTimeoutMs(env);
|
|
855
996
|
const timeoutControl = buildTimeoutSignal(timeoutMs);
|
|
856
997
|
try {
|
|
998
|
+
const serializedBody = JSON.stringify(plan.providerBody);
|
|
857
999
|
const init = {
|
|
858
1000
|
method: "POST",
|
|
859
1001
|
headers,
|
|
860
|
-
body:
|
|
1002
|
+
body: serializedBody
|
|
861
1003
|
};
|
|
1004
|
+
maybeQueueLargeRequestLog({
|
|
1005
|
+
env,
|
|
1006
|
+
onLargeRequestLog,
|
|
1007
|
+
providerBody: plan.providerBody,
|
|
1008
|
+
serializedBody,
|
|
1009
|
+
providerUrl,
|
|
1010
|
+
candidate,
|
|
1011
|
+
sourceFormat,
|
|
1012
|
+
targetFormat: plan.targetFormat,
|
|
1013
|
+
requestKind: plan.requestKind,
|
|
1014
|
+
clientType,
|
|
1015
|
+
stream,
|
|
1016
|
+
providerType: "http"
|
|
1017
|
+
});
|
|
862
1018
|
if (timeoutControl.signal) {
|
|
863
1019
|
init.signal = timeoutControl.signal;
|
|
864
1020
|
}
|
|
@@ -934,6 +1090,19 @@ export async function makeProviderCall({
|
|
|
934
1090
|
};
|
|
935
1091
|
}
|
|
936
1092
|
|
|
1093
|
+
// Provider doesn't support native /v1/responses — retry with Chat Completions downgrade.
|
|
1094
|
+
if ((!response || !response.ok) && responsesDowngradedPlan) {
|
|
1095
|
+
try {
|
|
1096
|
+
const downgradedResponse = await executeHttpProviderRequest(responsesDowngradedPlan);
|
|
1097
|
+
if (downgradedResponse instanceof Response && downgradedResponse.ok) {
|
|
1098
|
+
response = downgradedResponse;
|
|
1099
|
+
activePlan = responsesDowngradedPlan;
|
|
1100
|
+
}
|
|
1101
|
+
} catch {
|
|
1102
|
+
// Keep the original failure if the downgraded request also fails.
|
|
1103
|
+
}
|
|
1104
|
+
}
|
|
1105
|
+
|
|
937
1106
|
if (!response.ok) {
|
|
938
1107
|
const hostedWebSearchErrorKind = await resolveHostedWebSearchErrorKind(response, activePlan.providerBody, {
|
|
939
1108
|
targetFormat: activePlan.targetFormat,
|
|
@@ -983,6 +1152,7 @@ export async function makeProviderCall({
|
|
|
983
1152
|
requestKind: activePlan.requestKind,
|
|
984
1153
|
requestBody: body,
|
|
985
1154
|
clientType,
|
|
986
|
-
env
|
|
1155
|
+
env,
|
|
1156
|
+
responsesDowngraded: activePlan.responsesDowngraded
|
|
987
1157
|
});
|
|
988
1158
|
}
|
|
@@ -295,8 +295,18 @@ export function applyReasoningEffortMapping({
|
|
|
295
295
|
sourceFormat,
|
|
296
296
|
targetFormat,
|
|
297
297
|
targetModel,
|
|
298
|
-
requestHeaders
|
|
298
|
+
requestHeaders,
|
|
299
|
+
capabilities
|
|
299
300
|
}) {
|
|
301
|
+
if (capabilities) {
|
|
302
|
+
if (targetFormat === FORMATS.OPENAI && capabilities.supportsReasoning === false) {
|
|
303
|
+
return providerBody;
|
|
304
|
+
}
|
|
305
|
+
if (targetFormat === FORMATS.CLAUDE && capabilities.supportsThinking === false) {
|
|
306
|
+
return providerBody;
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
300
310
|
const effort = resolveRequestedEffort(originalBody, requestHeaders);
|
|
301
311
|
if (!effort) return providerBody;
|
|
302
312
|
|