@khanglvm/llm-router 2.3.1 → 2.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/README.md +2 -2
- package/package.json +1 -1
- package/src/cli/router-module.js +32 -5
- package/src/node/coding-tool-config.js +138 -25
- package/src/node/large-request-log.js +54 -0
- package/src/node/litellm-context-catalog.js +13 -1
- package/src/node/local-server.js +10 -0
- package/src/node/ollama-client.js +195 -0
- package/src/node/ollama-hardware.js +94 -0
- package/src/node/ollama-install.js +230 -0
- package/src/node/provider-probe.js +69 -5
- package/src/node/web-console-client.js +36 -36
- package/src/node/web-console-server.js +478 -8
- package/src/node/web-console-styles.generated.js +1 -1
- package/src/node/web-console-ui/amp-utils.js +272 -0
- package/src/node/web-console-ui/api-client.js +128 -0
- package/src/node/web-console-ui/capability-utils.js +36 -0
- package/src/node/web-console-ui/config-editor-utils.js +20 -5
- package/src/node/web-console-ui/constants.js +140 -0
- package/src/node/web-console-ui/context-window-utils.js +262 -0
- package/src/node/web-console-ui/hooks/use-reorder-layout-animation.js +65 -0
- package/src/node/web-console-ui/provider-presets.js +211 -0
- package/src/node/web-console-ui/quick-start-utils.js +790 -0
- package/src/node/web-console-ui/utils.js +353 -0
- package/src/node/web-console-ui/web-search-utils.js +460 -0
- package/src/runtime/config.js +96 -9
- package/src/runtime/handler/fallback.js +71 -0
- package/src/runtime/handler/field-filter.js +39 -0
- package/src/runtime/handler/large-request-log.js +211 -0
- package/src/runtime/handler/provider-call.js +276 -15
- package/src/runtime/handler/reasoning-effort.js +11 -1
- package/src/runtime/handler/tool-name-sanitizer.js +258 -0
- package/src/runtime/handler.js +16 -3
- package/src/shared/coding-tool-bindings.js +3 -0
|
@@ -21,6 +21,7 @@ import {
|
|
|
21
21
|
import { maybeRewriteAmpClientResponse } from "./amp-response.js";
|
|
22
22
|
import { applyCachingMapping, mergeCachingHeaders } from "./cache-mapping.js";
|
|
23
23
|
import { applyReasoningEffortMapping } from "./reasoning-effort.js";
|
|
24
|
+
import { stripUnsupportedFields } from "./field-filter.js";
|
|
24
25
|
import { resolveUpstreamTimeoutMs } from "./request.js";
|
|
25
26
|
import { parseJsonSafely } from "./utils.js";
|
|
26
27
|
import { buildTimeoutSignal } from "../../shared/timeout-signal.js";
|
|
@@ -35,11 +36,139 @@ import {
|
|
|
35
36
|
rewriteProviderBodyForAmpWebSearch,
|
|
36
37
|
shouldInterceptAmpWebSearch
|
|
37
38
|
} from "./amp-web-search.js";
|
|
39
|
+
import {
|
|
40
|
+
buildLargeRequestLogEntry,
|
|
41
|
+
isLargeRequestLoggingEnabled,
|
|
42
|
+
measureSerializedRequestBytes,
|
|
43
|
+
resolveLargeRequestLogThresholdBytes
|
|
44
|
+
} from "./large-request-log.js";
|
|
45
|
+
|
|
46
|
+
const OPENAI_TOOL_ROUTING_SUPPRESSION_TTL_MS = 30 * 60 * 1000;
|
|
47
|
+
const openAIToolRoutingSuppressionUntil = new Map();
|
|
38
48
|
|
|
39
49
|
function isSubscriptionProvider(provider) {
|
|
40
50
|
return provider?.type === "subscription";
|
|
41
51
|
}
|
|
42
52
|
|
|
53
|
+
function normalizeFormatList(values) {
|
|
54
|
+
return [...new Set(
|
|
55
|
+
(Array.isArray(values) ? values : [values])
|
|
56
|
+
.map((value) => String(value || "").trim())
|
|
57
|
+
.filter((value) => value === FORMATS.OPENAI || value === FORMATS.CLAUDE)
|
|
58
|
+
)];
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function resolveCandidateModel(provider, model, modelId) {
|
|
62
|
+
if (model && typeof model === "object" && !Array.isArray(model)) {
|
|
63
|
+
return model;
|
|
64
|
+
}
|
|
65
|
+
const normalizedModelId = String(modelId || "").trim();
|
|
66
|
+
if (!normalizedModelId || !Array.isArray(provider?.models)) return null;
|
|
67
|
+
return provider.models.find((entry) => String(entry?.id || "").trim() === normalizedModelId) || null;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function getProviderModelSupportedFormats(provider, model, modelId) {
|
|
71
|
+
const resolvedModel = resolveCandidateModel(provider, model, modelId);
|
|
72
|
+
const configuredFormats = normalizeFormatList(resolvedModel?.formats || resolvedModel?.format);
|
|
73
|
+
const resolvedModelId = String(resolvedModel?.id || modelId || "").trim();
|
|
74
|
+
if (!resolvedModelId) return configuredFormats;
|
|
75
|
+
|
|
76
|
+
const preferredFormat = provider?.lastProbe?.modelPreferredFormat?.[resolvedModelId];
|
|
77
|
+
if (preferredFormat === FORMATS.OPENAI || preferredFormat === FORMATS.CLAUDE) {
|
|
78
|
+
return [preferredFormat];
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const probedFormats = normalizeFormatList(provider?.lastProbe?.modelSupport?.[resolvedModelId]);
|
|
82
|
+
return probedFormats.length > 0 ? probedFormats : configuredFormats;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function getProviderModelPreferredFormat(provider, model, modelId) {
|
|
86
|
+
const resolvedModel = resolveCandidateModel(provider, model, modelId);
|
|
87
|
+
const resolvedModelId = String(resolvedModel?.id || modelId || "").trim();
|
|
88
|
+
if (!resolvedModelId) return "";
|
|
89
|
+
const preferredFormat = String(provider?.lastProbe?.modelPreferredFormat?.[resolvedModelId] || "").trim();
|
|
90
|
+
return preferredFormat === FORMATS.OPENAI || preferredFormat === FORMATS.CLAUDE
|
|
91
|
+
? preferredFormat
|
|
92
|
+
: "";
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function buildOpenAIToolRoutingSuppressionKey(candidate) {
|
|
96
|
+
const providerId = String(candidate?.providerId || candidate?.provider?.id || "").trim();
|
|
97
|
+
const modelId = String(candidate?.modelId || candidate?.model?.id || candidate?.backend || "").trim();
|
|
98
|
+
if (!providerId || !modelId) return "";
|
|
99
|
+
return `${providerId}/${modelId}`;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function pruneOpenAIToolRoutingSuppressions(now = Date.now()) {
|
|
103
|
+
for (const [key, expiresAt] of openAIToolRoutingSuppressionUntil.entries()) {
|
|
104
|
+
if (!Number.isFinite(expiresAt) || expiresAt <= now) {
|
|
105
|
+
openAIToolRoutingSuppressionUntil.delete(key);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function isOpenAIToolRoutingSuppressed(candidate, now = Date.now()) {
|
|
111
|
+
const key = buildOpenAIToolRoutingSuppressionKey(candidate);
|
|
112
|
+
if (!key) return false;
|
|
113
|
+
pruneOpenAIToolRoutingSuppressions(now);
|
|
114
|
+
return Number(openAIToolRoutingSuppressionUntil.get(key)) > now;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function suppressOpenAIToolRouting(candidate, now = Date.now()) {
|
|
118
|
+
const key = buildOpenAIToolRoutingSuppressionKey(candidate);
|
|
119
|
+
if (!key) return;
|
|
120
|
+
openAIToolRoutingSuppressionUntil.set(key, now + OPENAI_TOOL_ROUTING_SUPPRESSION_TTL_MS);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
export function resetOpenAIToolRoutingLearningState() {
|
|
124
|
+
openAIToolRoutingSuppressionUntil.clear();
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function queueLargeRequestEvent(onLargeRequestLog, payload) {
|
|
128
|
+
if (typeof onLargeRequestLog !== "function") return;
|
|
129
|
+
try {
|
|
130
|
+
const result = onLargeRequestLog(payload);
|
|
131
|
+
if (result && typeof result.then === "function") {
|
|
132
|
+
result.catch(() => {});
|
|
133
|
+
}
|
|
134
|
+
} catch {
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function maybeQueueLargeRequestLog({
|
|
139
|
+
env,
|
|
140
|
+
onLargeRequestLog,
|
|
141
|
+
providerBody,
|
|
142
|
+
serializedBody,
|
|
143
|
+
providerUrl,
|
|
144
|
+
candidate,
|
|
145
|
+
sourceFormat,
|
|
146
|
+
targetFormat,
|
|
147
|
+
requestKind,
|
|
148
|
+
clientType,
|
|
149
|
+
stream,
|
|
150
|
+
providerType = "http"
|
|
151
|
+
} = {}) {
|
|
152
|
+
if (!isLargeRequestLoggingEnabled(env) || typeof onLargeRequestLog !== "function") return;
|
|
153
|
+
const requestBytes = measureSerializedRequestBytes(serializedBody);
|
|
154
|
+
const thresholdBytes = resolveLargeRequestLogThresholdBytes(env);
|
|
155
|
+
if (requestBytes < thresholdBytes) return;
|
|
156
|
+
|
|
157
|
+
queueLargeRequestEvent(onLargeRequestLog, buildLargeRequestLogEntry({
|
|
158
|
+
providerBody,
|
|
159
|
+
requestBytes,
|
|
160
|
+
thresholdBytes,
|
|
161
|
+
providerUrl,
|
|
162
|
+
candidate,
|
|
163
|
+
sourceFormat,
|
|
164
|
+
targetFormat,
|
|
165
|
+
requestKind,
|
|
166
|
+
clientType,
|
|
167
|
+
stream,
|
|
168
|
+
providerType
|
|
169
|
+
}));
|
|
170
|
+
}
|
|
171
|
+
|
|
43
172
|
async function toProviderError(response) {
|
|
44
173
|
const raw = await response.text();
|
|
45
174
|
const parsed = parseJsonSafely(raw);
|
|
@@ -97,7 +226,8 @@ async function adaptProviderResponse({
|
|
|
97
226
|
requestKind,
|
|
98
227
|
requestBody,
|
|
99
228
|
clientType,
|
|
100
|
-
env
|
|
229
|
+
env,
|
|
230
|
+
responsesDowngraded
|
|
101
231
|
}) {
|
|
102
232
|
const buildSuccessResponse = async (resultResponse) => ({
|
|
103
233
|
ok: true,
|
|
@@ -111,6 +241,30 @@ async function adaptProviderResponse({
|
|
|
111
241
|
})
|
|
112
242
|
});
|
|
113
243
|
|
|
244
|
+
// Responses API was downgraded to Chat Completions for provider compatibility.
|
|
245
|
+
// Convert response back: Chat Completions → Claude → Responses API.
|
|
246
|
+
if (responsesDowngraded) {
|
|
247
|
+
if (stream) {
|
|
248
|
+
const claudeStream = handleOpenAIStreamToClaude(response);
|
|
249
|
+
return buildSuccessResponse(handleClaudeStreamToOpenAIResponses(claudeStream, requestBody, fallbackModel));
|
|
250
|
+
}
|
|
251
|
+
const raw = await response.text();
|
|
252
|
+
const parsed = parseJsonSafely(raw);
|
|
253
|
+
if (!parsed) {
|
|
254
|
+
return {
|
|
255
|
+
ok: false,
|
|
256
|
+
status: 502,
|
|
257
|
+
retryable: true,
|
|
258
|
+
response: jsonResponse({
|
|
259
|
+
type: "error",
|
|
260
|
+
error: { type: "api_error", message: "Provider returned invalid JSON." }
|
|
261
|
+
}, 502)
|
|
262
|
+
};
|
|
263
|
+
}
|
|
264
|
+
const claudeMessage = convertOpenAINonStreamToClaude(parsed, fallbackModel);
|
|
265
|
+
return buildSuccessResponse(jsonResponse(convertClaudeNonStreamToOpenAIResponses(claudeMessage, requestBody, fallbackModel)));
|
|
266
|
+
}
|
|
267
|
+
|
|
114
268
|
if (stream) {
|
|
115
269
|
if (!translate) {
|
|
116
270
|
return buildSuccessResponse(
|
|
@@ -236,6 +390,9 @@ function normalizeProviderRequestKind(targetFormat, requestKind) {
|
|
|
236
390
|
|
|
237
391
|
function shouldPreferOpenAIForClaudeToolCalls({
|
|
238
392
|
provider,
|
|
393
|
+
model,
|
|
394
|
+
modelId,
|
|
395
|
+
candidate,
|
|
239
396
|
sourceFormat,
|
|
240
397
|
targetFormat,
|
|
241
398
|
requestKind,
|
|
@@ -243,6 +400,11 @@ function shouldPreferOpenAIForClaudeToolCalls({
|
|
|
243
400
|
} = {}) {
|
|
244
401
|
if (sourceFormat !== FORMATS.CLAUDE || targetFormat !== FORMATS.CLAUDE) return false;
|
|
245
402
|
if (!hasToolDefinitions(body)) return false;
|
|
403
|
+
if (candidate && isOpenAIToolRoutingSuppressed(candidate)) return false;
|
|
404
|
+
const preferredFormat = getProviderModelPreferredFormat(provider, model, modelId);
|
|
405
|
+
if (preferredFormat === FORMATS.CLAUDE) return false;
|
|
406
|
+
const modelFormats = getProviderModelSupportedFormats(provider, model, modelId);
|
|
407
|
+
if (modelFormats.length > 0 && !modelFormats.includes(FORMATS.OPENAI)) return false;
|
|
246
408
|
if (!getProviderFormats(provider).includes(FORMATS.OPENAI)) return false;
|
|
247
409
|
return Boolean(resolveProviderUrl(provider, FORMATS.OPENAI, normalizeProviderRequestKind(FORMATS.OPENAI, requestKind)));
|
|
248
410
|
}
|
|
@@ -489,14 +651,22 @@ function buildProviderRequestPlan({
|
|
|
489
651
|
requestKind,
|
|
490
652
|
requestHeaders,
|
|
491
653
|
interceptAmpWebSearch,
|
|
492
|
-
stream
|
|
654
|
+
stream,
|
|
655
|
+
forceResponsesDowngrade = false
|
|
493
656
|
}) {
|
|
494
657
|
const normalizedRequestKind = normalizeProviderRequestKind(targetFormat, requestKind);
|
|
495
658
|
const translate = needsTranslation(sourceFormat, targetFormat);
|
|
496
659
|
|
|
497
660
|
let providerBody = { ...body };
|
|
661
|
+
let responsesDowngraded = false;
|
|
498
662
|
if (translate) {
|
|
499
663
|
providerBody = translateRequest(sourceFormat, targetFormat, candidate.backend, body, stream);
|
|
664
|
+
} else if (forceResponsesDowngrade) {
|
|
665
|
+
// Provider confirmed to not support Responses API — downgrade to Chat Completions
|
|
666
|
+
// via double-hop: Responses API → Claude → Chat Completions.
|
|
667
|
+
const intermediateBody = translateRequest(FORMATS.OPENAI, FORMATS.CLAUDE, candidate.backend, body, stream);
|
|
668
|
+
providerBody = translateRequest(FORMATS.CLAUDE, FORMATS.OPENAI, candidate.backend, intermediateBody, stream);
|
|
669
|
+
responsesDowngraded = true;
|
|
500
670
|
}
|
|
501
671
|
|
|
502
672
|
providerBody.model = candidate.backend;
|
|
@@ -513,9 +683,19 @@ function buildProviderRequestPlan({
|
|
|
513
683
|
sourceFormat,
|
|
514
684
|
targetFormat,
|
|
515
685
|
targetModel: candidate.backend,
|
|
516
|
-
requestHeaders
|
|
686
|
+
requestHeaders,
|
|
687
|
+
capabilities: candidate.model?.capabilities
|
|
517
688
|
});
|
|
518
689
|
|
|
690
|
+
if (responsesDowngraded) {
|
|
691
|
+
// Strip Responses-API-only fields that Chat Completions providers reject.
|
|
692
|
+
delete providerBody.prompt_cache_key;
|
|
693
|
+
delete providerBody.store;
|
|
694
|
+
delete providerBody.include;
|
|
695
|
+
delete providerBody.text;
|
|
696
|
+
delete providerBody.service_tier;
|
|
697
|
+
}
|
|
698
|
+
|
|
519
699
|
const declaredOpenAIHostedWebSearchToolType = getProviderOpenAIHostedWebSearchToolType(candidate.provider, {
|
|
520
700
|
targetFormat,
|
|
521
701
|
requestKind: normalizedRequestKind
|
|
@@ -532,11 +712,14 @@ function buildProviderRequestPlan({
|
|
|
532
712
|
providerBody = rewriteProviderBodyForAmpWebSearch(providerBody, targetFormat, requestKind).providerBody;
|
|
533
713
|
}
|
|
534
714
|
|
|
715
|
+
providerBody = stripUnsupportedFields(providerBody, candidate.model?.capabilities);
|
|
716
|
+
|
|
535
717
|
return {
|
|
536
718
|
targetFormat,
|
|
537
|
-
requestKind: normalizedRequestKind,
|
|
719
|
+
requestKind: responsesDowngraded ? undefined : normalizedRequestKind,
|
|
538
720
|
translate,
|
|
539
|
-
providerBody
|
|
721
|
+
providerBody,
|
|
722
|
+
responsesDowngraded
|
|
540
723
|
};
|
|
541
724
|
}
|
|
542
725
|
|
|
@@ -552,7 +735,8 @@ export async function makeProviderCall({
|
|
|
552
735
|
runtimeConfig,
|
|
553
736
|
stateStore,
|
|
554
737
|
ampContext,
|
|
555
|
-
runtimeFlags
|
|
738
|
+
runtimeFlags,
|
|
739
|
+
onLargeRequestLog
|
|
556
740
|
}) {
|
|
557
741
|
const provider = candidate.provider;
|
|
558
742
|
const targetFormat = candidate.targetFormat;
|
|
@@ -565,6 +749,9 @@ export async function makeProviderCall({
|
|
|
565
749
|
|
|
566
750
|
const preferOpenAIToolRouting = !isSubscriptionProvider(provider) && shouldPreferOpenAIForClaudeToolCalls({
|
|
567
751
|
provider,
|
|
752
|
+
model: candidate?.model,
|
|
753
|
+
modelId: candidate?.modelId,
|
|
754
|
+
candidate,
|
|
568
755
|
sourceFormat,
|
|
569
756
|
targetFormat,
|
|
570
757
|
requestKind,
|
|
@@ -576,8 +763,17 @@ export async function makeProviderCall({
|
|
|
576
763
|
effectiveBody = { ...body, reasoning_effort: ampContext.presets.reasoningEffort };
|
|
577
764
|
}
|
|
578
765
|
|
|
766
|
+
// For Responses API requests to OpenAI-format providers, try the native endpoint first.
|
|
767
|
+
// If the provider doesn't support /v1/responses (returns 404/400), fall back to a
|
|
768
|
+
// downgraded Chat Completions plan with double-hop translation.
|
|
769
|
+
const needsResponsesDowngradeFallback = !isSubscriptionProvider(provider)
|
|
770
|
+
&& sourceFormat === FORMATS.OPENAI
|
|
771
|
+
&& targetFormat === FORMATS.OPENAI
|
|
772
|
+
&& requestKind === "responses";
|
|
773
|
+
|
|
579
774
|
let activePlan;
|
|
580
775
|
let fallbackPlan = null;
|
|
776
|
+
let responsesDowngradedPlan = null;
|
|
581
777
|
try {
|
|
582
778
|
activePlan = buildProviderRequestPlan({
|
|
583
779
|
body: effectiveBody,
|
|
@@ -601,6 +797,19 @@ export async function makeProviderCall({
|
|
|
601
797
|
stream
|
|
602
798
|
});
|
|
603
799
|
}
|
|
800
|
+
if (needsResponsesDowngradeFallback) {
|
|
801
|
+
responsesDowngradedPlan = buildProviderRequestPlan({
|
|
802
|
+
body: effectiveBody,
|
|
803
|
+
sourceFormat,
|
|
804
|
+
targetFormat,
|
|
805
|
+
candidate,
|
|
806
|
+
requestKind,
|
|
807
|
+
requestHeaders,
|
|
808
|
+
interceptAmpWebSearch,
|
|
809
|
+
stream,
|
|
810
|
+
forceResponsesDowngrade: true
|
|
811
|
+
});
|
|
812
|
+
}
|
|
604
813
|
} catch (error) {
|
|
605
814
|
return {
|
|
606
815
|
ok: false,
|
|
@@ -651,13 +860,33 @@ export async function makeProviderCall({
|
|
|
651
860
|
prompt_cache_key: activePlan.providerBody.prompt_cache_key || ampContext.threadId
|
|
652
861
|
};
|
|
653
862
|
}
|
|
654
|
-
const executeSubscriptionRequest = async (requestBody) =>
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
863
|
+
const executeSubscriptionRequest = async (requestBody) => {
|
|
864
|
+
const requestStream = subscriptionType === "chatgpt-codex" ? true : Boolean(stream);
|
|
865
|
+
const providerUrl = subscriptionType === "chatgpt-codex"
|
|
866
|
+
? "https://chatgpt.com/backend-api/codex/responses"
|
|
867
|
+
: "https://console.anthropic.com/v1/messages?beta=true";
|
|
868
|
+
maybeQueueLargeRequestLog({
|
|
869
|
+
env,
|
|
870
|
+
onLargeRequestLog,
|
|
871
|
+
providerBody: requestBody,
|
|
872
|
+
serializedBody: JSON.stringify(requestBody),
|
|
873
|
+
providerUrl,
|
|
874
|
+
candidate,
|
|
875
|
+
sourceFormat,
|
|
876
|
+
targetFormat: activePlan.targetFormat,
|
|
877
|
+
requestKind: activePlan.requestKind,
|
|
878
|
+
clientType,
|
|
879
|
+
stream: requestStream,
|
|
880
|
+
providerType: subscriptionType
|
|
881
|
+
});
|
|
882
|
+
return makeSubscriptionProviderCall({
|
|
883
|
+
provider,
|
|
884
|
+
body: requestBody,
|
|
885
|
+
// ChatGPT Codex backend expects stream=true; non-stream responses are reconstructed from SSE.
|
|
886
|
+
stream: requestStream,
|
|
887
|
+
env
|
|
888
|
+
});
|
|
889
|
+
};
|
|
661
890
|
const subscriptionResult = await executeSubscriptionRequest(activePlan.providerBody);
|
|
662
891
|
|
|
663
892
|
if (!subscriptionResult?.ok) {
|
|
@@ -854,11 +1083,26 @@ export async function makeProviderCall({
|
|
|
854
1083
|
const timeoutMs = resolveUpstreamTimeoutMs(env);
|
|
855
1084
|
const timeoutControl = buildTimeoutSignal(timeoutMs);
|
|
856
1085
|
try {
|
|
1086
|
+
const serializedBody = JSON.stringify(plan.providerBody);
|
|
857
1087
|
const init = {
|
|
858
1088
|
method: "POST",
|
|
859
1089
|
headers,
|
|
860
|
-
body:
|
|
1090
|
+
body: serializedBody
|
|
861
1091
|
};
|
|
1092
|
+
maybeQueueLargeRequestLog({
|
|
1093
|
+
env,
|
|
1094
|
+
onLargeRequestLog,
|
|
1095
|
+
providerBody: plan.providerBody,
|
|
1096
|
+
serializedBody,
|
|
1097
|
+
providerUrl,
|
|
1098
|
+
candidate,
|
|
1099
|
+
sourceFormat,
|
|
1100
|
+
targetFormat: plan.targetFormat,
|
|
1101
|
+
requestKind: plan.requestKind,
|
|
1102
|
+
clientType,
|
|
1103
|
+
stream,
|
|
1104
|
+
providerType: "http"
|
|
1105
|
+
});
|
|
862
1106
|
if (timeoutControl.signal) {
|
|
863
1107
|
init.signal = timeoutControl.signal;
|
|
864
1108
|
}
|
|
@@ -908,6 +1152,9 @@ export async function makeProviderCall({
|
|
|
908
1152
|
try {
|
|
909
1153
|
const fallbackResponse = await executeHttpProviderRequest(fallbackPlan);
|
|
910
1154
|
if (fallbackResponse instanceof Response && fallbackResponse.ok) {
|
|
1155
|
+
if (preferOpenAIToolRouting) {
|
|
1156
|
+
suppressOpenAIToolRouting(candidate);
|
|
1157
|
+
}
|
|
911
1158
|
response = fallbackResponse;
|
|
912
1159
|
activePlan = fallbackPlan;
|
|
913
1160
|
}
|
|
@@ -934,6 +1181,19 @@ export async function makeProviderCall({
|
|
|
934
1181
|
};
|
|
935
1182
|
}
|
|
936
1183
|
|
|
1184
|
+
// Provider doesn't support native /v1/responses — retry with Chat Completions downgrade.
|
|
1185
|
+
if ((!response || !response.ok) && responsesDowngradedPlan) {
|
|
1186
|
+
try {
|
|
1187
|
+
const downgradedResponse = await executeHttpProviderRequest(responsesDowngradedPlan);
|
|
1188
|
+
if (downgradedResponse instanceof Response && downgradedResponse.ok) {
|
|
1189
|
+
response = downgradedResponse;
|
|
1190
|
+
activePlan = responsesDowngradedPlan;
|
|
1191
|
+
}
|
|
1192
|
+
} catch {
|
|
1193
|
+
// Keep the original failure if the downgraded request also fails.
|
|
1194
|
+
}
|
|
1195
|
+
}
|
|
1196
|
+
|
|
937
1197
|
if (!response.ok) {
|
|
938
1198
|
const hostedWebSearchErrorKind = await resolveHostedWebSearchErrorKind(response, activePlan.providerBody, {
|
|
939
1199
|
targetFormat: activePlan.targetFormat,
|
|
@@ -983,6 +1243,7 @@ export async function makeProviderCall({
|
|
|
983
1243
|
requestKind: activePlan.requestKind,
|
|
984
1244
|
requestBody: body,
|
|
985
1245
|
clientType,
|
|
986
|
-
env
|
|
1246
|
+
env,
|
|
1247
|
+
responsesDowngraded: activePlan.responsesDowngraded
|
|
987
1248
|
});
|
|
988
1249
|
}
|
|
@@ -295,8 +295,18 @@ export function applyReasoningEffortMapping({
|
|
|
295
295
|
sourceFormat,
|
|
296
296
|
targetFormat,
|
|
297
297
|
targetModel,
|
|
298
|
-
requestHeaders
|
|
298
|
+
requestHeaders,
|
|
299
|
+
capabilities
|
|
299
300
|
}) {
|
|
301
|
+
if (capabilities) {
|
|
302
|
+
if (targetFormat === FORMATS.OPENAI && capabilities.supportsReasoning === false) {
|
|
303
|
+
return providerBody;
|
|
304
|
+
}
|
|
305
|
+
if (targetFormat === FORMATS.CLAUDE && capabilities.supportsThinking === false) {
|
|
306
|
+
return providerBody;
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
300
310
|
const effort = resolveRequestedEffort(originalBody, requestHeaders);
|
|
301
311
|
if (!effort) return providerBody;
|
|
302
312
|
|