@khanglvm/llm-router 2.0.0-beta.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/README.md +163 -426
- package/package.json +3 -3
- package/src/cli/router-module.js +2773 -2587
- package/src/cli-entry.js +32 -103
- package/src/node/activity-log.js +119 -0
- package/src/node/coding-tool-config.js +85 -11
- package/src/node/config-workflows.js +51 -12
- package/src/node/instance-state.js +1 -1
- package/src/node/litellm-context-catalog.js +184 -0
- package/src/node/local-server.js +23 -3
- package/src/node/port-reclaim.js +2 -2
- package/src/node/start-command.js +22 -22
- package/src/node/startup-manager.js +3 -3
- package/src/node/web-command.js +1 -1
- package/src/node/web-console-assets.js +1 -1
- package/src/node/web-console-client.js +34 -29
- package/src/node/web-console-server.js +420 -38
- package/src/node/web-console-styles.generated.js +1 -1
- package/src/node/web-console-ui/buffered-text-input.js +133 -0
- package/src/node/web-console-ui/config-editor-utils.js +57 -4
- package/src/node/web-console-ui/dropdown-placement.js +153 -0
- package/src/node/web-console-ui/select-search-utils.js +6 -0
- package/src/node/web-console-ui/transient-integer-input-utils.js +12 -0
- package/src/runtime/balancer.js +78 -1
- package/src/runtime/codex-request-transformer.js +16 -7
- package/src/runtime/config.js +448 -12
- package/src/runtime/handler/amp-response.js +5 -3
- package/src/runtime/handler/amp-web-search.js +2232 -0
- package/src/runtime/handler/fallback.js +30 -2
- package/src/runtime/handler/provider-call.js +353 -36
- package/src/runtime/handler/provider-translation.js +14 -0
- package/src/runtime/handler/request.js +128 -2
- package/src/runtime/handler/route-debug.js +36 -0
- package/src/runtime/handler.js +210 -20
- package/src/runtime/subscription-provider.js +1 -1
- package/src/shared/coding-tool-bindings.js +49 -0
- package/src/shared/local-router-defaults.js +62 -0
- package/src/translator/request/claude-to-openai.js +43 -0
|
@@ -3,11 +3,28 @@ import { extractAmpGeminiRouteInfo } from "./amp-gemini.js";
|
|
|
3
3
|
import { toNonNegativeInteger } from "./utils.js";
|
|
4
4
|
|
|
5
5
|
const DEFAULT_MAX_REQUEST_BODY_BYTES = 1 * 1024 * 1024;
|
|
6
|
+
const DEFAULT_RESPONSES_MAX_REQUEST_BODY_BYTES = 8 * 1024 * 1024;
|
|
6
7
|
const MIN_MAX_REQUEST_BODY_BYTES = 4 * 1024;
|
|
7
8
|
const MAX_MAX_REQUEST_BODY_BYTES = 20 * 1024 * 1024;
|
|
8
9
|
const DEFAULT_UPSTREAM_TIMEOUT_MS = 60_000;
|
|
9
10
|
const MIN_UPSTREAM_TIMEOUT_MS = 1_000;
|
|
10
11
|
const MAX_UPSTREAM_TIMEOUT_MS = 300_000;
|
|
12
|
+
const DEFAULT_OUTPUT_TOKEN_RESERVE = 1_024;
|
|
13
|
+
const AMP_ANTHROPIC_CONTEXT_BASELINE_TOKENS = 200_000;
|
|
14
|
+
const AMP_ANTHROPIC_CONTEXT_1M_TOKENS = 1_000_000;
|
|
15
|
+
const AMP_ANTHROPIC_CONTEXT_1M_BETA = "context-1m-2025-08-07";
|
|
16
|
+
const AMP_CONTEXT_HINTS_BY_MODEL = new Map([
|
|
17
|
+
["free", { minimumContextTokens: 136_000, source: "amp:model:free" }],
|
|
18
|
+
["rush", { minimumContextTokens: 136_000, source: "amp:model:rush" }],
|
|
19
|
+
["smart", { minimumContextTokens: 168_000, source: "amp:model:smart" }],
|
|
20
|
+
["deep", { minimumContextTokens: 272_000, source: "amp:model:deep" }],
|
|
21
|
+
["large", { minimumContextTokens: 936_000, source: "amp:model:large" }],
|
|
22
|
+
["claude-haiku-4-5-20251001", { minimumContextTokens: 136_000, source: "amp:model:claude-haiku-4-5-20251001" }],
|
|
23
|
+
["claude-opus-4-6", { minimumContextTokens: 168_000, source: "amp:model:claude-opus-4-6" }],
|
|
24
|
+
["openai/gpt-5.3-codex", { minimumContextTokens: 272_000, source: "amp:model:openai/gpt-5.3-codex" }],
|
|
25
|
+
["claude-sonnet-4-6", { minimumContextTokens: 936_000, source: "amp:model:claude-sonnet-4-6" }],
|
|
26
|
+
["gpt-5.3-codex", { minimumContextTokens: 968_000, source: "amp:model:gpt-5.3-codex" }]
|
|
27
|
+
]);
|
|
11
28
|
const AMP_API_PROVIDER_PREFIX = "/api/provider/";
|
|
12
29
|
const AMP_MANAGEMENT_ROOT_PREFIXES = [
|
|
13
30
|
"/auth",
|
|
@@ -133,10 +150,14 @@ function resolveAmpProviderRoute(path, method) {
|
|
|
133
150
|
return null;
|
|
134
151
|
}
|
|
135
152
|
|
|
136
|
-
export function resolveMaxRequestBodyBytes(env = {}) {
|
|
153
|
+
export function resolveMaxRequestBodyBytes(env = {}, options = {}) {
|
|
154
|
+
const requestKind = String(options?.requestKind || "").trim().toLowerCase();
|
|
155
|
+
const fallbackLimit = requestKind === "responses"
|
|
156
|
+
? DEFAULT_RESPONSES_MAX_REQUEST_BODY_BYTES
|
|
157
|
+
: DEFAULT_MAX_REQUEST_BODY_BYTES;
|
|
137
158
|
const configured = toNonNegativeInteger(
|
|
138
159
|
env?.LLM_ROUTER_MAX_REQUEST_BODY_BYTES,
|
|
139
|
-
|
|
160
|
+
fallbackLimit
|
|
140
161
|
);
|
|
141
162
|
return Math.min(
|
|
142
163
|
MAX_MAX_REQUEST_BODY_BYTES,
|
|
@@ -162,6 +183,26 @@ function parseContentLength(value) {
|
|
|
162
183
|
return parsed;
|
|
163
184
|
}
|
|
164
185
|
|
|
186
|
+
function parseNonNegativeNumber(value) {
|
|
187
|
+
const parsed = Number(value);
|
|
188
|
+
if (!Number.isFinite(parsed) || parsed < 0) return 0;
|
|
189
|
+
return Math.floor(parsed);
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
function parseHeaderTokenList(value) {
|
|
193
|
+
if (!value) return [];
|
|
194
|
+
return String(value)
|
|
195
|
+
.split(",")
|
|
196
|
+
.map((entry) => entry.trim().toLowerCase())
|
|
197
|
+
.filter(Boolean);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
function lookupAmpContextHint(model) {
|
|
201
|
+
const key = String(model || "").trim().toLowerCase();
|
|
202
|
+
if (!key) return null;
|
|
203
|
+
return AMP_CONTEXT_HINTS_BY_MODEL.get(key) || null;
|
|
204
|
+
}
|
|
205
|
+
|
|
165
206
|
function createRequestBodyTooLargeError(maxBytes) {
|
|
166
207
|
const error = new Error(`Request body exceeds ${maxBytes} bytes.`);
|
|
167
208
|
error.code = "REQUEST_BODY_TOO_LARGE";
|
|
@@ -210,6 +251,91 @@ export async function parseJsonBodyWithLimit(request, maxBytes) {
|
|
|
210
251
|
return JSON.parse(raw);
|
|
211
252
|
}
|
|
212
253
|
|
|
254
|
+
export function estimateRequestContextTokens(body = {}) {
|
|
255
|
+
if (!body || typeof body !== "object") {
|
|
256
|
+
return {
|
|
257
|
+
estimatedInputTokens: 0,
|
|
258
|
+
requestedOutputTokens: 0,
|
|
259
|
+
safetyPaddingTokens: 0,
|
|
260
|
+
estimatedRequiredTokens: 0
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
let serialized = "";
|
|
265
|
+
try {
|
|
266
|
+
serialized = JSON.stringify(body) || "";
|
|
267
|
+
} catch {
|
|
268
|
+
serialized = "";
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
const charLength = serialized.length;
|
|
272
|
+
const byteLength = serialized
|
|
273
|
+
? new TextEncoder().encode(serialized).byteLength
|
|
274
|
+
: 0;
|
|
275
|
+
const estimatedInputTokens = Math.max(
|
|
276
|
+
Math.ceil(charLength / 4),
|
|
277
|
+
Math.ceil(byteLength / 3)
|
|
278
|
+
);
|
|
279
|
+
|
|
280
|
+
const explicitOutputTokens = Math.max(
|
|
281
|
+
parseNonNegativeNumber(body?.max_output_tokens),
|
|
282
|
+
parseNonNegativeNumber(body?.max_completion_tokens),
|
|
283
|
+
parseNonNegativeNumber(body?.max_tokens)
|
|
284
|
+
);
|
|
285
|
+
const requestedOutputTokens = explicitOutputTokens > 0
|
|
286
|
+
? explicitOutputTokens
|
|
287
|
+
: DEFAULT_OUTPUT_TOKEN_RESERVE;
|
|
288
|
+
const safetyPaddingTokens = estimatedInputTokens > 0
|
|
289
|
+
? Math.max(256, Math.ceil(estimatedInputTokens * 0.1))
|
|
290
|
+
: 0;
|
|
291
|
+
|
|
292
|
+
return {
|
|
293
|
+
estimatedInputTokens,
|
|
294
|
+
requestedOutputTokens,
|
|
295
|
+
safetyPaddingTokens,
|
|
296
|
+
estimatedRequiredTokens: estimatedInputTokens + requestedOutputTokens + safetyPaddingTokens
|
|
297
|
+
};
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
export function inferAmpContextRequirement(request, body = {}, options = {}) {
|
|
301
|
+
if (String(options?.clientType || "").trim().toLowerCase() !== "amp") {
|
|
302
|
+
return {
|
|
303
|
+
minimumContextTokens: 0,
|
|
304
|
+
source: ""
|
|
305
|
+
};
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
const providerHint = String(options?.providerHint || "").trim().toLowerCase();
|
|
309
|
+
const requestKind = String(options?.requestKind || "").trim().toLowerCase();
|
|
310
|
+
const requestedModelHint = lookupAmpContextHint(body?.model);
|
|
311
|
+
const anthropicBetaFlags = parseHeaderTokenList(
|
|
312
|
+
request?.headers?.get("anthropic-beta") || request?.headers?.get("Anthropic-Beta")
|
|
313
|
+
);
|
|
314
|
+
|
|
315
|
+
if (anthropicBetaFlags.includes(AMP_ANTHROPIC_CONTEXT_1M_BETA)) {
|
|
316
|
+
return {
|
|
317
|
+
minimumContextTokens: AMP_ANTHROPIC_CONTEXT_1M_TOKENS,
|
|
318
|
+
source: `amp:anthropic-beta:${AMP_ANTHROPIC_CONTEXT_1M_BETA}`
|
|
319
|
+
};
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
if (requestedModelHint) {
|
|
323
|
+
return requestedModelHint;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
if (providerHint === "anthropic" || requestKind === "messages") {
|
|
327
|
+
return {
|
|
328
|
+
minimumContextTokens: AMP_ANTHROPIC_CONTEXT_BASELINE_TOKENS,
|
|
329
|
+
source: "amp:anthropic-route:200k-baseline"
|
|
330
|
+
};
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
return {
|
|
334
|
+
minimumContextTokens: 0,
|
|
335
|
+
source: ""
|
|
336
|
+
};
|
|
337
|
+
}
|
|
338
|
+
|
|
213
339
|
export function isJsonRequest(request) {
|
|
214
340
|
const contentType = String(request.headers.get("content-type") || "").toLowerCase();
|
|
215
341
|
return contentType.includes("application/json") || contentType.includes("+json");
|
|
@@ -41,6 +41,9 @@ export function buildRouteDebugState(enabled, resolved) {
|
|
|
41
41
|
selectedCandidate: "",
|
|
42
42
|
skippedCandidates: [],
|
|
43
43
|
attempts: [],
|
|
44
|
+
contextRequiredTokens: "",
|
|
45
|
+
contextHintSource: "",
|
|
46
|
+
contextRisk: "",
|
|
44
47
|
toolTypes: "",
|
|
45
48
|
toolRouting: ""
|
|
46
49
|
};
|
|
@@ -81,6 +84,24 @@ export function setRouteToolDebug(debugState, toolTypes, toolRouting = "") {
|
|
|
81
84
|
debugState.toolRouting = String(toolRouting || "").trim();
|
|
82
85
|
}
|
|
83
86
|
|
|
87
|
+
export function setRouteContextDebug(debugState, { requiredTokens, hintSource, risk } = {}) {
|
|
88
|
+
if (!debugState?.enabled) return;
|
|
89
|
+
|
|
90
|
+
const normalizedRequiredTokens = Number(requiredTokens);
|
|
91
|
+
if (Number.isFinite(normalizedRequiredTokens) && normalizedRequiredTokens > 0) {
|
|
92
|
+
debugState.contextRequiredTokens = String(Math.floor(normalizedRequiredTokens));
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const normalizedHintSource = String(hintSource || "").trim();
|
|
96
|
+
if (normalizedHintSource) {
|
|
97
|
+
debugState.contextHintSource = normalizedHintSource;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
if (risk !== undefined) {
|
|
101
|
+
debugState.contextRisk = String(risk || "").trim();
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
84
105
|
export function withRouteDebugHeaders(response, debugState) {
|
|
85
106
|
if (!debugState?.enabled || !(response instanceof Response)) {
|
|
86
107
|
return response;
|
|
@@ -107,6 +128,21 @@ export function withRouteDebugHeaders(response, debugState) {
|
|
|
107
128
|
headers.set("x-llm-router-attempts", attempts);
|
|
108
129
|
}
|
|
109
130
|
|
|
131
|
+
const contextRequiredTokens = toSafeHeaderValue(debugState.contextRequiredTokens);
|
|
132
|
+
if (contextRequiredTokens) {
|
|
133
|
+
headers.set("x-llm-router-context-required", contextRequiredTokens);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const contextHintSource = toSafeHeaderValue(debugState.contextHintSource);
|
|
137
|
+
if (contextHintSource) {
|
|
138
|
+
headers.set("x-llm-router-context-hint-source", contextHintSource);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
const contextRisk = toSafeHeaderValue(debugState.contextRisk);
|
|
142
|
+
if (contextRisk) {
|
|
143
|
+
headers.set("x-llm-router-context-risk", contextRisk);
|
|
144
|
+
}
|
|
145
|
+
|
|
110
146
|
const toolTypes = toSafeHeaderValue(debugState.toolTypes);
|
|
111
147
|
if (toolTypes) {
|
|
112
148
|
headers.set("x-llm-router-tool-types", toolTypes);
|
package/src/runtime/handler.js
CHANGED
|
@@ -23,6 +23,8 @@ import {
|
|
|
23
23
|
import { corsResponse, jsonResponse } from "./handler/http.js";
|
|
24
24
|
import {
|
|
25
25
|
detectUserRequestFormat,
|
|
26
|
+
estimateRequestContextTokens,
|
|
27
|
+
inferAmpContextRequirement,
|
|
26
28
|
isAmpManagementPath,
|
|
27
29
|
isJsonRequest,
|
|
28
30
|
isStreamingEnabled,
|
|
@@ -43,6 +45,7 @@ import {
|
|
|
43
45
|
convertAmpGeminiRequestToOpenAI,
|
|
44
46
|
hasGeminiWebSearchTool
|
|
45
47
|
} from "./handler/amp-gemini.js";
|
|
48
|
+
import { shouldInterceptAmpWebSearch } from "./handler/amp-web-search.js";
|
|
46
49
|
import {
|
|
47
50
|
isRequestFromAllowedIp,
|
|
48
51
|
resolveAllowedOrigin,
|
|
@@ -55,7 +58,7 @@ import {
|
|
|
55
58
|
resolveFallbackCircuitPolicy,
|
|
56
59
|
resolveRetryPolicy
|
|
57
60
|
} from "./handler/fallback.js";
|
|
58
|
-
import { sleep } from "./handler/utils.js";
|
|
61
|
+
import { parseJsonSafely, sleep } from "./handler/utils.js";
|
|
59
62
|
import {
|
|
60
63
|
applyCandidateFailureState,
|
|
61
64
|
applyRuntimeRetryPolicyGuards,
|
|
@@ -68,6 +71,7 @@ import {
|
|
|
68
71
|
isRoutingDebugEnabled,
|
|
69
72
|
recordRouteAttempt,
|
|
70
73
|
recordRouteSkip,
|
|
74
|
+
setRouteContextDebug,
|
|
71
75
|
setRouteSelectedCandidate,
|
|
72
76
|
setRouteToolDebug,
|
|
73
77
|
withRouteDebugHeaders
|
|
@@ -96,13 +100,6 @@ function filterCandidatesByFormat(candidates) {
|
|
|
96
100
|
return { eligible, skipped };
|
|
97
101
|
}
|
|
98
102
|
|
|
99
|
-
function hasNextEligibleCandidate(entries, startIndex) {
|
|
100
|
-
for (let index = startIndex + 1; index < (entries || []).length; index += 1) {
|
|
101
|
-
if (entries[index]?.eligible) return true;
|
|
102
|
-
}
|
|
103
|
-
return false;
|
|
104
|
-
}
|
|
105
|
-
|
|
106
103
|
function extractBuiltInToolTypes(body) {
|
|
107
104
|
const tools = Array.isArray(body?.tools) ? body.tools : [];
|
|
108
105
|
const seen = new Set();
|
|
@@ -161,11 +158,111 @@ function isChatGPTCodexCandidate(candidate) {
|
|
|
161
158
|
return subscriptionType === "chatgpt-codex";
|
|
162
159
|
}
|
|
163
160
|
|
|
161
|
+
function resolveCandidateContextWindow(candidate) {
|
|
162
|
+
const raw = candidate?.contextWindow ?? candidate?.model?.contextWindow;
|
|
163
|
+
const parsed = Number(raw);
|
|
164
|
+
if (!Number.isFinite(parsed) || parsed <= 0) return null;
|
|
165
|
+
return Math.floor(parsed);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function resolveSelectedContextRisk(candidate, estimatedRequiredTokens) {
|
|
169
|
+
const requiredTokens = Number(estimatedRequiredTokens);
|
|
170
|
+
if (!candidate || !Number.isFinite(requiredTokens) || requiredTokens <= 0) return "";
|
|
171
|
+
|
|
172
|
+
const contextWindow = resolveCandidateContextWindow(candidate);
|
|
173
|
+
if (!contextWindow) {
|
|
174
|
+
return "selected-context-window-unknown";
|
|
175
|
+
}
|
|
176
|
+
if (contextWindow < requiredTokens) {
|
|
177
|
+
return `selected-context-window-below-required:${contextWindow}<${requiredTokens}`;
|
|
178
|
+
}
|
|
179
|
+
return "";
|
|
180
|
+
}
|
|
181
|
+
|
|
164
182
|
const WEB_SEARCH_UNAVAILABLE_HINTS = [
|
|
165
183
|
"web search credits are unavailable in this session",
|
|
166
184
|
"web access unavailable (out of credits)",
|
|
167
185
|
"web access unavailable"
|
|
168
186
|
];
|
|
187
|
+
const ACTIVITY_LOG_ERROR_DETAIL_MAX_CHARS = 240;
|
|
188
|
+
|
|
189
|
+
function queueActivityEvent(onActivityLog, payload) {
|
|
190
|
+
if (typeof onActivityLog !== "function") return;
|
|
191
|
+
try {
|
|
192
|
+
const result = onActivityLog(payload);
|
|
193
|
+
if (result && typeof result.then === "function") {
|
|
194
|
+
result.catch(() => {});
|
|
195
|
+
}
|
|
196
|
+
} catch {
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
function getNextEligibleCandidateEntry(entries, startIndex) {
|
|
201
|
+
for (let index = startIndex + 1; index < (entries || []).length; index += 1) {
|
|
202
|
+
if (entries[index]?.eligible) return entries[index];
|
|
203
|
+
}
|
|
204
|
+
return null;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function formatActivityCandidateLabel(candidate) {
|
|
208
|
+
const providerId = String(candidate?.providerId || "unknown").trim() || "unknown";
|
|
209
|
+
const modelId = String(candidate?.modelId || candidate?.backend || "unknown").trim() || "unknown";
|
|
210
|
+
return `${providerId}/${modelId}`;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
function formatActivityRouteLabel(requestedModel, resolved) {
|
|
214
|
+
const requested = String(requestedModel || "").trim() || "smart";
|
|
215
|
+
const routeRef = String(resolved?.routeRef || "").trim();
|
|
216
|
+
return routeRef && routeRef !== requested ? `${requested} -> ${routeRef}` : (routeRef || requested);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
function formatFailureCategory(category) {
|
|
220
|
+
return String(category || "")
|
|
221
|
+
.trim()
|
|
222
|
+
.replace(/_/g, " ");
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
function buildFailureSummary(result, classification) {
|
|
226
|
+
const parts = [];
|
|
227
|
+
const status = Number.isFinite(result?.status) ? Number(result.status) : 0;
|
|
228
|
+
if (status > 0) parts.push(`status ${status}`);
|
|
229
|
+
const category = formatFailureCategory(classification?.category);
|
|
230
|
+
if (category) parts.push(category);
|
|
231
|
+
return parts.join(" · ") || "request failed";
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
function buildActivityDetail(baseMessage, providerMessage = "") {
|
|
235
|
+
const detail = String(providerMessage || "").trim();
|
|
236
|
+
if (!detail) return baseMessage;
|
|
237
|
+
return `${baseMessage} Provider said: ${detail}`;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
async function readActivityErrorDetail(result) {
|
|
241
|
+
const response = result?.upstreamResponse instanceof Response
|
|
242
|
+
? result.upstreamResponse
|
|
243
|
+
: (result?.response instanceof Response ? result.response : null);
|
|
244
|
+
if (!(response instanceof Response)) return "";
|
|
245
|
+
|
|
246
|
+
try {
|
|
247
|
+
const raw = (await response.clone().text()).trim();
|
|
248
|
+
if (!raw) return "";
|
|
249
|
+
const parsed = parseJsonSafely(raw, null);
|
|
250
|
+
const message = parsed?.error?.message
|
|
251
|
+
|| parsed?.error?.code
|
|
252
|
+
|| parsed?.error?.type
|
|
253
|
+
|| parsed?.error
|
|
254
|
+
|| parsed?.code
|
|
255
|
+
|| parsed?.type
|
|
256
|
+
|| parsed?.message
|
|
257
|
+
|| raw;
|
|
258
|
+
const compact = String(message || "").replace(/\s+/g, " ").trim();
|
|
259
|
+
if (!compact) return "";
|
|
260
|
+
if (compact.length <= ACTIVITY_LOG_ERROR_DETAIL_MAX_CHARS) return compact;
|
|
261
|
+
return `${compact.slice(0, ACTIVITY_LOG_ERROR_DETAIL_MAX_CHARS - 1)}…`;
|
|
262
|
+
} catch {
|
|
263
|
+
return "";
|
|
264
|
+
}
|
|
265
|
+
}
|
|
169
266
|
|
|
170
267
|
function extractAssistantTextFragments(payload) {
|
|
171
268
|
const fragments = [];
|
|
@@ -340,7 +437,9 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
|
|
|
340
437
|
return jsonResponse({ error: "Unsupported Media Type. Use application/json." }, 415);
|
|
341
438
|
}
|
|
342
439
|
|
|
343
|
-
const maxRequestBodyBytes = resolveMaxRequestBodyBytes(env
|
|
440
|
+
const maxRequestBodyBytes = resolveMaxRequestBodyBytes(env, {
|
|
441
|
+
requestKind: options.requestKind
|
|
442
|
+
});
|
|
344
443
|
let body;
|
|
345
444
|
try {
|
|
346
445
|
body = await parseJsonBodyWithLimit(request, maxRequestBodyBytes);
|
|
@@ -359,7 +458,14 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
|
|
|
359
458
|
const requestedModel = body?.model || "smart";
|
|
360
459
|
const stream = isStreamingEnabled(sourceFormat, body);
|
|
361
460
|
|
|
362
|
-
|
|
461
|
+
const interceptAmpWebSearch = shouldInterceptAmpWebSearch({
|
|
462
|
+
clientType: options.clientType,
|
|
463
|
+
originalBody: body,
|
|
464
|
+
runtimeConfig: config,
|
|
465
|
+
env
|
|
466
|
+
});
|
|
467
|
+
|
|
468
|
+
if (!interceptAmpWebSearch && shouldProxyAmpWebSearchRequest(options.clientType, builtInToolTypes, config)) {
|
|
363
469
|
const routeDebug = buildAmpWebSearchProxyDebugState(env, requestedModel, builtInToolTypes);
|
|
364
470
|
if (routeDebug.enabled) {
|
|
365
471
|
console.warn(
|
|
@@ -415,6 +521,30 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
|
|
|
415
521
|
...resolved,
|
|
416
522
|
sourceFormat
|
|
417
523
|
};
|
|
524
|
+
const routeLabel = formatActivityRouteLabel(requestedModel, resolved);
|
|
525
|
+
const requestContext = estimateRequestContextTokens(body);
|
|
526
|
+
const ampContextRequirement = inferAmpContextRequirement(request, body, options);
|
|
527
|
+
const effectiveRequiredTokens = Math.max(
|
|
528
|
+
Number(requestContext?.estimatedRequiredTokens) || 0,
|
|
529
|
+
Number(ampContextRequirement?.minimumContextTokens) || 0
|
|
530
|
+
);
|
|
531
|
+
const effectiveRequestContext = {
|
|
532
|
+
...requestContext,
|
|
533
|
+
ampMinimumContextTokens: Number(ampContextRequirement?.minimumContextTokens) || 0,
|
|
534
|
+
ampContextSource: String(ampContextRequirement?.source || "").trim(),
|
|
535
|
+
estimatedRequiredTokens: effectiveRequiredTokens
|
|
536
|
+
};
|
|
537
|
+
setRouteContextDebug(routeDebug, {
|
|
538
|
+
requiredTokens: effectiveRequiredTokens,
|
|
539
|
+
hintSource: effectiveRequiredTokens > (Number(requestContext?.estimatedRequiredTokens) || 0)
|
|
540
|
+
? ampContextRequirement?.source || "request-context-hint"
|
|
541
|
+
: (effectiveRequiredTokens > 0 ? "request-body-estimate" : "")
|
|
542
|
+
});
|
|
543
|
+
if (routeDebug.enabled && effectiveRequestContext.ampContextSource) {
|
|
544
|
+
console.warn(
|
|
545
|
+
`[llm-router] context hint request=${requestedModel} source=${effectiveRequestContext.ampContextSource} required=${effectiveRequiredTokens}`
|
|
546
|
+
);
|
|
547
|
+
}
|
|
418
548
|
const routeCandidates = [resolved.primary, ...resolved.fallbacks];
|
|
419
549
|
const formatFiltered = filterCandidatesByFormat(routeCandidates);
|
|
420
550
|
for (const skipped of formatFiltered.skipped) {
|
|
@@ -449,6 +579,7 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
|
|
|
449
579
|
candidates: prioritizedCandidates.candidates,
|
|
450
580
|
stateStore,
|
|
451
581
|
config,
|
|
582
|
+
requestContext: effectiveRequestContext,
|
|
452
583
|
now
|
|
453
584
|
});
|
|
454
585
|
} catch (error) {
|
|
@@ -463,6 +594,12 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
|
|
|
463
594
|
|
|
464
595
|
routeDebug.strategy = ranking.strategy;
|
|
465
596
|
setRouteSelectedCandidate(routeDebug, ranking.selectedEntry?.candidate);
|
|
597
|
+
setRouteContextDebug(routeDebug, {
|
|
598
|
+
risk: resolveSelectedContextRisk(
|
|
599
|
+
ranking.selectedEntry?.candidate,
|
|
600
|
+
effectiveRequestContext.estimatedRequiredTokens
|
|
601
|
+
)
|
|
602
|
+
});
|
|
466
603
|
for (const skippedEntry of (ranking.skippedEntries || [])) {
|
|
467
604
|
recordRouteSkip(routeDebug, skippedEntry.candidate, skippedEntry.skipReasons);
|
|
468
605
|
}
|
|
@@ -480,6 +617,7 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
|
|
|
480
617
|
let lastErrorResult = null;
|
|
481
618
|
let lastErrorMessage = "Unknown error";
|
|
482
619
|
let routeSelectionCommitted = false;
|
|
620
|
+
let pendingFallbackContext = null;
|
|
483
621
|
|
|
484
622
|
for (let index = 0; index < ranking.entries.length; index += 1) {
|
|
485
623
|
const entry = ranking.entries[index];
|
|
@@ -505,14 +643,16 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
|
|
|
505
643
|
while (attempt < maxAttempts) {
|
|
506
644
|
attempt += 1;
|
|
507
645
|
result = await makeProviderCall({
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
646
|
+
body,
|
|
647
|
+
sourceFormat,
|
|
648
|
+
stream,
|
|
649
|
+
requestKind: options.requestKind,
|
|
650
|
+
candidate,
|
|
651
|
+
requestHeaders: request.headers,
|
|
652
|
+
env,
|
|
653
|
+
clientType: options.clientType,
|
|
654
|
+
runtimeConfig: config,
|
|
655
|
+
stateStore
|
|
516
656
|
});
|
|
517
657
|
|
|
518
658
|
if (!quotaConsumed && shouldConsumeQuotaFromResult(result)) {
|
|
@@ -538,7 +678,22 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
|
|
|
538
678
|
}
|
|
539
679
|
await clearCandidateRoutingState(stateStore, entry.candidateKey);
|
|
540
680
|
setRouteSelectedCandidate(routeDebug, candidate, { overwrite: true });
|
|
681
|
+
setRouteContextDebug(routeDebug, {
|
|
682
|
+
risk: resolveSelectedContextRisk(candidate, effectiveRequestContext.estimatedRequiredTokens)
|
|
683
|
+
});
|
|
541
684
|
recordRouteAttempt(routeDebug, candidate, result.status, null, attempt);
|
|
685
|
+
if (pendingFallbackContext) {
|
|
686
|
+
queueActivityEvent(options.onActivityLog, {
|
|
687
|
+
level: "success",
|
|
688
|
+
message: `Fallback request succeeded for ${routeLabel}.`,
|
|
689
|
+
detail: `${formatActivityCandidateLabel(candidate)} completed the request after ${pendingFallbackContext.failedCandidate} failed (${pendingFallbackContext.failureSummary}).`,
|
|
690
|
+
source: "runtime",
|
|
691
|
+
category: "usage",
|
|
692
|
+
kind: "fallback-succeeded",
|
|
693
|
+
route: routeLabel
|
|
694
|
+
});
|
|
695
|
+
pendingFallbackContext = null;
|
|
696
|
+
}
|
|
542
697
|
return withRouteDebugHeaders(result.response, routeDebug);
|
|
543
698
|
}
|
|
544
699
|
|
|
@@ -571,8 +726,41 @@ async function handleRouteRequest(request, env, getConfig, sourceFormatHint, opt
|
|
|
571
726
|
isFallbackAttempt
|
|
572
727
|
);
|
|
573
728
|
|
|
574
|
-
const
|
|
729
|
+
const nextCandidateEntry = getNextEligibleCandidateEntry(ranking.entries, index);
|
|
730
|
+
const hasNextCandidate = Boolean(nextCandidateEntry);
|
|
731
|
+
const failureSummary = buildFailureSummary(result, classification);
|
|
732
|
+
const providerMessage = await readActivityErrorDetail(result);
|
|
733
|
+
if (hasNextCandidate && classification?.allowFallback !== false) {
|
|
734
|
+
queueActivityEvent(options.onActivityLog, {
|
|
735
|
+
level: "warn",
|
|
736
|
+
message: `Request fallback triggered for ${routeLabel}.`,
|
|
737
|
+
detail: buildActivityDetail(
|
|
738
|
+
`${formatActivityCandidateLabel(candidate)} failed (${failureSummary}). Trying ${formatActivityCandidateLabel(nextCandidateEntry?.candidate)} next.`,
|
|
739
|
+
providerMessage
|
|
740
|
+
),
|
|
741
|
+
source: "runtime",
|
|
742
|
+
category: "usage",
|
|
743
|
+
kind: "fallback-triggered",
|
|
744
|
+
route: routeLabel
|
|
745
|
+
});
|
|
746
|
+
pendingFallbackContext = {
|
|
747
|
+
failedCandidate: formatActivityCandidateLabel(candidate),
|
|
748
|
+
failureSummary
|
|
749
|
+
};
|
|
750
|
+
}
|
|
575
751
|
if (!hasNextCandidate || classification?.allowFallback === false) {
|
|
752
|
+
queueActivityEvent(options.onActivityLog, {
|
|
753
|
+
level: "error",
|
|
754
|
+
message: `Request failed for ${routeLabel}.`,
|
|
755
|
+
detail: buildActivityDetail(
|
|
756
|
+
`${formatActivityCandidateLabel(candidate)} failed (${failureSummary})${classification?.allowFallback === false ? ". Fallback stopped for this error." : ". No more fallbacks are available."}`,
|
|
757
|
+
providerMessage
|
|
758
|
+
),
|
|
759
|
+
source: "runtime",
|
|
760
|
+
category: "usage",
|
|
761
|
+
kind: "request-failed",
|
|
762
|
+
route: routeLabel
|
|
763
|
+
});
|
|
576
764
|
return withRouteDebugHeaders(await buildFailureResponse(result), routeDebug);
|
|
577
765
|
}
|
|
578
766
|
}
|
|
@@ -779,7 +967,9 @@ export function createFetchHandler(options) {
|
|
|
779
967
|
|
|
780
968
|
let body;
|
|
781
969
|
try {
|
|
782
|
-
body = await parseJsonBodyWithLimit(request, resolveMaxRequestBodyBytes(env
|
|
970
|
+
body = await parseJsonBodyWithLimit(request, resolveMaxRequestBodyBytes(env, {
|
|
971
|
+
requestKind: route.requestKind
|
|
972
|
+
}));
|
|
783
973
|
} catch (error) {
|
|
784
974
|
if (error && typeof error === "object" && error.code === "REQUEST_BODY_TOO_LARGE") {
|
|
785
975
|
return respond(jsonResponse({ error: "Request body too large" }, 413));
|
|
@@ -142,7 +142,7 @@ export async function makeSubscriptionProviderCall({ provider, body, stream }) {
|
|
|
142
142
|
type: 'error',
|
|
143
143
|
error: {
|
|
144
144
|
type: 'authentication_error',
|
|
145
|
-
message: `Not authenticated for subscription profile '${profileId}'. Run '
|
|
145
|
+
message: `Not authenticated for subscription profile '${profileId}'. Run 'llr subscription login --profile=${profileId}' first.`
|
|
146
146
|
}
|
|
147
147
|
}), {
|
|
148
148
|
status: 401,
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
export const CODEX_CLI_INHERIT_MODEL_VALUE = "__codex_cli_inherit__";
|
|
2
|
+
export const CODEX_CLI_REASONING_EFFORT_VALUES = Object.freeze([
|
|
3
|
+
"minimal",
|
|
4
|
+
"low",
|
|
5
|
+
"medium",
|
|
6
|
+
"high",
|
|
7
|
+
"xhigh"
|
|
8
|
+
]);
|
|
9
|
+
export const CLAUDE_CODE_THINKING_LEVEL_VALUES = Object.freeze([
|
|
10
|
+
"low",
|
|
11
|
+
"medium",
|
|
12
|
+
"high",
|
|
13
|
+
"max"
|
|
14
|
+
]);
|
|
15
|
+
export const CLAUDE_CODE_THINKING_TOKENS_BY_LEVEL = Object.freeze({
|
|
16
|
+
low: 4096,
|
|
17
|
+
medium: 12000,
|
|
18
|
+
high: 24000,
|
|
19
|
+
max: 31999
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
export function isCodexCliInheritModelBinding(value) {
|
|
23
|
+
return String(value || "").trim() === CODEX_CLI_INHERIT_MODEL_VALUE;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function normalizeCodexCliReasoningEffort(value) {
|
|
27
|
+
const normalized = String(value || "").trim().toLowerCase();
|
|
28
|
+
return CODEX_CLI_REASONING_EFFORT_VALUES.includes(normalized) ? normalized : "";
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export function normalizeClaudeCodeThinkingLevel(value) {
|
|
32
|
+
const normalized = String(value || "").trim().toLowerCase();
|
|
33
|
+
return CLAUDE_CODE_THINKING_LEVEL_VALUES.includes(normalized) ? normalized : "";
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function mapClaudeCodeThinkingLevelToTokens(level) {
|
|
37
|
+
const normalizedLevel = normalizeClaudeCodeThinkingLevel(level);
|
|
38
|
+
if (!normalizedLevel) return "";
|
|
39
|
+
return String(CLAUDE_CODE_THINKING_TOKENS_BY_LEVEL[normalizedLevel] || "");
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function mapClaudeCodeThinkingTokensToLevel(value) {
|
|
43
|
+
const parsed = Number(value);
|
|
44
|
+
if (!Number.isFinite(parsed) || parsed <= 0) return "";
|
|
45
|
+
if (parsed >= CLAUDE_CODE_THINKING_TOKENS_BY_LEVEL.max) return "max";
|
|
46
|
+
if (parsed >= CLAUDE_CODE_THINKING_TOKENS_BY_LEVEL.high) return "high";
|
|
47
|
+
if (parsed >= 6000) return "medium";
|
|
48
|
+
return "low";
|
|
49
|
+
}
|