@khanglvm/llm-router 2.3.1 → 2.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/README.md +2 -2
- package/package.json +1 -1
- package/src/cli/router-module.js +32 -5
- package/src/node/coding-tool-config.js +138 -25
- package/src/node/large-request-log.js +54 -0
- package/src/node/litellm-context-catalog.js +13 -1
- package/src/node/local-server.js +10 -0
- package/src/node/ollama-client.js +195 -0
- package/src/node/ollama-hardware.js +94 -0
- package/src/node/ollama-install.js +230 -0
- package/src/node/provider-probe.js +69 -5
- package/src/node/web-console-client.js +36 -36
- package/src/node/web-console-server.js +478 -8
- package/src/node/web-console-styles.generated.js +1 -1
- package/src/node/web-console-ui/amp-utils.js +272 -0
- package/src/node/web-console-ui/api-client.js +128 -0
- package/src/node/web-console-ui/capability-utils.js +36 -0
- package/src/node/web-console-ui/config-editor-utils.js +20 -5
- package/src/node/web-console-ui/constants.js +140 -0
- package/src/node/web-console-ui/context-window-utils.js +262 -0
- package/src/node/web-console-ui/hooks/use-reorder-layout-animation.js +65 -0
- package/src/node/web-console-ui/provider-presets.js +211 -0
- package/src/node/web-console-ui/quick-start-utils.js +790 -0
- package/src/node/web-console-ui/utils.js +353 -0
- package/src/node/web-console-ui/web-search-utils.js +460 -0
- package/src/runtime/config.js +96 -9
- package/src/runtime/handler/fallback.js +71 -0
- package/src/runtime/handler/field-filter.js +39 -0
- package/src/runtime/handler/large-request-log.js +211 -0
- package/src/runtime/handler/provider-call.js +276 -15
- package/src/runtime/handler/reasoning-effort.js +11 -1
- package/src/runtime/handler/tool-name-sanitizer.js +258 -0
- package/src/runtime/handler.js +16 -3
- package/src/shared/coding-tool-bindings.js +3 -0
|
@@ -44,6 +44,18 @@ const POLICY_HINTS = [
|
|
|
44
44
|
"unsafe",
|
|
45
45
|
"flagged"
|
|
46
46
|
];
|
|
47
|
+
const MODEL_NOT_FOUND_HINTS = [
|
|
48
|
+
"model not found",
|
|
49
|
+
"model does not exist",
|
|
50
|
+
"model_not_found"
|
|
51
|
+
];
|
|
52
|
+
const VRAM_EXHAUSTION_HINTS = [
|
|
53
|
+
"insufficient vram",
|
|
54
|
+
"out of memory",
|
|
55
|
+
"failed to load model",
|
|
56
|
+
"insufficient memory"
|
|
57
|
+
];
|
|
58
|
+
const DEFAULT_ORIGIN_MODEL_NOT_FOUND_COOLDOWN_MS = 60 * 60_000;
|
|
47
59
|
const CONTEXT_WINDOW_HINTS = [
|
|
48
60
|
"context window",
|
|
49
61
|
"maximum context length",
|
|
@@ -56,6 +68,17 @@ const CONTEXT_WINDOW_HINTS = [
|
|
|
56
68
|
"too many tokens",
|
|
57
69
|
"ran out of room in the model's context window"
|
|
58
70
|
];
|
|
71
|
+
const RATE_LIMIT_HINTS = [
|
|
72
|
+
"tokens per minute",
|
|
73
|
+
"requests per minute",
|
|
74
|
+
"rate limit",
|
|
75
|
+
"rate_limit",
|
|
76
|
+
"tpm",
|
|
77
|
+
"rpm",
|
|
78
|
+
"quota exceeded",
|
|
79
|
+
"quota_exceeded",
|
|
80
|
+
"limit exceeded"
|
|
81
|
+
];
|
|
59
82
|
const fallbackCircuitState = new Map();
|
|
60
83
|
|
|
61
84
|
export function shouldRetryStatus(status) {
|
|
@@ -392,6 +415,16 @@ export async function classifyFailureResult(result, retryPolicy) {
|
|
|
392
415
|
}
|
|
393
416
|
|
|
394
417
|
if (status === 404 || status === 410) {
|
|
418
|
+
const hintText404 = await readProviderErrorHint(result);
|
|
419
|
+
if (hasAnyHint(hintText404, MODEL_NOT_FOUND_HINTS)) {
|
|
420
|
+
return {
|
|
421
|
+
category: "model_not_found",
|
|
422
|
+
retryable: false,
|
|
423
|
+
retryOrigin: false,
|
|
424
|
+
allowFallback: true,
|
|
425
|
+
originCooldownMs: DEFAULT_ORIGIN_MODEL_NOT_FOUND_COOLDOWN_MS
|
|
426
|
+
};
|
|
427
|
+
}
|
|
395
428
|
return {
|
|
396
429
|
category: "not_found",
|
|
397
430
|
retryable: false,
|
|
@@ -412,9 +445,47 @@ export async function classifyFailureResult(result, retryPolicy) {
|
|
|
412
445
|
originCooldownMs: 0
|
|
413
446
|
};
|
|
414
447
|
}
|
|
448
|
+
if (hasAnyHint(hintText, VRAM_EXHAUSTION_HINTS)) {
|
|
449
|
+
return {
|
|
450
|
+
category: "vram_exhaustion",
|
|
451
|
+
retryable: false,
|
|
452
|
+
retryOrigin: false,
|
|
453
|
+
allowFallback: true,
|
|
454
|
+
originCooldownMs: retryPolicy.originFallbackCooldownMs
|
|
455
|
+
};
|
|
456
|
+
}
|
|
457
|
+
if (status === 413 && hasAnyHint(hintText, RATE_LIMIT_HINTS)) {
|
|
458
|
+
const rateLimitCooldown = retryAfterMs > 0 ? retryAfterMs : retryPolicy.originRateLimitCooldownMs;
|
|
459
|
+
return {
|
|
460
|
+
category: "rate_limited",
|
|
461
|
+
retryable: true,
|
|
462
|
+
retryOrigin: false,
|
|
463
|
+
allowFallback: true,
|
|
464
|
+
originCooldownMs: rateLimitCooldown
|
|
465
|
+
};
|
|
466
|
+
}
|
|
415
467
|
}
|
|
416
468
|
|
|
417
469
|
if (status === 408 || status === 409 || status >= 500) {
|
|
470
|
+
const hintText5xx = await readProviderErrorHint(result);
|
|
471
|
+
if (hasAnyHint(hintText5xx, VRAM_EXHAUSTION_HINTS)) {
|
|
472
|
+
return {
|
|
473
|
+
category: "vram_exhaustion",
|
|
474
|
+
retryable: false,
|
|
475
|
+
retryOrigin: false,
|
|
476
|
+
allowFallback: true,
|
|
477
|
+
originCooldownMs: retryPolicy.originFallbackCooldownMs
|
|
478
|
+
};
|
|
479
|
+
}
|
|
480
|
+
if (hasAnyHint(hintText5xx, MODEL_NOT_FOUND_HINTS)) {
|
|
481
|
+
return {
|
|
482
|
+
category: "model_not_found",
|
|
483
|
+
retryable: false,
|
|
484
|
+
retryOrigin: false,
|
|
485
|
+
allowFallback: true,
|
|
486
|
+
originCooldownMs: DEFAULT_ORIGIN_MODEL_NOT_FOUND_COOLDOWN_MS
|
|
487
|
+
};
|
|
488
|
+
}
|
|
418
489
|
return {
|
|
419
490
|
category: "temporary_error",
|
|
420
491
|
retryable: true,
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Strips request body fields the target model doesn't support.
|
|
3
|
+
* Only acts when a capability is explicitly `false` — undefined means "pass through".
|
|
4
|
+
*
|
|
5
|
+
* @param {object} providerBody - Request body (already cloned upstream)
|
|
6
|
+
* @param {object} [capabilities] - Model capabilities from config
|
|
7
|
+
* @returns {object} The providerBody with unsupported fields deleted
|
|
8
|
+
*/
|
|
9
|
+
export function stripUnsupportedFields(providerBody, capabilities) {
|
|
10
|
+
if (!capabilities || typeof capabilities !== "object") return providerBody;
|
|
11
|
+
|
|
12
|
+
if (capabilities.supportsReasoning === false) {
|
|
13
|
+
delete providerBody.reasoning_effort;
|
|
14
|
+
delete providerBody.reasoning;
|
|
15
|
+
}
|
|
16
|
+
if (capabilities.supportsThinking === false) {
|
|
17
|
+
delete providerBody.thinking;
|
|
18
|
+
}
|
|
19
|
+
if (capabilities.supportsResponseFormat === false) {
|
|
20
|
+
delete providerBody.response_format;
|
|
21
|
+
}
|
|
22
|
+
if (capabilities.supportsLogprobs === false) {
|
|
23
|
+
delete providerBody.logprobs;
|
|
24
|
+
delete providerBody.top_logprobs;
|
|
25
|
+
}
|
|
26
|
+
if (capabilities.supportsServiceTier === false) {
|
|
27
|
+
delete providerBody.service_tier;
|
|
28
|
+
}
|
|
29
|
+
if (capabilities.supportsPrediction === false) {
|
|
30
|
+
delete providerBody.prediction;
|
|
31
|
+
delete providerBody.predicted_output;
|
|
32
|
+
}
|
|
33
|
+
if (capabilities.supportsStreamOptions === false) {
|
|
34
|
+
delete providerBody.stream_options;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
return providerBody;
|
|
38
|
+
}
|
|
39
|
+
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
const DEFAULT_TEXT_ENCODER = new TextEncoder();
|
|
2
|
+
|
|
3
|
+
export const LARGE_REQUEST_LOG_ENABLED_ENV = "LLM_ROUTER_LOG_LARGE_REQUESTS";
|
|
4
|
+
export const LARGE_REQUEST_LOG_THRESHOLD_ENV = "LLM_ROUTER_LARGE_REQUEST_LOG_THRESHOLD_BYTES";
|
|
5
|
+
export const LARGE_REQUEST_LOG_PATH_ENV = "LLM_ROUTER_LARGE_REQUEST_LOG_PATH";
|
|
6
|
+
export const DEFAULT_LARGE_REQUEST_LOG_THRESHOLD_BYTES = 20 * 1024 * 1024;
|
|
7
|
+
const LARGE_STRING_HINT_THRESHOLD_BYTES = 256 * 1024;
|
|
8
|
+
const MAX_LARGE_STRING_HINTS = 8;
|
|
9
|
+
const MAX_SUMMARY_NODES = 50_000;
|
|
10
|
+
|
|
11
|
+
function toBoolean(value, fallback = false) {
|
|
12
|
+
if (value === undefined || value === null || value === "") return fallback;
|
|
13
|
+
if (typeof value === "boolean") return value;
|
|
14
|
+
const normalized = String(value).trim().toLowerCase();
|
|
15
|
+
if (["1", "true", "yes", "y", "on"].includes(normalized)) return true;
|
|
16
|
+
if (["0", "false", "no", "n", "off"].includes(normalized)) return false;
|
|
17
|
+
return fallback;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function toPositiveInteger(value, fallback) {
|
|
21
|
+
if (value === undefined || value === null || value === "") return fallback;
|
|
22
|
+
const parsed = Number.parseInt(String(value), 10);
|
|
23
|
+
if (!Number.isFinite(parsed) || parsed <= 0) return fallback;
|
|
24
|
+
return parsed;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function appendToolType(target, value) {
|
|
28
|
+
const normalized = String(value || "").trim();
|
|
29
|
+
if (!normalized || target.includes(normalized)) return;
|
|
30
|
+
target.push(normalized);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function classifyContentType(type) {
|
|
34
|
+
const normalized = String(type || "").trim().toLowerCase();
|
|
35
|
+
if (!normalized) return "";
|
|
36
|
+
if (normalized === "image" || normalized === "image_url" || normalized === "input_image") return "image";
|
|
37
|
+
if (normalized === "document" || normalized === "input_document") return "document";
|
|
38
|
+
if (normalized === "audio" || normalized === "input_audio") return "audio";
|
|
39
|
+
if (normalized === "file" || normalized === "input_file") return "file";
|
|
40
|
+
if (normalized.includes("attachment")) return "attachment";
|
|
41
|
+
return "";
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function maybeRecordLargeString(summary, value, path, hintType = "string") {
|
|
45
|
+
if (typeof value !== "string" || value.length === 0) return;
|
|
46
|
+
const bytes = DEFAULT_TEXT_ENCODER.encode(value).byteLength;
|
|
47
|
+
if (bytes > summary.largestStringBytes) {
|
|
48
|
+
summary.largestStringBytes = bytes;
|
|
49
|
+
}
|
|
50
|
+
if (bytes < LARGE_STRING_HINT_THRESHOLD_BYTES) return;
|
|
51
|
+
|
|
52
|
+
summary.largeStringCount += 1;
|
|
53
|
+
summary.largeStringHints.push({
|
|
54
|
+
path,
|
|
55
|
+
bytes,
|
|
56
|
+
type: hintType
|
|
57
|
+
});
|
|
58
|
+
summary.largeStringHints.sort((left, right) => right.bytes - left.bytes);
|
|
59
|
+
if (summary.largeStringHints.length > MAX_LARGE_STRING_HINTS) {
|
|
60
|
+
summary.largeStringHints.length = MAX_LARGE_STRING_HINTS;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function summarizeProviderBody(body) {
|
|
65
|
+
const toolTypes = [];
|
|
66
|
+
for (const tool of Array.isArray(body?.tools) ? body.tools : []) {
|
|
67
|
+
appendToolType(toolTypes, tool?.type);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const summary = {
|
|
71
|
+
topLevelKeys: body && typeof body === "object" && !Array.isArray(body) ? Object.keys(body).sort() : [],
|
|
72
|
+
messageCount: Array.isArray(body?.messages) ? body.messages.length : 0,
|
|
73
|
+
inputCount: Array.isArray(body?.input) ? body.input.length : 0,
|
|
74
|
+
toolCount: Array.isArray(body?.tools) ? body.tools.length : 0,
|
|
75
|
+
toolTypes,
|
|
76
|
+
contentPartCount: 0,
|
|
77
|
+
attachmentLikeParts: 0,
|
|
78
|
+
imageParts: 0,
|
|
79
|
+
documentParts: 0,
|
|
80
|
+
audioParts: 0,
|
|
81
|
+
fileParts: 0,
|
|
82
|
+
dataUrlStrings: 0,
|
|
83
|
+
base64SourceParts: 0,
|
|
84
|
+
largeStringCount: 0,
|
|
85
|
+
largestStringBytes: 0,
|
|
86
|
+
largeStringHints: [],
|
|
87
|
+
traversalTruncated: false
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
const stack = [{ value: body, path: "body" }];
|
|
91
|
+
const seen = new WeakSet();
|
|
92
|
+
let visited = 0;
|
|
93
|
+
|
|
94
|
+
while (stack.length > 0) {
|
|
95
|
+
const current = stack.pop();
|
|
96
|
+
visited += 1;
|
|
97
|
+
if (visited > MAX_SUMMARY_NODES) {
|
|
98
|
+
summary.traversalTruncated = true;
|
|
99
|
+
break;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const value = current?.value;
|
|
103
|
+
if (typeof value === "string") {
|
|
104
|
+
const isDataUrl = value.startsWith("data:");
|
|
105
|
+
if (isDataUrl) {
|
|
106
|
+
summary.dataUrlStrings += 1;
|
|
107
|
+
}
|
|
108
|
+
maybeRecordLargeString(summary, value, current.path, isDataUrl ? "data-url" : "string");
|
|
109
|
+
continue;
|
|
110
|
+
}
|
|
111
|
+
if (!value || typeof value !== "object") continue;
|
|
112
|
+
if (seen.has(value)) continue;
|
|
113
|
+
seen.add(value);
|
|
114
|
+
|
|
115
|
+
if (Array.isArray(value)) {
|
|
116
|
+
for (let index = value.length - 1; index >= 0; index -= 1) {
|
|
117
|
+
stack.push({
|
|
118
|
+
value: value[index],
|
|
119
|
+
path: `${current.path}[${index}]`
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
continue;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
const contentType = classifyContentType(value.type);
|
|
126
|
+
if (contentType) {
|
|
127
|
+
summary.attachmentLikeParts += 1;
|
|
128
|
+
if (contentType === "image") summary.imageParts += 1;
|
|
129
|
+
if (contentType === "document") summary.documentParts += 1;
|
|
130
|
+
if (contentType === "audio") summary.audioParts += 1;
|
|
131
|
+
if (contentType === "file" || contentType === "attachment") summary.fileParts += 1;
|
|
132
|
+
}
|
|
133
|
+
if (value?.source && typeof value.source === "object") {
|
|
134
|
+
const sourceType = String(value.source.type || "").trim().toLowerCase();
|
|
135
|
+
if (sourceType === "base64") {
|
|
136
|
+
summary.base64SourceParts += 1;
|
|
137
|
+
maybeRecordLargeString(summary, value.source.data, `${current.path}.source.data`, "base64");
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
for (const [key, child] of Object.entries(value)) {
|
|
142
|
+
const childPath = `${current.path}.${key}`;
|
|
143
|
+
if (typeof child === "string") {
|
|
144
|
+
const hintType = key === "data"
|
|
145
|
+
? "data"
|
|
146
|
+
: (key === "text" ? "text" : "string");
|
|
147
|
+
const isDataUrl = child.startsWith("data:");
|
|
148
|
+
if (isDataUrl) {
|
|
149
|
+
summary.dataUrlStrings += 1;
|
|
150
|
+
}
|
|
151
|
+
maybeRecordLargeString(summary, child, childPath, isDataUrl ? "data-url" : hintType);
|
|
152
|
+
continue;
|
|
153
|
+
}
|
|
154
|
+
if (key === "content" && Array.isArray(child)) {
|
|
155
|
+
summary.contentPartCount += child.length;
|
|
156
|
+
}
|
|
157
|
+
stack.push({
|
|
158
|
+
value: child,
|
|
159
|
+
path: childPath
|
|
160
|
+
});
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
return summary;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
export function isLargeRequestLoggingEnabled(env = {}) {
|
|
168
|
+
return toBoolean(env?.[LARGE_REQUEST_LOG_ENABLED_ENV], false);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
export function resolveLargeRequestLogThresholdBytes(env = {}) {
|
|
172
|
+
return toPositiveInteger(
|
|
173
|
+
env?.[LARGE_REQUEST_LOG_THRESHOLD_ENV],
|
|
174
|
+
DEFAULT_LARGE_REQUEST_LOG_THRESHOLD_BYTES
|
|
175
|
+
);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
export function measureSerializedRequestBytes(serializedBody = "") {
|
|
179
|
+
return DEFAULT_TEXT_ENCODER.encode(String(serializedBody || "")).byteLength;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
export function buildLargeRequestLogEntry({
|
|
183
|
+
providerBody,
|
|
184
|
+
requestBytes,
|
|
185
|
+
thresholdBytes,
|
|
186
|
+
providerUrl,
|
|
187
|
+
candidate,
|
|
188
|
+
sourceFormat,
|
|
189
|
+
targetFormat,
|
|
190
|
+
requestKind,
|
|
191
|
+
clientType,
|
|
192
|
+
stream,
|
|
193
|
+
providerType = "http"
|
|
194
|
+
} = {}) {
|
|
195
|
+
return {
|
|
196
|
+
kind: "large-provider-request",
|
|
197
|
+
providerType: String(providerType || "http").trim() || "http",
|
|
198
|
+
requestBytes: Number.isFinite(Number(requestBytes)) ? Number(requestBytes) : 0,
|
|
199
|
+
thresholdBytes: Number.isFinite(Number(thresholdBytes)) ? Number(thresholdBytes) : DEFAULT_LARGE_REQUEST_LOG_THRESHOLD_BYTES,
|
|
200
|
+
providerUrl: String(providerUrl || "").trim(),
|
|
201
|
+
clientType: String(clientType || "").trim(),
|
|
202
|
+
stream: Boolean(stream),
|
|
203
|
+
sourceFormat: String(sourceFormat || "").trim(),
|
|
204
|
+
targetFormat: String(targetFormat || "").trim(),
|
|
205
|
+
requestKind: String(requestKind || "").trim(),
|
|
206
|
+
requestedModel: String(candidate?.requestModelId || "").trim(),
|
|
207
|
+
providerId: String(candidate?.providerId || candidate?.provider?.id || "").trim(),
|
|
208
|
+
backendModel: String(candidate?.backend || candidate?.modelId || providerBody?.model || "").trim(),
|
|
209
|
+
bodySummary: summarizeProviderBody(providerBody)
|
|
210
|
+
};
|
|
211
|
+
}
|