@khanglvm/llm-router 2.5.2 → 2.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/node/coding-tool-config.js +15 -1
- package/src/node/provider-probe.js +18 -0
- package/src/node/quota-probe-mapping.js +215 -0
- package/src/node/quota-probe-runner.js +234 -0
- package/src/node/web-console-client.js +33 -27
- package/src/node/web-console-server.js +99 -0
- package/src/node/web-console-styles.generated.js +1 -1
- package/src/node/web-console-ui/api-client.js +27 -0
- package/src/runtime/balancer.js +47 -4
- package/src/runtime/config.js +9 -4
- package/src/runtime/handler/fallback.js +7 -0
- package/src/runtime/quota-probe.js +179 -0
package/src/runtime/config.js
CHANGED
|
@@ -15,6 +15,7 @@ import {
|
|
|
15
15
|
materializeLocalVariantProvider,
|
|
16
16
|
normalizeLocalModelsMetadata
|
|
17
17
|
} from "./local-models.js";
|
|
18
|
+
import { normalizeQuotaProbeConfig } from "./quota-probe.js";
|
|
18
19
|
|
|
19
20
|
export const CONFIG_VERSION = 2;
|
|
20
21
|
export const MIN_SUPPORTED_CONFIG_VERSION = 1;
|
|
@@ -1471,6 +1472,8 @@ function normalizeProvider(provider, index = 0) {
|
|
|
1471
1472
|
}))
|
|
1472
1473
|
.filter(Boolean);
|
|
1473
1474
|
|
|
1475
|
+
const quotaProbe = normalizeQuotaProbeConfig(provider.quotaProbe);
|
|
1476
|
+
|
|
1474
1477
|
const auth = normalizeAuthConfig(provider.auth) || null;
|
|
1475
1478
|
const authByFormat = provider.authByFormat && typeof provider.authByFormat === "object"
|
|
1476
1479
|
? Object.fromEntries(
|
|
@@ -1500,7 +1503,8 @@ function normalizeProvider(provider, index = 0) {
|
|
|
1500
1503
|
models: normalizedModels,
|
|
1501
1504
|
rateLimits: normalizedRateLimits,
|
|
1502
1505
|
metadata: normalizeMetadataObject(provider.metadata),
|
|
1503
|
-
lastProbe: provider.lastProbe && typeof provider.lastProbe === "object" ? provider.lastProbe : undefined
|
|
1506
|
+
lastProbe: provider.lastProbe && typeof provider.lastProbe === "object" ? provider.lastProbe : undefined,
|
|
1507
|
+
quotaProbe
|
|
1504
1508
|
};
|
|
1505
1509
|
|
|
1506
1510
|
// Add subscription-specific fields
|
|
@@ -2229,20 +2233,21 @@ export function resolveProviderUrl(provider, targetFormat, requestKind = undefin
|
|
|
2229
2233
|
const baseUrl = sanitizeEndpointUrl(provider?.baseUrlByFormat?.[targetFormat] || provider?.baseUrl || "").replace(/\/+$/, "");
|
|
2230
2234
|
if (!baseUrl) return "";
|
|
2231
2235
|
const isVersionedApiRoot = /\/v\d+(?:\.\d+)?$/i.test(baseUrl);
|
|
2236
|
+
const hasVersionedApiPath = /\/v\d+[a-z]*(?:\.\d+)?(?:\/|$)/i.test(baseUrl);
|
|
2232
2237
|
|
|
2233
2238
|
if (targetFormat === FORMATS.OPENAI) {
|
|
2234
2239
|
if (requestKind === "responses") {
|
|
2235
2240
|
if (baseUrl.endsWith("/responses")) return baseUrl;
|
|
2236
|
-
if (baseUrl.endsWith("/v1") || isVersionedApiRoot) return `${baseUrl}/responses`;
|
|
2241
|
+
if (baseUrl.endsWith("/v1") || isVersionedApiRoot || hasVersionedApiPath) return `${baseUrl}/responses`;
|
|
2237
2242
|
return `${baseUrl}/v1/responses`;
|
|
2238
2243
|
}
|
|
2239
2244
|
if (requestKind === "completions") {
|
|
2240
2245
|
if (baseUrl.endsWith("/completions")) return baseUrl;
|
|
2241
|
-
if (baseUrl.endsWith("/v1") || isVersionedApiRoot) return `${baseUrl}/completions`;
|
|
2246
|
+
if (baseUrl.endsWith("/v1") || isVersionedApiRoot || hasVersionedApiPath) return `${baseUrl}/completions`;
|
|
2242
2247
|
return `${baseUrl}/v1/completions`;
|
|
2243
2248
|
}
|
|
2244
2249
|
if (baseUrl.endsWith("/chat/completions")) return baseUrl;
|
|
2245
|
-
if (baseUrl.endsWith("/v1") || isVersionedApiRoot) return `${baseUrl}/chat/completions`;
|
|
2250
|
+
if (baseUrl.endsWith("/v1") || isVersionedApiRoot || hasVersionedApiPath) return `${baseUrl}/chat/completions`;
|
|
2246
2251
|
return `${baseUrl}/v1/chat/completions`;
|
|
2247
2252
|
}
|
|
2248
2253
|
|
|
@@ -524,6 +524,13 @@ export async function classifyFailureResult(result, retryPolicy) {
|
|
|
524
524
|
};
|
|
525
525
|
}
|
|
526
526
|
|
|
527
|
+
export function shouldEmitCapErrorTrigger(status, probeConfig) {
|
|
528
|
+
if (!probeConfig?.enabled || !probeConfig?.refreshTriggers?.onUpstreamError) return false;
|
|
529
|
+
const triggers = probeConfig.refreshTriggers.onUpstreamError;
|
|
530
|
+
if (Array.isArray(triggers.statusCodes) && triggers.statusCodes.includes(Number(status))) return true;
|
|
531
|
+
return false;
|
|
532
|
+
}
|
|
533
|
+
|
|
527
534
|
export function enrichErrorMessage(error, candidate, isFallback) {
|
|
528
535
|
const prefix = `${candidate.providerId}/${candidate.modelId}`;
|
|
529
536
|
if (isFallback) {
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure logic for provider quota-probe snapshots, verdicts, and config normalization.
|
|
3
|
+
* Zero IO — all functions are deterministic and side-effect free.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
const VALID_CAP_KINDS = new Set(["dollars", "tokens", "requests"]);
|
|
7
|
+
const VALID_COMBINATORS = new Set(["AND", "OR", "REPLACE"]);
|
|
8
|
+
const VALID_ENFORCE_MODES = new Set(["gate", "observe"]);
|
|
9
|
+
const VALID_PROBE_MODES = new Set(["http", "custom"]);
|
|
10
|
+
const VALID_HTTP_METHODS = new Set(["GET", "POST"]);
|
|
11
|
+
|
|
12
|
+
const HTTP_TIMEOUT_DEFAULT = 5000;
|
|
13
|
+
const HTTP_TIMEOUT_CAP = 15000;
|
|
14
|
+
const CUSTOM_TIMEOUT_DEFAULT = 2000;
|
|
15
|
+
const CUSTOM_TIMEOUT_CAP = 10000;
|
|
16
|
+
|
|
17
|
+
function isFiniteNonNeg(v) {
|
|
18
|
+
return typeof v === "number" && Number.isFinite(v) && v >= 0;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function validateSnapshot(raw) {
|
|
22
|
+
if (!raw || typeof raw !== "object") {
|
|
23
|
+
return { valid: false, error: "snapshot must be an object" };
|
|
24
|
+
}
|
|
25
|
+
if (!VALID_CAP_KINDS.has(raw.capKind)) {
|
|
26
|
+
return { valid: false, error: `invalid capKind: ${raw.capKind}` };
|
|
27
|
+
}
|
|
28
|
+
for (const field of ["used", "limit", "remaining"]) {
|
|
29
|
+
if (field in raw && raw[field] !== undefined && raw[field] !== null) {
|
|
30
|
+
if (!isFiniteNonNeg(raw[field])) {
|
|
31
|
+
return { valid: false, error: `${field} must be a non-negative finite number` };
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
if (!raw.isUnlimited) {
|
|
36
|
+
const present = ["used", "limit", "remaining"].filter(
|
|
37
|
+
(f) => f in raw && isFiniteNonNeg(raw[f])
|
|
38
|
+
);
|
|
39
|
+
if (present.length < 2) {
|
|
40
|
+
return { valid: false, error: "at least two of {used, limit, remaining} required" };
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
return { valid: true, error: null };
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export function deriveSnapshot(raw) {
|
|
47
|
+
const out = { ...raw };
|
|
48
|
+
const hasUsed = isFiniteNonNeg(out.used);
|
|
49
|
+
const hasLimit = isFiniteNonNeg(out.limit);
|
|
50
|
+
const hasRemaining = isFiniteNonNeg(out.remaining);
|
|
51
|
+
|
|
52
|
+
if (hasUsed && hasLimit && !hasRemaining) {
|
|
53
|
+
out.remaining = out.limit - out.used;
|
|
54
|
+
} else if (hasLimit && hasRemaining && !hasUsed) {
|
|
55
|
+
out.used = out.limit - out.remaining;
|
|
56
|
+
} else if (hasUsed && hasRemaining && !hasLimit) {
|
|
57
|
+
out.limit = out.used + out.remaining;
|
|
58
|
+
}
|
|
59
|
+
return out;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export function isExhausted(snapshot, safetyMargin) {
|
|
63
|
+
if (snapshot.isUnlimited) return false;
|
|
64
|
+
if (!isFiniteNonNeg(snapshot.remaining)) return false;
|
|
65
|
+
|
|
66
|
+
const dollarMargin = safetyMargin?.dollars ?? 0;
|
|
67
|
+
const percentMargin = safetyMargin?.percent ?? 0;
|
|
68
|
+
const limitBased = isFiniteNonNeg(snapshot.limit)
|
|
69
|
+
? (snapshot.limit * percentMargin) / 100
|
|
70
|
+
: 0;
|
|
71
|
+
const effectiveMargin = Math.max(dollarMargin, limitBased);
|
|
72
|
+
return snapshot.remaining <= effectiveMargin;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export function resolveProbeVerdict(snapshot, probeConfig, _now) {
|
|
76
|
+
if (!snapshot || !probeConfig?.enabled) return null;
|
|
77
|
+
if (probeConfig.enforce !== "gate") return null;
|
|
78
|
+
if (snapshot.state === "unknown" || snapshot.state === "errored") return null;
|
|
79
|
+
if (snapshot.isUnlimited) return { available: true, reason: "unlimited" };
|
|
80
|
+
|
|
81
|
+
const derived = deriveSnapshot(snapshot);
|
|
82
|
+
const margin = probeConfig.safetyMargin ?? { dollars: 0, percent: 0 };
|
|
83
|
+
if (isExhausted(derived, margin)) {
|
|
84
|
+
return { available: false, reason: "quota exhausted" };
|
|
85
|
+
}
|
|
86
|
+
return { available: true, reason: "within budget" };
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
export function applyQuotaProbeGate({ combinator, probeAvailable, rateLimitEligible }) {
|
|
90
|
+
const probeOk = probeAvailable === null || probeAvailable === undefined ? true : probeAvailable;
|
|
91
|
+
const rlOk = !!rateLimitEligible;
|
|
92
|
+
|
|
93
|
+
switch (combinator) {
|
|
94
|
+
case "OR":
|
|
95
|
+
return probeOk || rlOk
|
|
96
|
+
? { eligible: true, skipReason: null }
|
|
97
|
+
: { eligible: false, skipReason: "probe and rate-limit both unavailable" };
|
|
98
|
+
case "REPLACE":
|
|
99
|
+
return probeOk
|
|
100
|
+
? { eligible: true, skipReason: null }
|
|
101
|
+
: { eligible: false, skipReason: "probe unavailable" };
|
|
102
|
+
case "AND":
|
|
103
|
+
default:
|
|
104
|
+
if (!probeOk && !rlOk) return { eligible: false, skipReason: "probe and rate-limit both unavailable" };
|
|
105
|
+
if (!probeOk) return { eligible: false, skipReason: "probe unavailable" };
|
|
106
|
+
if (!rlOk) return { eligible: false, skipReason: "rate-limit exceeded" };
|
|
107
|
+
return { eligible: true, skipReason: null };
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function clampTimeout(value, defaultVal, cap) {
|
|
112
|
+
const n = Number(value);
|
|
113
|
+
if (!Number.isFinite(n) || n <= 0) return defaultVal;
|
|
114
|
+
return Math.min(n, cap);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function normalizeHttpBlock(raw) {
|
|
118
|
+
if (!raw || typeof raw !== "object") return null;
|
|
119
|
+
const method = VALID_HTTP_METHODS.has(raw.method) ? raw.method : "GET";
|
|
120
|
+
const url = typeof raw.url === "string" ? raw.url : "";
|
|
121
|
+
const headers = Array.isArray(raw.headers) ? raw.headers : [];
|
|
122
|
+
const body = raw.body !== undefined ? raw.body : undefined;
|
|
123
|
+
const timeoutMs = clampTimeout(raw.timeoutMs, HTTP_TIMEOUT_DEFAULT, HTTP_TIMEOUT_CAP);
|
|
124
|
+
const mapping = raw.mapping && typeof raw.mapping === "object" ? raw.mapping : {};
|
|
125
|
+
return { method, url, headers, body, timeoutMs, mapping };
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function normalizeCustomBlock(raw) {
|
|
129
|
+
if (!raw || typeof raw !== "object") return null;
|
|
130
|
+
const source = typeof raw.source === "string" ? raw.source : "";
|
|
131
|
+
const timeoutMs = clampTimeout(raw.timeoutMs, CUSTOM_TIMEOUT_DEFAULT, CUSTOM_TIMEOUT_CAP);
|
|
132
|
+
return { source, timeoutMs };
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function normalizeMargin(raw) {
|
|
136
|
+
if (!raw || typeof raw !== "object") return { dollars: 0, percent: 0 };
|
|
137
|
+
const dollars = isFiniteNonNeg(raw.dollars) ? raw.dollars : 0;
|
|
138
|
+
const percent = isFiniteNonNeg(raw.percent) ? raw.percent : 0;
|
|
139
|
+
return { dollars, percent };
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
function normalizeRefreshTriggers(raw) {
|
|
143
|
+
const defaults = { onUiOpen: false, onManual: true, onResetAt: false, onUpstreamError: null };
|
|
144
|
+
if (!raw || typeof raw !== "object") return defaults;
|
|
145
|
+
const out = {
|
|
146
|
+
onUiOpen: !!raw.onUiOpen,
|
|
147
|
+
onManual: true,
|
|
148
|
+
onResetAt: !!raw.onResetAt,
|
|
149
|
+
onUpstreamError: null
|
|
150
|
+
};
|
|
151
|
+
if (raw.onUpstreamError && typeof raw.onUpstreamError === "object") {
|
|
152
|
+
out.onUpstreamError = {
|
|
153
|
+
statusCodes: Array.isArray(raw.onUpstreamError.statusCodes)
|
|
154
|
+
? raw.onUpstreamError.statusCodes.filter((c) => Number.isFinite(c))
|
|
155
|
+
: [],
|
|
156
|
+
bodyRegex: typeof raw.onUpstreamError.bodyRegex === "string"
|
|
157
|
+
? raw.onUpstreamError.bodyRegex
|
|
158
|
+
: null
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
return out;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
export function normalizeQuotaProbeConfig(raw) {
|
|
165
|
+
if (!raw || typeof raw !== "object" || raw.enabled !== true) return null;
|
|
166
|
+
|
|
167
|
+
const capKind = VALID_CAP_KINDS.has(raw.capKind) ? raw.capKind : null;
|
|
168
|
+
if (!capKind) return null;
|
|
169
|
+
|
|
170
|
+
const combinator = VALID_COMBINATORS.has(raw.combinator) ? raw.combinator : "AND";
|
|
171
|
+
const enforce = VALID_ENFORCE_MODES.has(raw.enforce) ? raw.enforce : "gate";
|
|
172
|
+
const mode = VALID_PROBE_MODES.has(raw.mode) ? raw.mode : "http";
|
|
173
|
+
const safetyMargin = normalizeMargin(raw.safetyMargin);
|
|
174
|
+
const http = normalizeHttpBlock(raw.http);
|
|
175
|
+
const custom = normalizeCustomBlock(raw.custom);
|
|
176
|
+
const refreshTriggers = normalizeRefreshTriggers(raw.refreshTriggers);
|
|
177
|
+
|
|
178
|
+
return { enabled: true, capKind, combinator, enforce, mode, safetyMargin, http, custom, refreshTriggers };
|
|
179
|
+
}
|