@khanglvm/llm-router 2.5.2 → 2.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,6 +15,7 @@ import {
15
15
  materializeLocalVariantProvider,
16
16
  normalizeLocalModelsMetadata
17
17
  } from "./local-models.js";
18
+ import { normalizeQuotaProbeConfig } from "./quota-probe.js";
18
19
 
19
20
  export const CONFIG_VERSION = 2;
20
21
  export const MIN_SUPPORTED_CONFIG_VERSION = 1;
@@ -1471,6 +1472,8 @@ function normalizeProvider(provider, index = 0) {
1471
1472
  }))
1472
1473
  .filter(Boolean);
1473
1474
 
1475
+ const quotaProbe = normalizeQuotaProbeConfig(provider.quotaProbe);
1476
+
1474
1477
  const auth = normalizeAuthConfig(provider.auth) || null;
1475
1478
  const authByFormat = provider.authByFormat && typeof provider.authByFormat === "object"
1476
1479
  ? Object.fromEntries(
@@ -1500,7 +1503,8 @@ function normalizeProvider(provider, index = 0) {
1500
1503
  models: normalizedModels,
1501
1504
  rateLimits: normalizedRateLimits,
1502
1505
  metadata: normalizeMetadataObject(provider.metadata),
1503
- lastProbe: provider.lastProbe && typeof provider.lastProbe === "object" ? provider.lastProbe : undefined
1506
+ lastProbe: provider.lastProbe && typeof provider.lastProbe === "object" ? provider.lastProbe : undefined,
1507
+ quotaProbe
1504
1508
  };
1505
1509
 
1506
1510
  // Add subscription-specific fields
@@ -2229,20 +2233,21 @@ export function resolveProviderUrl(provider, targetFormat, requestKind = undefin
2229
2233
  const baseUrl = sanitizeEndpointUrl(provider?.baseUrlByFormat?.[targetFormat] || provider?.baseUrl || "").replace(/\/+$/, "");
2230
2234
  if (!baseUrl) return "";
2231
2235
  const isVersionedApiRoot = /\/v\d+(?:\.\d+)?$/i.test(baseUrl);
2236
+ const hasVersionedApiPath = /\/v\d+[a-z]*(?:\.\d+)?(?:\/|$)/i.test(baseUrl);
2232
2237
 
2233
2238
  if (targetFormat === FORMATS.OPENAI) {
2234
2239
  if (requestKind === "responses") {
2235
2240
  if (baseUrl.endsWith("/responses")) return baseUrl;
2236
- if (baseUrl.endsWith("/v1") || isVersionedApiRoot) return `${baseUrl}/responses`;
2241
+ if (baseUrl.endsWith("/v1") || isVersionedApiRoot || hasVersionedApiPath) return `${baseUrl}/responses`;
2237
2242
  return `${baseUrl}/v1/responses`;
2238
2243
  }
2239
2244
  if (requestKind === "completions") {
2240
2245
  if (baseUrl.endsWith("/completions")) return baseUrl;
2241
- if (baseUrl.endsWith("/v1") || isVersionedApiRoot) return `${baseUrl}/completions`;
2246
+ if (baseUrl.endsWith("/v1") || isVersionedApiRoot || hasVersionedApiPath) return `${baseUrl}/completions`;
2242
2247
  return `${baseUrl}/v1/completions`;
2243
2248
  }
2244
2249
  if (baseUrl.endsWith("/chat/completions")) return baseUrl;
2245
- if (baseUrl.endsWith("/v1") || isVersionedApiRoot) return `${baseUrl}/chat/completions`;
2250
+ if (baseUrl.endsWith("/v1") || isVersionedApiRoot || hasVersionedApiPath) return `${baseUrl}/chat/completions`;
2246
2251
  return `${baseUrl}/v1/chat/completions`;
2247
2252
  }
2248
2253
 
@@ -524,6 +524,13 @@ export async function classifyFailureResult(result, retryPolicy) {
524
524
  };
525
525
  }
526
526
 
527
+ export function shouldEmitCapErrorTrigger(status, probeConfig) {
528
+ if (!probeConfig?.enabled || !probeConfig?.refreshTriggers?.onUpstreamError) return false;
529
+ const triggers = probeConfig.refreshTriggers.onUpstreamError;
530
+ if (Array.isArray(triggers.statusCodes) && triggers.statusCodes.includes(Number(status))) return true;
531
+ return false;
532
+ }
533
+
527
534
  export function enrichErrorMessage(error, candidate, isFallback) {
528
535
  const prefix = `${candidate.providerId}/${candidate.modelId}`;
529
536
  if (isFallback) {
@@ -0,0 +1,179 @@
1
+ /**
2
+ * Pure logic for provider quota-probe snapshots, verdicts, and config normalization.
3
+ * Zero IO — all functions are deterministic and side-effect free.
4
+ */
5
+
6
+ const VALID_CAP_KINDS = new Set(["dollars", "tokens", "requests"]);
7
+ const VALID_COMBINATORS = new Set(["AND", "OR", "REPLACE"]);
8
+ const VALID_ENFORCE_MODES = new Set(["gate", "observe"]);
9
+ const VALID_PROBE_MODES = new Set(["http", "custom"]);
10
+ const VALID_HTTP_METHODS = new Set(["GET", "POST"]);
11
+
12
+ const HTTP_TIMEOUT_DEFAULT = 5000;
13
+ const HTTP_TIMEOUT_CAP = 15000;
14
+ const CUSTOM_TIMEOUT_DEFAULT = 2000;
15
+ const CUSTOM_TIMEOUT_CAP = 10000;
16
+
17
+ function isFiniteNonNeg(v) {
18
+ return typeof v === "number" && Number.isFinite(v) && v >= 0;
19
+ }
20
+
21
+ export function validateSnapshot(raw) {
22
+ if (!raw || typeof raw !== "object") {
23
+ return { valid: false, error: "snapshot must be an object" };
24
+ }
25
+ if (!VALID_CAP_KINDS.has(raw.capKind)) {
26
+ return { valid: false, error: `invalid capKind: ${raw.capKind}` };
27
+ }
28
+ for (const field of ["used", "limit", "remaining"]) {
29
+ if (field in raw && raw[field] !== undefined && raw[field] !== null) {
30
+ if (!isFiniteNonNeg(raw[field])) {
31
+ return { valid: false, error: `${field} must be a non-negative finite number` };
32
+ }
33
+ }
34
+ }
35
+ if (!raw.isUnlimited) {
36
+ const present = ["used", "limit", "remaining"].filter(
37
+ (f) => f in raw && isFiniteNonNeg(raw[f])
38
+ );
39
+ if (present.length < 2) {
40
+ return { valid: false, error: "at least two of {used, limit, remaining} required" };
41
+ }
42
+ }
43
+ return { valid: true, error: null };
44
+ }
45
+
46
+ export function deriveSnapshot(raw) {
47
+ const out = { ...raw };
48
+ const hasUsed = isFiniteNonNeg(out.used);
49
+ const hasLimit = isFiniteNonNeg(out.limit);
50
+ const hasRemaining = isFiniteNonNeg(out.remaining);
51
+
52
+ if (hasUsed && hasLimit && !hasRemaining) {
53
+ out.remaining = out.limit - out.used;
54
+ } else if (hasLimit && hasRemaining && !hasUsed) {
55
+ out.used = out.limit - out.remaining;
56
+ } else if (hasUsed && hasRemaining && !hasLimit) {
57
+ out.limit = out.used + out.remaining;
58
+ }
59
+ return out;
60
+ }
61
+
62
+ export function isExhausted(snapshot, safetyMargin) {
63
+ if (snapshot.isUnlimited) return false;
64
+ if (!isFiniteNonNeg(snapshot.remaining)) return false;
65
+
66
+ const dollarMargin = safetyMargin?.dollars ?? 0;
67
+ const percentMargin = safetyMargin?.percent ?? 0;
68
+ const limitBased = isFiniteNonNeg(snapshot.limit)
69
+ ? (snapshot.limit * percentMargin) / 100
70
+ : 0;
71
+ const effectiveMargin = Math.max(dollarMargin, limitBased);
72
+ return snapshot.remaining <= effectiveMargin;
73
+ }
74
+
75
+ export function resolveProbeVerdict(snapshot, probeConfig, _now) {
76
+ if (!snapshot || !probeConfig?.enabled) return null;
77
+ if (probeConfig.enforce !== "gate") return null;
78
+ if (snapshot.state === "unknown" || snapshot.state === "errored") return null;
79
+ if (snapshot.isUnlimited) return { available: true, reason: "unlimited" };
80
+
81
+ const derived = deriveSnapshot(snapshot);
82
+ const margin = probeConfig.safetyMargin ?? { dollars: 0, percent: 0 };
83
+ if (isExhausted(derived, margin)) {
84
+ return { available: false, reason: "quota exhausted" };
85
+ }
86
+ return { available: true, reason: "within budget" };
87
+ }
88
+
89
+ export function applyQuotaProbeGate({ combinator, probeAvailable, rateLimitEligible }) {
90
+ const probeOk = probeAvailable === null || probeAvailable === undefined ? true : probeAvailable;
91
+ const rlOk = !!rateLimitEligible;
92
+
93
+ switch (combinator) {
94
+ case "OR":
95
+ return probeOk || rlOk
96
+ ? { eligible: true, skipReason: null }
97
+ : { eligible: false, skipReason: "probe and rate-limit both unavailable" };
98
+ case "REPLACE":
99
+ return probeOk
100
+ ? { eligible: true, skipReason: null }
101
+ : { eligible: false, skipReason: "probe unavailable" };
102
+ case "AND":
103
+ default:
104
+ if (!probeOk && !rlOk) return { eligible: false, skipReason: "probe and rate-limit both unavailable" };
105
+ if (!probeOk) return { eligible: false, skipReason: "probe unavailable" };
106
+ if (!rlOk) return { eligible: false, skipReason: "rate-limit exceeded" };
107
+ return { eligible: true, skipReason: null };
108
+ }
109
+ }
110
+
111
+ function clampTimeout(value, defaultVal, cap) {
112
+ const n = Number(value);
113
+ if (!Number.isFinite(n) || n <= 0) return defaultVal;
114
+ return Math.min(n, cap);
115
+ }
116
+
117
+ function normalizeHttpBlock(raw) {
118
+ if (!raw || typeof raw !== "object") return null;
119
+ const method = VALID_HTTP_METHODS.has(raw.method) ? raw.method : "GET";
120
+ const url = typeof raw.url === "string" ? raw.url : "";
121
+ const headers = Array.isArray(raw.headers) ? raw.headers : [];
122
+ const body = raw.body !== undefined ? raw.body : undefined;
123
+ const timeoutMs = clampTimeout(raw.timeoutMs, HTTP_TIMEOUT_DEFAULT, HTTP_TIMEOUT_CAP);
124
+ const mapping = raw.mapping && typeof raw.mapping === "object" ? raw.mapping : {};
125
+ return { method, url, headers, body, timeoutMs, mapping };
126
+ }
127
+
128
+ function normalizeCustomBlock(raw) {
129
+ if (!raw || typeof raw !== "object") return null;
130
+ const source = typeof raw.source === "string" ? raw.source : "";
131
+ const timeoutMs = clampTimeout(raw.timeoutMs, CUSTOM_TIMEOUT_DEFAULT, CUSTOM_TIMEOUT_CAP);
132
+ return { source, timeoutMs };
133
+ }
134
+
135
+ function normalizeMargin(raw) {
136
+ if (!raw || typeof raw !== "object") return { dollars: 0, percent: 0 };
137
+ const dollars = isFiniteNonNeg(raw.dollars) ? raw.dollars : 0;
138
+ const percent = isFiniteNonNeg(raw.percent) ? raw.percent : 0;
139
+ return { dollars, percent };
140
+ }
141
+
142
+ function normalizeRefreshTriggers(raw) {
143
+ const defaults = { onUiOpen: false, onManual: true, onResetAt: false, onUpstreamError: null };
144
+ if (!raw || typeof raw !== "object") return defaults;
145
+ const out = {
146
+ onUiOpen: !!raw.onUiOpen,
147
+ onManual: true,
148
+ onResetAt: !!raw.onResetAt,
149
+ onUpstreamError: null
150
+ };
151
+ if (raw.onUpstreamError && typeof raw.onUpstreamError === "object") {
152
+ out.onUpstreamError = {
153
+ statusCodes: Array.isArray(raw.onUpstreamError.statusCodes)
154
+ ? raw.onUpstreamError.statusCodes.filter((c) => Number.isFinite(c))
155
+ : [],
156
+ bodyRegex: typeof raw.onUpstreamError.bodyRegex === "string"
157
+ ? raw.onUpstreamError.bodyRegex
158
+ : null
159
+ };
160
+ }
161
+ return out;
162
+ }
163
+
164
+ export function normalizeQuotaProbeConfig(raw) {
165
+ if (!raw || typeof raw !== "object" || raw.enabled !== true) return null;
166
+
167
+ const capKind = VALID_CAP_KINDS.has(raw.capKind) ? raw.capKind : null;
168
+ if (!capKind) return null;
169
+
170
+ const combinator = VALID_COMBINATORS.has(raw.combinator) ? raw.combinator : "AND";
171
+ const enforce = VALID_ENFORCE_MODES.has(raw.enforce) ? raw.enforce : "gate";
172
+ const mode = VALID_PROBE_MODES.has(raw.mode) ? raw.mode : "http";
173
+ const safetyMargin = normalizeMargin(raw.safetyMargin);
174
+ const http = normalizeHttpBlock(raw.http);
175
+ const custom = normalizeCustomBlock(raw.custom);
176
+ const refreshTriggers = normalizeRefreshTriggers(raw.refreshTriggers);
177
+
178
+ return { enabled: true, capKind, combinator, enforce, mode, safetyMargin, http, custom, refreshTriggers };
179
+ }