@khanglvm/llm-router 2.5.2 → 2.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@khanglvm/llm-router",
3
- "version": "2.5.2",
3
+ "version": "2.6.1",
4
4
  "description": "LLM Router: single gateway endpoint for multi-provider LLMs with unified OpenAI+Anthropic format and seamless fallback",
5
5
  "keywords": [
6
6
  "llm-router",
@@ -17,6 +17,7 @@ import {
17
17
  normalizeFactoryDroidReasoningEffort,
18
18
  resolveFactoryDroidRouterModelRef
19
19
  } from "../shared/coding-tool-bindings.js";
20
+ import { LOCAL_RUNTIME_PROVIDER_TYPE } from "../runtime/local-models.js";
20
21
 
21
22
  const BACKUP_SUFFIX = ".llm_router_backup";
22
23
  const CODEX_PROVIDER_ID = "llm-router";
@@ -972,9 +973,11 @@ export async function patchClaudeCodeEffortLevel({
972
973
  const FACTORY_DROID_ROUTER_MARKER = "_llmRouterManaged";
973
974
  const FACTORY_DROID_OPENAI_PROVIDER = "openai";
974
975
  const FACTORY_DROID_ANTHROPIC_PROVIDER = "anthropic";
976
+ const FACTORY_DROID_GENERIC_CHAT_COMPLETIONS_PROVIDER = "generic-chat-completion-api";
975
977
  const FACTORY_DROID_ROUTER_PROVIDERS = Object.freeze([
976
978
  FACTORY_DROID_OPENAI_PROVIDER,
977
- FACTORY_DROID_ANTHROPIC_PROVIDER
979
+ FACTORY_DROID_ANTHROPIC_PROVIDER,
980
+ FACTORY_DROID_GENERIC_CHAT_COMPLETIONS_PROVIDER
978
981
  ]);
979
982
 
980
983
  function dedupeStrings(values = []) {
@@ -1116,6 +1119,17 @@ function resolveFactoryDroidRouteFormat(modelRef, config = {}, seen = new Set())
1116
1119
  }
1117
1120
 
1118
1121
  function resolveFactoryDroidCustomModelProvider(modelRef, config = {}) {
1122
+ const normalizedModelRef = String(modelRef || "").trim();
1123
+ if (normalizedModelRef.includes("/")) {
1124
+ const separatorIndex = normalizedModelRef.indexOf("/");
1125
+ const providerId = normalizedModelRef.slice(0, separatorIndex).trim();
1126
+ const provider = (Array.isArray(config?.providers) ? config.providers : [])
1127
+ .find((entry) => String(entry?.id || "").trim() === providerId);
1128
+ if (String(provider?.type || "").trim().toLowerCase() === LOCAL_RUNTIME_PROVIDER_TYPE) {
1129
+ return FACTORY_DROID_GENERIC_CHAT_COMPLETIONS_PROVIDER;
1130
+ }
1131
+ }
1132
+
1119
1133
  return mapFactoryDroidFormatToProvider(resolveFactoryDroidRouteFormat(modelRef, config))
1120
1134
  || FACTORY_DROID_OPENAI_PROVIDER;
1121
1135
  }
@@ -310,6 +310,15 @@ function isTransientModelRuntimeError(result, message) {
310
310
  return patterns.some((pattern) => pattern.test(text));
311
311
  }
312
312
 
313
+ function isOutputLimitReachedMessage(message) {
314
+ const text = String(message || "").toLowerCase();
315
+ if (!text) return false;
316
+ return (
317
+ text.includes("max_tokens") &&
318
+ (text.includes("output limit") || text.includes("token limit") || text.includes("finish"))
319
+ );
320
+ }
321
+
313
322
  function isRateLimitResult(result, message) {
314
323
  const status = Number(result?.status || 0);
315
324
  if (status === 429) return true;
@@ -377,6 +386,15 @@ function classifyModelProbeResult(format, result) {
377
386
  };
378
387
  }
379
388
 
389
+ if (isOutputLimitReachedMessage(message)) {
390
+ return {
391
+ supported: true,
392
+ confirmed: true,
393
+ outcome: "output-limit",
394
+ message: message || "Request reached model but the probe token budget was too small."
395
+ };
396
+ }
397
+
380
398
  if (isUnsupportedModelMessage(message)) {
381
399
  return {
382
400
  supported: false,
@@ -0,0 +1,215 @@
1
+ /**
2
+ * Pure helper functions for resolving values from arbitrary JSON responses
3
+ * and coercing them to expected types. No IO, no side effects.
4
+ */
5
+
6
+ /**
7
+ * Resolves a dot-path from a JSON object.
8
+ * Supports `$.foo.bar` syntax (leading `$` or `$.` is stripped).
9
+ * Supports array indices: `$.data[0].amount`.
10
+ * Returns undefined for missing paths or null/undefined intermediates.
11
+ * @param {any} obj
12
+ * @param {string} pathStr
13
+ * @returns {any}
14
+ */
15
+ export function resolvePath(obj, pathStr) {
16
+ if (obj == null || typeof pathStr !== "string") return undefined;
17
+
18
+ // Strip leading $ or $.
19
+ let cleaned = pathStr;
20
+ if (cleaned.startsWith("$.")) cleaned = cleaned.slice(2);
21
+ else if (cleaned.startsWith("$")) cleaned = cleaned.slice(1);
22
+
23
+ if (!cleaned) return obj;
24
+
25
+ // Tokenize: split on dots, then expand array indices.
26
+ // "data[0].amount" → ["data", "0", "amount"]
27
+ const segments = [];
28
+ for (const part of cleaned.split(".")) {
29
+ if (!part) continue;
30
+ // Handle array indices like "data[0]" or just "[0]"
31
+ const bracketRe = /([^\[]*)\[(\d+)\]/g;
32
+ let match;
33
+ let lastIndex = 0;
34
+ let hasMatch = false;
35
+ while ((match = bracketRe.exec(part)) !== null) {
36
+ hasMatch = true;
37
+ if (match[1]) segments.push(match[1]);
38
+ segments.push(match[2]);
39
+ lastIndex = bracketRe.lastIndex;
40
+ }
41
+ if (!hasMatch) {
42
+ segments.push(part);
43
+ } else if (lastIndex < part.length) {
44
+ segments.push(part.slice(lastIndex));
45
+ }
46
+ }
47
+
48
+ let current = obj;
49
+ for (const seg of segments) {
50
+ if (current == null) return undefined;
51
+ if (Array.isArray(current)) {
52
+ const idx = Number(seg);
53
+ if (!Number.isInteger(idx) || idx < 0) return undefined;
54
+ current = current[idx];
55
+ } else if (typeof current === "object") {
56
+ current = current[seg];
57
+ } else {
58
+ return undefined;
59
+ }
60
+ }
61
+ return current;
62
+ }
63
+
64
+ const FALSE_SET = new Set([0, "0", "false", "no", null, undefined, false, ""]);
65
+
66
+ const DURATION_RE = /^PT?(?:(\d+(?:\.\d+)?)H)?(?:(\d+(?:\.\d+)?)M)?(?:(\d+(?:\.\d+)?)S)?$/i;
67
+ const SHORT_DURATION_RE = /^(\d+(?:\.\d+)?)\s*(h|m|s)$/i;
68
+
69
+ /**
70
+ * @param {string} str
71
+ * @returns {number|undefined} duration in milliseconds
72
+ */
73
+ function parseDuration(str) {
74
+ // Try short form: "2h", "30m", "45s"
75
+ let m = SHORT_DURATION_RE.exec(str);
76
+ if (m) {
77
+ const val = parseFloat(m[1]);
78
+ const unit = m[2].toLowerCase();
79
+ if (unit === "h") return val * 3600_000;
80
+ if (unit === "m") return val * 60_000;
81
+ if (unit === "s") return val * 1000;
82
+ }
83
+ // Try ISO 8601 duration: "PT2H", "PT30M", "PT2H30M"
84
+ m = DURATION_RE.exec(str);
85
+ if (m && (m[1] || m[2] || m[3])) {
86
+ const hours = parseFloat(m[1] || "0");
87
+ const minutes = parseFloat(m[2] || "0");
88
+ const seconds = parseFloat(m[3] || "0");
89
+ return (hours * 3600 + minutes * 60 + seconds) * 1000;
90
+ }
91
+ return undefined;
92
+ }
93
+
94
+ /**
95
+ * Type coercion for mapped values.
96
+ * @param {any} value
97
+ * @param {string} as - "number" | "dollars-from-cents" | "boolean" | "datetime" | "raw"
98
+ * @param {{ now?: number }} [opts]
99
+ * @returns {any}
100
+ */
101
+ export function coerceValue(value, as, { now } = {}) {
102
+ switch (as) {
103
+ case "number": {
104
+ if (value == null) return undefined;
105
+ const n = Number(value);
106
+ return Number.isNaN(n) ? undefined : n;
107
+ }
108
+ case "dollars-from-cents": {
109
+ if (value == null) return undefined;
110
+ const n = Number(value);
111
+ return Number.isNaN(n) ? undefined : n / 100;
112
+ }
113
+ case "boolean": {
114
+ return !FALSE_SET.has(value);
115
+ }
116
+ case "datetime": {
117
+ if (value == null) return undefined;
118
+ if (typeof value === "string") {
119
+ // Try duration first
120
+ const dur = parseDuration(value.trim());
121
+ if (dur !== undefined) {
122
+ return (now ?? Date.now()) + dur;
123
+ }
124
+ // Try ISO-8601
125
+ const d = new Date(value);
126
+ if (!Number.isNaN(d.getTime())) return d.getTime();
127
+ return undefined;
128
+ }
129
+ if (typeof value === "number") {
130
+ if (!Number.isFinite(value)) return undefined;
131
+ // Epoch seconds vs ms heuristic
132
+ return value < 1e12 ? value * 1000 : value;
133
+ }
134
+ return undefined;
135
+ }
136
+ case "raw":
137
+ default:
138
+ return value;
139
+ }
140
+ }
141
+
142
+ const SHORTCODE_RE = /\{\{([^}]+)\}\}/g;
143
+
144
+ const KNOWN_CTX_KEYS = new Set([
145
+ "providerApiKey",
146
+ "providerBaseUrl",
147
+ "providerId"
148
+ ]);
149
+
150
+ /**
151
+ * Replace `{{shortcode}}` placeholders in a template string.
152
+ * @param {any} template
153
+ * @param {Record<string, string>} ctx
154
+ * @param {Record<string, string>} [env]
155
+ * @returns {any}
156
+ */
157
+ export function interpolateShortcodes(template, ctx, env = {}) {
158
+ if (typeof template !== "string") return template;
159
+ return template.replace(SHORTCODE_RE, (_, key) => {
160
+ const trimmed = key.trim();
161
+ if (KNOWN_CTX_KEYS.has(trimmed)) return ctx[trimmed] ?? "";
162
+ const envMatch = trimmed.match(/^env\.(.+)$/);
163
+ if (envMatch) return env[envMatch[1]] ?? "";
164
+ return "";
165
+ });
166
+ }
167
+
168
+ const MAPPED_FIELDS = ["used", "limit", "remaining", "resetAt", "isUnlimited"];
169
+
170
+ /**
171
+ * Extract normalized fields from a raw API response using a mapping config.
172
+ * @param {any} rawResponse
173
+ * @param {Record<string, any>} mapping
174
+ * @returns {Record<string, any>}
175
+ */
176
+ export function extractMappedSnapshot(rawResponse, mapping) {
177
+ const result = {};
178
+ const now = Date.now();
179
+
180
+ for (const field of MAPPED_FIELDS) {
181
+ let value;
182
+
183
+ // Try primary path
184
+ const fieldMapping = mapping[field];
185
+ if (fieldMapping && fieldMapping.path) {
186
+ const raw = resolvePath(rawResponse, fieldMapping.path);
187
+ if (raw != null) {
188
+ value = coerceValue(raw, fieldMapping.as || "raw", { now });
189
+ }
190
+ }
191
+
192
+ // For "limit" field: try limitFallbacks chain if still null/undefined
193
+ if (value == null && field === "limit" && Array.isArray(mapping.limitFallbacks)) {
194
+ for (const fallbackPath of mapping.limitFallbacks) {
195
+ const raw = resolvePath(rawResponse, fallbackPath);
196
+ if (raw != null) {
197
+ const as = fieldMapping?.as || "number";
198
+ value = coerceValue(raw, as, { now });
199
+ if (value != null) break;
200
+ }
201
+ }
202
+ }
203
+
204
+ // Try constants as final fallback
205
+ if (value == null && mapping.constants && mapping.constants[field] != null) {
206
+ value = mapping.constants[field];
207
+ }
208
+
209
+ if (value !== undefined) {
210
+ result[field] = value;
211
+ }
212
+ }
213
+
214
+ return result;
215
+ }
@@ -0,0 +1,234 @@
1
+ /**
2
+ * IO layer: executes quota probes (HTTP or custom JS), caches snapshots,
3
+ * and manages a per-provider circuit breaker.
4
+ */
5
+ import { createContext, Script } from "node:vm";
6
+ import { validateSnapshot, deriveSnapshot } from "../runtime/quota-probe.js";
7
+ import { extractMappedSnapshot, interpolateShortcodes } from "./quota-probe-mapping.js";
8
+
9
+ const CIRCUIT_THRESHOLD = 3;
10
+ const CIRCUIT_PAUSE_MS = 5 * 60 * 1000;
11
+
12
+ function makeErroredSnapshot(capKind, now, error, lastKnownGood) {
13
+ return {
14
+ capKind,
15
+ state: "errored",
16
+ error: { message: String(error) },
17
+ fetchedAt: now,
18
+ raw: null,
19
+ lastKnownGood: lastKnownGood || null,
20
+ };
21
+ }
22
+
23
+ async function executeHttp(probeConfig, shortcodeCtx, env, fetchFn) {
24
+ const http = probeConfig.http;
25
+ const url = interpolateShortcodes(http.url, shortcodeCtx, env);
26
+ const headers = {};
27
+ for (const h of http.headers || []) {
28
+ const headerKey = String(h.key || h.name || "").trim();
29
+ if (!headerKey) continue;
30
+ headers[interpolateShortcodes(headerKey, shortcodeCtx, env)] =
31
+ interpolateShortcodes(String(h.value || ""), shortcodeCtx, env);
32
+ }
33
+ const opts = { method: http.method || "GET", headers };
34
+ if (http.body !== undefined && opts.method !== "GET") {
35
+ opts.body = typeof http.body === "string"
36
+ ? interpolateShortcodes(http.body, shortcodeCtx, env)
37
+ : JSON.stringify(http.body);
38
+ }
39
+
40
+ const ac = new AbortController();
41
+ const timer = setTimeout(() => ac.abort(), http.timeoutMs || 5000);
42
+ opts.signal = ac.signal;
43
+
44
+ try {
45
+ const res = await fetchFn(url, opts);
46
+ clearTimeout(timer);
47
+ const body = await res.json().catch(() => null);
48
+ if (!res.ok) {
49
+ const err = new Error(`HTTP ${res.status}`);
50
+ err.responseBody = body;
51
+ throw err;
52
+ }
53
+ return body;
54
+ } catch (err) {
55
+ clearTimeout(timer);
56
+ if (err.responseBody !== undefined) {
57
+ const wrapped = new Error(err.message);
58
+ wrapped.responseBody = err.responseBody;
59
+ throw wrapped;
60
+ }
61
+ const msg = err.name === "AbortError" ? "timeout" : err.message;
62
+ throw new Error(msg);
63
+ }
64
+ }
65
+
66
+ async function executeCustom(probeConfig, shortcodeCtx, fetchFn, now) {
67
+ const { source, timeoutMs } = probeConfig.custom;
68
+ const sandbox = Object.freeze({
69
+ ctx: Object.freeze({
70
+ fetch: fetchFn,
71
+ providerApiKey: shortcodeCtx.providerApiKey,
72
+ providerBaseUrl: shortcodeCtx.providerBaseUrl,
73
+ providerId: shortcodeCtx.providerId,
74
+ log: () => {},
75
+ now,
76
+ timeoutMs,
77
+ }),
78
+ });
79
+ const vmCtx = createContext(sandbox);
80
+ const wrapped = `(async () => { ${source}\n return fetchUsage(ctx); })()`;
81
+ const script = new Script(wrapped, { timeout: timeoutMs });
82
+ return await script.runInContext(vmCtx, { timeout: timeoutMs });
83
+ }
84
+
85
+ export function createQuotaProbeRunner({ fetchImpl } = {}) {
86
+ const fetchFn = fetchImpl || globalThis.fetch;
87
+ const cache = new Map();
88
+ const circuits = new Map();
89
+
90
+ function getCircuit(providerId) {
91
+ if (!circuits.has(providerId)) {
92
+ circuits.set(providerId, { failures: 0, openUntil: 0 });
93
+ }
94
+ return circuits.get(providerId);
95
+ }
96
+
97
+ function isCircuitOpen(providerId, now) {
98
+ const c = circuits.get(providerId);
99
+ return !!c && c.failures >= CIRCUIT_THRESHOLD && now < c.openUntil;
100
+ }
101
+
102
+ function resetCircuit(providerId) {
103
+ circuits.delete(providerId);
104
+ }
105
+
106
+ async function executeProbe({ providerId, probeConfig, shortcodeCtx, env, now }) {
107
+ const capKind = probeConfig.capKind;
108
+ const prev = cache.get(providerId);
109
+ const lastKnownGood = prev?.state === "fresh" ? prev : prev?.lastKnownGood || null;
110
+
111
+ try {
112
+ let result;
113
+ if (probeConfig.mode === "custom") {
114
+ result = await executeCustom(probeConfig, shortcodeCtx, fetchFn, now);
115
+ } else {
116
+ const rawJson = await executeHttp(probeConfig, shortcodeCtx, env, fetchFn);
117
+ const mapped = extractMappedSnapshot(rawJson, probeConfig.http.mapping);
118
+ result = { ...mapped, capKind, raw: rawJson };
119
+ }
120
+
121
+ const toValidate = { ...result, capKind: result.capKind || capKind };
122
+ const validation = validateSnapshot(toValidate);
123
+ if (!validation.valid) {
124
+ const err = new Error(validation.error);
125
+ err.responseBody = result.raw;
126
+ throw err;
127
+ }
128
+
129
+ const derived = deriveSnapshot(toValidate);
130
+ const snapshot = {
131
+ capKind: derived.capKind,
132
+ used: derived.used,
133
+ limit: derived.limit,
134
+ remaining: derived.remaining,
135
+ resetAt: derived.resetAt,
136
+ isUnlimited: derived.isUnlimited,
137
+ state: "fresh",
138
+ fetchedAt: now,
139
+ error: null,
140
+ raw: result.raw ?? null,
141
+ lastKnownGood: null,
142
+ };
143
+
144
+ cache.set(providerId, snapshot);
145
+ resetCircuit(providerId);
146
+ return snapshot;
147
+ } catch (err) {
148
+ const circuit = getCircuit(providerId);
149
+ circuit.failures++;
150
+ if (circuit.failures >= CIRCUIT_THRESHOLD) {
151
+ circuit.openUntil = now + CIRCUIT_PAUSE_MS;
152
+ }
153
+ const snapshot = makeErroredSnapshot(capKind, now, err.message, lastKnownGood);
154
+ snapshot.raw = err.responseBody ?? null;
155
+ cache.set(providerId, snapshot);
156
+ return snapshot;
157
+ }
158
+ }
159
+
160
+ function getSnapshot(providerId) {
161
+ return cache.get(providerId) || null;
162
+ }
163
+
164
+ function getAllSnapshots() {
165
+ return new Map(cache);
166
+ }
167
+
168
+ // ── Refresh trigger management ──────────────────────────────────────
169
+ const pendingRefreshes = new Map();
170
+ const resetAtTimers = new Map();
171
+ const MAX_CONCURRENT_PROBES = 4;
172
+ const MAX_RESET_AT_DELAY_MS = 24 * 60 * 60 * 1000;
173
+ let activeConcurrent = 0;
174
+ let _onTriggerRefresh = null;
175
+
176
+ function scheduleResetAtRefresh(providerId, snapshot, probeConfig, shortcodeCtx, env) {
177
+ if (resetAtTimers.has(providerId)) {
178
+ clearTimeout(resetAtTimers.get(providerId));
179
+ resetAtTimers.delete(providerId);
180
+ }
181
+ if (!probeConfig.refreshTriggers?.onResetAt) return;
182
+ if (!snapshot.resetAt) return;
183
+ const delay = snapshot.resetAt - Date.now();
184
+ if (delay <= 0 || delay > MAX_RESET_AT_DELAY_MS) return;
185
+
186
+ const timerId = setTimeout(() => {
187
+ resetAtTimers.delete(providerId);
188
+ if (_onTriggerRefresh) _onTriggerRefresh({ providerId, trigger: "scheduler.resetAt" });
189
+ enqueueRefresh({ providerId, probeConfig, shortcodeCtx, env });
190
+ }, delay);
191
+ resetAtTimers.set(providerId, timerId);
192
+ }
193
+
194
+ async function enqueueRefresh({ providerId, probeConfig, shortcodeCtx, env, bypassCircuit }) {
195
+ if (isCircuitOpen(providerId, Date.now()) && !bypassCircuit) {
196
+ return getSnapshot(providerId);
197
+ }
198
+ if (pendingRefreshes.has(providerId)) {
199
+ return pendingRefreshes.get(providerId);
200
+ }
201
+
202
+ const run = async () => {
203
+ while (activeConcurrent >= MAX_CONCURRENT_PROBES) {
204
+ await new Promise(r => setTimeout(r, 50));
205
+ }
206
+ activeConcurrent++;
207
+ try {
208
+ const snap = await executeProbe({ providerId, probeConfig, shortcodeCtx, env, now: Date.now() });
209
+ scheduleResetAtRefresh(providerId, snap, probeConfig, shortcodeCtx, env);
210
+ return snap;
211
+ } finally {
212
+ activeConcurrent--;
213
+ pendingRefreshes.delete(providerId);
214
+ }
215
+ };
216
+
217
+ const promise = run();
218
+ pendingRefreshes.set(providerId, promise);
219
+ return promise;
220
+ }
221
+
222
+ function dispose() {
223
+ for (const id of resetAtTimers.values()) clearTimeout(id);
224
+ resetAtTimers.clear();
225
+ }
226
+
227
+ const runner = { executeProbe, getSnapshot, getAllSnapshots, isCircuitOpen, resetCircuit, enqueueRefresh, dispose };
228
+ Object.defineProperty(runner, "onTriggerRefresh", {
229
+ get() { return _onTriggerRefresh; },
230
+ set(fn) { _onTriggerRefresh = fn; },
231
+ enumerable: true,
232
+ });
233
+ return runner;
234
+ }