@khanglvm/llm-router 2.6.0 → 2.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,10 +13,6 @@ import { readActivityLogSettings } from "../shared/local-router-defaults.js";
13
13
  import { appendActivityLogEntry, resolveActivityLogPath } from "./activity-log.js";
14
14
  import { appendLargeRequestLogEntry, resolveLargeRequestLogPath } from "./large-request-log.js";
15
15
  import { isLargeRequestLoggingEnabled } from "../runtime/handler/large-request-log.js";
16
- import {
17
- startConfiguredLlamacppRuntime,
18
- stopManagedLlamacppRuntime
19
- } from "./llamacpp-runtime.js";
20
16
 
21
17
  const DEFAULT_CONFIG_RELOAD_DEBOUNCE_MS = 300;
22
18
  const MAX_CONFIG_RELOAD_DEBOUNCE_MS = 5000;
@@ -38,10 +34,6 @@ function formatError(error) {
38
34
  return error instanceof Error ? error.message : String(error);
39
35
  }
40
36
 
41
- function normalizeString(value) {
42
- return typeof value === "string" ? value.trim() : "";
43
- }
44
-
45
37
  function createLiveConfigStore({
46
38
  configPath,
47
39
  watchConfig = true,
@@ -245,39 +237,6 @@ async function writeFetchResponseToNode(res, response) {
245
237
  readable.pipe(res);
246
238
  }
247
239
 
248
- function buildVariantLlamacppRuntimeConfig(config, variantKey) {
249
- const normalizedVariantKey = normalizeString(variantKey);
250
- const runtime = config?.metadata?.localModels?.runtime?.llamacpp;
251
- const variants = config?.metadata?.localModels?.variants;
252
- const library = config?.metadata?.localModels?.library;
253
- const variant = variants?.[normalizedVariantKey];
254
- if (!runtime || !variant || variant.runtime !== "llamacpp") return null;
255
-
256
- const baseModelId = normalizeString(variant?.baseModelId);
257
- const baseModel = library?.[baseModelId];
258
- if (!baseModel) return null;
259
-
260
- return {
261
- metadata: {
262
- localModels: {
263
- runtime: {
264
- llamacpp: { ...runtime }
265
- },
266
- library: {
267
- [baseModelId]: { ...baseModel }
268
- },
269
- variants: {
270
- [normalizedVariantKey]: {
271
- ...variant,
272
- enabled: true,
273
- preload: true
274
- }
275
- }
276
- }
277
- }
278
- };
279
- }
280
-
281
240
  export async function startLocalRouteServer({
282
241
  port = FIXED_LOCAL_ROUTER_PORT,
283
242
  host = FIXED_LOCAL_ROUTER_HOST,
@@ -289,10 +248,7 @@ export async function startLocalRouteServer({
289
248
  validateConfig,
290
249
  onConfigReload,
291
250
  onConfigReloadError,
292
- requireAuth = false,
293
- createFetchHandlerImpl = createFetchHandler,
294
- startConfiguredLlamacppRuntimeImpl = startConfiguredLlamacppRuntime,
295
- stopManagedLlamacppRuntimeImpl = stopManagedLlamacppRuntime
251
+ requireAuth = false
296
252
  } = {}) {
297
253
  const reloadDebounceMs = resolveReloadDebounceMs(configReloadDebounceMs);
298
254
  const resolvedActivityLogPath = resolveActivityLogPath(configPath, activityLogPath);
@@ -314,22 +270,9 @@ export async function startLocalRouteServer({
314
270
  const initialConfig = await configStore.getConfig();
315
271
  activityLogEnabled = readActivityLogSettings(initialConfig).enabled;
316
272
 
317
- const fetchHandler = createFetchHandlerImpl({
273
+ const fetchHandler = createFetchHandler({
318
274
  ignoreAuth: !requireAuth,
319
- runtime: "node",
320
275
  getConfig: () => configStore.getConfig(),
321
- resolveLocalRuntimeBaseUrl: async ({ candidate }) => {
322
- const variantKey = candidate?.model?.metadata?.localVariantKey;
323
- const config = await configStore.getConfig();
324
- const targetedConfig = buildVariantLlamacppRuntimeConfig(config, variantKey);
325
- if (!targetedConfig) return "";
326
-
327
- const started = await startConfiguredLlamacppRuntimeImpl(targetedConfig);
328
- if (!started?.ok) {
329
- throw new Error(started?.errorMessage || `Failed starting local runtime for ${normalizeString(variantKey) || "unknown variant"}.`);
330
- }
331
- return normalizeString(started?.runtime?.baseUrl);
332
- },
333
276
  defaultStateStoreBackend: "file",
334
277
  onActivityLog: (entry) => {
335
278
  if (!activityLogEnabled) return;
@@ -412,7 +355,6 @@ export async function startLocalRouteServer({
412
355
  server.close = (callback) => {
413
356
  shuttingDown = true;
414
357
  Promise.resolve()
415
- .then(() => stopManagedLlamacppRuntimeImpl().catch(() => {}))
416
358
  .then(() => configStore.close())
417
359
  .then(() => (typeof fetchHandler.close === "function" ? fetchHandler.close() : undefined))
418
360
  .finally(() => {
@@ -310,6 +310,15 @@ function isTransientModelRuntimeError(result, message) {
310
310
  return patterns.some((pattern) => pattern.test(text));
311
311
  }
312
312
 
313
+ function isOutputLimitReachedMessage(message) {
314
+ const text = String(message || "").toLowerCase();
315
+ if (!text) return false;
316
+ return (
317
+ text.includes("max_tokens") &&
318
+ (text.includes("output limit") || text.includes("token limit") || text.includes("finish"))
319
+ );
320
+ }
321
+
313
322
  function isRateLimitResult(result, message) {
314
323
  const status = Number(result?.status || 0);
315
324
  if (status === 429) return true;
@@ -377,6 +386,15 @@ function classifyModelProbeResult(format, result) {
377
386
  };
378
387
  }
379
388
 
389
+ if (isOutputLimitReachedMessage(message)) {
390
+ return {
391
+ supported: true,
392
+ confirmed: true,
393
+ outcome: "output-limit",
394
+ message: message || "Request reached model but the probe token budget was too small."
395
+ };
396
+ }
397
+
380
398
  if (isUnsupportedModelMessage(message)) {
381
399
  return {
382
400
  supported: false,
@@ -0,0 +1,215 @@
1
+ /**
2
+ * Pure helper functions for resolving values from arbitrary JSON responses
3
+ * and coercing them to expected types. No IO, no side effects.
4
+ */
5
+
6
+ /**
7
+ * Resolves a dot-path from a JSON object.
8
+ * Supports `$.foo.bar` syntax (leading `$` or `$.` is stripped).
9
+ * Supports array indices: `$.data[0].amount`.
10
+ * Returns undefined for missing paths or null/undefined intermediates.
11
+ * @param {any} obj
12
+ * @param {string} pathStr
13
+ * @returns {any}
14
+ */
15
+ export function resolvePath(obj, pathStr) {
16
+ if (obj == null || typeof pathStr !== "string") return undefined;
17
+
18
+ // Strip leading $ or $.
19
+ let cleaned = pathStr;
20
+ if (cleaned.startsWith("$.")) cleaned = cleaned.slice(2);
21
+ else if (cleaned.startsWith("$")) cleaned = cleaned.slice(1);
22
+
23
+ if (!cleaned) return obj;
24
+
25
+ // Tokenize: split on dots, then expand array indices.
26
+ // "data[0].amount" → ["data", "0", "amount"]
27
+ const segments = [];
28
+ for (const part of cleaned.split(".")) {
29
+ if (!part) continue;
30
+ // Handle array indices like "data[0]" or just "[0]"
31
+ const bracketRe = /([^\[]*)\[(\d+)\]/g;
32
+ let match;
33
+ let lastIndex = 0;
34
+ let hasMatch = false;
35
+ while ((match = bracketRe.exec(part)) !== null) {
36
+ hasMatch = true;
37
+ if (match[1]) segments.push(match[1]);
38
+ segments.push(match[2]);
39
+ lastIndex = bracketRe.lastIndex;
40
+ }
41
+ if (!hasMatch) {
42
+ segments.push(part);
43
+ } else if (lastIndex < part.length) {
44
+ segments.push(part.slice(lastIndex));
45
+ }
46
+ }
47
+
48
+ let current = obj;
49
+ for (const seg of segments) {
50
+ if (current == null) return undefined;
51
+ if (Array.isArray(current)) {
52
+ const idx = Number(seg);
53
+ if (!Number.isInteger(idx) || idx < 0) return undefined;
54
+ current = current[idx];
55
+ } else if (typeof current === "object") {
56
+ current = current[seg];
57
+ } else {
58
+ return undefined;
59
+ }
60
+ }
61
+ return current;
62
+ }
63
+
64
+ const FALSE_SET = new Set([0, "0", "false", "no", null, undefined, false, ""]);
65
+
66
+ const DURATION_RE = /^PT?(?:(\d+(?:\.\d+)?)H)?(?:(\d+(?:\.\d+)?)M)?(?:(\d+(?:\.\d+)?)S)?$/i;
67
+ const SHORT_DURATION_RE = /^(\d+(?:\.\d+)?)\s*(h|m|s)$/i;
68
+
69
+ /**
70
+ * @param {string} str
71
+ * @returns {number|undefined} duration in milliseconds
72
+ */
73
+ function parseDuration(str) {
74
+ // Try short form: "2h", "30m", "45s"
75
+ let m = SHORT_DURATION_RE.exec(str);
76
+ if (m) {
77
+ const val = parseFloat(m[1]);
78
+ const unit = m[2].toLowerCase();
79
+ if (unit === "h") return val * 3600_000;
80
+ if (unit === "m") return val * 60_000;
81
+ if (unit === "s") return val * 1000;
82
+ }
83
+ // Try ISO 8601 duration: "PT2H", "PT30M", "PT2H30M"
84
+ m = DURATION_RE.exec(str);
85
+ if (m && (m[1] || m[2] || m[3])) {
86
+ const hours = parseFloat(m[1] || "0");
87
+ const minutes = parseFloat(m[2] || "0");
88
+ const seconds = parseFloat(m[3] || "0");
89
+ return (hours * 3600 + minutes * 60 + seconds) * 1000;
90
+ }
91
+ return undefined;
92
+ }
93
+
94
+ /**
95
+ * Type coercion for mapped values.
96
+ * @param {any} value
97
+ * @param {string} as - "number" | "dollars-from-cents" | "boolean" | "datetime" | "raw"
98
+ * @param {{ now?: number }} [opts]
99
+ * @returns {any}
100
+ */
101
+ export function coerceValue(value, as, { now } = {}) {
102
+ switch (as) {
103
+ case "number": {
104
+ if (value == null) return undefined;
105
+ const n = Number(value);
106
+ return Number.isNaN(n) ? undefined : n;
107
+ }
108
+ case "dollars-from-cents": {
109
+ if (value == null) return undefined;
110
+ const n = Number(value);
111
+ return Number.isNaN(n) ? undefined : n / 100;
112
+ }
113
+ case "boolean": {
114
+ return !FALSE_SET.has(value);
115
+ }
116
+ case "datetime": {
117
+ if (value == null) return undefined;
118
+ if (typeof value === "string") {
119
+ // Try duration first
120
+ const dur = parseDuration(value.trim());
121
+ if (dur !== undefined) {
122
+ return (now ?? Date.now()) + dur;
123
+ }
124
+ // Try ISO-8601
125
+ const d = new Date(value);
126
+ if (!Number.isNaN(d.getTime())) return d.getTime();
127
+ return undefined;
128
+ }
129
+ if (typeof value === "number") {
130
+ if (!Number.isFinite(value)) return undefined;
131
+ // Epoch seconds vs ms heuristic
132
+ return value < 1e12 ? value * 1000 : value;
133
+ }
134
+ return undefined;
135
+ }
136
+ case "raw":
137
+ default:
138
+ return value;
139
+ }
140
+ }
141
+
142
+ const SHORTCODE_RE = /\{\{([^}]+)\}\}/g;
143
+
144
+ const KNOWN_CTX_KEYS = new Set([
145
+ "providerApiKey",
146
+ "providerBaseUrl",
147
+ "providerId"
148
+ ]);
149
+
150
+ /**
151
+ * Replace `{{shortcode}}` placeholders in a template string.
152
+ * @param {any} template
153
+ * @param {Record<string, string>} ctx
154
+ * @param {Record<string, string>} [env]
155
+ * @returns {any}
156
+ */
157
+ export function interpolateShortcodes(template, ctx, env = {}) {
158
+ if (typeof template !== "string") return template;
159
+ return template.replace(SHORTCODE_RE, (_, key) => {
160
+ const trimmed = key.trim();
161
+ if (KNOWN_CTX_KEYS.has(trimmed)) return ctx[trimmed] ?? "";
162
+ const envMatch = trimmed.match(/^env\.(.+)$/);
163
+ if (envMatch) return env[envMatch[1]] ?? "";
164
+ return "";
165
+ });
166
+ }
167
+
168
+ const MAPPED_FIELDS = ["used", "limit", "remaining", "resetAt", "isUnlimited"];
169
+
170
+ /**
171
+ * Extract normalized fields from a raw API response using a mapping config.
172
+ * @param {any} rawResponse
173
+ * @param {Record<string, any>} mapping
174
+ * @returns {Record<string, any>}
175
+ */
176
+ export function extractMappedSnapshot(rawResponse, mapping) {
177
+ const result = {};
178
+ const now = Date.now();
179
+
180
+ for (const field of MAPPED_FIELDS) {
181
+ let value;
182
+
183
+ // Try primary path
184
+ const fieldMapping = mapping[field];
185
+ if (fieldMapping && fieldMapping.path) {
186
+ const raw = resolvePath(rawResponse, fieldMapping.path);
187
+ if (raw != null) {
188
+ value = coerceValue(raw, fieldMapping.as || "raw", { now });
189
+ }
190
+ }
191
+
192
+ // For "limit" field: try limitFallbacks chain if still null/undefined
193
+ if (value == null && field === "limit" && Array.isArray(mapping.limitFallbacks)) {
194
+ for (const fallbackPath of mapping.limitFallbacks) {
195
+ const raw = resolvePath(rawResponse, fallbackPath);
196
+ if (raw != null) {
197
+ const as = fieldMapping?.as || "number";
198
+ value = coerceValue(raw, as, { now });
199
+ if (value != null) break;
200
+ }
201
+ }
202
+ }
203
+
204
+ // Try constants as final fallback
205
+ if (value == null && mapping.constants && mapping.constants[field] != null) {
206
+ value = mapping.constants[field];
207
+ }
208
+
209
+ if (value !== undefined) {
210
+ result[field] = value;
211
+ }
212
+ }
213
+
214
+ return result;
215
+ }
@@ -0,0 +1,234 @@
1
+ /**
2
+ * IO layer: executes quota probes (HTTP or custom JS), caches snapshots,
3
+ * and manages a per-provider circuit breaker.
4
+ */
5
+ import { createContext, Script } from "node:vm";
6
+ import { validateSnapshot, deriveSnapshot } from "../runtime/quota-probe.js";
7
+ import { extractMappedSnapshot, interpolateShortcodes } from "./quota-probe-mapping.js";
8
+
9
+ const CIRCUIT_THRESHOLD = 3;
10
+ const CIRCUIT_PAUSE_MS = 5 * 60 * 1000;
11
+
12
+ function makeErroredSnapshot(capKind, now, error, lastKnownGood) {
13
+ return {
14
+ capKind,
15
+ state: "errored",
16
+ error: { message: String(error) },
17
+ fetchedAt: now,
18
+ raw: null,
19
+ lastKnownGood: lastKnownGood || null,
20
+ };
21
+ }
22
+
23
+ async function executeHttp(probeConfig, shortcodeCtx, env, fetchFn) {
24
+ const http = probeConfig.http;
25
+ const url = interpolateShortcodes(http.url, shortcodeCtx, env);
26
+ const headers = {};
27
+ for (const h of http.headers || []) {
28
+ const headerKey = String(h.key || h.name || "").trim();
29
+ if (!headerKey) continue;
30
+ headers[interpolateShortcodes(headerKey, shortcodeCtx, env)] =
31
+ interpolateShortcodes(String(h.value || ""), shortcodeCtx, env);
32
+ }
33
+ const opts = { method: http.method || "GET", headers };
34
+ if (http.body !== undefined && opts.method !== "GET") {
35
+ opts.body = typeof http.body === "string"
36
+ ? interpolateShortcodes(http.body, shortcodeCtx, env)
37
+ : JSON.stringify(http.body);
38
+ }
39
+
40
+ const ac = new AbortController();
41
+ const timer = setTimeout(() => ac.abort(), http.timeoutMs || 5000);
42
+ opts.signal = ac.signal;
43
+
44
+ try {
45
+ const res = await fetchFn(url, opts);
46
+ clearTimeout(timer);
47
+ const body = await res.json().catch(() => null);
48
+ if (!res.ok) {
49
+ const err = new Error(`HTTP ${res.status}`);
50
+ err.responseBody = body;
51
+ throw err;
52
+ }
53
+ return body;
54
+ } catch (err) {
55
+ clearTimeout(timer);
56
+ if (err.responseBody !== undefined) {
57
+ const wrapped = new Error(err.message);
58
+ wrapped.responseBody = err.responseBody;
59
+ throw wrapped;
60
+ }
61
+ const msg = err.name === "AbortError" ? "timeout" : err.message;
62
+ throw new Error(msg);
63
+ }
64
+ }
65
+
66
+ async function executeCustom(probeConfig, shortcodeCtx, fetchFn, now) {
67
+ const { source, timeoutMs } = probeConfig.custom;
68
+ const sandbox = Object.freeze({
69
+ ctx: Object.freeze({
70
+ fetch: fetchFn,
71
+ providerApiKey: shortcodeCtx.providerApiKey,
72
+ providerBaseUrl: shortcodeCtx.providerBaseUrl,
73
+ providerId: shortcodeCtx.providerId,
74
+ log: () => {},
75
+ now,
76
+ timeoutMs,
77
+ }),
78
+ });
79
+ const vmCtx = createContext(sandbox);
80
+ const wrapped = `(async () => { ${source}\n return fetchUsage(ctx); })()`;
81
+ const script = new Script(wrapped, { timeout: timeoutMs });
82
+ return await script.runInContext(vmCtx, { timeout: timeoutMs });
83
+ }
84
+
85
+ export function createQuotaProbeRunner({ fetchImpl } = {}) {
86
+ const fetchFn = fetchImpl || globalThis.fetch;
87
+ const cache = new Map();
88
+ const circuits = new Map();
89
+
90
+ function getCircuit(providerId) {
91
+ if (!circuits.has(providerId)) {
92
+ circuits.set(providerId, { failures: 0, openUntil: 0 });
93
+ }
94
+ return circuits.get(providerId);
95
+ }
96
+
97
+ function isCircuitOpen(providerId, now) {
98
+ const c = circuits.get(providerId);
99
+ return !!c && c.failures >= CIRCUIT_THRESHOLD && now < c.openUntil;
100
+ }
101
+
102
+ function resetCircuit(providerId) {
103
+ circuits.delete(providerId);
104
+ }
105
+
106
+ async function executeProbe({ providerId, probeConfig, shortcodeCtx, env, now }) {
107
+ const capKind = probeConfig.capKind;
108
+ const prev = cache.get(providerId);
109
+ const lastKnownGood = prev?.state === "fresh" ? prev : prev?.lastKnownGood || null;
110
+
111
+ try {
112
+ let result;
113
+ if (probeConfig.mode === "custom") {
114
+ result = await executeCustom(probeConfig, shortcodeCtx, fetchFn, now);
115
+ } else {
116
+ const rawJson = await executeHttp(probeConfig, shortcodeCtx, env, fetchFn);
117
+ const mapped = extractMappedSnapshot(rawJson, probeConfig.http.mapping);
118
+ result = { ...mapped, capKind, raw: rawJson };
119
+ }
120
+
121
+ const toValidate = { ...result, capKind: result.capKind || capKind };
122
+ const validation = validateSnapshot(toValidate);
123
+ if (!validation.valid) {
124
+ const err = new Error(validation.error);
125
+ err.responseBody = result.raw;
126
+ throw err;
127
+ }
128
+
129
+ const derived = deriveSnapshot(toValidate);
130
+ const snapshot = {
131
+ capKind: derived.capKind,
132
+ used: derived.used,
133
+ limit: derived.limit,
134
+ remaining: derived.remaining,
135
+ resetAt: derived.resetAt,
136
+ isUnlimited: derived.isUnlimited,
137
+ state: "fresh",
138
+ fetchedAt: now,
139
+ error: null,
140
+ raw: result.raw ?? null,
141
+ lastKnownGood: null,
142
+ };
143
+
144
+ cache.set(providerId, snapshot);
145
+ resetCircuit(providerId);
146
+ return snapshot;
147
+ } catch (err) {
148
+ const circuit = getCircuit(providerId);
149
+ circuit.failures++;
150
+ if (circuit.failures >= CIRCUIT_THRESHOLD) {
151
+ circuit.openUntil = now + CIRCUIT_PAUSE_MS;
152
+ }
153
+ const snapshot = makeErroredSnapshot(capKind, now, err.message, lastKnownGood);
154
+ snapshot.raw = err.responseBody ?? null;
155
+ cache.set(providerId, snapshot);
156
+ return snapshot;
157
+ }
158
+ }
159
+
160
+ function getSnapshot(providerId) {
161
+ return cache.get(providerId) || null;
162
+ }
163
+
164
+ function getAllSnapshots() {
165
+ return new Map(cache);
166
+ }
167
+
168
+ // ── Refresh trigger management ──────────────────────────────────────
169
+ const pendingRefreshes = new Map();
170
+ const resetAtTimers = new Map();
171
+ const MAX_CONCURRENT_PROBES = 4;
172
+ const MAX_RESET_AT_DELAY_MS = 24 * 60 * 60 * 1000;
173
+ let activeConcurrent = 0;
174
+ let _onTriggerRefresh = null;
175
+
176
+ function scheduleResetAtRefresh(providerId, snapshot, probeConfig, shortcodeCtx, env) {
177
+ if (resetAtTimers.has(providerId)) {
178
+ clearTimeout(resetAtTimers.get(providerId));
179
+ resetAtTimers.delete(providerId);
180
+ }
181
+ if (!probeConfig.refreshTriggers?.onResetAt) return;
182
+ if (!snapshot.resetAt) return;
183
+ const delay = snapshot.resetAt - Date.now();
184
+ if (delay <= 0 || delay > MAX_RESET_AT_DELAY_MS) return;
185
+
186
+ const timerId = setTimeout(() => {
187
+ resetAtTimers.delete(providerId);
188
+ if (_onTriggerRefresh) _onTriggerRefresh({ providerId, trigger: "scheduler.resetAt" });
189
+ enqueueRefresh({ providerId, probeConfig, shortcodeCtx, env });
190
+ }, delay);
191
+ resetAtTimers.set(providerId, timerId);
192
+ }
193
+
194
+ async function enqueueRefresh({ providerId, probeConfig, shortcodeCtx, env, bypassCircuit }) {
195
+ if (isCircuitOpen(providerId, Date.now()) && !bypassCircuit) {
196
+ return getSnapshot(providerId);
197
+ }
198
+ if (pendingRefreshes.has(providerId)) {
199
+ return pendingRefreshes.get(providerId);
200
+ }
201
+
202
+ const run = async () => {
203
+ while (activeConcurrent >= MAX_CONCURRENT_PROBES) {
204
+ await new Promise(r => setTimeout(r, 50));
205
+ }
206
+ activeConcurrent++;
207
+ try {
208
+ const snap = await executeProbe({ providerId, probeConfig, shortcodeCtx, env, now: Date.now() });
209
+ scheduleResetAtRefresh(providerId, snap, probeConfig, shortcodeCtx, env);
210
+ return snap;
211
+ } finally {
212
+ activeConcurrent--;
213
+ pendingRefreshes.delete(providerId);
214
+ }
215
+ };
216
+
217
+ const promise = run();
218
+ pendingRefreshes.set(providerId, promise);
219
+ return promise;
220
+ }
221
+
222
+ function dispose() {
223
+ for (const id of resetAtTimers.values()) clearTimeout(id);
224
+ resetAtTimers.clear();
225
+ }
226
+
227
+ const runner = { executeProbe, getSnapshot, getAllSnapshots, isCircuitOpen, resetCircuit, enqueueRefresh, dispose };
228
+ Object.defineProperty(runner, "onTriggerRefresh", {
229
+ get() { return _onTriggerRefresh; },
230
+ set(fn) { _onTriggerRefresh = fn; },
231
+ enumerable: true,
232
+ });
233
+ return runner;
234
+ }