@khanglvm/llm-router 2.6.0 → 2.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -65,7 +65,6 @@ import { detectOllamaInstallation, installOllama, startOllamaServer, stopOllamaS
65
65
  import { browseForLocalModelPath, scanLocalModelPath } from "./local-model-browser.js";
66
66
  import {
67
67
  detectLlamacppCandidates,
68
- getManagedLlamacppRuntimeSnapshot,
69
68
  startConfiguredLlamacppRuntime,
70
69
  stopManagedLlamacppRuntime,
71
70
  validateLlamacppCommand
@@ -84,6 +83,7 @@ import {
84
83
  downloadManagedHuggingFaceGguf,
85
84
  searchHuggingFaceGgufCandidates
86
85
  } from "./huggingface-gguf.js";
86
+ import { createQuotaProbeRunner } from "./quota-probe-runner.js";
87
87
  import {
88
88
  CONFIG_VERSION,
89
89
  DEFAULT_MODEL_ALIAS_ID,
@@ -869,10 +869,6 @@ function routeSnapshotDocument(configState) {
869
869
  return configState.parseError ? null : (configState.normalizedConfig || buildDefaultConfigObject());
870
870
  }
871
871
 
872
- function isObjectRecord(value) {
873
- return Boolean(value) && typeof value === "object" && !Array.isArray(value);
874
- }
875
-
876
872
  function readConfiguredLlamacppRuntime(config = {}) {
877
873
  const runtime = config?.metadata?.localModels?.runtime?.llamacpp;
878
874
  if (!runtime || typeof runtime !== "object") {
@@ -900,46 +896,17 @@ function readConfiguredLlamacppRuntime(config = {}) {
900
896
  };
901
897
  }
902
898
 
903
- function buildLlamacppRuntimePayload(runtime = {}, validation = {}, candidates = [], registrySnapshot = []) {
899
+ function buildLlamacppRuntimePayload(runtime = {}, validation = {}, candidates = []) {
904
900
  const selectedCommand = String(runtime.selectedCommand || runtime.manualCommand || runtime.command || "").trim();
905
- const runtimeInstances = Array.isArray(registrySnapshot) ? registrySnapshot : [];
906
- const healthyInstances = runtimeInstances.filter((entry) => entry?.healthy === true);
907
901
  return {
908
902
  ...runtime,
909
903
  selectedCommand,
910
904
  selectedDirectory: selectedCommand ? path.dirname(selectedCommand) : "",
911
- managedInstanceCount: runtimeInstances.length,
912
- healthyInstanceCount: healthyInstances.length,
913
- staleRuntimeCount: runtimeInstances.filter((entry) => entry?.healthy !== true).length,
914
- instances: runtimeInstances,
915
905
  ...(validation && typeof validation === "object" ? validation : {}),
916
906
  candidates
917
907
  };
918
908
  }
919
909
 
920
- function buildRouteSnapshotDocument(configState, registrySnapshot = []) {
921
- const document = routeSnapshotDocument(configState);
922
- if (!isObjectRecord(document)) return document;
923
-
924
- const runtimeInstances = Array.isArray(registrySnapshot) ? registrySnapshot : [];
925
- if (!isObjectRecord(document.metadata?.localModels) && runtimeInstances.length === 0) {
926
- return document;
927
- }
928
-
929
- const nextDocument = JSON.parse(JSON.stringify(document));
930
- if (!isObjectRecord(nextDocument.metadata)) nextDocument.metadata = {};
931
- if (!isObjectRecord(nextDocument.metadata.localModels)) nextDocument.metadata.localModels = {};
932
- if (!isObjectRecord(nextDocument.metadata.localModels.runtime)) nextDocument.metadata.localModels.runtime = {};
933
-
934
- nextDocument.metadata.localModels.runtime.llamacpp = buildLlamacppRuntimePayload(
935
- readConfiguredLlamacppRuntime(nextDocument),
936
- {},
937
- [],
938
- runtimeInstances
939
- );
940
- return nextDocument;
941
- }
942
-
943
910
  function updateLlamacppRuntimeConfig(config = {}, runtimePatch = {}) {
944
911
  const nextConfig = JSON.parse(JSON.stringify(config || {}));
945
912
  if (!nextConfig.metadata || typeof nextConfig.metadata !== "object" || Array.isArray(nextConfig.metadata)) {
@@ -1045,9 +1012,6 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
1045
1012
  const stopManagedLlamacppRuntimeFn = typeof deps.stopManagedLlamacppRuntime === "function"
1046
1013
  ? deps.stopManagedLlamacppRuntime
1047
1014
  : (callbacks = {}) => stopManagedLlamacppRuntime(callbacks);
1048
- const getManagedLlamacppRuntimeSnapshotFn = typeof deps.getManagedLlamacppRuntimeSnapshot === "function"
1049
- ? deps.getManagedLlamacppRuntimeSnapshot
1050
- : getManagedLlamacppRuntimeSnapshot;
1051
1015
  const validateLlamacppCommandFn = typeof deps.validateLlamacppCommand === "function"
1052
1016
  ? deps.validateLlamacppCommand
1053
1017
  : validateLlamacppCommand;
@@ -1072,6 +1036,8 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
1072
1036
  requireAuth: routerRequireAuth === true
1073
1037
  };
1074
1038
 
1039
+ const quotaProbeRunner = createQuotaProbeRunner({ fetchImpl: globalThis.fetch });
1040
+
1075
1041
  async function readWebSearchState(config = null) {
1076
1042
  if (!config || typeof config !== "object") return null;
1077
1043
  const runtimeFlags = resolveRuntimeFlags({ runtime: "node" }, runtimeEnv);
@@ -2497,24 +2463,6 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
2497
2463
 
2498
2464
  let routerRestartPromise = null;
2499
2465
 
2500
- async function readManagedLlamacppRuntimeSnapshot() {
2501
- try {
2502
- const snapshot = await Promise.resolve(getManagedLlamacppRuntimeSnapshotFn());
2503
- return Array.isArray(snapshot) ? snapshot : [];
2504
- } catch {
2505
- return [];
2506
- }
2507
- }
2508
-
2509
- async function buildManagedLlamacppRuntimePayload(runtime = {}, validation = {}, candidates = []) {
2510
- return buildLlamacppRuntimePayload(
2511
- runtime,
2512
- validation,
2513
- candidates,
2514
- await readManagedLlamacppRuntimeSnapshot()
2515
- );
2516
- }
2517
-
2518
2466
  async function restartManagedRouterWithSettings(settings, {
2519
2467
  reason = "Restarting managed router.",
2520
2468
  configStateOverride = null
@@ -2570,7 +2518,6 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
2570
2518
  const claudeCodeGlobal = await readClaudeCodeGlobalRoutingState(configLocalServer, configState.normalizedConfig);
2571
2519
  const factoryDroidGlobal = await readFactoryDroidGlobalRoutingState(configLocalServer, configState.normalizedConfig);
2572
2520
  const webSearch = await readWebSearchState(configState.normalizedConfig).catch(() => null);
2573
- const managedLlamacppRegistrySnapshot = await readManagedLlamacppRuntimeSnapshot();
2574
2521
  const ollamaConfig = configState.normalizedConfig?.ollama;
2575
2522
  const ollamaBaseUrl = ollamaConfig?.baseUrl || "http://localhost:11434";
2576
2523
  const ollamaInstallation = detectOllamaInstallation();
@@ -2593,7 +2540,7 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
2593
2540
  },
2594
2541
  config: {
2595
2542
  ...configState.summary,
2596
- document: buildRouteSnapshotDocument(configState, managedLlamacppRegistrySnapshot),
2543
+ document: routeSnapshotDocument(configState),
2597
2544
  localServer: configLocalServer
2598
2545
  },
2599
2546
  router: routerSnapshot,
@@ -3089,6 +3036,102 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
3089
3036
  return;
3090
3037
  }
3091
3038
 
3039
+ // ── Quota Probe routes ──────────────────────────────────────────
3040
+ const quotaProbeTestMatch = requestUrl.pathname.match(/^\/api\/providers\/([^/]+)\/quota-probe\/test$/);
3041
+ if (method === "POST" && quotaProbeTestMatch) {
3042
+ const providerId = decodeURIComponent(quotaProbeTestMatch[1]);
3043
+ const body = await readJsonBody(req);
3044
+ const configState = await readConfigState(configPath);
3045
+ const provider = (configState.normalizedConfig?.providers || []).find((entry) => entry.id === providerId);
3046
+ if (!provider) {
3047
+ sendJson(res, 404, { error: "Provider not found." });
3048
+ return;
3049
+ }
3050
+ const shortcodeCtx = {
3051
+ providerApiKey: resolveProviderApiKey(provider, process.env) || "",
3052
+ providerBaseUrl: provider.baseUrl || "",
3053
+ providerId: provider.id
3054
+ };
3055
+ const probeConfig = {
3056
+ ...(provider.quotaProbe || {}),
3057
+ enabled: true,
3058
+ capKind: body.capKind || provider.quotaProbe?.capKind || "dollars",
3059
+ mode: body.mode || provider.quotaProbe?.mode || "http",
3060
+ http: body.http || provider.quotaProbe?.http,
3061
+ custom: body.custom || provider.quotaProbe?.custom
3062
+ };
3063
+ const tempRunner = createQuotaProbeRunner({ fetchImpl: globalThis.fetch });
3064
+ const now = Date.now();
3065
+ const startMs = now;
3066
+ try {
3067
+ const snapshot = await tempRunner.executeProbe({ providerId, probeConfig, shortcodeCtx, env: process.env, now });
3068
+ const latencyMs = Date.now() - startMs;
3069
+ sendJson(res, 200, { snapshot, raw: snapshot.raw, latencyMs, error: snapshot.error });
3070
+ } finally {
3071
+ tempRunner.dispose();
3072
+ }
3073
+ return;
3074
+ }
3075
+
3076
+ const quotaProbeRefreshMatch = requestUrl.pathname.match(/^\/api\/providers\/([^/]+)\/quota-probe\/refresh$/);
3077
+ if (method === "POST" && quotaProbeRefreshMatch) {
3078
+ const providerId = decodeURIComponent(quotaProbeRefreshMatch[1]);
3079
+ const configState = await readConfigState(configPath);
3080
+ const provider = (configState.normalizedConfig?.providers || []).find((entry) => entry.id === providerId);
3081
+ if (!provider) {
3082
+ sendJson(res, 404, { error: "Provider not found." });
3083
+ return;
3084
+ }
3085
+ if (!provider.quotaProbe?.enabled) {
3086
+ sendJson(res, 400, { error: "Quota probe not enabled for this provider." });
3087
+ return;
3088
+ }
3089
+ const shortcodeCtx = {
3090
+ providerApiKey: resolveProviderApiKey(provider, process.env) || "",
3091
+ providerBaseUrl: provider.baseUrl || "",
3092
+ providerId: provider.id
3093
+ };
3094
+ const snapshot = await quotaProbeRunner.enqueueRefresh({
3095
+ providerId,
3096
+ probeConfig: provider.quotaProbe,
3097
+ shortcodeCtx,
3098
+ env: process.env,
3099
+ bypassCircuit: true
3100
+ });
3101
+ sendJson(res, 200, { snapshot });
3102
+ return;
3103
+ }
3104
+
3105
+ const quotaProbeSnapshotMatch = requestUrl.pathname.match(/^\/api\/providers\/([^/]+)\/quota-probe\/snapshot$/);
3106
+ if (method === "GET" && quotaProbeSnapshotMatch) {
3107
+ const providerId = decodeURIComponent(quotaProbeSnapshotMatch[1]);
3108
+ sendJson(res, 200, { snapshot: quotaProbeRunner.getSnapshot(providerId) });
3109
+ return;
3110
+ }
3111
+
3112
+ const quotaProbeSaveMatch = requestUrl.pathname.match(/^\/api\/providers\/([^/]+)\/quota-probe\/save$/);
3113
+ if (method === "POST" && quotaProbeSaveMatch) {
3114
+ const providerId = decodeURIComponent(quotaProbeSaveMatch[1]);
3115
+ const body = await readJsonBody(req);
3116
+ const configState = await readConfigState(configPath);
3117
+ if (configState.parseError) {
3118
+ sendJson(res, 400, { error: `Config parse error: ${configState.parseError}` });
3119
+ return;
3120
+ }
3121
+ const rawConfig = configState.rawConfig || {};
3122
+ const providerList = Array.isArray(rawConfig.providers) ? rawConfig.providers : [];
3123
+ const providerIndex = providerList.findIndex((entry) => entry?.id === providerId);
3124
+ if (providerIndex === -1) {
3125
+ sendJson(res, 404, { error: "Provider not found." });
3126
+ return;
3127
+ }
3128
+ providerList[providerIndex] = { ...providerList[providerIndex], quotaProbe: body.quotaProbe || null };
3129
+ rawConfig.providers = providerList;
3130
+ const { snapshot } = await writeAndBroadcastConfig(rawConfig, { source: "quota-probe-save" });
3131
+ sendJson(res, 200, { ok: true, snapshot });
3132
+ return;
3133
+ }
3134
+
3092
3135
  if (method === "POST" && requestUrl.pathname === "/api/config/test-provider") {
3093
3136
  const body = await readJsonBody(req);
3094
3137
  const endpoints = Array.isArray(body?.endpoints) ? body.endpoints.map((entry) => String(entry || "").trim()).filter(Boolean) : [];
@@ -3464,7 +3507,7 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
3464
3507
  });
3465
3508
 
3466
3509
  sendJson(res, 200, {
3467
- runtime: await buildManagedLlamacppRuntimePayload(configuredRuntime, {}, hydratedCandidates)
3510
+ runtime: buildLlamacppRuntimePayload(configuredRuntime, {}, hydratedCandidates)
3468
3511
  });
3469
3512
  return;
3470
3513
  }
@@ -3491,7 +3534,7 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
3491
3534
  if (!validation?.ok) {
3492
3535
  sendJson(res, 400, {
3493
3536
  error: validation?.errorMessage || `Failed validating llama.cpp runtime '${command}'.`,
3494
- runtime: await buildManagedLlamacppRuntimePayload(readConfiguredLlamacppRuntime(configState.rawConfig || {}), validation)
3537
+ runtime: buildLlamacppRuntimePayload(readConfiguredLlamacppRuntime(configState.rawConfig || {}), validation)
3495
3538
  });
3496
3539
  return;
3497
3540
  }
@@ -3507,7 +3550,7 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
3507
3550
  const configuredRuntime = readConfiguredLlamacppRuntime(savedConfig || {});
3508
3551
  sendJson(res, 200, {
3509
3552
  ok: true,
3510
- runtime: await buildManagedLlamacppRuntimePayload(configuredRuntime, {
3553
+ runtime: buildLlamacppRuntimePayload(configuredRuntime, {
3511
3554
  ...validation,
3512
3555
  status: configuredRuntime.status || "stopped"
3513
3556
  })
@@ -3533,7 +3576,7 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
3533
3576
  });
3534
3577
  sendJson(res, 200, {
3535
3578
  ok: true,
3536
- runtime: await buildManagedLlamacppRuntimePayload(readConfiguredLlamacppRuntime(savedConfig || {}))
3579
+ runtime: buildLlamacppRuntimePayload(readConfiguredLlamacppRuntime(savedConfig || {}))
3537
3580
  });
3538
3581
  return;
3539
3582
  }
@@ -3579,7 +3622,7 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
3579
3622
  });
3580
3623
  sendJson(res, 200, {
3581
3624
  ok: true,
3582
- runtime: await buildManagedLlamacppRuntimePayload(readConfiguredLlamacppRuntime(savedConfig || {}), {
3625
+ runtime: buildLlamacppRuntimePayload(readConfiguredLlamacppRuntime(savedConfig || {}), {
3583
3626
  ...(validation && typeof validation === "object" ? validation : {}),
3584
3627
  status: "running"
3585
3628
  })
@@ -3615,7 +3658,7 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
3615
3658
  });
3616
3659
  sendJson(res, 200, {
3617
3660
  ok: true,
3618
- runtime: await buildManagedLlamacppRuntimePayload(readConfiguredLlamacppRuntime(savedConfig || {}), {
3661
+ runtime: buildLlamacppRuntimePayload(readConfiguredLlamacppRuntime(savedConfig || {}), {
3619
3662
  status: "stopped"
3620
3663
  })
3621
3664
  });