@khanglvm/llm-router 2.6.0 → 2.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +0 -13
- package/README.md +0 -3
- package/package.json +1 -1
- package/src/node/coding-tool-config.js +15 -1
- package/src/node/huggingface-gguf.js +0 -12
- package/src/node/llamacpp-runtime.js +78 -256
- package/src/node/local-models-service.js +2 -25
- package/src/node/local-server.js +2 -60
- package/src/node/provider-probe.js +18 -0
- package/src/node/quota-probe-mapping.js +215 -0
- package/src/node/quota-probe-runner.js +234 -0
- package/src/node/web-console-client.js +33 -27
- package/src/node/web-console-server.js +107 -64
- package/src/node/web-console-styles.generated.js +1 -1
- package/src/node/web-console-ui/api-client.js +27 -0
- package/src/node/web-console-ui/local-models-utils.js +0 -33
- package/src/runtime/balancer.js +47 -4
- package/src/runtime/config.js +9 -4
- package/src/runtime/handler/fallback.js +7 -0
- package/src/runtime/handler/provider-call.js +18 -36
- package/src/runtime/handler/runtime-policy.js +1 -4
- package/src/runtime/local-models.js +0 -36
- package/src/runtime/quota-probe.js +179 -0
- package/src/node/llamacpp-managed-runtime.js +0 -202
- package/src/node/llamacpp-runtime-profile.js +0 -133
|
@@ -65,7 +65,6 @@ import { detectOllamaInstallation, installOllama, startOllamaServer, stopOllamaS
|
|
|
65
65
|
import { browseForLocalModelPath, scanLocalModelPath } from "./local-model-browser.js";
|
|
66
66
|
import {
|
|
67
67
|
detectLlamacppCandidates,
|
|
68
|
-
getManagedLlamacppRuntimeSnapshot,
|
|
69
68
|
startConfiguredLlamacppRuntime,
|
|
70
69
|
stopManagedLlamacppRuntime,
|
|
71
70
|
validateLlamacppCommand
|
|
@@ -84,6 +83,7 @@ import {
|
|
|
84
83
|
downloadManagedHuggingFaceGguf,
|
|
85
84
|
searchHuggingFaceGgufCandidates
|
|
86
85
|
} from "./huggingface-gguf.js";
|
|
86
|
+
import { createQuotaProbeRunner } from "./quota-probe-runner.js";
|
|
87
87
|
import {
|
|
88
88
|
CONFIG_VERSION,
|
|
89
89
|
DEFAULT_MODEL_ALIAS_ID,
|
|
@@ -869,10 +869,6 @@ function routeSnapshotDocument(configState) {
|
|
|
869
869
|
return configState.parseError ? null : (configState.normalizedConfig || buildDefaultConfigObject());
|
|
870
870
|
}
|
|
871
871
|
|
|
872
|
-
function isObjectRecord(value) {
|
|
873
|
-
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
|
|
874
|
-
}
|
|
875
|
-
|
|
876
872
|
function readConfiguredLlamacppRuntime(config = {}) {
|
|
877
873
|
const runtime = config?.metadata?.localModels?.runtime?.llamacpp;
|
|
878
874
|
if (!runtime || typeof runtime !== "object") {
|
|
@@ -900,46 +896,17 @@ function readConfiguredLlamacppRuntime(config = {}) {
|
|
|
900
896
|
};
|
|
901
897
|
}
|
|
902
898
|
|
|
903
|
-
function buildLlamacppRuntimePayload(runtime = {}, validation = {}, candidates = []
|
|
899
|
+
function buildLlamacppRuntimePayload(runtime = {}, validation = {}, candidates = []) {
|
|
904
900
|
const selectedCommand = String(runtime.selectedCommand || runtime.manualCommand || runtime.command || "").trim();
|
|
905
|
-
const runtimeInstances = Array.isArray(registrySnapshot) ? registrySnapshot : [];
|
|
906
|
-
const healthyInstances = runtimeInstances.filter((entry) => entry?.healthy === true);
|
|
907
901
|
return {
|
|
908
902
|
...runtime,
|
|
909
903
|
selectedCommand,
|
|
910
904
|
selectedDirectory: selectedCommand ? path.dirname(selectedCommand) : "",
|
|
911
|
-
managedInstanceCount: runtimeInstances.length,
|
|
912
|
-
healthyInstanceCount: healthyInstances.length,
|
|
913
|
-
staleRuntimeCount: runtimeInstances.filter((entry) => entry?.healthy !== true).length,
|
|
914
|
-
instances: runtimeInstances,
|
|
915
905
|
...(validation && typeof validation === "object" ? validation : {}),
|
|
916
906
|
candidates
|
|
917
907
|
};
|
|
918
908
|
}
|
|
919
909
|
|
|
920
|
-
function buildRouteSnapshotDocument(configState, registrySnapshot = []) {
|
|
921
|
-
const document = routeSnapshotDocument(configState);
|
|
922
|
-
if (!isObjectRecord(document)) return document;
|
|
923
|
-
|
|
924
|
-
const runtimeInstances = Array.isArray(registrySnapshot) ? registrySnapshot : [];
|
|
925
|
-
if (!isObjectRecord(document.metadata?.localModels) && runtimeInstances.length === 0) {
|
|
926
|
-
return document;
|
|
927
|
-
}
|
|
928
|
-
|
|
929
|
-
const nextDocument = JSON.parse(JSON.stringify(document));
|
|
930
|
-
if (!isObjectRecord(nextDocument.metadata)) nextDocument.metadata = {};
|
|
931
|
-
if (!isObjectRecord(nextDocument.metadata.localModels)) nextDocument.metadata.localModels = {};
|
|
932
|
-
if (!isObjectRecord(nextDocument.metadata.localModels.runtime)) nextDocument.metadata.localModels.runtime = {};
|
|
933
|
-
|
|
934
|
-
nextDocument.metadata.localModels.runtime.llamacpp = buildLlamacppRuntimePayload(
|
|
935
|
-
readConfiguredLlamacppRuntime(nextDocument),
|
|
936
|
-
{},
|
|
937
|
-
[],
|
|
938
|
-
runtimeInstances
|
|
939
|
-
);
|
|
940
|
-
return nextDocument;
|
|
941
|
-
}
|
|
942
|
-
|
|
943
910
|
function updateLlamacppRuntimeConfig(config = {}, runtimePatch = {}) {
|
|
944
911
|
const nextConfig = JSON.parse(JSON.stringify(config || {}));
|
|
945
912
|
if (!nextConfig.metadata || typeof nextConfig.metadata !== "object" || Array.isArray(nextConfig.metadata)) {
|
|
@@ -1045,9 +1012,6 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
|
|
|
1045
1012
|
const stopManagedLlamacppRuntimeFn = typeof deps.stopManagedLlamacppRuntime === "function"
|
|
1046
1013
|
? deps.stopManagedLlamacppRuntime
|
|
1047
1014
|
: (callbacks = {}) => stopManagedLlamacppRuntime(callbacks);
|
|
1048
|
-
const getManagedLlamacppRuntimeSnapshotFn = typeof deps.getManagedLlamacppRuntimeSnapshot === "function"
|
|
1049
|
-
? deps.getManagedLlamacppRuntimeSnapshot
|
|
1050
|
-
: getManagedLlamacppRuntimeSnapshot;
|
|
1051
1015
|
const validateLlamacppCommandFn = typeof deps.validateLlamacppCommand === "function"
|
|
1052
1016
|
? deps.validateLlamacppCommand
|
|
1053
1017
|
: validateLlamacppCommand;
|
|
@@ -1072,6 +1036,8 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
|
|
|
1072
1036
|
requireAuth: routerRequireAuth === true
|
|
1073
1037
|
};
|
|
1074
1038
|
|
|
1039
|
+
const quotaProbeRunner = createQuotaProbeRunner({ fetchImpl: globalThis.fetch });
|
|
1040
|
+
|
|
1075
1041
|
async function readWebSearchState(config = null) {
|
|
1076
1042
|
if (!config || typeof config !== "object") return null;
|
|
1077
1043
|
const runtimeFlags = resolveRuntimeFlags({ runtime: "node" }, runtimeEnv);
|
|
@@ -2497,24 +2463,6 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
|
|
|
2497
2463
|
|
|
2498
2464
|
let routerRestartPromise = null;
|
|
2499
2465
|
|
|
2500
|
-
async function readManagedLlamacppRuntimeSnapshot() {
|
|
2501
|
-
try {
|
|
2502
|
-
const snapshot = await Promise.resolve(getManagedLlamacppRuntimeSnapshotFn());
|
|
2503
|
-
return Array.isArray(snapshot) ? snapshot : [];
|
|
2504
|
-
} catch {
|
|
2505
|
-
return [];
|
|
2506
|
-
}
|
|
2507
|
-
}
|
|
2508
|
-
|
|
2509
|
-
async function buildManagedLlamacppRuntimePayload(runtime = {}, validation = {}, candidates = []) {
|
|
2510
|
-
return buildLlamacppRuntimePayload(
|
|
2511
|
-
runtime,
|
|
2512
|
-
validation,
|
|
2513
|
-
candidates,
|
|
2514
|
-
await readManagedLlamacppRuntimeSnapshot()
|
|
2515
|
-
);
|
|
2516
|
-
}
|
|
2517
|
-
|
|
2518
2466
|
async function restartManagedRouterWithSettings(settings, {
|
|
2519
2467
|
reason = "Restarting managed router.",
|
|
2520
2468
|
configStateOverride = null
|
|
@@ -2570,7 +2518,6 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
|
|
|
2570
2518
|
const claudeCodeGlobal = await readClaudeCodeGlobalRoutingState(configLocalServer, configState.normalizedConfig);
|
|
2571
2519
|
const factoryDroidGlobal = await readFactoryDroidGlobalRoutingState(configLocalServer, configState.normalizedConfig);
|
|
2572
2520
|
const webSearch = await readWebSearchState(configState.normalizedConfig).catch(() => null);
|
|
2573
|
-
const managedLlamacppRegistrySnapshot = await readManagedLlamacppRuntimeSnapshot();
|
|
2574
2521
|
const ollamaConfig = configState.normalizedConfig?.ollama;
|
|
2575
2522
|
const ollamaBaseUrl = ollamaConfig?.baseUrl || "http://localhost:11434";
|
|
2576
2523
|
const ollamaInstallation = detectOllamaInstallation();
|
|
@@ -2593,7 +2540,7 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
|
|
|
2593
2540
|
},
|
|
2594
2541
|
config: {
|
|
2595
2542
|
...configState.summary,
|
|
2596
|
-
document:
|
|
2543
|
+
document: routeSnapshotDocument(configState),
|
|
2597
2544
|
localServer: configLocalServer
|
|
2598
2545
|
},
|
|
2599
2546
|
router: routerSnapshot,
|
|
@@ -3089,6 +3036,102 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
|
|
|
3089
3036
|
return;
|
|
3090
3037
|
}
|
|
3091
3038
|
|
|
3039
|
+
// ── Quota Probe routes ──────────────────────────────────────────
|
|
3040
|
+
const quotaProbeTestMatch = requestUrl.pathname.match(/^\/api\/providers\/([^/]+)\/quota-probe\/test$/);
|
|
3041
|
+
if (method === "POST" && quotaProbeTestMatch) {
|
|
3042
|
+
const providerId = decodeURIComponent(quotaProbeTestMatch[1]);
|
|
3043
|
+
const body = await readJsonBody(req);
|
|
3044
|
+
const configState = await readConfigState(configPath);
|
|
3045
|
+
const provider = (configState.normalizedConfig?.providers || []).find((entry) => entry.id === providerId);
|
|
3046
|
+
if (!provider) {
|
|
3047
|
+
sendJson(res, 404, { error: "Provider not found." });
|
|
3048
|
+
return;
|
|
3049
|
+
}
|
|
3050
|
+
const shortcodeCtx = {
|
|
3051
|
+
providerApiKey: resolveProviderApiKey(provider, process.env) || "",
|
|
3052
|
+
providerBaseUrl: provider.baseUrl || "",
|
|
3053
|
+
providerId: provider.id
|
|
3054
|
+
};
|
|
3055
|
+
const probeConfig = {
|
|
3056
|
+
...(provider.quotaProbe || {}),
|
|
3057
|
+
enabled: true,
|
|
3058
|
+
capKind: body.capKind || provider.quotaProbe?.capKind || "dollars",
|
|
3059
|
+
mode: body.mode || provider.quotaProbe?.mode || "http",
|
|
3060
|
+
http: body.http || provider.quotaProbe?.http,
|
|
3061
|
+
custom: body.custom || provider.quotaProbe?.custom
|
|
3062
|
+
};
|
|
3063
|
+
const tempRunner = createQuotaProbeRunner({ fetchImpl: globalThis.fetch });
|
|
3064
|
+
const now = Date.now();
|
|
3065
|
+
const startMs = now;
|
|
3066
|
+
try {
|
|
3067
|
+
const snapshot = await tempRunner.executeProbe({ providerId, probeConfig, shortcodeCtx, env: process.env, now });
|
|
3068
|
+
const latencyMs = Date.now() - startMs;
|
|
3069
|
+
sendJson(res, 200, { snapshot, raw: snapshot.raw, latencyMs, error: snapshot.error });
|
|
3070
|
+
} finally {
|
|
3071
|
+
tempRunner.dispose();
|
|
3072
|
+
}
|
|
3073
|
+
return;
|
|
3074
|
+
}
|
|
3075
|
+
|
|
3076
|
+
const quotaProbeRefreshMatch = requestUrl.pathname.match(/^\/api\/providers\/([^/]+)\/quota-probe\/refresh$/);
|
|
3077
|
+
if (method === "POST" && quotaProbeRefreshMatch) {
|
|
3078
|
+
const providerId = decodeURIComponent(quotaProbeRefreshMatch[1]);
|
|
3079
|
+
const configState = await readConfigState(configPath);
|
|
3080
|
+
const provider = (configState.normalizedConfig?.providers || []).find((entry) => entry.id === providerId);
|
|
3081
|
+
if (!provider) {
|
|
3082
|
+
sendJson(res, 404, { error: "Provider not found." });
|
|
3083
|
+
return;
|
|
3084
|
+
}
|
|
3085
|
+
if (!provider.quotaProbe?.enabled) {
|
|
3086
|
+
sendJson(res, 400, { error: "Quota probe not enabled for this provider." });
|
|
3087
|
+
return;
|
|
3088
|
+
}
|
|
3089
|
+
const shortcodeCtx = {
|
|
3090
|
+
providerApiKey: resolveProviderApiKey(provider, process.env) || "",
|
|
3091
|
+
providerBaseUrl: provider.baseUrl || "",
|
|
3092
|
+
providerId: provider.id
|
|
3093
|
+
};
|
|
3094
|
+
const snapshot = await quotaProbeRunner.enqueueRefresh({
|
|
3095
|
+
providerId,
|
|
3096
|
+
probeConfig: provider.quotaProbe,
|
|
3097
|
+
shortcodeCtx,
|
|
3098
|
+
env: process.env,
|
|
3099
|
+
bypassCircuit: true
|
|
3100
|
+
});
|
|
3101
|
+
sendJson(res, 200, { snapshot });
|
|
3102
|
+
return;
|
|
3103
|
+
}
|
|
3104
|
+
|
|
3105
|
+
const quotaProbeSnapshotMatch = requestUrl.pathname.match(/^\/api\/providers\/([^/]+)\/quota-probe\/snapshot$/);
|
|
3106
|
+
if (method === "GET" && quotaProbeSnapshotMatch) {
|
|
3107
|
+
const providerId = decodeURIComponent(quotaProbeSnapshotMatch[1]);
|
|
3108
|
+
sendJson(res, 200, { snapshot: quotaProbeRunner.getSnapshot(providerId) });
|
|
3109
|
+
return;
|
|
3110
|
+
}
|
|
3111
|
+
|
|
3112
|
+
const quotaProbeSaveMatch = requestUrl.pathname.match(/^\/api\/providers\/([^/]+)\/quota-probe\/save$/);
|
|
3113
|
+
if (method === "POST" && quotaProbeSaveMatch) {
|
|
3114
|
+
const providerId = decodeURIComponent(quotaProbeSaveMatch[1]);
|
|
3115
|
+
const body = await readJsonBody(req);
|
|
3116
|
+
const configState = await readConfigState(configPath);
|
|
3117
|
+
if (configState.parseError) {
|
|
3118
|
+
sendJson(res, 400, { error: `Config parse error: ${configState.parseError}` });
|
|
3119
|
+
return;
|
|
3120
|
+
}
|
|
3121
|
+
const rawConfig = configState.rawConfig || {};
|
|
3122
|
+
const providerList = Array.isArray(rawConfig.providers) ? rawConfig.providers : [];
|
|
3123
|
+
const providerIndex = providerList.findIndex((entry) => entry?.id === providerId);
|
|
3124
|
+
if (providerIndex === -1) {
|
|
3125
|
+
sendJson(res, 404, { error: "Provider not found." });
|
|
3126
|
+
return;
|
|
3127
|
+
}
|
|
3128
|
+
providerList[providerIndex] = { ...providerList[providerIndex], quotaProbe: body.quotaProbe || null };
|
|
3129
|
+
rawConfig.providers = providerList;
|
|
3130
|
+
const { snapshot } = await writeAndBroadcastConfig(rawConfig, { source: "quota-probe-save" });
|
|
3131
|
+
sendJson(res, 200, { ok: true, snapshot });
|
|
3132
|
+
return;
|
|
3133
|
+
}
|
|
3134
|
+
|
|
3092
3135
|
if (method === "POST" && requestUrl.pathname === "/api/config/test-provider") {
|
|
3093
3136
|
const body = await readJsonBody(req);
|
|
3094
3137
|
const endpoints = Array.isArray(body?.endpoints) ? body.endpoints.map((entry) => String(entry || "").trim()).filter(Boolean) : [];
|
|
@@ -3464,7 +3507,7 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
|
|
|
3464
3507
|
});
|
|
3465
3508
|
|
|
3466
3509
|
sendJson(res, 200, {
|
|
3467
|
-
runtime:
|
|
3510
|
+
runtime: buildLlamacppRuntimePayload(configuredRuntime, {}, hydratedCandidates)
|
|
3468
3511
|
});
|
|
3469
3512
|
return;
|
|
3470
3513
|
}
|
|
@@ -3491,7 +3534,7 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
|
|
|
3491
3534
|
if (!validation?.ok) {
|
|
3492
3535
|
sendJson(res, 400, {
|
|
3493
3536
|
error: validation?.errorMessage || `Failed validating llama.cpp runtime '${command}'.`,
|
|
3494
|
-
runtime:
|
|
3537
|
+
runtime: buildLlamacppRuntimePayload(readConfiguredLlamacppRuntime(configState.rawConfig || {}), validation)
|
|
3495
3538
|
});
|
|
3496
3539
|
return;
|
|
3497
3540
|
}
|
|
@@ -3507,7 +3550,7 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
|
|
|
3507
3550
|
const configuredRuntime = readConfiguredLlamacppRuntime(savedConfig || {});
|
|
3508
3551
|
sendJson(res, 200, {
|
|
3509
3552
|
ok: true,
|
|
3510
|
-
runtime:
|
|
3553
|
+
runtime: buildLlamacppRuntimePayload(configuredRuntime, {
|
|
3511
3554
|
...validation,
|
|
3512
3555
|
status: configuredRuntime.status || "stopped"
|
|
3513
3556
|
})
|
|
@@ -3533,7 +3576,7 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
|
|
|
3533
3576
|
});
|
|
3534
3577
|
sendJson(res, 200, {
|
|
3535
3578
|
ok: true,
|
|
3536
|
-
runtime:
|
|
3579
|
+
runtime: buildLlamacppRuntimePayload(readConfiguredLlamacppRuntime(savedConfig || {}))
|
|
3537
3580
|
});
|
|
3538
3581
|
return;
|
|
3539
3582
|
}
|
|
@@ -3579,7 +3622,7 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
|
|
|
3579
3622
|
});
|
|
3580
3623
|
sendJson(res, 200, {
|
|
3581
3624
|
ok: true,
|
|
3582
|
-
runtime:
|
|
3625
|
+
runtime: buildLlamacppRuntimePayload(readConfiguredLlamacppRuntime(savedConfig || {}), {
|
|
3583
3626
|
...(validation && typeof validation === "object" ? validation : {}),
|
|
3584
3627
|
status: "running"
|
|
3585
3628
|
})
|
|
@@ -3615,7 +3658,7 @@ export async function startWebConsoleServer(options = {}, deps = {}) {
|
|
|
3615
3658
|
});
|
|
3616
3659
|
sendJson(res, 200, {
|
|
3617
3660
|
ok: true,
|
|
3618
|
-
runtime:
|
|
3661
|
+
runtime: buildLlamacppRuntimePayload(readConfiguredLlamacppRuntime(savedConfig || {}), {
|
|
3619
3662
|
status: "stopped"
|
|
3620
3663
|
})
|
|
3621
3664
|
});
|