@khanglvm/llm-router 2.5.1 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/README.md +5 -2
- package/package.json +1 -1
- package/src/node/dev-command.js +114 -0
- package/src/node/huggingface-gguf.js +12 -0
- package/src/node/llamacpp-managed-runtime.js +202 -0
- package/src/node/llamacpp-runtime-profile.js +133 -0
- package/src/node/llamacpp-runtime.js +256 -78
- package/src/node/local-models-service.js +25 -2
- package/src/node/local-server.js +60 -2
- package/src/node/web-console-client.js +20 -20
- package/src/node/web-console-server.js +64 -8
- package/src/node/web-console-styles.generated.js +1 -1
- package/src/node/web-console-ui/local-models-utils.js +33 -0
- package/src/runtime/handler/provider-call.js +36 -18
- package/src/runtime/handler/runtime-policy.js +4 -1
- package/src/runtime/local-models.js +36 -0
|
@@ -2,6 +2,11 @@ import path from "node:path";
|
|
|
2
2
|
import os from "node:os";
|
|
3
3
|
import { existsSync } from "node:fs";
|
|
4
4
|
import { spawn, spawnSync } from "node:child_process";
|
|
5
|
+
import { setTimeout as delay } from "node:timers/promises";
|
|
6
|
+
import { deriveLlamacppLaunchProfile } from "./llamacpp-runtime-profile.js";
|
|
7
|
+
import { createLlamacppManagedRuntimeRegistry } from "./llamacpp-managed-runtime.js";
|
|
8
|
+
import { listListeningPids as listListeningPidsForPort } from "./port-reclaim.js";
|
|
9
|
+
import { stopProcessByPid as stopProcessByPidForRuntime } from "./instance-state.js";
|
|
5
10
|
|
|
6
11
|
export const LLAMACPP_DEFAULT_HOST = "127.0.0.1";
|
|
7
12
|
export const LLAMACPP_DEFAULT_PORT = 39391;
|
|
@@ -16,8 +21,8 @@ const COMMON_SOURCE_BUILD_PATHS = Object.freeze([
|
|
|
16
21
|
"src/llama-cpp-turboquant/build/bin/llama-server",
|
|
17
22
|
"src/llama.cpp-turboquant/build/bin/llama-server"
|
|
18
23
|
]);
|
|
19
|
-
|
|
20
|
-
let
|
|
24
|
+
const managedLlamacppRuntimeRegistry = createLlamacppManagedRuntimeRegistry();
|
|
25
|
+
let inFlightConfiguredStartCount = 0;
|
|
21
26
|
|
|
22
27
|
function isPlainObject(value) {
|
|
23
28
|
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
|
|
@@ -39,6 +44,34 @@ function normalizePathEntries(entries) {
|
|
|
39
44
|
: [];
|
|
40
45
|
}
|
|
41
46
|
|
|
47
|
+
function buildRuntimeProfileHash({ command, host, port, args = [] } = {}) {
|
|
48
|
+
const normalizedArgs = Array.isArray(args) ? args.filter(Boolean) : [];
|
|
49
|
+
return `${normalizeString(command)}|${normalizeString(host)}|${String(normalizePort(port, LLAMACPP_DEFAULT_PORT))}|${normalizedArgs.join("\u001f")}`;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function isManagedRuntimeAlive(instance) {
|
|
53
|
+
const child = instance?.child;
|
|
54
|
+
if (!child) return false;
|
|
55
|
+
return child.exitCode === null && child.killed !== true;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function normalizeListeningPidResult(result) {
|
|
59
|
+
if (result && typeof result === "object" && result.ok === false) {
|
|
60
|
+
return { ok: false, pids: [] };
|
|
61
|
+
}
|
|
62
|
+
if (Array.isArray(result)) {
|
|
63
|
+
return result
|
|
64
|
+
.map((value) => Number(value))
|
|
65
|
+
.filter((pid) => Number.isInteger(pid) && pid > 0);
|
|
66
|
+
}
|
|
67
|
+
if (result && typeof result === "object" && Array.isArray(result.pids)) {
|
|
68
|
+
return result.pids
|
|
69
|
+
.map((value) => Number(value))
|
|
70
|
+
.filter((pid) => Number.isInteger(pid) && pid > 0);
|
|
71
|
+
}
|
|
72
|
+
return [];
|
|
73
|
+
}
|
|
74
|
+
|
|
42
75
|
function readConfiguredLlamacppRuntime(config) {
|
|
43
76
|
const runtime = config?.metadata?.localModels?.runtime?.llamacpp;
|
|
44
77
|
if (!isPlainObject(runtime)) {
|
|
@@ -72,6 +105,8 @@ function buildPreloadModels(config) {
|
|
|
72
105
|
if (!modelPath) continue;
|
|
73
106
|
preloadModels.push({
|
|
74
107
|
variantId: normalizeString(variant.id),
|
|
108
|
+
variant,
|
|
109
|
+
baseModel,
|
|
75
110
|
modelPath,
|
|
76
111
|
contextWindow: Number.isFinite(Number(variant.contextWindow)) ? Number(variant.contextWindow) : undefined
|
|
77
112
|
});
|
|
@@ -79,6 +114,15 @@ function buildPreloadModels(config) {
|
|
|
79
114
|
return preloadModels;
|
|
80
115
|
}
|
|
81
116
|
|
|
117
|
+
function detectLlamacppSystemProfile(system = {}) {
|
|
118
|
+
const totalMemoryBytes = Number(system?.totalMemoryBytes);
|
|
119
|
+
return {
|
|
120
|
+
platform: normalizeString(system?.platform) || process.platform,
|
|
121
|
+
unifiedMemory: system?.unifiedMemory === true || process.platform === "darwin",
|
|
122
|
+
totalMemoryBytes: Number.isFinite(totalMemoryBytes) && totalMemoryBytes > 0 ? totalMemoryBytes : os.totalmem()
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
|
|
82
126
|
export function detectLlamacppCandidates({
|
|
83
127
|
envPathEntries = process.env.PATH?.split(path.delimiter) || [],
|
|
84
128
|
homeDir = os.homedir(),
|
|
@@ -122,16 +166,18 @@ export function buildLlamacppLaunchArgs({
|
|
|
122
166
|
command,
|
|
123
167
|
host = LLAMACPP_DEFAULT_HOST,
|
|
124
168
|
port = LLAMACPP_DEFAULT_PORT,
|
|
125
|
-
preloadModels = []
|
|
169
|
+
preloadModels = [],
|
|
170
|
+
launchProfile = null
|
|
126
171
|
} = {}) {
|
|
127
172
|
const firstModel = Array.isArray(preloadModels) ? preloadModels[0] : null;
|
|
128
173
|
const args = [
|
|
129
174
|
normalizeString(command),
|
|
130
175
|
"--host", normalizeString(host) || LLAMACPP_DEFAULT_HOST,
|
|
131
|
-
"--port", String(normalizePort(port, LLAMACPP_DEFAULT_PORT))
|
|
176
|
+
"--port", String(normalizePort(port, LLAMACPP_DEFAULT_PORT)),
|
|
177
|
+
...((Array.isArray(launchProfile?.args) ? launchProfile.args : []).filter(Boolean))
|
|
132
178
|
];
|
|
133
179
|
|
|
134
|
-
if (firstModel?.modelPath) {
|
|
180
|
+
if (!launchProfile && firstModel?.modelPath) {
|
|
135
181
|
args.push("-m", firstModel.modelPath);
|
|
136
182
|
if (Number.isFinite(Number(firstModel.contextWindow)) && Number(firstModel.contextWindow) > 0) {
|
|
137
183
|
args.push("-c", String(Math.floor(Number(firstModel.contextWindow))));
|
|
@@ -141,6 +187,31 @@ export function buildLlamacppLaunchArgs({
|
|
|
141
187
|
return args.filter(Boolean);
|
|
142
188
|
}
|
|
143
189
|
|
|
190
|
+
export async function spawnManagedLlamacppRuntime({
|
|
191
|
+
command,
|
|
192
|
+
host = LLAMACPP_DEFAULT_HOST,
|
|
193
|
+
port = LLAMACPP_DEFAULT_PORT,
|
|
194
|
+
launchProfile
|
|
195
|
+
} = {}, {
|
|
196
|
+
spawnImpl = spawn
|
|
197
|
+
} = {}) {
|
|
198
|
+
const args = buildLlamacppLaunchArgs({
|
|
199
|
+
command,
|
|
200
|
+
host,
|
|
201
|
+
port,
|
|
202
|
+
launchProfile
|
|
203
|
+
});
|
|
204
|
+
const child = spawnImpl(args[0], args.slice(1), { stdio: "ignore" });
|
|
205
|
+
return {
|
|
206
|
+
pid: child?.pid,
|
|
207
|
+
child,
|
|
208
|
+
host,
|
|
209
|
+
port,
|
|
210
|
+
baseUrl: `http://${host}:${port}/v1`,
|
|
211
|
+
args
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
|
|
144
215
|
export function parseLlamacppValidationOutput(output = "") {
|
|
145
216
|
const text = String(output || "").trim();
|
|
146
217
|
const lowered = text.toLowerCase();
|
|
@@ -199,78 +270,142 @@ async function startConfiguredRuntime(config, {
|
|
|
199
270
|
requireAutostart = true
|
|
200
271
|
} = {}, {
|
|
201
272
|
spawnSyncImpl = spawnSync,
|
|
202
|
-
spawnImpl = spawn
|
|
273
|
+
spawnImpl = spawn,
|
|
274
|
+
system = undefined,
|
|
275
|
+
listListeningPids = undefined,
|
|
276
|
+
stopProcessByPid = undefined
|
|
203
277
|
} = {}) {
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
const errorMessage = "llama.cpp autostart is enabled, but no runtime command is configured.";
|
|
211
|
-
error(errorMessage);
|
|
212
|
-
return { ok: false, errorMessage };
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
if (managedLlamacppRuntime
|
|
216
|
-
&& managedLlamacppRuntime.command === runtime.command
|
|
217
|
-
&& managedLlamacppRuntime.host === runtime.host
|
|
218
|
-
&& managedLlamacppRuntime.port === runtime.port
|
|
219
|
-
&& managedLlamacppRuntime.child?.exitCode === null
|
|
220
|
-
&& managedLlamacppRuntime.child?.killed !== true) {
|
|
221
|
-
return { ok: true, alreadyRunning: true, runtime: managedLlamacppRuntime };
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
const validation = validateLlamacppCommand(runtime.command, { spawnSyncImpl });
|
|
225
|
-
if (!validation.ok) {
|
|
226
|
-
error(validation.errorMessage || `Failed validating llama.cpp runtime '${runtime.command}'.`);
|
|
227
|
-
return validation;
|
|
228
|
-
}
|
|
278
|
+
inFlightConfiguredStartCount += 1;
|
|
279
|
+
try {
|
|
280
|
+
const runtime = readConfiguredLlamacppRuntime(config);
|
|
281
|
+
if (requireAutostart && !runtime.startWithRouter) {
|
|
282
|
+
return { ok: true, skipped: true, reason: "autostart-disabled" };
|
|
283
|
+
}
|
|
229
284
|
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
preloadModels
|
|
236
|
-
});
|
|
285
|
+
if (!runtime.command) {
|
|
286
|
+
const errorMessage = "llama.cpp autostart is enabled, but no runtime command is configured.";
|
|
287
|
+
error(errorMessage);
|
|
288
|
+
return { ok: false, errorMessage };
|
|
289
|
+
}
|
|
237
290
|
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
const
|
|
241
|
-
|
|
291
|
+
const preloadModels = buildPreloadModels(config);
|
|
292
|
+
const firstModel = Array.isArray(preloadModels) ? preloadModels[0] : null;
|
|
293
|
+
const launchProfile = firstModel?.variant && firstModel?.baseModel
|
|
294
|
+
? deriveLlamacppLaunchProfile({
|
|
295
|
+
variant: firstModel.variant,
|
|
296
|
+
baseModel: firstModel.baseModel,
|
|
297
|
+
system: detectLlamacppSystemProfile(system)
|
|
298
|
+
})
|
|
299
|
+
: null;
|
|
300
|
+
const args = buildLlamacppLaunchArgs({
|
|
301
|
+
command: runtime.command,
|
|
302
|
+
host: runtime.host,
|
|
303
|
+
port: runtime.port,
|
|
304
|
+
preloadModels,
|
|
305
|
+
launchProfile
|
|
306
|
+
});
|
|
307
|
+
const variantKey = normalizeString(firstModel?.variant?.key || firstModel?.variantId) || "default";
|
|
308
|
+
const profileHash = buildRuntimeProfileHash({
|
|
309
|
+
command: runtime.command,
|
|
310
|
+
host: runtime.host,
|
|
311
|
+
port: runtime.port,
|
|
312
|
+
args: args.slice(1)
|
|
313
|
+
});
|
|
314
|
+
const listListeningPidsFn = typeof listListeningPids === "function"
|
|
315
|
+
? listListeningPids
|
|
316
|
+
: (port) => listListeningPidsForPort(port, { spawnSync: spawnSyncImpl });
|
|
317
|
+
const stopProcessByPidFn = typeof stopProcessByPid === "function"
|
|
318
|
+
? stopProcessByPid
|
|
319
|
+
: (pid) => stopProcessByPidForRuntime(pid);
|
|
320
|
+
await managedLlamacppRuntimeRegistry.reconcile({
|
|
321
|
+
listListeningPids: async (port) => normalizeListeningPidResult(await listListeningPidsFn(port)),
|
|
322
|
+
stopProcessByPid: async (pid) => stopProcessByPidFn(pid)
|
|
242
323
|
});
|
|
243
324
|
|
|
244
|
-
const
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
325
|
+
const existing = managedLlamacppRuntimeRegistry
|
|
326
|
+
.snapshot()
|
|
327
|
+
.find((instance) => (
|
|
328
|
+
instance.variantKey === variantKey
|
|
329
|
+
&& instance.profileHash === profileHash
|
|
330
|
+
&& isManagedRuntimeAlive(instance)
|
|
331
|
+
));
|
|
332
|
+
if (existing) {
|
|
333
|
+
return { ok: true, alreadyRunning: true, runtime: existing };
|
|
334
|
+
}
|
|
249
335
|
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
336
|
+
const validation = validateLlamacppCommand(runtime.command, { spawnSyncImpl });
|
|
337
|
+
if (!validation.ok) {
|
|
338
|
+
error(validation.errorMessage || `Failed validating llama.cpp runtime '${runtime.command}'.`);
|
|
339
|
+
return validation;
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
try {
|
|
343
|
+
const managedRuntime = await managedLlamacppRuntimeRegistry.ensureRuntimeForVariant({
|
|
344
|
+
variantKey,
|
|
345
|
+
profileHash,
|
|
346
|
+
launchArgs: args.slice(1),
|
|
347
|
+
preferredPort: runtime.port
|
|
348
|
+
}, {
|
|
349
|
+
spawnRuntime: async ({ port }) => new Promise((resolve, reject) => {
|
|
350
|
+
let settled = false;
|
|
351
|
+
const allocatedArgs = buildLlamacppLaunchArgs({
|
|
352
|
+
command: runtime.command,
|
|
353
|
+
host: runtime.host,
|
|
354
|
+
port,
|
|
355
|
+
preloadModels,
|
|
356
|
+
launchProfile
|
|
357
|
+
});
|
|
358
|
+
const child = spawnImpl(allocatedArgs[0], allocatedArgs.slice(1), {
|
|
359
|
+
stdio: "ignore"
|
|
360
|
+
});
|
|
361
|
+
const expectedInstanceId = `${variantKey}:${profileHash}:${port}`;
|
|
362
|
+
if (child && child.__llamacppManagedExitHookAttached !== true) {
|
|
363
|
+
child.__llamacppManagedExitHookAttached = true;
|
|
364
|
+
child.once("exit", () => {
|
|
365
|
+
void managedLlamacppRuntimeRegistry.untrackInstance(expectedInstanceId);
|
|
366
|
+
});
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
const settleResolve = (value) => {
|
|
370
|
+
if (settled) return;
|
|
371
|
+
settled = true;
|
|
372
|
+
resolve(value);
|
|
373
|
+
};
|
|
374
|
+
const settleReject = (reason) => {
|
|
375
|
+
if (settled) return;
|
|
376
|
+
settled = true;
|
|
377
|
+
reject(reason);
|
|
378
|
+
};
|
|
379
|
+
|
|
380
|
+
child.once("spawn", () => {
|
|
381
|
+
if (typeof child.unref === "function") child.unref();
|
|
382
|
+
settleResolve({
|
|
383
|
+
pid: child?.pid,
|
|
384
|
+
child,
|
|
385
|
+
command: runtime.command,
|
|
386
|
+
host: runtime.host,
|
|
387
|
+
port,
|
|
388
|
+
args: allocatedArgs,
|
|
389
|
+
baseUrl: `http://${runtime.host}:${port}/v1`
|
|
390
|
+
});
|
|
391
|
+
});
|
|
392
|
+
child.once("error", (spawnError) => {
|
|
393
|
+
settleReject(spawnError);
|
|
394
|
+
});
|
|
395
|
+
}),
|
|
396
|
+
waitForHealthy: async (instance) => instance
|
|
262
397
|
});
|
|
263
|
-
if (typeof child.unref === "function") child.unref();
|
|
264
|
-
line(`Started llama.cpp runtime on http://${runtime.host}:${runtime.port}${validation.isTurboQuant ? " (TurboQuant detected)" : ""}.`);
|
|
265
|
-
finish({ ok: true, runtime: managedLlamacppRuntime, validation });
|
|
266
|
-
});
|
|
267
398
|
|
|
268
|
-
|
|
399
|
+
line(`Started llama.cpp runtime on http://${managedRuntime.host}:${managedRuntime.port}${validation.isTurboQuant ? " (TurboQuant detected)" : ""}.`);
|
|
400
|
+
return { ok: true, runtime: managedRuntime, validation };
|
|
401
|
+
} catch (spawnError) {
|
|
269
402
|
const errorMessage = spawnError instanceof Error ? spawnError.message : String(spawnError);
|
|
270
403
|
error(`Failed starting llama.cpp runtime: ${errorMessage}`);
|
|
271
|
-
|
|
272
|
-
}
|
|
273
|
-
}
|
|
404
|
+
return { ok: false, errorMessage };
|
|
405
|
+
}
|
|
406
|
+
} finally {
|
|
407
|
+
inFlightConfiguredStartCount = Math.max(0, inFlightConfiguredStartCount - 1);
|
|
408
|
+
}
|
|
274
409
|
}
|
|
275
410
|
|
|
276
411
|
export async function ensureConfiguredLlamacppRuntimeStarted(config, callbacks = {}, deps = {}) {
|
|
@@ -287,23 +422,66 @@ export async function startConfiguredLlamacppRuntime(config, callbacks = {}, dep
|
|
|
287
422
|
}, deps);
|
|
288
423
|
}
|
|
289
424
|
|
|
425
|
+
export function getManagedLlamacppRuntimeSnapshot() {
|
|
426
|
+
return managedLlamacppRuntimeRegistry.snapshot().map((instance) => {
|
|
427
|
+
const { child: _child, ...rest } = instance || {};
|
|
428
|
+
return JSON.parse(JSON.stringify(rest));
|
|
429
|
+
});
|
|
430
|
+
}
|
|
431
|
+
|
|
290
432
|
export async function stopManagedLlamacppRuntime({
|
|
291
433
|
line = () => {},
|
|
292
434
|
error = () => {}
|
|
293
435
|
} = {}) {
|
|
294
|
-
|
|
295
|
-
|
|
436
|
+
while (inFlightConfiguredStartCount > 0) {
|
|
437
|
+
await delay(0);
|
|
438
|
+
}
|
|
439
|
+
if (typeof managedLlamacppRuntimeRegistry.waitForInFlightStarts === "function") {
|
|
440
|
+
await managedLlamacppRuntimeRegistry.waitForInFlightStarts();
|
|
441
|
+
}
|
|
442
|
+
const instances = managedLlamacppRuntimeRegistry.snapshot();
|
|
443
|
+
if (instances.length === 0) {
|
|
296
444
|
return { ok: true, skipped: true, reason: "not-running" };
|
|
297
445
|
}
|
|
298
446
|
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
447
|
+
const failures = [];
|
|
448
|
+
let stoppedCount = 0;
|
|
449
|
+
let pendingExitCount = 0;
|
|
450
|
+
for (const instance of instances) {
|
|
451
|
+
try {
|
|
452
|
+
if (instance?.owner === "llm-router" && typeof instance?.child?.kill === "function") {
|
|
453
|
+
const killResult = instance.child.kill("SIGTERM");
|
|
454
|
+
if (killResult !== false) {
|
|
455
|
+
stoppedCount += 1;
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
if (!isManagedRuntimeAlive(instance)) {
|
|
459
|
+
await managedLlamacppRuntimeRegistry.untrackInstance(instance?.instanceId);
|
|
460
|
+
} else {
|
|
461
|
+
pendingExitCount += 1;
|
|
462
|
+
}
|
|
463
|
+
} catch (stopError) {
|
|
464
|
+
const errorMessage = stopError instanceof Error ? stopError.message : String(stopError);
|
|
465
|
+
failures.push(errorMessage);
|
|
466
|
+
error(`Failed stopping llama.cpp runtime: ${errorMessage}`);
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
if (stoppedCount > 0) {
|
|
471
|
+
if (pendingExitCount === 0) {
|
|
472
|
+
line(stoppedCount === 1 ? "Stopped managed llama.cpp runtime." : `Stopped ${stoppedCount} managed llama.cpp runtimes.`);
|
|
473
|
+
} else {
|
|
474
|
+
line(stoppedCount === 1
|
|
475
|
+
? "Stop signal sent to managed llama.cpp runtime; waiting for exit."
|
|
476
|
+
: `Stop signal sent to ${stoppedCount} managed llama.cpp runtimes; waiting for exits.`);
|
|
477
|
+
}
|
|
308
478
|
}
|
|
479
|
+
|
|
480
|
+
const completed = failures.length === 0 && pendingExitCount === 0;
|
|
481
|
+
return {
|
|
482
|
+
ok: completed,
|
|
483
|
+
stoppedCount,
|
|
484
|
+
pendingExitCount,
|
|
485
|
+
...(failures.length > 0 ? { errorMessage: failures.join("; ") } : {})
|
|
486
|
+
};
|
|
309
487
|
}
|
|
@@ -212,19 +212,42 @@ export async function saveLocalModelVariant(config, draft, {
|
|
|
212
212
|
activeVariants,
|
|
213
213
|
totalMemoryBytes: system.totalMemoryBytes
|
|
214
214
|
});
|
|
215
|
-
|
|
215
|
+
if (!decision.allowed) {
|
|
216
216
|
throw new Error(decision.reason);
|
|
217
217
|
}
|
|
218
218
|
}
|
|
219
219
|
|
|
220
|
+
const normalizedMetadata = normalizeLocalModelsMetadata({
|
|
221
|
+
variants: {
|
|
222
|
+
draft: normalizedDraft
|
|
223
|
+
}
|
|
224
|
+
});
|
|
225
|
+
const normalizedVariantDraft = Object.values(normalizedMetadata.variants)[0] || {};
|
|
226
|
+
const previousVariant = isPlainObject(next.metadata.localModels.variants[key])
|
|
227
|
+
? next.metadata.localModels.variants[key]
|
|
228
|
+
: {};
|
|
229
|
+
|
|
220
230
|
next.metadata.localModels.variants[key] = {
|
|
221
|
-
...
|
|
231
|
+
...previousVariant,
|
|
222
232
|
key,
|
|
223
233
|
baseModelId,
|
|
224
234
|
id: modelId,
|
|
225
235
|
name,
|
|
226
236
|
runtime,
|
|
227
237
|
preset: normalizeString(normalizedDraft.preset),
|
|
238
|
+
runtimeProfile: runtime === "llamacpp"
|
|
239
|
+
? normalizedVariantDraft.runtimeProfile
|
|
240
|
+
: undefined,
|
|
241
|
+
runtimeStatus: runtime === "llamacpp"
|
|
242
|
+
? (isPlainObject(previousVariant.runtimeStatus)
|
|
243
|
+
? previousVariant.runtimeStatus
|
|
244
|
+
: {
|
|
245
|
+
activeInstanceId: "",
|
|
246
|
+
lastFailure: null,
|
|
247
|
+
lastStartedAt: "",
|
|
248
|
+
lastHealthyAt: ""
|
|
249
|
+
})
|
|
250
|
+
: undefined,
|
|
228
251
|
enabled: normalizedDraft.enabled === true,
|
|
229
252
|
preload: normalizedDraft.preload === true,
|
|
230
253
|
contextWindow: Number.isFinite(Number(normalizedDraft.contextWindow)) ? Number(normalizedDraft.contextWindow) : undefined,
|
package/src/node/local-server.js
CHANGED
|
@@ -13,6 +13,10 @@ import { readActivityLogSettings } from "../shared/local-router-defaults.js";
|
|
|
13
13
|
import { appendActivityLogEntry, resolveActivityLogPath } from "./activity-log.js";
|
|
14
14
|
import { appendLargeRequestLogEntry, resolveLargeRequestLogPath } from "./large-request-log.js";
|
|
15
15
|
import { isLargeRequestLoggingEnabled } from "../runtime/handler/large-request-log.js";
|
|
16
|
+
import {
|
|
17
|
+
startConfiguredLlamacppRuntime,
|
|
18
|
+
stopManagedLlamacppRuntime
|
|
19
|
+
} from "./llamacpp-runtime.js";
|
|
16
20
|
|
|
17
21
|
const DEFAULT_CONFIG_RELOAD_DEBOUNCE_MS = 300;
|
|
18
22
|
const MAX_CONFIG_RELOAD_DEBOUNCE_MS = 5000;
|
|
@@ -34,6 +38,10 @@ function formatError(error) {
|
|
|
34
38
|
return error instanceof Error ? error.message : String(error);
|
|
35
39
|
}
|
|
36
40
|
|
|
41
|
+
function normalizeString(value) {
|
|
42
|
+
return typeof value === "string" ? value.trim() : "";
|
|
43
|
+
}
|
|
44
|
+
|
|
37
45
|
function createLiveConfigStore({
|
|
38
46
|
configPath,
|
|
39
47
|
watchConfig = true,
|
|
@@ -237,6 +245,39 @@ async function writeFetchResponseToNode(res, response) {
|
|
|
237
245
|
readable.pipe(res);
|
|
238
246
|
}
|
|
239
247
|
|
|
248
|
+
function buildVariantLlamacppRuntimeConfig(config, variantKey) {
|
|
249
|
+
const normalizedVariantKey = normalizeString(variantKey);
|
|
250
|
+
const runtime = config?.metadata?.localModels?.runtime?.llamacpp;
|
|
251
|
+
const variants = config?.metadata?.localModels?.variants;
|
|
252
|
+
const library = config?.metadata?.localModels?.library;
|
|
253
|
+
const variant = variants?.[normalizedVariantKey];
|
|
254
|
+
if (!runtime || !variant || variant.runtime !== "llamacpp") return null;
|
|
255
|
+
|
|
256
|
+
const baseModelId = normalizeString(variant?.baseModelId);
|
|
257
|
+
const baseModel = library?.[baseModelId];
|
|
258
|
+
if (!baseModel) return null;
|
|
259
|
+
|
|
260
|
+
return {
|
|
261
|
+
metadata: {
|
|
262
|
+
localModels: {
|
|
263
|
+
runtime: {
|
|
264
|
+
llamacpp: { ...runtime }
|
|
265
|
+
},
|
|
266
|
+
library: {
|
|
267
|
+
[baseModelId]: { ...baseModel }
|
|
268
|
+
},
|
|
269
|
+
variants: {
|
|
270
|
+
[normalizedVariantKey]: {
|
|
271
|
+
...variant,
|
|
272
|
+
enabled: true,
|
|
273
|
+
preload: true
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
};
|
|
279
|
+
}
|
|
280
|
+
|
|
240
281
|
export async function startLocalRouteServer({
|
|
241
282
|
port = FIXED_LOCAL_ROUTER_PORT,
|
|
242
283
|
host = FIXED_LOCAL_ROUTER_HOST,
|
|
@@ -248,7 +289,10 @@ export async function startLocalRouteServer({
|
|
|
248
289
|
validateConfig,
|
|
249
290
|
onConfigReload,
|
|
250
291
|
onConfigReloadError,
|
|
251
|
-
requireAuth = false
|
|
292
|
+
requireAuth = false,
|
|
293
|
+
createFetchHandlerImpl = createFetchHandler,
|
|
294
|
+
startConfiguredLlamacppRuntimeImpl = startConfiguredLlamacppRuntime,
|
|
295
|
+
stopManagedLlamacppRuntimeImpl = stopManagedLlamacppRuntime
|
|
252
296
|
} = {}) {
|
|
253
297
|
const reloadDebounceMs = resolveReloadDebounceMs(configReloadDebounceMs);
|
|
254
298
|
const resolvedActivityLogPath = resolveActivityLogPath(configPath, activityLogPath);
|
|
@@ -270,9 +314,22 @@ export async function startLocalRouteServer({
|
|
|
270
314
|
const initialConfig = await configStore.getConfig();
|
|
271
315
|
activityLogEnabled = readActivityLogSettings(initialConfig).enabled;
|
|
272
316
|
|
|
273
|
-
const fetchHandler =
|
|
317
|
+
const fetchHandler = createFetchHandlerImpl({
|
|
274
318
|
ignoreAuth: !requireAuth,
|
|
319
|
+
runtime: "node",
|
|
275
320
|
getConfig: () => configStore.getConfig(),
|
|
321
|
+
resolveLocalRuntimeBaseUrl: async ({ candidate }) => {
|
|
322
|
+
const variantKey = candidate?.model?.metadata?.localVariantKey;
|
|
323
|
+
const config = await configStore.getConfig();
|
|
324
|
+
const targetedConfig = buildVariantLlamacppRuntimeConfig(config, variantKey);
|
|
325
|
+
if (!targetedConfig) return "";
|
|
326
|
+
|
|
327
|
+
const started = await startConfiguredLlamacppRuntimeImpl(targetedConfig);
|
|
328
|
+
if (!started?.ok) {
|
|
329
|
+
throw new Error(started?.errorMessage || `Failed starting local runtime for ${normalizeString(variantKey) || "unknown variant"}.`);
|
|
330
|
+
}
|
|
331
|
+
return normalizeString(started?.runtime?.baseUrl);
|
|
332
|
+
},
|
|
276
333
|
defaultStateStoreBackend: "file",
|
|
277
334
|
onActivityLog: (entry) => {
|
|
278
335
|
if (!activityLogEnabled) return;
|
|
@@ -355,6 +412,7 @@ export async function startLocalRouteServer({
|
|
|
355
412
|
server.close = (callback) => {
|
|
356
413
|
shuttingDown = true;
|
|
357
414
|
Promise.resolve()
|
|
415
|
+
.then(() => stopManagedLlamacppRuntimeImpl().catch(() => {}))
|
|
358
416
|
.then(() => configStore.close())
|
|
359
417
|
.then(() => (typeof fetchHandler.close === "function" ? fetchHandler.close() : undefined))
|
|
360
418
|
.finally(() => {
|