@khanglvm/llm-router 2.4.1 → 2.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,309 @@
1
+ import path from "node:path";
2
+ import os from "node:os";
3
+ import { existsSync } from "node:fs";
4
+ import { spawn, spawnSync } from "node:child_process";
5
+
6
+ export const LLAMACPP_DEFAULT_HOST = "127.0.0.1";
7
+ export const LLAMACPP_DEFAULT_PORT = 39391;
8
+ const LLAMACPP_EXECUTABLE = "llama-server";
9
+ const FALLBACK_LLAMACPP_PATHS = Object.freeze([
10
+ "/opt/homebrew/bin/llama-server",
11
+ "/usr/local/bin/llama-server"
12
+ ]);
13
+ const COMMON_SOURCE_BUILD_PATHS = Object.freeze([
14
+ "src/llama-cpp/build/bin/llama-server",
15
+ "src/llama.cpp/build/bin/llama-server",
16
+ "src/llama-cpp-turboquant/build/bin/llama-server",
17
+ "src/llama.cpp-turboquant/build/bin/llama-server"
18
+ ]);
19
+
20
+ let managedLlamacppRuntime = null;
21
+
22
+ function isPlainObject(value) {
23
+ return Boolean(value) && typeof value === "object" && !Array.isArray(value);
24
+ }
25
+
26
+ function normalizeString(value) {
27
+ return typeof value === "string" ? value.trim() : "";
28
+ }
29
+
30
+ function normalizePort(value, fallback = LLAMACPP_DEFAULT_PORT) {
31
+ const parsed = Number(value);
32
+ if (!Number.isInteger(parsed) || parsed <= 0 || parsed > 65535) return fallback;
33
+ return parsed;
34
+ }
35
+
36
+ function normalizePathEntries(entries) {
37
+ return Array.isArray(entries)
38
+ ? entries.map((entry) => normalizeString(entry)).filter(Boolean)
39
+ : [];
40
+ }
41
+
42
+ function readConfiguredLlamacppRuntime(config) {
43
+ const runtime = config?.metadata?.localModels?.runtime?.llamacpp;
44
+ if (!isPlainObject(runtime)) {
45
+ return {
46
+ startWithRouter: false,
47
+ command: "",
48
+ host: LLAMACPP_DEFAULT_HOST,
49
+ port: LLAMACPP_DEFAULT_PORT
50
+ };
51
+ }
52
+
53
+ return {
54
+ startWithRouter: runtime.startWithRouter === true,
55
+ command: normalizeString(runtime.selectedCommand || runtime.manualCommand || runtime.command || runtime.path),
56
+ host: normalizeString(runtime.host) || LLAMACPP_DEFAULT_HOST,
57
+ port: normalizePort(runtime.port, LLAMACPP_DEFAULT_PORT)
58
+ };
59
+ }
60
+
61
+ function buildPreloadModels(config) {
62
+ const library = config?.metadata?.localModels?.library;
63
+ const variants = config?.metadata?.localModels?.variants;
64
+ if (!isPlainObject(library) || !isPlainObject(variants)) return [];
65
+
66
+ const preloadModels = [];
67
+ for (const variant of Object.values(variants)) {
68
+ if (!isPlainObject(variant)) continue;
69
+ if (variant.runtime !== "llamacpp" || variant.preload !== true || variant.enabled !== true) continue;
70
+ const baseModel = library[variant.baseModelId];
71
+ const modelPath = normalizeString(baseModel?.path);
72
+ if (!modelPath) continue;
73
+ preloadModels.push({
74
+ variantId: normalizeString(variant.id),
75
+ modelPath,
76
+ contextWindow: Number.isFinite(Number(variant.contextWindow)) ? Number(variant.contextWindow) : undefined
77
+ });
78
+ }
79
+ return preloadModels;
80
+ }
81
+
82
+ export function detectLlamacppCandidates({
83
+ envPathEntries = process.env.PATH?.split(path.delimiter) || [],
84
+ homeDir = os.homedir(),
85
+ existingPaths = null
86
+ } = {}) {
87
+ const seen = new Set();
88
+ const candidates = [];
89
+ const searchTargets = [
90
+ ...normalizePathEntries(envPathEntries).map((entry) => ({
91
+ path: path.join(entry, LLAMACPP_EXECUTABLE),
92
+ source: "path"
93
+ })),
94
+ ...FALLBACK_LLAMACPP_PATHS.map((entry) => ({
95
+ path: entry,
96
+ source: "homebrew"
97
+ })),
98
+ ...COMMON_SOURCE_BUILD_PATHS.map((entry) => ({
99
+ path: path.join(homeDir, entry),
100
+ source: "source-build"
101
+ }))
102
+ ];
103
+
104
+ for (const target of searchTargets) {
105
+ const candidatePath = normalizeString(target.path);
106
+ if (seen.has(candidatePath)) continue;
107
+ seen.add(candidatePath);
108
+ const exists = existingPaths instanceof Set ? existingPaths.has(candidatePath) : existsSync(candidatePath);
109
+ if (!exists) continue;
110
+ candidates.push({
111
+ id: candidatePath,
112
+ label: candidatePath,
113
+ path: candidatePath,
114
+ source: target.source
115
+ });
116
+ }
117
+
118
+ return candidates;
119
+ }
120
+
121
+ export function buildLlamacppLaunchArgs({
122
+ command,
123
+ host = LLAMACPP_DEFAULT_HOST,
124
+ port = LLAMACPP_DEFAULT_PORT,
125
+ preloadModels = []
126
+ } = {}) {
127
+ const firstModel = Array.isArray(preloadModels) ? preloadModels[0] : null;
128
+ const args = [
129
+ normalizeString(command),
130
+ "--host", normalizeString(host) || LLAMACPP_DEFAULT_HOST,
131
+ "--port", String(normalizePort(port, LLAMACPP_DEFAULT_PORT))
132
+ ];
133
+
134
+ if (firstModel?.modelPath) {
135
+ args.push("-m", firstModel.modelPath);
136
+ if (Number.isFinite(Number(firstModel.contextWindow)) && Number(firstModel.contextWindow) > 0) {
137
+ args.push("-c", String(Math.floor(Number(firstModel.contextWindow))));
138
+ }
139
+ }
140
+
141
+ return args.filter(Boolean);
142
+ }
143
+
144
+ export function parseLlamacppValidationOutput(output = "") {
145
+ const text = String(output || "").trim();
146
+ const lowered = text.toLowerCase();
147
+ const supportsHost = /(^|\s)--host(\s|$)/m.test(text);
148
+ const supportsPort = /(^|\s)--port(\s|$)/m.test(text);
149
+ const referencesModelFlag = /(^|\s)(-m,\s+)?--model(\s|$)/m.test(text);
150
+ const looksLikeServerHelp = supportsHost && supportsPort && referencesModelFlag;
151
+ const kind = lowered.includes("llama-server") || looksLikeServerHelp ? "server" : "";
152
+
153
+ return {
154
+ ok: Boolean(kind) && supportsHost && supportsPort,
155
+ kind,
156
+ supportsHost,
157
+ supportsPort,
158
+ isTurboQuant: lowered.includes("turboquant") || /\bturbo[234]\b/.test(lowered)
159
+ };
160
+ }
161
+
162
+ export function validateLlamacppCommand(command, { spawnSyncImpl = spawnSync } = {}) {
163
+ const target = normalizeString(command);
164
+ if (!target) {
165
+ return {
166
+ ok: false,
167
+ errorMessage: "No llama.cpp command is configured."
168
+ };
169
+ }
170
+
171
+ const result = spawnSyncImpl(target, ["--help"], {
172
+ encoding: "utf8"
173
+ });
174
+ if (result?.error) {
175
+ return {
176
+ ok: false,
177
+ errorMessage: result.error instanceof Error ? result.error.message : String(result.error)
178
+ };
179
+ }
180
+
181
+ const parsed = parseLlamacppValidationOutput(`${result?.stdout || ""}\n${result?.stderr || ""}`);
182
+ if (!parsed.ok) {
183
+ return {
184
+ ok: false,
185
+ errorMessage: `Command '${target}' does not appear to be a compatible llama-server binary.`,
186
+ ...parsed
187
+ };
188
+ }
189
+
190
+ return {
191
+ ok: true,
192
+ ...parsed
193
+ };
194
+ }
195
+
196
+ async function startConfiguredRuntime(config, {
197
+ line = () => {},
198
+ error = () => {},
199
+ requireAutostart = true
200
+ } = {}, {
201
+ spawnSyncImpl = spawnSync,
202
+ spawnImpl = spawn
203
+ } = {}) {
204
+ const runtime = readConfiguredLlamacppRuntime(config);
205
+ if (requireAutostart && !runtime.startWithRouter) {
206
+ return { ok: true, skipped: true, reason: "autostart-disabled" };
207
+ }
208
+
209
+ if (!runtime.command) {
210
+ const errorMessage = "llama.cpp autostart is enabled, but no runtime command is configured.";
211
+ error(errorMessage);
212
+ return { ok: false, errorMessage };
213
+ }
214
+
215
+ if (managedLlamacppRuntime
216
+ && managedLlamacppRuntime.command === runtime.command
217
+ && managedLlamacppRuntime.host === runtime.host
218
+ && managedLlamacppRuntime.port === runtime.port
219
+ && managedLlamacppRuntime.child?.exitCode === null
220
+ && managedLlamacppRuntime.child?.killed !== true) {
221
+ return { ok: true, alreadyRunning: true, runtime: managedLlamacppRuntime };
222
+ }
223
+
224
+ const validation = validateLlamacppCommand(runtime.command, { spawnSyncImpl });
225
+ if (!validation.ok) {
226
+ error(validation.errorMessage || `Failed validating llama.cpp runtime '${runtime.command}'.`);
227
+ return validation;
228
+ }
229
+
230
+ const preloadModels = buildPreloadModels(config);
231
+ const args = buildLlamacppLaunchArgs({
232
+ command: runtime.command,
233
+ host: runtime.host,
234
+ port: runtime.port,
235
+ preloadModels
236
+ });
237
+
238
+ return new Promise((resolve) => {
239
+ let settled = false;
240
+ const child = spawnImpl(args[0], args.slice(1), {
241
+ stdio: "ignore"
242
+ });
243
+
244
+ const finish = (result) => {
245
+ if (settled) return;
246
+ settled = true;
247
+ resolve(result);
248
+ };
249
+
250
+ child.once("spawn", () => {
251
+ managedLlamacppRuntime = {
252
+ child,
253
+ command: runtime.command,
254
+ host: runtime.host,
255
+ port: runtime.port,
256
+ args
257
+ };
258
+ child.once("exit", () => {
259
+ if (managedLlamacppRuntime?.child === child) {
260
+ managedLlamacppRuntime = null;
261
+ }
262
+ });
263
+ if (typeof child.unref === "function") child.unref();
264
+ line(`Started llama.cpp runtime on http://${runtime.host}:${runtime.port}${validation.isTurboQuant ? " (TurboQuant detected)" : ""}.`);
265
+ finish({ ok: true, runtime: managedLlamacppRuntime, validation });
266
+ });
267
+
268
+ child.once("error", (spawnError) => {
269
+ const errorMessage = spawnError instanceof Error ? spawnError.message : String(spawnError);
270
+ error(`Failed starting llama.cpp runtime: ${errorMessage}`);
271
+ finish({ ok: false, errorMessage });
272
+ });
273
+ });
274
+ }
275
+
276
+ export async function ensureConfiguredLlamacppRuntimeStarted(config, callbacks = {}, deps = {}) {
277
+ return startConfiguredRuntime(config, {
278
+ ...callbacks,
279
+ requireAutostart: true
280
+ }, deps);
281
+ }
282
+
283
+ export async function startConfiguredLlamacppRuntime(config, callbacks = {}, deps = {}) {
284
+ return startConfiguredRuntime(config, {
285
+ ...callbacks,
286
+ requireAutostart: false
287
+ }, deps);
288
+ }
289
+
290
+ export async function stopManagedLlamacppRuntime({
291
+ line = () => {},
292
+ error = () => {}
293
+ } = {}) {
294
+ const active = managedLlamacppRuntime;
295
+ if (!active?.child) {
296
+ return { ok: true, skipped: true, reason: "not-running" };
297
+ }
298
+
299
+ managedLlamacppRuntime = null;
300
+ try {
301
+ active.child.kill("SIGTERM");
302
+ line("Stopped managed llama.cpp runtime.");
303
+ return { ok: true };
304
+ } catch (stopError) {
305
+ const errorMessage = stopError instanceof Error ? stopError.message : String(stopError);
306
+ error(`Failed stopping llama.cpp runtime: ${errorMessage}`);
307
+ return { ok: false, errorMessage };
308
+ }
309
+ }
@@ -0,0 +1,132 @@
1
+ import path from "node:path";
2
+ import { execFile } from "node:child_process";
3
+ import { promises as fs } from "node:fs";
4
+
5
+ const GGUF_PATTERN = /\.gguf$/i;
6
+
7
+ function normalizeString(value) {
8
+ return typeof value === "string" ? value.trim() : "";
9
+ }
10
+
11
+ function formatScanEntry(filePath, stats = null) {
12
+ return {
13
+ filePath,
14
+ fileName: path.basename(filePath),
15
+ sizeBytes: Number.isFinite(Number(stats?.size)) ? Number(stats.size) : undefined
16
+ };
17
+ }
18
+
19
+ async function collectGgufFiles(targetPath, entries = []) {
20
+ const stats = await fs.stat(targetPath);
21
+ if (stats.isFile()) {
22
+ if (GGUF_PATTERN.test(targetPath)) entries.push(formatScanEntry(targetPath, stats));
23
+ return entries;
24
+ }
25
+
26
+ if (!stats.isDirectory()) return entries;
27
+
28
+ const children = await fs.readdir(targetPath, { withFileTypes: true });
29
+ for (const child of children) {
30
+ const childPath = path.join(targetPath, child.name);
31
+ if (child.isDirectory()) {
32
+ await collectGgufFiles(childPath, entries);
33
+ continue;
34
+ }
35
+ if (!child.isFile() || !GGUF_PATTERN.test(child.name)) continue;
36
+ const childStats = await fs.stat(childPath);
37
+ entries.push(formatScanEntry(childPath, childStats));
38
+ }
39
+ return entries;
40
+ }
41
+
42
+ function buildBrowseAppleScript(selection) {
43
+ if (selection === "directory") {
44
+ return [
45
+ "try",
46
+ "POSIX path of (choose folder with prompt \"Select a folder to scan for GGUF files\")",
47
+ "on error number -128",
48
+ "return \"\"",
49
+ "end try"
50
+ ];
51
+ }
52
+
53
+ if (selection === "runtime") {
54
+ return [
55
+ "try",
56
+ "POSIX path of (choose file with prompt \"Select a llama.cpp runtime binary (llama-server)\")",
57
+ "on error number -128",
58
+ "return \"\"",
59
+ "end try"
60
+ ];
61
+ }
62
+
63
+ return [
64
+ "try",
65
+ "POSIX path of (choose file with prompt \"Select a GGUF file\")",
66
+ "on error number -128",
67
+ "return \"\"",
68
+ "end try"
69
+ ];
70
+ }
71
+
72
+ export async function browseForLocalModelPath({
73
+ selection = "file"
74
+ } = {}, {
75
+ platform = process.platform,
76
+ execFileImpl = execFile
77
+ } = {}) {
78
+ if (platform !== "darwin") {
79
+ return {
80
+ canceled: true,
81
+ reason: "Native local-model browse is currently available on macOS only.",
82
+ selection
83
+ };
84
+ }
85
+
86
+ const scriptLines = buildBrowseAppleScript(selection);
87
+ const args = scriptLines.flatMap((line) => ["-e", line]);
88
+ const result = await runExecFile(execFileImpl, "osascript", args, { encoding: "utf8" });
89
+ const output = normalizeString(result?.stdout || "");
90
+ if (!output) {
91
+ return { canceled: true, selection };
92
+ }
93
+
94
+ return {
95
+ canceled: false,
96
+ selection,
97
+ path: output
98
+ };
99
+ }
100
+
101
+ export async function scanLocalModelPath(targetPath) {
102
+ const resolvedPath = normalizeString(targetPath);
103
+ if (!resolvedPath) return [];
104
+
105
+ const matches = await collectGgufFiles(resolvedPath);
106
+ return matches.sort((left, right) => left.fileName.localeCompare(right.fileName));
107
+ }
108
+ async function runExecFile(execFileImpl, command, args, options) {
109
+ if (execFileImpl === execFile) {
110
+ return new Promise((resolve, reject) => {
111
+ execFile(command, args, options, (error, stdout, stderr) => {
112
+ if (error) reject(error);
113
+ else resolve({ stdout, stderr });
114
+ });
115
+ });
116
+ }
117
+
118
+ if (typeof execFileImpl !== "function") {
119
+ throw new Error("execFile implementation is required.");
120
+ }
121
+
122
+ if (execFileImpl.length >= 4) {
123
+ return new Promise((resolve, reject) => {
124
+ execFileImpl(command, args, options, (error, stdout, stderr) => {
125
+ if (error) reject(error);
126
+ else resolve({ stdout, stderr });
127
+ });
128
+ });
129
+ }
130
+
131
+ return execFileImpl(command, args, options);
132
+ }
@@ -0,0 +1,39 @@
1
+ function normalizePositiveNumber(value) {
2
+ const parsed = Number(value);
3
+ if (!Number.isFinite(parsed) || parsed <= 0) return 0;
4
+ return parsed;
5
+ }
6
+
7
+ function calculateEstimatedBytes(variant = {}) {
8
+ const sizeBytes = normalizePositiveNumber(variant.sizeBytes);
9
+ const contextWindow = normalizePositiveNumber(variant.contextWindow);
10
+ const contextBytes = contextWindow * 163840;
11
+ const preloadPenalty = variant.preload === true ? Math.floor(sizeBytes * 0.15) : 0;
12
+ return sizeBytes + contextBytes + preloadPenalty;
13
+ }
14
+
15
+ export function classifyVariantCapacity(variant, system = {}) {
16
+ const estimatedBytes = calculateEstimatedBytes(variant);
17
+ const totalMemoryBytes = normalizePositiveNumber(system.totalMemoryBytes);
18
+ const safeBudget = Math.floor(totalMemoryBytes * 0.72);
19
+ const tightBudget = Math.floor(totalMemoryBytes * 0.82);
20
+
21
+ if (system.platform === "darwin" && system.unifiedMemory === true && estimatedBytes > tightBudget) {
22
+ return { fit: "over-budget", estimatedBytes };
23
+ }
24
+ if (system.platform === "darwin" && system.unifiedMemory === true && estimatedBytes > safeBudget) {
25
+ return { fit: "tight", estimatedBytes };
26
+ }
27
+ return { fit: "safe", estimatedBytes };
28
+ }
29
+
30
+ export function canActivateVariant({ candidate, activeVariants, totalMemoryBytes }) {
31
+ const safeBudget = Math.floor(normalizePositiveNumber(totalMemoryBytes) * 0.72);
32
+ const activeBytes = (Array.isArray(activeVariants) ? activeVariants : [])
33
+ .reduce((sum, variant) => sum + normalizePositiveNumber(variant?.estimatedBytes), 0);
34
+ const nextBytes = activeBytes + normalizePositiveNumber(candidate?.estimatedBytes);
35
+
36
+ return nextBytes <= safeBudget
37
+ ? { allowed: true, reason: "" }
38
+ : { allowed: false, reason: "Enabling this variant would exceed the local capacity budget." };
39
+ }