@khanglvm/llm-router 2.4.1 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,132 @@
1
+ import path from "node:path";
2
+ import { execFile } from "node:child_process";
3
+ import { promises as fs } from "node:fs";
4
+
5
+ const GGUF_PATTERN = /\.gguf$/i;
6
+
7
+ function normalizeString(value) {
8
+ return typeof value === "string" ? value.trim() : "";
9
+ }
10
+
11
+ function formatScanEntry(filePath, stats = null) {
12
+ return {
13
+ filePath,
14
+ fileName: path.basename(filePath),
15
+ sizeBytes: Number.isFinite(Number(stats?.size)) ? Number(stats.size) : undefined
16
+ };
17
+ }
18
+
19
+ async function collectGgufFiles(targetPath, entries = []) {
20
+ const stats = await fs.stat(targetPath);
21
+ if (stats.isFile()) {
22
+ if (GGUF_PATTERN.test(targetPath)) entries.push(formatScanEntry(targetPath, stats));
23
+ return entries;
24
+ }
25
+
26
+ if (!stats.isDirectory()) return entries;
27
+
28
+ const children = await fs.readdir(targetPath, { withFileTypes: true });
29
+ for (const child of children) {
30
+ const childPath = path.join(targetPath, child.name);
31
+ if (child.isDirectory()) {
32
+ await collectGgufFiles(childPath, entries);
33
+ continue;
34
+ }
35
+ if (!child.isFile() || !GGUF_PATTERN.test(child.name)) continue;
36
+ const childStats = await fs.stat(childPath);
37
+ entries.push(formatScanEntry(childPath, childStats));
38
+ }
39
+ return entries;
40
+ }
41
+
42
+ function buildBrowseAppleScript(selection) {
43
+ if (selection === "directory") {
44
+ return [
45
+ "try",
46
+ "POSIX path of (choose folder with prompt \"Select a folder to scan for GGUF files\")",
47
+ "on error number -128",
48
+ "return \"\"",
49
+ "end try"
50
+ ];
51
+ }
52
+
53
+ if (selection === "runtime") {
54
+ return [
55
+ "try",
56
+ "POSIX path of (choose file with prompt \"Select a llama.cpp runtime binary (llama-server)\")",
57
+ "on error number -128",
58
+ "return \"\"",
59
+ "end try"
60
+ ];
61
+ }
62
+
63
+ return [
64
+ "try",
65
+ "POSIX path of (choose file with prompt \"Select a GGUF file\")",
66
+ "on error number -128",
67
+ "return \"\"",
68
+ "end try"
69
+ ];
70
+ }
71
+
72
+ export async function browseForLocalModelPath({
73
+ selection = "file"
74
+ } = {}, {
75
+ platform = process.platform,
76
+ execFileImpl = execFile
77
+ } = {}) {
78
+ if (platform !== "darwin") {
79
+ return {
80
+ canceled: true,
81
+ reason: "Native local-model browse is currently available on macOS only.",
82
+ selection
83
+ };
84
+ }
85
+
86
+ const scriptLines = buildBrowseAppleScript(selection);
87
+ const args = scriptLines.flatMap((line) => ["-e", line]);
88
+ const result = await runExecFile(execFileImpl, "osascript", args, { encoding: "utf8" });
89
+ const output = normalizeString(result?.stdout || "");
90
+ if (!output) {
91
+ return { canceled: true, selection };
92
+ }
93
+
94
+ return {
95
+ canceled: false,
96
+ selection,
97
+ path: output
98
+ };
99
+ }
100
+
101
+ export async function scanLocalModelPath(targetPath) {
102
+ const resolvedPath = normalizeString(targetPath);
103
+ if (!resolvedPath) return [];
104
+
105
+ const matches = await collectGgufFiles(resolvedPath);
106
+ return matches.sort((left, right) => left.fileName.localeCompare(right.fileName));
107
+ }
108
+ async function runExecFile(execFileImpl, command, args, options) {
109
+ if (execFileImpl === execFile) {
110
+ return new Promise((resolve, reject) => {
111
+ execFile(command, args, options, (error, stdout, stderr) => {
112
+ if (error) reject(error);
113
+ else resolve({ stdout, stderr });
114
+ });
115
+ });
116
+ }
117
+
118
+ if (typeof execFileImpl !== "function") {
119
+ throw new Error("execFile implementation is required.");
120
+ }
121
+
122
+ if (execFileImpl.length >= 4) {
123
+ return new Promise((resolve, reject) => {
124
+ execFileImpl(command, args, options, (error, stdout, stderr) => {
125
+ if (error) reject(error);
126
+ else resolve({ stdout, stderr });
127
+ });
128
+ });
129
+ }
130
+
131
+ return execFileImpl(command, args, options);
132
+ }
@@ -0,0 +1,39 @@
1
+ function normalizePositiveNumber(value) {
2
+ const parsed = Number(value);
3
+ if (!Number.isFinite(parsed) || parsed <= 0) return 0;
4
+ return parsed;
5
+ }
6
+
7
+ function calculateEstimatedBytes(variant = {}) {
8
+ const sizeBytes = normalizePositiveNumber(variant.sizeBytes);
9
+ const contextWindow = normalizePositiveNumber(variant.contextWindow);
10
+ const contextBytes = contextWindow * 163840;
11
+ const preloadPenalty = variant.preload === true ? Math.floor(sizeBytes * 0.15) : 0;
12
+ return sizeBytes + contextBytes + preloadPenalty;
13
+ }
14
+
15
+ export function classifyVariantCapacity(variant, system = {}) {
16
+ const estimatedBytes = calculateEstimatedBytes(variant);
17
+ const totalMemoryBytes = normalizePositiveNumber(system.totalMemoryBytes);
18
+ const safeBudget = Math.floor(totalMemoryBytes * 0.72);
19
+ const tightBudget = Math.floor(totalMemoryBytes * 0.82);
20
+
21
+ if (system.platform === "darwin" && system.unifiedMemory === true && estimatedBytes > tightBudget) {
22
+ return { fit: "over-budget", estimatedBytes };
23
+ }
24
+ if (system.platform === "darwin" && system.unifiedMemory === true && estimatedBytes > safeBudget) {
25
+ return { fit: "tight", estimatedBytes };
26
+ }
27
+ return { fit: "safe", estimatedBytes };
28
+ }
29
+
30
+ export function canActivateVariant({ candidate, activeVariants, totalMemoryBytes }) {
31
+ const safeBudget = Math.floor(normalizePositiveNumber(totalMemoryBytes) * 0.72);
32
+ const activeBytes = (Array.isArray(activeVariants) ? activeVariants : [])
33
+ .reduce((sum, variant) => sum + normalizePositiveNumber(variant?.estimatedBytes), 0);
34
+ const nextBytes = activeBytes + normalizePositiveNumber(candidate?.estimatedBytes);
35
+
36
+ return nextBytes <= safeBudget
37
+ ? { allowed: true, reason: "" }
38
+ : { allowed: false, reason: "Enabling this variant would exceed the local capacity budget." };
39
+ }
@@ -0,0 +1,238 @@
1
+ import os from "node:os";
2
+ import path from "node:path";
3
+ import { promises as fs } from "node:fs";
4
+ import { normalizeLocalModelsMetadata } from "../runtime/local-models.js";
5
+ import { canActivateVariant, classifyVariantCapacity } from "./local-model-capacity.js";
6
+
7
+ function isPlainObject(value) {
8
+ return Boolean(value) && typeof value === "object" && !Array.isArray(value);
9
+ }
10
+
11
+ function cloneConfig(config) {
12
+ return isPlainObject(config) ? structuredClone(config) : {};
13
+ }
14
+
15
+ function normalizeString(value) {
16
+ return typeof value === "string" ? value.trim() : "";
17
+ }
18
+
19
+ function normalizeManagedMetadata(metadata) {
20
+ return isPlainObject(metadata) ? metadata : {};
21
+ }
22
+
23
+ function ensureLocalModelsState(config) {
24
+ const next = cloneConfig(config);
25
+ next.metadata = isPlainObject(next.metadata) ? next.metadata : {};
26
+ next.metadata.localModels = normalizeLocalModelsMetadata(next.metadata.localModels);
27
+ return next;
28
+ }
29
+
30
+ async function defaultPathExists(filePath) {
31
+ try {
32
+ await fs.access(filePath);
33
+ return true;
34
+ } catch {
35
+ return false;
36
+ }
37
+ }
38
+
39
+ export function getManagedLocalModelsDir({ homeDir = os.homedir() } = {}) {
40
+ return path.join(homeDir, ".llm-router", "local-models");
41
+ }
42
+
43
+ export async function registerAttachedLlamacppModel(config, {
44
+ id,
45
+ displayName,
46
+ filePath,
47
+ metadata = {}
48
+ } = {}) {
49
+ const baseModelId = normalizeString(id);
50
+ const modelPath = normalizeString(filePath);
51
+ const label = normalizeString(displayName);
52
+
53
+ if (!baseModelId) throw new Error("Attached local model id is required.");
54
+ if (!modelPath) throw new Error("Attached local model path is required.");
55
+
56
+ const next = ensureLocalModelsState(config);
57
+ next.metadata.localModels.library[baseModelId] = {
58
+ id: baseModelId,
59
+ source: "llamacpp-attached",
60
+ displayName: label || baseModelId,
61
+ path: modelPath,
62
+ availability: "available",
63
+ metadata: isPlainObject(metadata) ? metadata : {},
64
+ managed: false
65
+ };
66
+
67
+ return next;
68
+ }
69
+
70
+ export async function registerManagedLlamacppModel(config, {
71
+ id,
72
+ displayName,
73
+ filePath,
74
+ repo = "",
75
+ file = "",
76
+ sizeBytes = undefined,
77
+ metadata = {}
78
+ } = {}) {
79
+ const baseModelId = normalizeString(id);
80
+ const modelPath = normalizeString(filePath);
81
+ const label = normalizeString(displayName);
82
+
83
+ if (!baseModelId) throw new Error("Managed local model id is required.");
84
+ if (!modelPath) throw new Error("Managed local model path is required.");
85
+
86
+ const next = ensureLocalModelsState(config);
87
+ next.metadata.localModels.library[baseModelId] = {
88
+ id: baseModelId,
89
+ source: "llamacpp-managed",
90
+ displayName: label || baseModelId,
91
+ path: modelPath,
92
+ availability: "available",
93
+ metadata: {
94
+ ...normalizeManagedMetadata(metadata),
95
+ repo: normalizeString(repo),
96
+ file: normalizeString(file),
97
+ ...(Number.isFinite(Number(sizeBytes)) ? { sizeBytes: Number(sizeBytes) } : {})
98
+ },
99
+ managed: true
100
+ };
101
+
102
+ return next;
103
+ }
104
+
105
+ export async function reconcileLocalModelPaths(config, {
106
+ pathExists = defaultPathExists
107
+ } = {}) {
108
+ const next = ensureLocalModelsState(config);
109
+ const { library, variants } = next.metadata.localModels;
110
+
111
+ for (const baseModel of Object.values(library)) {
112
+ const baseModelPath = normalizeString(baseModel?.path);
113
+ if (!baseModelPath) {
114
+ baseModel.availability = "stale";
115
+ continue;
116
+ }
117
+
118
+ const exists = await pathExists(baseModelPath);
119
+ baseModel.availability = exists ? "available" : "stale";
120
+ }
121
+
122
+ for (const variant of Object.values(variants)) {
123
+ const baseModel = library[variant?.baseModelId];
124
+ variant.availability = baseModel?.availability || "stale";
125
+ }
126
+
127
+ return next;
128
+ }
129
+
130
+ export async function removeLocalBaseModel(config, baseModelId) {
131
+ const targetId = normalizeString(baseModelId);
132
+ const next = ensureLocalModelsState(config);
133
+
134
+ if (!targetId) return next;
135
+
136
+ delete next.metadata.localModels.library[targetId];
137
+ for (const [variantKey, variant] of Object.entries(next.metadata.localModels.variants)) {
138
+ if (variant?.baseModelId === targetId) {
139
+ delete next.metadata.localModels.variants[variantKey];
140
+ }
141
+ }
142
+
143
+ return next;
144
+ }
145
+
146
+ export async function updateLocalBaseModelPath(config, baseModelId, filePath) {
147
+ const targetId = normalizeString(baseModelId);
148
+ const nextPath = normalizeString(filePath);
149
+ const next = ensureLocalModelsState(config);
150
+
151
+ if (!targetId) throw new Error("Base model id is required.");
152
+ if (!nextPath) throw new Error("Updated local model path is required.");
153
+
154
+ const baseModel = next.metadata.localModels.library[targetId];
155
+ if (!baseModel) {
156
+ throw new Error(`Base model '${targetId}' was not found.`);
157
+ }
158
+
159
+ baseModel.path = nextPath;
160
+ baseModel.availability = "available";
161
+
162
+ for (const variant of Object.values(next.metadata.localModels.variants)) {
163
+ if (variant?.baseModelId === targetId) {
164
+ variant.availability = "available";
165
+ }
166
+ }
167
+
168
+ return next;
169
+ }
170
+
171
+ export async function saveLocalModelVariant(config, draft, {
172
+ system = {}
173
+ } = {}) {
174
+ const next = ensureLocalModelsState(config);
175
+ const normalizedDraft = isPlainObject(draft) ? draft : {};
176
+ const key = normalizeString(normalizedDraft.key || normalizedDraft.id);
177
+ const baseModelId = normalizeString(normalizedDraft.baseModelId);
178
+ const modelId = normalizeString(normalizedDraft.id);
179
+ const name = normalizeString(normalizedDraft.name);
180
+ const runtime = normalizeString(normalizedDraft.runtime);
181
+
182
+ if (!key || !baseModelId || !modelId || !name || !runtime) {
183
+ throw new Error("key, baseModelId, id, name, and runtime are required.");
184
+ }
185
+
186
+ const baseModel = next.metadata.localModels.library[baseModelId];
187
+ if (!baseModel) {
188
+ throw new Error(`Base model '${baseModelId}' was not found.`);
189
+ }
190
+
191
+ const sizeBytes = Number(baseModel?.metadata?.sizeBytes || 0);
192
+ const capacity = classifyVariantCapacity({
193
+ sizeBytes,
194
+ contextWindow: normalizedDraft.contextWindow,
195
+ preload: normalizedDraft.preload === true
196
+ }, system);
197
+
198
+ const shouldActivate = normalizedDraft.enabled === true || normalizedDraft.preload === true;
199
+ if (shouldActivate && system?.platform === "darwin" && system?.unifiedMemory === true) {
200
+ const activeVariants = Object.values(next.metadata.localModels.variants)
201
+ .filter((variant) => variant?.key !== key)
202
+ .filter((variant) => variant?.enabled === true || variant?.preload === true)
203
+ .map((variant) => ({
204
+ estimatedBytes: Number(variant?.estimatedBytes || 0),
205
+ preload: variant?.preload === true
206
+ }));
207
+ const decision = canActivateVariant({
208
+ candidate: {
209
+ estimatedBytes: capacity.estimatedBytes,
210
+ preload: normalizedDraft.preload === true
211
+ },
212
+ activeVariants,
213
+ totalMemoryBytes: system.totalMemoryBytes
214
+ });
215
+ if (!decision.allowed) {
216
+ throw new Error(decision.reason);
217
+ }
218
+ }
219
+
220
+ next.metadata.localModels.variants[key] = {
221
+ ...(isPlainObject(next.metadata.localModels.variants[key]) ? next.metadata.localModels.variants[key] : {}),
222
+ key,
223
+ baseModelId,
224
+ id: modelId,
225
+ name,
226
+ runtime,
227
+ preset: normalizeString(normalizedDraft.preset),
228
+ enabled: normalizedDraft.enabled === true,
229
+ preload: normalizedDraft.preload === true,
230
+ contextWindow: Number.isFinite(Number(normalizedDraft.contextWindow)) ? Number(normalizedDraft.contextWindow) : undefined,
231
+ capabilities: isPlainObject(normalizedDraft.capabilities) ? normalizedDraft.capabilities : undefined,
232
+ availability: normalizeString(baseModel?.availability) || "available",
233
+ capacityState: capacity.fit,
234
+ estimatedBytes: capacity.estimatedBytes
235
+ };
236
+
237
+ return next;
238
+ }
@@ -14,6 +14,7 @@ import { startLocalRouteServer } from "./local-server.js";
14
14
  import { startRouterSupervisor } from "./router-supervisor.js";
15
15
  import { reclaimPort, stopStartupManagedListener } from "./port-reclaim.js";
16
16
  import { installStartup, startupStatus } from "./startup-manager.js";
17
+ import { ensureConfiguredLlamacppRuntimeStarted, stopManagedLlamacppRuntime } from "./llamacpp-runtime.js";
17
18
  import { configHasProvider, sanitizeConfigForDisplay } from "../runtime/config.js";
18
19
 
19
20
  function summarizeConfig(config, configPath) {
@@ -888,6 +889,12 @@ async function runRouterSupervisorCommand(options = {}) {
888
889
  const startRouterSupervisorFn = typeof options.startRouterSupervisor === "function"
889
890
  ? options.startRouterSupervisor
890
891
  : (startOptions) => startRouterSupervisor(startOptions, options);
892
+ const ensureConfiguredLlamacppRuntimeStartedFn = typeof options.ensureConfiguredLlamacppRuntimeStarted === "function"
893
+ ? options.ensureConfiguredLlamacppRuntimeStarted
894
+ : ensureConfiguredLlamacppRuntimeStarted;
895
+ const stopManagedLlamacppRuntimeFn = typeof options.stopManagedLlamacppRuntime === "function"
896
+ ? options.stopManagedLlamacppRuntime
897
+ : stopManagedLlamacppRuntime;
891
898
 
892
899
  if (!(await configFileExists(configPath))) {
893
900
  return {
@@ -956,6 +963,8 @@ async function runRouterSupervisorCommand(options = {}) {
956
963
  };
957
964
  }
958
965
 
966
+ await ensureConfiguredLlamacppRuntimeStartedFn(config, { line, error });
967
+
959
968
  const requestedStartArgs = {
960
969
  configPath,
961
970
  host,
@@ -1103,6 +1112,7 @@ async function runRouterSupervisorCommand(options = {}) {
1103
1112
  if (shutdownPromise) return shutdownPromise;
1104
1113
  shuttingDown = true;
1105
1114
  shutdownPromise = (async () => {
1115
+ await stopManagedLlamacppRuntimeFn({ line, error });
1106
1116
  await new Promise((resolve) => server.close(() => resolve()));
1107
1117
  await clearRuntimeStateFn({ pid: process.pid });
1108
1118
  })();
@@ -1127,6 +1137,8 @@ async function runRouterSupervisorCommand(options = {}) {
1127
1137
  await donePromise;
1128
1138
  if (shutdownPromise) {
1129
1139
  await shutdownPromise;
1140
+ } else {
1141
+ await stopManagedLlamacppRuntimeFn({ line, error });
1130
1142
  }
1131
1143
 
1132
1144
  process.removeListener("SIGINT", handleSigInt);