offgrid-ai 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,209 @@
1
+ // HuggingFace model download helpers.
2
+ // Uses the Python huggingface_hub package (the standard, maintained downloader)
3
+ // to download models into the standard HF cache directory.
4
+ // Downloads go to ~/.cache/huggingface/hub, NOT a custom offgrid-ai folder.
5
+
6
+ import { execFile } from "node:child_process";
7
+ import { promisify } from "node:util";
8
+ import { join, dirname } from "node:path";
9
+ import { mkdir } from "node:fs/promises";
10
+ import { fileURLToPath } from "node:url";
11
+ import { HF_HUB_DIR } from "./config.mjs";
12
+
13
+ const execFileAsync = promisify(execFile);
14
+
15
+ const HF_DOWNLOAD_SCRIPT = join(dirname(fileURLToPath(import.meta.url)), "..", "resources", "hf-download.py");
16
+
17
+ /** Check whether python3 + huggingface_hub is available. */
18
+ export async function hasHuggingfaceHub() {
19
+ try {
20
+ const { stdout } = await execFileAsync("python3", ["-c", "import huggingface_hub; print(huggingface_hub.__version__)"]);
21
+ return Boolean(stdout.trim());
22
+ } catch {
23
+ return false;
24
+ }
25
+ }
26
+
27
+ /** Parse a HuggingFace reference (URL, repo/filename, or repo ID). */
28
+ export function parseHfRef(input) {
29
+ const trimmed = input.trim();
30
+
31
+ if (trimmed.startsWith("https://huggingface.co/")) {
32
+ const url = new URL(trimmed);
33
+ const pathParts = url.pathname.split("/").filter(Boolean);
34
+ const resolveIdx = pathParts.indexOf("resolve");
35
+ if (resolveIdx > 0 && pathParts[resolveIdx + 1] === "main") {
36
+ return {
37
+ repo: pathParts.slice(0, resolveIdx).join("/"),
38
+ filename: pathParts.slice(resolveIdx + 2).join("/"),
39
+ };
40
+ }
41
+ if (pathParts.length >= 2) {
42
+ return {
43
+ repo: pathParts.slice(0, 2).join("/"),
44
+ filename: pathParts.length > 2 ? pathParts.slice(2).join("/") : undefined,
45
+ };
46
+ }
47
+ throw new Error(`Invalid HuggingFace URL: ${input}`);
48
+ }
49
+
50
+ const parts = trimmed.split("/").filter(Boolean);
51
+ if (parts.length < 2) {
52
+ throw new Error(`Invalid HuggingFace reference: "${input}". Expected at least org/name.`);
53
+ }
54
+ return {
55
+ repo: parts.slice(0, 2).join("/"),
56
+ filename: parts.length > 2 ? parts.slice(2).join("/") : undefined,
57
+ };
58
+ }
59
+
60
+ /** Resolve file metadata for a GGUF file from the HF tree API. */
61
+ export async function resolveGgufFile(ref, { fetchImpl = globalThis.fetch } = {}) {
62
+ const { repo, filename } = parseHfRef(ref);
63
+ const tree = await getHfTree(repo, { fetchImpl });
64
+ const entry = tree.find((f) => f.path === filename && f.type === "file");
65
+ if (!entry) throw new Error(`File '${filename}' not found in HuggingFace repo '${repo}'.`);
66
+ return {
67
+ repo,
68
+ filename,
69
+ url: `https://huggingface.co/${repo}/resolve/main/${filename}`,
70
+ sizeBytes: entry.lfs?.size ?? entry.size ?? 0,
71
+ sha256: entry.lfs?.oid ?? "",
72
+ relativePath: filename,
73
+ };
74
+ }
75
+
76
+ /** Resolve all model files in an MLX repo from the HF tree API. */
77
+ export async function resolveMlxRepo(repo, { fetchImpl = globalThis.fetch } = {}) {
78
+ const tree = await getHfTree(repo, { fetchImpl });
79
+ const modelFiles = tree.filter(
80
+ (f) => f.type === "file" && !f.path.startsWith(".") && f.path !== ".gitattributes" && f.path !== "README.md",
81
+ );
82
+ return modelFiles.map((f) => ({
83
+ repo,
84
+ filename: f.path,
85
+ url: `https://huggingface.co/${repo}/resolve/main/${f.path}`,
86
+ sizeBytes: f.lfs?.size ?? f.size ?? 0,
87
+ sha256: f.lfs?.oid ?? "",
88
+ relativePath: f.path,
89
+ }));
90
+ }
91
+
92
+ async function getHfTree(repo, { branch = "main", fetchImpl = globalThis.fetch } = {}) {
93
+ const url = `https://huggingface.co/api/models/${repo}/tree/${branch}?recursive=true`;
94
+ const response = await fetchImpl(url, { signal: AbortSignal.timeout(10000) });
95
+ if (!response.ok) throw new Error(`HuggingFace API error: HTTP ${response.status} for ${repo}`);
96
+ return await response.json();
97
+ }
98
+
99
+ /** Resolve a user-provided HF reference into a download plan. */
100
+ export async function resolveHfDownload(input, { fetchImpl = globalThis.fetch } = {}) {
101
+ const { repo, filename } = parseHfRef(input);
102
+
103
+ if (filename && filename.endsWith(".gguf")) {
104
+ const file = await resolveGgufFile(`${repo}/${filename}`, { fetchImpl });
105
+ return {
106
+ id: repo.split("/").pop() ?? repo,
107
+ repo,
108
+ format: "gguf",
109
+ files: [file],
110
+ totalSizeBytes: file.sizeBytes,
111
+ };
112
+ }
113
+
114
+ const tree = await getHfTree(repo, { fetchImpl });
115
+ const ggufFiles = tree.filter((f) => f.type === "file" && f.path.endsWith(".gguf"));
116
+ if (ggufFiles.length > 0) {
117
+ const file = ggufFiles[0];
118
+ const resolved = await resolveGgufFile(`${repo}/${file.path}`, { fetchImpl });
119
+ return {
120
+ id: repo.split("/").pop() ?? repo,
121
+ repo,
122
+ format: "gguf",
123
+ files: [resolved],
124
+ totalSizeBytes: resolved.sizeBytes,
125
+ };
126
+ }
127
+
128
+ const files = await resolveMlxRepo(repo, { fetchImpl });
129
+ return {
130
+ id: repo.split("/").pop() ?? repo,
131
+ repo,
132
+ format: "mlx",
133
+ files,
134
+ totalSizeBytes: files.reduce((sum, f) => sum + f.sizeBytes, 0),
135
+ };
136
+ }
137
+
138
+ /**
139
+ * Download a resolved model into the HF hub cache.
140
+ * @param {object} model - from resolveHfDownload
141
+ * @param {object} options
142
+ * @param {function} options.onProgress - ({ downloadedBytes, totalBytes, percentage, file }) => void
143
+ * @returns {Promise<{ localDir: string, format: string }>}
144
+ */
145
+ export async function downloadToHfCache(model, options = {}) {
146
+ await mkdir(HF_HUB_DIR, { recursive: true });
147
+
148
+ const script = HF_DOWNLOAD_SCRIPT;
149
+ const args = ["--repo", model.repo, "--cache-dir", HF_HUB_DIR];
150
+ if (model.format === "gguf") {
151
+ args.push("--file", model.files[0].filename);
152
+ }
153
+
154
+ const onProgress = options.onProgress ?? (() => {});
155
+
156
+ return new Promise((resolve, reject) => {
157
+ const child = execFile("python3", [script, ...args], { env: process.env });
158
+
159
+ let stdoutBuf = "";
160
+ let downloadedBytes = 0;
161
+ let currentFile = null;
162
+
163
+ // huggingface_hub streams NDJSON progress events to stdout, one per line.
164
+ // Buffer and split on complete newlines so an event split across chunk
165
+ // boundaries isn't silently dropped.
166
+ const handleLine = (line) => {
167
+ if (!line) return;
168
+ try {
169
+ const event = JSON.parse(line);
170
+ if (event.type === "progress") {
171
+ downloadedBytes = event.downloadedBytes ?? downloadedBytes;
172
+ currentFile = event.file ?? currentFile;
173
+ onProgress({
174
+ downloadedBytes,
175
+ totalBytes: model.totalSizeBytes,
176
+ percentage: Math.min(100, Math.round((downloadedBytes / model.totalSizeBytes) * 100)),
177
+ file: currentFile,
178
+ });
179
+ } else if (event.type === "complete") {
180
+ resolve({ localDir: event.localDir, format: model.format });
181
+ } else if (event.type === "error") {
182
+ reject(new Error(event.message));
183
+ }
184
+ } catch {
185
+ // Ignore non-JSON output (progress bars, etc.)
186
+ }
187
+ };
188
+
189
+ child.stdout?.on("data", (chunk) => {
190
+ stdoutBuf += String(chunk);
191
+ let nl;
192
+ while ((nl = stdoutBuf.indexOf("\n")) !== -1) {
193
+ handleLine(stdoutBuf.slice(0, nl));
194
+ stdoutBuf = stdoutBuf.slice(nl + 1);
195
+ }
196
+ });
197
+
198
+ child.stderr?.on("data", () => {
199
+ // huggingface_hub prints progress bars to stderr; ignore.
200
+ });
201
+
202
+ child.on("error", reject);
203
+ child.on("exit", (code) => {
204
+ // Flush any final line that lacked a trailing newline.
205
+ if (stdoutBuf.trim()) handleLine(stdoutBuf.trim());
206
+ if (code !== 0) reject(new Error(`Download failed with exit code ${code}`));
207
+ });
208
+ });
209
+ }
package/src/managed.mjs CHANGED
@@ -2,7 +2,7 @@ import { existsSync } from "node:fs";
2
2
  import { BACKENDS } from "./backends.mjs";
3
3
  import { commandExists } from "./exec.mjs";
4
4
 
5
- export const MANAGED_BACKEND_IDS = ["ollama", "omlx"];
5
+ export const MANAGED_BACKEND_IDS = ["omlx"];
6
6
 
7
7
  export async function scanManagedModels() {
8
8
  const results = [];
@@ -22,10 +22,6 @@ export function hasLmStudioInstalled() {
22
22
  return existsSync("/Applications/LM Studio.app");
23
23
  }
24
24
 
25
- export function hasOllamaInstalled() {
26
- return commandExists("ollama");
27
- }
28
-
29
25
  export function hasOmlxInstalled() {
30
26
  return commandExists("omlx");
31
27
  }
@@ -0,0 +1,290 @@
1
+ // MLX model discovery + metadata — scans configured model directories for MLX
2
+ // model directories and parses their config.json.
3
+ // Ported from deprecated-offgrid-desktop/src/main/model-discovery.ts +
4
+ // mlx-metadata.ts (MLX subset only).
5
+ //
6
+ // This runs ALONGSIDE offgrid-ai's existing GGUF scan (scan.mjs scanGgufModels)
7
+ // — it does not replace it. The picker (main.mjs) will merge GGUF + MLX lists.
8
+ //
9
+ // An MLX model directory is one containing config.json + one or more
10
+ // *.safetensors files. HuggingFace Hub cache layout (models--org--name) is
11
+ // detected and scanned specially.
12
+
13
+ import { readdir, stat, readFile } from "node:fs/promises";
14
+ import { existsSync } from "node:fs";
15
+ import { join, basename } from "node:path";
16
+ import { homedir } from "node:os";
17
+ import { getModelScanDirs } from "./config.mjs";
18
+ import { inferSourceLabel, MIN_MODEL_SIZE_BYTES, EMBEDDING_MODEL_TYPES } from "./discovery-shared.mjs";
19
+
20
+ // ── Folder → backend mapping ──────────────────────────────────────────────
21
+ // The oMLX folder is oMLX-exclusive: models there are served by the oMLX
22
+ // managed backend, NOT by mlx-vlm. Every OTHER scan dir is format-based
23
+ // (GGUF → llama.cpp, MLX → mlx-vlm). So mlx-vlm scans all configured dirs
24
+ // EXCEPT the oMLX folder.
25
+ const OMLX_MODELS_DIR = join(homedir(), ".omlx", "models");
26
+ function isOmlxFolder(p) {
27
+ return p === OMLX_MODELS_DIR || p.startsWith(OMLX_MODELS_DIR + "/");
28
+ }
29
+
30
+ // ── MLX directory detection ───────────────────────────────────────────────
31
+
32
+ /** True if dir contains config.json + at least one .safetensors file. */
33
+ async function isMlxModelDir(dir) {
34
+ if (!existsSync(join(dir, "config.json"))) return false;
35
+ try {
36
+ const entries = await readdir(dir);
37
+ return entries.some((f) => f.endsWith(".safetensors"));
38
+ } catch {
39
+ return false;
40
+ }
41
+ }
42
+
43
+ /** Sum the size of all .safetensors files in an MLX model dir (bytes). */
44
+ async function getMlxDirSizeBytes(dir) {
45
+ try {
46
+ const entries = await readdir(dir);
47
+ const sizes = await Promise.all(
48
+ entries.filter((f) => f.endsWith(".safetensors")).map(async (f) => {
49
+ const s = await stat(join(dir, f));
50
+ return s.size;
51
+ }),
52
+ );
53
+ return sizes.reduce((a, b) => a + b, 0);
54
+ } catch {
55
+ return 0;
56
+ }
57
+ }
58
+
59
+ // ── Recursive MLX scanner ─────────────────────────────────────────────────
60
+
61
+ /**
62
+ * Recursively scan a directory for MLX model directories.
63
+ * Searches up to maxDepth levels deep. Does NOT collect GGUF (that's scan.mjs).
64
+ */
65
+ async function scanDirRecursiveForMlx(rootDir, sourceLabel, maxDepth = 3) {
66
+ if (!existsSync(rootDir)) return [];
67
+ const models = [];
68
+
69
+ async function walk(dir, depth) {
70
+ if (depth > maxDepth) return;
71
+ let entries;
72
+ try {
73
+ entries = await readdir(dir, { withFileTypes: true });
74
+ } catch {
75
+ return;
76
+ }
77
+
78
+ // Is this directory itself an MLX model dir? (don't recurse into it)
79
+ if (depth > 0 && await isMlxModelDir(dir)) {
80
+ const sizeBytes = await getMlxDirSizeBytes(dir);
81
+ if (sizeBytes < MIN_MODEL_SIZE_BYTES) return;
82
+ if (await isEmbeddingMlxModel(join(dir, "config.json"))) return;
83
+ models.push(makeMlxModel(dir, basename(dir), sizeBytes, sourceLabel, rootDir));
84
+ return;
85
+ }
86
+
87
+ for (const entry of entries) {
88
+ if (entry.name.startsWith(".") || entry.name === "README.md" || entry.name === ".gitattributes") continue;
89
+ const fullPath = join(dir, entry.name);
90
+ if (entry.isDirectory()) {
91
+ if (await isMlxModelDir(fullPath)) {
92
+ const sizeBytes = await getMlxDirSizeBytes(fullPath);
93
+ if (sizeBytes < MIN_MODEL_SIZE_BYTES) continue;
94
+ if (await isEmbeddingMlxModel(join(fullPath, "config.json"))) continue;
95
+ models.push(makeMlxModel(fullPath, entry.name, sizeBytes, sourceLabel, rootDir));
96
+ } else {
97
+ await walk(fullPath, depth + 1);
98
+ }
99
+ }
100
+ }
101
+ }
102
+
103
+ await walk(rootDir, 0);
104
+ return models;
105
+ }
106
+
107
+ // ── HuggingFace Hub layout ────────────────────────────────────────────────
108
+
109
+ /** True if dir looks like an HF Hub cache (has models--* subdirs). */
110
+ async function looksLikeHfHub(dir) {
111
+ if (!existsSync(dir)) return false;
112
+ try {
113
+ const entries = await readdir(dir, { withFileTypes: true });
114
+ return entries.some((e) => e.isDirectory() && e.name.startsWith("models--"));
115
+ } catch {
116
+ return false;
117
+ }
118
+ }
119
+
120
+ /**
121
+ * Scan an HF Hub cache dir for MLX model dirs.
122
+ * HF layout: models--org--name/snapshots/hash/files
123
+ */
124
+ async function scanHfHubForMlx(dir, sourceLabel) {
125
+ if (!existsSync(dir)) return [];
126
+ const models = [];
127
+ try {
128
+ const entries = await readdir(dir, { withFileTypes: true });
129
+ for (const entry of entries) {
130
+ if (!entry.isDirectory() || !entry.name.startsWith("models--")) continue;
131
+ const parts = entry.name.slice("models--".length).split("--");
132
+ const label = parts.join("/");
133
+ const snapshotsDir = join(dir, entry.name, "snapshots");
134
+ if (!existsSync(snapshotsDir)) continue;
135
+ const snapshots = await readdir(snapshotsDir, { withFileTypes: true });
136
+ // Follow symlinks (HF hub uses them; test imports use them too). A model
137
+ // dir can have several snapshots — some incomplete/empty. Check EACH
138
+ // snapshot and use the first that is a valid MLX model dir, rather than
139
+ // giving up on the whole model if the first snapshot happens to be empty.
140
+ const candidates = snapshots.filter((s) => s.isDirectory() || s.isSymbolicLink());
141
+ let snapshotPath = null;
142
+ for (const snap of candidates) {
143
+ const sp = join(snapshotsDir, snap.name);
144
+ const st = await stat(sp).catch(() => null);
145
+ if (st?.isDirectory() && await isMlxModelDir(sp)) { snapshotPath = sp; break; }
146
+ }
147
+
148
+ if (!snapshotPath) continue;
149
+ const sizeBytes = await getMlxDirSizeBytes(snapshotPath);
150
+ if (sizeBytes < MIN_MODEL_SIZE_BYTES) continue;
151
+ if (await isEmbeddingMlxModel(join(snapshotPath, "config.json"))) continue;
152
+ models.push({
153
+ id: `${sourceLabel}:${entry.name}`,
154
+ label,
155
+ path: snapshotPath,
156
+ filePath: snapshotPath,
157
+ sizeBytes,
158
+ backend: "mlx-vlm",
159
+ format: "mlx",
160
+ source: sourceLabel,
161
+ });
162
+ }
163
+ } catch {
164
+ // Can't read — return what we have.
165
+ }
166
+ return models;
167
+ }
168
+
169
+ // ── Embedding model filtering for MLX ─────────────────────────────────────
170
+
171
+ async function isEmbeddingMlxModel(configPath) {
172
+ if (!existsSync(configPath)) return false;
173
+ try {
174
+ const config = JSON.parse(await readFile(configPath, "utf-8"));
175
+ const textConfig = config.text_config ?? config;
176
+ const modelType = String(textConfig.model_type ?? "").toLowerCase();
177
+ if (EMBEDDING_MODEL_TYPES.has(modelType)) return true;
178
+ const arch = Array.isArray(config.architectures) ? config.architectures[0] : "";
179
+ const lowerArch = String(arch).toLowerCase();
180
+ return EMBEDDING_MODEL_TYPES.has(lowerArch) || lowerArch.includes("bert");
181
+ } catch {
182
+ return false;
183
+ }
184
+ }
185
+
186
+ // ── MLX model entry builder ───────────────────────────────────────────────
187
+
188
+ function makeMlxModel(dir, label, sizeBytes, sourceLabel, rootDir) {
189
+ return {
190
+ id: `${sourceLabel}:${dir.replace(rootDir + "/", "")}`,
191
+ label,
192
+ path: dir,
193
+ filePath: dir,
194
+ sizeBytes,
195
+ backend: "mlx-vlm",
196
+ format: "mlx",
197
+ source: sourceLabel,
198
+ };
199
+ }
200
+
201
+ // ── Public API ─────────────────────────────────────────────────────────────
202
+
203
+ /**
204
+ * Discover all MLX models across the configured scan directories.
205
+ * Reads scan dirs from config.mjs getModelScanDirs() — same paths GGUF uses
206
+ * (LM Studio, HF hub, user-added). Returns a flat, deduplicated list.
207
+ */
208
+ export async function scanMlxModels(dirs) {
209
+ // mlx-vlm scans every configured dir EXCEPT the oMLX folder (oMLX-exclusive).
210
+ const scanDirs = (dirs ?? await getModelScanDirs()).filter((d) => !isOmlxFolder(d));
211
+ const results = await Promise.all(
212
+ scanDirs.map(async (dir) => {
213
+ const label = inferSourceLabel(dir);
214
+ if (await looksLikeHfHub(dir)) return scanHfHubForMlx(dir, label);
215
+ return scanDirRecursiveForMlx(dir, label);
216
+ }),
217
+ );
218
+ const all = results.flat();
219
+ // Deduplicate by filePath (same model may appear in multiple paths).
220
+ const seen = new Set();
221
+ return all.filter((m) => {
222
+ if (seen.has(m.filePath)) return false;
223
+ seen.add(m.filePath);
224
+ return true;
225
+ });
226
+ }
227
+
228
+ // ── MLX capability detection ─────────────────────────────────────────────
229
+
230
+ /**
231
+ * Detect MLX model capabilities from its config.json.
232
+ * Returns { architecture, thinking, vision, contextLength }.
233
+ */
234
+ export async function detectMlxCapabilities(modelDir) {
235
+ const configPath = join(modelDir, "config.json");
236
+ if (!existsSync(configPath)) return { thinking: false, vision: false, contextLength: null, architecture: null };
237
+ try {
238
+ const config = JSON.parse(await readFile(configPath, "utf-8"));
239
+ return detectMlxCapabilitiesFromConfig(config, modelDir);
240
+ } catch {
241
+ return { thinking: false, vision: false, contextLength: null, architecture: null };
242
+ }
243
+ }
244
+
245
+ export function detectMlxCapabilitiesFromConfig(config, modelDir) {
246
+ const textConfig = config.text_config ?? config;
247
+ const rawName = config._name_or_path ?? basename(modelDir ?? "");
248
+ const name = String(rawName).toLowerCase();
249
+ const label = String(rawName);
250
+
251
+ const modelType = String(config.model_type ?? "").toLowerCase();
252
+ const textModelType = String(textConfig.model_type ?? "").toLowerCase();
253
+
254
+ const vision = Boolean(
255
+ config.vision_config ||
256
+ config.image_token_id != null ||
257
+ config.video_token_id != null ||
258
+ config.vision_start_token_id != null ||
259
+ modelType.includes("vl") ||
260
+ modelType.includes("vision") ||
261
+ textModelType.includes("vl") ||
262
+ textModelType.includes("vision")
263
+ );
264
+
265
+ const thinking = /qwen3|gemma-4|gemma4|deepseek-r[12]/i.test(name + " " + label);
266
+
267
+ const architectures = Array.isArray(config.architectures) ? config.architectures : [];
268
+ const architecture = architectures[0] ?? null;
269
+
270
+ const candidates = [
271
+ textConfig.max_position_embeddings,
272
+ textConfig.sliding_window,
273
+ config.max_position_embeddings,
274
+ config.sliding_window,
275
+ ].filter((v) => typeof v === "number" && v > 0);
276
+ const contextLength = candidates.length > 0 ? Math.max(...candidates) : null;
277
+
278
+ return { thinking, vision, contextLength, architecture };
279
+ }
280
+
281
+ /**
282
+ * Pick a sensible default context length for an MLX model, capping by RAM.
283
+ */
284
+ export function defaultMlxContextLength(trainedCtx, ramGb) {
285
+ if (!trainedCtx || trainedCtx <= 0) return 8192;
286
+ if (ramGb < 12) return Math.min(trainedCtx, 4096);
287
+ if (ramGb < 16) return Math.min(trainedCtx, 8192);
288
+ if (ramGb < 32) return Math.min(trainedCtx, 16384);
289
+ return trainedCtx;
290
+ }
@@ -0,0 +1,93 @@
1
+ // mlx-vlm server flag computation — pure functions, no side effects.
2
+ // Ported from deprecated-offgrid-desktop/src/main/server-flags.ts (MLX subset).
3
+ //
4
+ // Benchmark-informed decisions (see sidequests/mlx-backend-benchmark/RESULTS.md):
5
+ // - mlx-vlm requires APC_ENABLED=1 env var (86x TTFT improvement) — set at spawn
6
+ // time in process.mjs, NOT here (this module only computes args).
7
+ // - mlx-vlm uses a strict=False wrapper script for shared-KV architectures
8
+ // (Gemma 4-class). Safe for all models — strict=False is a no-op for models
9
+ // that load fine with strict=True.
10
+ // - mlx-vlm uses --enable-thinking for thinking-mode control.
11
+ // - mlx-vlm uses --max-kv-size for the KV cache / context window.
12
+ //
13
+ // Only the mlx-vlm-relevant logic is ported here. offgrid-ai's existing GGUF
14
+ // flag logic (autodetect.mjs / profile-setup.mjs / estimate.mjs) is unchanged.
15
+
16
+ import { fileURLToPath } from "node:url";
17
+ import { dirname, join } from "node:path";
18
+
19
+ const MB = 1024 ** 2;
20
+
21
+ /** Default port for the local model server. Matches the desktop's DEFAULT_PORT. */
22
+ export const DEFAULT_PORT = 18080;
23
+
24
+ /** Resolved path to the bundled strict=False wrapper script (sibling of src/). */
25
+ export const MLX_VLM_WRAPPER = join(dirname(fileURLToPath(import.meta.url)), "..", "resources", "mlxvlm-server-wrapper.py");
26
+
27
+ /** Overhead multiplier for mlx-vlm: weights × 1.5 (covers KV cache, activations, APC cache; benchmark-validated). */
28
+ const MLX_VLM_OVERHEAD_MULTIPLIER = 1.5;
29
+
30
+ /** Server process overhead in MB. */
31
+ const PROCESS_OVERHEAD_MB = 200;
32
+
33
+ /**
34
+ * Estimate mlx-vlm memory usage (MB): model weights × 1.5 + process overhead.
35
+ *
36
+ * The 1.5 multiplier covers KV cache, activations, and APC cache overhead
37
+ * (benchmark-validated; see sidequests/mlx-backend-benchmark/RESULTS.md).
38
+ * GGUF/llama-server estimation uses the detailed path in estimate.mjs.
39
+ *
40
+ * @param {number} fileSizeBytes - model size on disk (sum of MLX safetensors).
41
+ * @returns {number} estimated memory in MB.
42
+ */
43
+ export function estimateMemoryMb(fileSizeBytes) {
44
+ return Math.round((fileSizeBytes / MB) * MLX_VLM_OVERHEAD_MULTIPLIER + PROCESS_OVERHEAD_MB);
45
+ }
46
+
47
+ /**
48
+ * Compute mlx-vlm server arguments.
49
+ *
50
+ * mlx-vlm is the MLX-native server (benchmark-validated best throughput + memory
51
+ * efficiency on Apple Silicon). Invoked via the strict=False wrapper script for
52
+ * compatibility with shared-KV architectures (Gemma 4-class).
53
+ *
54
+ * The APC_ENABLED=1 env var is MANDATORY but is set at spawn time in
55
+ * process.mjs, not in args.
56
+ *
57
+ * The wrapper script (resources/mlxvlm-server-wrapper.py) applies strict=False
58
+ * model loading + the BatchRotatingKVCache.merge() fix, both required for
59
+ * shared-KV architectures (Gemma 4-class). It is resolved to a real path via
60
+ * MLX_VLM_WRAPPER; there is intentionally no raw-mlx_vlm.server path.
61
+ *
62
+ * @param {string} modelPath - path to the MLX model directory.
63
+ * @param {object} [options]
64
+ * @param {number} [options.port] - port (default DEFAULT_PORT).
65
+ * @param {number} [options.ctxSize] - context window (passed as --max-kv-size).
66
+ * @param {boolean} [options.thinkingEnabled=true] - whether to enable thinking.
67
+ * @returns {{ args: string[], port: number }}
68
+ */
69
+ export function computeMlxVlmFlags(modelPath, options = {}) {
70
+ const port = options.port ?? DEFAULT_PORT;
71
+ const ctxSize = options.ctxSize;
72
+ const thinkingEnabled = options.thinkingEnabled ?? true;
73
+
74
+ // The binary is "python3" (resolved by backendBinaryFor in backends.mjs); the
75
+ // wrapper path is the first arg.
76
+ const args = [
77
+ MLX_VLM_WRAPPER,
78
+ "--model", modelPath,
79
+ "--host", "127.0.0.1",
80
+ "--port", String(port),
81
+ ];
82
+
83
+ if (thinkingEnabled) {
84
+ args.push("--enable-thinking");
85
+ }
86
+
87
+ // Context size: mlx-vlm uses --max-kv-size for the KV cache / context window.
88
+ if (ctxSize && ctxSize > 0) {
89
+ args.push("--max-kv-size", String(ctxSize));
90
+ }
91
+
92
+ return { args, port };
93
+ }
@@ -1,29 +1,34 @@
1
1
  import { scanGgufModels, matchDrafter } from "./scan.mjs";
2
2
  import { loadProfiles, normalizeProfile, sanitizeProfileId } from "./profiles.mjs";
3
3
  import { scanManagedModels } from "./managed.mjs";
4
+ import { scanMlxModels } from "./mlx-discovery.mjs";
4
5
  import { isProfileFileMissing } from "./model-summary.mjs";
5
6
 
6
7
  export async function loadModelCatalog() {
7
- const [profiles, { models: ggufModels, drafters }, managedModels] = await Promise.all([
8
+ const [profiles, { models: ggufModels, drafters }, managedModels, mlxModels] = await Promise.all([
8
9
  loadProfiles(),
9
10
  scanGgufModels(),
10
11
  scanManagedModels(),
12
+ scanMlxModels(),
11
13
  ]);
12
- return normalizeCatalog({ profiles, ggufModels, drafters, managedModels });
14
+ return normalizeCatalog({ profiles, ggufModels, drafters, managedModels, mlxModels });
13
15
  }
14
16
 
15
17
  export function normalizeCatalog(catalog) {
16
18
  if (catalog.newModels && catalog.managedItems) return catalog;
17
- const { profiles, ggufModels, drafters, managedModels } = catalog;
19
+ const { profiles, ggufModels, drafters, managedModels, mlxModels = [] } = catalog;
18
20
  const profiledPaths = new Set(profiles.map((profile) => profile.modelPath).filter(Boolean));
19
- const newModels = ggufModels.filter((model) => !profiledPaths.has(model.path));
21
+ const newModels = [
22
+ ...ggufModels.filter((model) => !profiledPaths.has(model.path)),
23
+ ...mlxModels.filter((model) => !profiledPaths.has(model.path)),
24
+ ];
20
25
  const managedItems = [];
21
26
  for (const { backendId, models, status } of managedModels) {
22
27
  if (status === "unavailable") continue;
23
28
  const profiledAliases = new Set(
24
29
  profiles
25
30
  .filter((profile) => profile.backend === backendId)
26
- .map((profile) => backendId === "ollama" ? `ollama:${profile.ollamaModel ?? profile.modelAlias}` : `omlx:${profile.omlxModel ?? profile.modelAlias}`),
31
+ .map((profile) => `omlx:${profile.omlxModel ?? profile.modelAlias}`),
27
32
  );
28
33
  for (const model of models) {
29
34
  if (!profiledAliases.has(`${backendId}:${model.id}`)) managedItems.push({ model, backendId });
@@ -66,8 +71,9 @@ export function createManagedProfile(model, backendId) {
66
71
  id: `${backendId}-${sanitizeProfileId(model.id)}`,
67
72
  label: model.label,
68
73
  backend: backendId,
74
+ source: backendId,
69
75
  modelAlias: model.aliasSuggestion,
70
- ...(backendId === "ollama" ? { ollamaModel: model.id } : {}),
76
+ modelSizeBytes: model.sizeBytes || 0,
71
77
  ...(backendId === "omlx" ? { omlxModel: model.id } : {}),
72
78
  });
73
79
  }