offgrid-ai 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -68,9 +68,8 @@ const TAG_TOKENS = [
68
68
  /**
69
69
  * Parse a raw model identifier into a structured display name.
70
70
  *
71
- * @param {string} rawId The raw identifier: GGUF filename (no .gguf),
72
- * Ollama model name, or oMLX model id.
73
- * @param {"local-gguf"|"ollama"|"omlx"} source Where this name came from.
71
+ * @param {string} rawId The raw identifier: GGUF filename (no .gguf) or oMLX model id.
72
+ * @param {"local-gguf"|"omlx"} source Where this name came from.
74
73
  * @returns {{ publisher: string|null, model: string, params: string|null,
75
74
  * quant: string|null, tags: string[], display: string,
76
75
  * sort: string, id: string }}
@@ -87,18 +86,7 @@ export function parseModelName(rawId, source) {
87
86
  name = rawId.slice(slashIdx + 1);
88
87
  }
89
88
 
90
- // 2. For Ollama, split on : to separate model from tag (e.g. "gemma3:4b")
91
- // The tag after : is a model size/variant identifier — not a GGUF quant.
92
- let ollamaTag = null;
93
- if (source === "ollama") {
94
- const colonIdx = name.lastIndexOf(":");
95
- if (colonIdx !== -1) {
96
- ollamaTag = name.slice(colonIdx + 1);
97
- name = name.slice(0, colonIdx);
98
- }
99
- }
100
-
101
- // 3. Extract quant (GGUF quantization suffix)
89
+ // 2. Extract quant (GGUF quantization suffix)
102
90
  let quant = null;
103
91
  for (const pattern of QUANT_PATTERNS) {
104
92
  const match = name.match(pattern);
@@ -125,13 +113,7 @@ export function parseModelName(rawId, source) {
125
113
  // Clean up leftover separators
126
114
  name = name.replace(/[-_]{2,}/g, "-").replace(/^[-_]+|[-_]+$/g, "");
127
115
 
128
- // 5. For Ollama, re-attach the tag as part of the model name
129
- // (Ollama tags like "4b" or "30b-a3b" are size variants, not quants)
130
- if (ollamaTag) {
131
- name = name + "-" + ollamaTag;
132
- }
133
-
134
- // 6. Title-case the remaining model name
116
+ // 5. Title-case the remaining model name
135
117
  let model = titleCaseModel(name);
136
118
 
137
119
  // If nothing is left after parsing, fall back to the raw name
@@ -139,13 +121,13 @@ export function parseModelName(rawId, source) {
139
121
  model = rawId.includes("/") ? rawId : rawId.replace(/[-_]/g, " ");
140
122
  }
141
123
 
142
- // 7. Extract params (size like 30B, 12B) for sort/filter convenience
124
+ // 6. Extract params (size like 30B, 12B) for sort/filter convenience
143
125
  const params = extractParams(model);
144
126
 
145
- // 8. Build display string
127
+ // 7. Build display string
146
128
  const display = buildDisplay(publisher, model, tags, quant);
147
129
 
148
- // 9. Build sort key (lowercase, no publisher, for alphabetical ordering)
130
+ // 8. Build sort key (lowercase, no publisher, for alphabetical ordering)
149
131
  const sort = model.toLowerCase().replace(/[-_]/g, " ");
150
132
 
151
133
  return { publisher, model, params, quant, tags, display, sort, id };
@@ -1,7 +1,8 @@
1
1
  import { existsSync, statSync } from "node:fs";
2
- import { BACKENDS, backendFor } from "./backends.mjs";
2
+ import { basename, dirname } from "node:path";
3
+ import { backendFor } from "./backends.mjs";
3
4
  import { readCommandArgv } from "./profiles.mjs";
4
- import { isProfileRunning, isProfileServerUp } from "./process.mjs";
5
+ import { isProfileRunning } from "./process.mjs";
5
6
  import { buildPrettyCommand } from "./command.mjs";
6
7
  import { pc, formatBytes, renderRows, renderSection } from "./ui.mjs";
7
8
  import { capabilitySummary, ggufDetailParts, isProfileFileMissing, profileDetailParts } from "./model-summary.mjs";
@@ -11,6 +12,7 @@ import { findBenchmarkRepo } from "./benchmark.mjs";
11
12
 
12
13
  const OPTION_SEPARATOR = pc.dim(" │ ");
13
14
  const OPTION_STATUS_WIDTH = 10;
15
+ const OPTION_BACKEND_WIDTH = 14;
14
16
  const OPTION_SOURCE_WIDTH = 14;
15
17
  const OPTION_CTX_WIDTH = 5;
16
18
 
@@ -25,7 +27,7 @@ function optionPad(text, color, width) {
25
27
  function optionStatusTag(kind) {
26
28
  const statuses = {
27
29
  running: ["RUNNING", pc.green],
28
- serverup: ["SERVER UP", pc.yellow],
30
+ serverup: ["READY", pc.blue],
29
31
  ready: ["READY", pc.blue],
30
32
  missing: ["MISSING", pc.red],
31
33
  setup: ["SETUP", pc.yellow],
@@ -34,17 +36,70 @@ function optionStatusTag(kind) {
34
36
  return optionPad(text, color, OPTION_STATUS_WIDTH);
35
37
  }
36
38
 
37
- function optionSourceTag(sourceId, label) {
39
+ function optionSourceTag(sourceId) {
40
+ const label = formatSourceLabel(sourceId);
38
41
  const colors = {
39
- "llama-cpp": pc.cyan,
40
- "llama-cpp-mtp": pc.blue,
41
- ollama: pc.green,
42
+ huggingface: pc.cyan,
43
+ lmstudio: pc.blue,
42
44
  omlx: pc.magenta,
45
+ "llama.cpp": pc.cyan,
43
46
  gguf: pc.cyan,
47
+ mlx: pc.yellow,
48
+ "mlx-vlm": pc.yellow,
44
49
  };
45
50
  return optionPad(label, colors[sourceId] ?? pc.dim, OPTION_SOURCE_WIDTH);
46
51
  }
47
52
 
53
+ function optionBackendTag(backendId) {
54
+ const backend = backendId ? backendFor(backendId) : null;
55
+ const label = backend?.label ?? backendId ?? "unknown";
56
+ const colors = {
57
+ "llama-cpp": pc.cyan,
58
+ "llama-cpp-mtp": pc.blue,
59
+ omlx: pc.magenta,
60
+ "mlx-vlm": pc.yellow,
61
+ };
62
+ return optionPad(label, colors[backendId] ?? pc.dim, OPTION_BACKEND_WIDTH);
63
+ }
64
+
65
+ function formatSourceLabel(sourceId) {
66
+ if (!sourceId) return "unknown";
67
+ const map = {
68
+ huggingface: "HuggingFace",
69
+ lmstudio: "LM Studio",
70
+ omlx: "oMLX",
71
+ "llama.cpp": "llama.cpp",
72
+ gguf: "GGUF file",
73
+ mlx: "MLX",
74
+ "mlx-vlm": "MLX",
75
+ };
76
+ return map[sourceId] ?? String(sourceId);
77
+ }
78
+
79
+ function inferSourceFromPath(modelPath) {
80
+ if (!modelPath) return null;
81
+ const normalized = modelPath.toLowerCase().replace(/\\/g, "/");
82
+ if (normalized.includes("/.omlx/models")) return "omlx";
83
+ if (normalized.includes("/.lmstudio/models")) return "lmstudio";
84
+ if (normalized.includes("/.cache/huggingface")) return "huggingface";
85
+ if (normalized.includes("/.cache/llama.cpp")) return "llama.cpp";
86
+ const parent = basename(dirname(modelPath));
87
+ if (parent && parent !== ".") return parent.replace(/^\./, "");
88
+ return null;
89
+ }
90
+
91
+ function discoverySourceForProfile(profile) {
92
+ const backend = backendFor(profile.backend);
93
+ if (backend.type === "managed-server") return backend.id;
94
+ if (profile.source && profile.source !== "local-gguf") return profile.source;
95
+ return inferSourceFromPath(profile.modelPath);
96
+ }
97
+
98
+ function discoverySourceForItem(item) {
99
+ if (item.type === "profile") return discoverySourceForProfile(item.profile);
100
+ return item.model?.source ?? null;
101
+ }
102
+
48
103
  function optionCtxLabel(item) {
49
104
  if (item.type === "profile" && item.profile.flags?.ctxSize) {
50
105
  return optionPad(`${(item.profile.flags.ctxSize / 1000).toFixed(0)}k`, null, OPTION_CTX_WIDTH);
@@ -52,12 +107,18 @@ function optionCtxLabel(item) {
52
107
  return optionPad("—", null, OPTION_CTX_WIDTH);
53
108
  }
54
109
 
55
- function optionSizeLabel(item) {
110
+ function optionSizeLabel(item, managedModels) {
56
111
  if (item.type === "profile") {
57
112
  if (item.fileMissing) return "—";
113
+ if (item.profile.modelSizeBytes) return formatBytes(item.profile.modelSizeBytes);
58
114
  if (item.profile.modelPath && existsSync(item.profile.modelPath)) {
59
- return formatBytes(statSync(item.profile.modelPath).size);
115
+ const s = statSync(item.profile.modelPath);
116
+ // Only stat regular files — a modelPath that is a directory (MLX)
117
+ // reports the dir entry size, not the model size.
118
+ if (s.isFile()) return formatBytes(s.size);
60
119
  }
120
+ const managedSize = managedProfileSizeBytes(item.profile, managedModels);
121
+ if (managedSize) return formatBytes(managedSize);
61
122
  return "—";
62
123
  }
63
124
  if (item.type === "new") {
@@ -76,17 +137,18 @@ export function modelNameWidth(items) {
76
137
  return Math.max(20, maxName + 2);
77
138
  }
78
139
 
79
- function optionLabel({ status, source, name, ctx, size, nameWidth }) {
80
- return [status, source, pc.bold(optionPad(name, null, nameWidth)), ctx, pc.dim(size)].join(OPTION_SEPARATOR);
140
+ function optionLabel({ status, backend, source, name, ctx, size, nameWidth }) {
141
+ return [status, backend, source, pc.bold(optionPad(name, null, nameWidth)), ctx, pc.dim(size)].join(OPTION_SEPARATOR);
81
142
  }
82
143
 
83
- export function modelSelectOption(item, { runningProfilesNow, serverUpIds, modelMissingIds, nameWidth }) {
144
+ export function modelSelectOption(item, { runningProfilesNow, modelMissingIds, nameWidth, managedModels }) {
145
+ const sourceId = discoverySourceForItem(item) ?? "unknown";
146
+ const backendId = inferBackendId(item);
84
147
  if (item.type === "profile") {
85
148
  const backend = backendFor(item.profile.backend);
86
149
  const running = runningProfilesNow.some((profile) => profile.id === item.profile.id);
87
- const serverUp = !running && !item.fileMissing && serverUpIds?.has(item.profile.id);
88
150
  const modelMissing = !item.fileMissing && modelMissingIds?.has(item.profile.id);
89
- const status = item.fileMissing || modelMissing ? "missing" : running ? "running" : serverUp ? "serverup" : "ready";
151
+ const status = item.fileMissing || modelMissing ? "missing" : running ? "running" : "ready";
90
152
  const drafterMissing = Boolean(item.profile.drafterPath) && !existsSync(item.profile.drafterPath);
91
153
  const hint = drafterMissing ? "MTP drafter missing — reconfigure"
92
154
  : modelMissing ? `${backend.label} model no longer available`
@@ -95,11 +157,12 @@ export function modelSelectOption(item, { runningProfilesNow, serverUpIds, model
95
157
  value: itemKey(item),
96
158
  label: optionLabel({
97
159
  status: optionStatusTag(status),
98
- source: optionSourceTag(item.profile.backend, backend.label),
160
+ backend: optionBackendTag(backendId),
161
+ source: optionSourceTag(sourceId),
99
162
  name: item.profile.label,
100
163
  nameWidth,
101
164
  ctx: optionCtxLabel(item),
102
- size: optionSizeLabel(item),
165
+ size: optionSizeLabel(item, managedModels),
103
166
  }),
104
167
  ...(hint ? { hint: pc.red(hint) } : {}),
105
168
  };
@@ -109,41 +172,60 @@ export function modelSelectOption(item, { runningProfilesNow, serverUpIds, model
109
172
  value: itemKey(item),
110
173
  label: optionLabel({
111
174
  status: optionStatusTag("setup"),
112
- source: optionSourceTag("gguf", "GGUF file"),
175
+ backend: optionBackendTag(backendId),
176
+ source: optionSourceTag(sourceId),
113
177
  name: item.model.label,
114
178
  nameWidth,
115
179
  ctx: optionCtxLabel(item),
116
- size: optionSizeLabel(item),
180
+ size: optionSizeLabel(item, managedModels),
117
181
  }),
118
182
  };
119
183
  }
120
- const backend = BACKENDS[item.backendId];
121
184
  return {
122
185
  value: itemKey(item),
123
186
  label: optionLabel({
124
187
  status: optionStatusTag("setup"),
125
- source: optionSourceTag(item.backendId, backend.label),
188
+ backend: optionBackendTag(backendId),
189
+ source: optionSourceTag(sourceId),
126
190
  name: item.model.label,
127
191
  nameWidth,
128
192
  ctx: optionCtxLabel(item),
129
- size: optionSizeLabel(item),
193
+ size: optionSizeLabel(item, managedModels),
130
194
  }),
131
195
  };
132
196
  }
133
197
 
134
- export function printWorkspaceHeader(normalized, runningProfilesNow, serverUpIds = new Set(), modelMissingIds = new Set()) {
198
+ function managedProfileSizeBytes(profile, managedModels) {
199
+ if (!managedModels || !Array.isArray(managedModels)) return null;
200
+ const backend = backendFor(profile.backend);
201
+ if (backend.type !== "managed-server") return null;
202
+ const backendModels = managedModels.find((m) => m.backendId === profile.backend)?.models ?? [];
203
+ const modelId = profile.omlxModel ?? null;
204
+ if (!modelId) return null;
205
+ const model = backendModels.find((m) => m.id === modelId);
206
+ return model?.sizeBytes || null;
207
+ }
208
+
209
+ function inferBackendId(item) {
210
+ if (item.type === "profile") return item.profile.backend;
211
+ if (item.type === "managed") return item.backendId;
212
+ // new model: derive from format
213
+ if (item.model?.format === "mlx") return "mlx-vlm";
214
+ if (item.model?.backend) return item.model.backend;
215
+ return "llama-cpp";
216
+ }
217
+
218
+ export function printWorkspaceHeader(normalized, runningProfilesNow, modelMissingIds = new Set()) {
135
219
  const profiles = normalized.profiles;
136
220
  const isRunning = (p) => runningProfilesNow.some((r) => r.id === p.id);
137
221
  const isMissing = (p) => isProfileFileMissing(p) || modelMissingIds.has(p.id);
138
- const readyCount = profiles.filter((p) => !isMissing(p) && !isRunning(p) && !serverUpIds.has(p.id)).length;
222
+ const readyCount = profiles.filter((p) => !isMissing(p) && !isRunning(p)).length;
139
223
  const runningCount = runningProfilesNow.length;
140
- const serverUpCount = profiles.filter((p) => !isMissing(p) && serverUpIds.has(p.id) && !isRunning(p)).length;
141
224
  const missingCount = profiles.filter(isMissing).length;
142
225
  const setupCount = normalized.newModels.length + normalized.managedItems.length;
143
226
 
144
227
  const countParts = [];
145
228
  if (runningCount > 0) countParts.push(pc.green(`${runningCount} running`));
146
- if (serverUpCount > 0) countParts.push(pc.yellow(`${serverUpCount} server up, model not loaded`));
147
229
  if (readyCount > 0) countParts.push(pc.blue(`${readyCount} model${readyCount === 1 ? "" : "s"} ready`));
148
230
  if (missingCount > 0) countParts.push(pc.red(`${missingCount} model${missingCount === 1 ? "" : "s"} missing`));
149
231
  if (setupCount > 0) countParts.push(pc.yellow(`${setupCount} model${setupCount === 1 ? "" : "s"} need${setupCount === 1 ? "s" : ""} setup`));
@@ -166,11 +248,10 @@ export async function printProfileDetails(profile) {
166
248
  const backend = backendFor(profile.backend);
167
249
  const isManaged = backend.type === "managed-server";
168
250
  const running = await isProfileRunning(profile);
169
- const serverUp = !running && isManaged && await isProfileServerUp(profile);
170
251
  const fileMissing = !isManaged && isProfileFileMissing(profile);
171
252
  console.log("\n" + renderSection("Model overview", renderRows([
172
253
  ["Name", pc.bold(profile.label)],
173
- ["Status", fileMissing ? pc.red("File missing") : running ? pc.green("Running now") : serverUp ? pc.yellow("Server up, model not loaded") : pc.blue("Ready")],
254
+ ["Status", fileMissing ? pc.red("File missing") : running ? pc.green("Running now") : pc.blue("Ready")],
174
255
  ["Details", profileDetailParts(profile, { fileMissing }).join(pc.dim(" · "))],
175
256
  ["Server", fileMissing ? pc.red(profile.baseUrl) : profile.baseUrl],
176
257
  ])));
@@ -219,6 +300,29 @@ export function printGgufModelDetails(model, drafter) {
219
300
  console.log("\n" + renderSection("Model details", renderRows(detailRows), { columns: 110 }));
220
301
  }
221
302
 
303
+ export async function printMlxModelDetails(model) {
304
+ const { detectMlxCapabilities } = await import("./mlx-discovery.mjs");
305
+ const caps = await detectMlxCapabilities(model.filePath ?? model.path);
306
+ const parts = [];
307
+ if (caps.architecture) parts.push(caps.architecture);
308
+ if (caps.thinking) parts.push("thinking");
309
+ if (caps.vision) parts.push("vision");
310
+ const summary = parts.length > 0 ? parts.join(pc.dim(" · ")) : "standard MLX";
311
+ console.log("\n" + renderSection("Downloaded model", renderRows([
312
+ ["Name", pc.bold(model.label)],
313
+ ["Status", pc.yellow("Needs one-time setup")],
314
+ ["Details", summary],
315
+ ])));
316
+ console.log("\n" + renderSection("Model details", renderRows([
317
+ ["Model dir", model.path],
318
+ ["Backend", "mlx-vlm"],
319
+ ["Source", formatSourceLabel(model.source)],
320
+ ["Detected", summary],
321
+ ["Size", formatBytes(model.sizeBytes)],
322
+ ["Context", caps.contextLength ? `${caps.contextLength.toLocaleString()} trained` : "unknown"],
323
+ ]), { columns: 110 }));
324
+ }
325
+
222
326
  export function printManagedModelDetails(model, backend) {
223
327
  console.log("\n" + renderSection(`${backend.label} model`, renderRows([
224
328
  ["Name", pc.bold(model.label)],
package/src/process.mjs CHANGED
@@ -35,11 +35,13 @@ async function startLocalServer(profile) {
35
35
 
36
36
  // Build argv: binary + command.json args
37
37
  const argv = [...commandArgv];
38
+ // mlx-vlm requires APC_ENABLED=1 (86x TTFT improvement; fixes Metal cache clearing).
39
+ const env = profile.backend === "mlx-vlm" ? { ...process.env, APC_ENABLED: "1" } : process.env;
38
40
 
39
41
  const rawFd = openSync(rawLogPath, "a");
40
42
  let child;
41
43
  try {
42
- child = spawn(binary, argv, { detached: true, stdio: ["ignore", rawFd, rawFd] });
44
+ child = spawn(binary, argv, { detached: true, stdio: ["ignore", rawFd, rawFd], env });
43
45
  } finally {
44
46
  closeSync(rawFd);
45
47
  }
@@ -96,16 +98,134 @@ export async function stopProfile(profile) {
96
98
  await writeState(profile.id, { ...state, pid: null, stoppedAt: new Date().toISOString(), stopReason: "pid-not-running" });
97
99
  return { stopped: false, message: `${profile.id} pid ${state.pid} is no longer running.` };
98
100
  }
101
+ const pid = state.pid;
99
102
  try {
100
- try {
101
- process.kill(-state.pid, "SIGTERM");
102
- } catch {
103
- process.kill(state.pid, "SIGTERM");
104
- }
105
- await writeState(profile.id, { ...state, pid: null, stoppedAt: new Date().toISOString(), stopSignal: "SIGTERM" });
106
- return { stopped: true, message: `Stopped ${profile.id} pid ${state.pid}` };
103
+ const signal = await terminateProcess(pid);
104
+ await writeState(profile.id, { ...state, pid: null, stoppedAt: new Date().toISOString(), stopSignal: signal });
105
+ return { stopped: true, message: `Stopped ${profile.id} pid ${pid}` };
107
106
  } catch (error) {
108
- return { stopped: false, message: `Could not stop pid ${state.pid}: ${error.message}` };
107
+ return { stopped: false, message: `Could not stop pid ${pid}: ${error.message}` };
108
+ }
109
+ }
110
+
111
+ // Reliably terminate a detached local-server process group: SIGTERM with a
112
+ // grace period for graceful shutdown (lets mlx-vlm/llama-server release the
113
+ // model), then SIGKILL if still alive. Guarantees the model is unloaded when a
114
+ // profile stops — consistent across backends (llama-server exits on SIGTERM;
115
+ // mlx-vlm/uvicorn often does not, hence the SIGKILL fallback).
116
+ async function terminateProcess(pid) {
117
+ const signalGroup = (sig) => {
118
+ try { process.kill(-pid, sig); }
119
+ catch { process.kill(pid, sig); } // not a group leader — kill the proc itself
120
+ };
121
+ signalGroup("SIGTERM");
122
+ for (let i = 0; i < 50; i++) { // 5s grace for graceful shutdown
123
+ if (await processGone(pid)) return "SIGTERM";
124
+ await sleep(100);
125
+ }
126
+ signalGroup("SIGKILL");
127
+ for (let i = 0; i < 30; i++) { // 3s for SIGKILL to take effect
128
+ if (await processGone(pid)) return "SIGKILL";
129
+ await sleep(100);
130
+ }
131
+ throw new Error(`pid ${pid} did not exit after SIGKILL`);
132
+ }
133
+
134
+ // True if the process is dead (or a zombie about to be reaped).
135
+ async function processGone(pid) {
136
+ try { process.kill(pid, 0); }
137
+ catch { return true; } // no such process
138
+ // Alive to signal(0) — but a detached setsid child can briefly appear as a
139
+ // zombie before launchd reaps it. Treat zombie as gone.
140
+ try {
141
+ const { stdout } = await execFileAsync("ps", ["-o", "stat=", "-p", String(pid)]);
142
+ return /^Z/.test(stdout.trim());
143
+ } catch {
144
+ return false;
145
+ }
146
+ }
147
+
148
+ // ── Unload model from a managed server (oMLX) ─────────────────────────────
149
+ // Counterpart to stopProfile for local-server backends: stopProfile kills the
150
+ // server process (which unloads the model); unloadModelFromServer asks a
151
+ // managed server to release the model from memory via its HTTP API, leaving the
152
+ // server itself running. Together they give a consistent UX: quitting Pi
153
+ // unloads the model regardless of backend type.
154
+
155
+ export async function unloadModelFromServer(profile) {
156
+ const backend = backendFor(profile.backend);
157
+
158
+ if (backend.id === "llama-cpp" || backend.id === "llama-cpp-mtp") {
159
+ // llama.cpp unloads when the server process exits; no HTTP unload API exists.
160
+ // If offgrid-ai started the server, stopProfile already handled it.
161
+ return { unloaded: false, backend: backend.id, reason: "stop server to unload" };
162
+ }
163
+
164
+ if (backend.id === "omlx") {
165
+ return await unloadOmlxModel(profile);
166
+ }
167
+
168
+ if (backend.id === "mlx-vlm") {
169
+ // mlx-vlm is a local-server backend — stopProfile handles unload by killing
170
+ // the process. No HTTP unload API.
171
+ return { unloaded: false, backend: backend.id, reason: "stop server to unload" };
172
+ }
173
+
174
+ return { unloaded: false, backend: backend.id, reason: "unsupported backend" };
175
+ }
176
+
177
+ async function unloadOmlxModel(profile) {
178
+ const baseUrl = profile.baseUrl?.replace(/\/v1\/?$/u, "") || "";
179
+ const adminUrl = `${baseUrl}/admin/api/models`;
180
+ const modelId = profile.modelAlias || profile.omlxModel || profile.id;
181
+
182
+ try {
183
+ const ids = await serverModelIds(profile.baseUrl);
184
+ const match = ids.find((id) => id.toLowerCase() === modelId.toLowerCase());
185
+ const targetId = match ?? modelId;
186
+
187
+ const response = await fetch(`${adminUrl}/${encodeURIComponent(targetId)}/unload`, {
188
+ method: "POST",
189
+ headers: { "Content-Type": "application/json" },
190
+ signal: AbortSignal.timeout(30000),
191
+ });
192
+
193
+ if (response.ok) {
194
+ return { unloaded: true, backend: "omlx", modelId: targetId };
195
+ }
196
+
197
+ const detail = await responseErrorDetail(response);
198
+
199
+ if (response.status === 400 && /not loaded/i.test(detail)) {
200
+ return { unloaded: true, backend: "omlx", modelId: targetId, reason: "model was not loaded" };
201
+ }
202
+
203
+ if (response.status === 401 || response.status === 403) {
204
+ return {
205
+ unloaded: false,
206
+ backend: "omlx",
207
+ modelId: targetId,
208
+ error: "oMLX admin authentication required. Enable skip_api_key_verification in oMLX settings, or unload manually from the admin panel.",
209
+ };
210
+ }
211
+
212
+ return { unloaded: false, backend: "omlx", modelId: targetId, error: `HTTP ${response.status}: ${detail}` };
213
+ } catch (err) {
214
+ if (err?.name === "AbortError" || err?.name === "TimeoutError") {
215
+ return { unloaded: false, backend: "omlx", modelId, error: "Unload request timed out. The model may still be unloading in the background." };
216
+ }
217
+ return { unloaded: false, backend: "omlx", modelId, error: err.message };
218
+ }
219
+ }
220
+
221
+ async function responseErrorDetail(response) {
222
+ const text = await response.text().catch(() => "");
223
+ if (!text) return "";
224
+ try {
225
+ const body = JSON.parse(text);
226
+ return body?.detail ?? body?.message ?? text;
227
+ } catch {
228
+ return text;
109
229
  }
110
230
  }
111
231
 
@@ -126,7 +246,6 @@ export async function isProfileServerUp(profile) {
126
246
 
127
247
  export async function modelLoadedOnServer(profile) {
128
248
  const backend = backendFor(profile.backend);
129
- if (backend.id === "ollama") return modelIdsMatch(await ollamaLoadedModelIds(profile), expectedModelIds(profile));
130
249
  if (backend.id === "omlx") return modelIdsMatch(await omlxLoadedModelIds(profile), expectedModelIds(profile));
131
250
  const { matches } = await serverMatchesProfile(profile);
132
251
  return matches;
@@ -134,9 +253,6 @@ export async function modelLoadedOnServer(profile) {
134
253
 
135
254
  export async function modelAvailableOnServer(profile) {
136
255
  const backend = backendFor(profile.backend);
137
- if (backend.id === "ollama") {
138
- return modelIdsMatch(await ollamaAvailableModelIds(profile), expectedModelIds(profile));
139
- }
140
256
  if (backend.id === "omlx") {
141
257
  // /v1/models lists discovered models; an ID must exist there to be usable.
142
258
  return modelIdsMatch(await serverModelIds(profile.baseUrl), expectedModelIds(profile));
@@ -217,24 +333,6 @@ export async function serverModelIds(baseUrl) {
217
333
  .filter(Boolean);
218
334
  }
219
335
 
220
- async function ollamaLoadedModelIds(profile) {
221
- const result = await fetchJson(`${apiRootUrl(profile.baseUrl)}/api/ps`);
222
- if (!result.ok) return [];
223
- return (Array.isArray(result.data?.models) ? result.data.models : [])
224
- .flatMap((model) => [model?.name, model?.model])
225
- .map((id) => String(id ?? "").trim())
226
- .filter(Boolean);
227
- }
228
-
229
- async function ollamaAvailableModelIds(profile) {
230
- const result = await fetchJson(`${apiRootUrl(profile.baseUrl)}/api/tags`);
231
- if (!result.ok) return [];
232
- return (Array.isArray(result.data?.models) ? result.data.models : [])
233
- .flatMap((model) => [model?.name, model?.model])
234
- .map((id) => String(id ?? "").trim())
235
- .filter(Boolean);
236
- }
237
-
238
336
  async function omlxLoadedModelIds(profile) {
239
337
  const statusResult = await fetchJson(`${profile.baseUrl.replace(/\/+$/u, "")}/models/status`);
240
338
  const fromStatus = statusResult.ok
@@ -305,7 +403,6 @@ function expectedModelIds(profile) {
305
403
  return [
306
404
  profile.modelAlias,
307
405
  profile.label,
308
- profile.ollamaModel,
309
406
  profile.omlxModel,
310
407
  profile.modelPath,
311
408
  fileName,
@@ -8,6 +8,7 @@ import { pc, formatBytes, renderRows, renderSection } from "./ui.mjs";
8
8
  import { detectCapabilities } from "./autodetect.mjs";
9
9
  import { matchDrafter } from "./scan.mjs";
10
10
  import { scanGgufModels } from "./scan.mjs";
11
+ import { estimateMemoryMb } from "./mlx-flags.mjs";
11
12
 
12
13
  const execFileAsync = promisify(execFile);
13
14
 
@@ -297,3 +298,107 @@ function detectionSummary(caps) {
297
298
  function samplingSummary(flags) {
298
299
  return `temp ${flags.temperature}, top-p ${flags.topP}, top-k ${flags.topK}`;
299
300
  }
301
+
302
+ // ── MLX profile configuration ─────────────────────────────────────────────
303
+
304
+ /**
305
+ * Interactive configuration for an mlx-vlm profile.
306
+ */
307
+ export async function configureMlxProfile(prompt, profile) {
308
+ let configured = profile;
309
+
310
+ console.log("");
311
+ console.log(renderSection("Model setup", renderRows([
312
+ ["Model", pc.bold(profile.label)],
313
+ ["Detected", mlxDetectionSummary(configured.capabilities)],
314
+ ["Context", String(configured.flags.ctxSize) + " tokens"],
315
+ ])));
316
+ console.log(pc.dim("Larger context windows use more memory. You can edit the profile later if needed.\n"));
317
+
318
+ if (configured.capabilities.vision) {
319
+ console.log(renderSection("Vision detected", renderRows([
320
+ ["Capability", "image / multimodal input"],
321
+ ["Note", "mlx-vlm loads vision from the model directory automatically."],
322
+ ])));
323
+ }
324
+
325
+ if (configured.capabilities.thinking) {
326
+ console.log("");
327
+ console.log(renderSection("Thinking mode", renderRows([
328
+ ["Flag", "--enable-thinking"],
329
+ ["Default", "on for Qwen 3 / Gemma 4 / DeepSeek-R class models"],
330
+ ])));
331
+ const useThinking = await prompt.yesNo("Enable thinking mode?", true);
332
+ configured = await applyMlxThinkingToggle(configured, useThinking);
333
+ }
334
+
335
+ const ctxSize = await prompt.number("Context window tokens", configured.flags.ctxSize, 1024, 1048576);
336
+ configured = applyMlxContextSize(configured, ctxSize);
337
+
338
+ console.log("\n" + renderMlxMemoryEstimate(configured));
339
+
340
+ console.log("");
341
+ console.log(renderSection("Defaults", renderRows([
342
+ ["Backend", configured.backend],
343
+ ["Endpoint", configured.baseUrl],
344
+ ["Context", String(configured.flags.ctxSize) + " tokens"],
345
+ ["Thinking", configured.capabilities.thinking && configured.commandArgv.includes("--enable-thinking") ? "on" : "off"],
346
+ ["Vision", configured.capabilities.vision ? "yes" : "no"],
347
+ ])));
348
+
349
+ if (!(await prompt.yesNo("Save profile with these settings?", true))) return null;
350
+ return configured;
351
+ }
352
+
353
+ async function applyMlxThinkingToggle(profile, enabled) {
354
+ if (!profile.capabilities.thinking) return profile;
355
+ const { computeMlxVlmFlags } = await import("./mlx-flags.mjs");
356
+ const { args } = computeMlxVlmFlags(profile.modelPath, {
357
+ port: profile.flags.port,
358
+ ctxSize: profile.flags.ctxSize,
359
+ thinkingEnabled: enabled,
360
+ });
361
+ return {
362
+ ...profile,
363
+ commandArgv: args,
364
+ capabilities: { ...profile.capabilities, thinkingEnabled: enabled },
365
+ };
366
+ }
367
+
368
+ function applyMlxContextSize(profile, ctxSize) {
369
+ const flags = { ...profile.flags, ctxSize };
370
+ const next = {
371
+ ...profile,
372
+ flags,
373
+ baseUrl: baseUrlForFlags(flags),
374
+ };
375
+ const idx = next.commandArgv.indexOf("--max-kv-size");
376
+ if (idx !== -1 && next.commandArgv[idx + 1] != null) {
377
+ next.commandArgv[idx + 1] = String(ctxSize);
378
+ } else if (ctxSize && ctxSize > 0) {
379
+ next.commandArgv.push("--max-kv-size", String(ctxSize));
380
+ }
381
+ return next;
382
+ }
383
+
384
+ function renderMlxMemoryEstimate(profile) {
385
+ const modelBytes = profile.modelSizeBytes || 0;
386
+ if (!modelBytes) {
387
+ return renderSection("Memory estimate", pc.dim("Model size unknown — save the profile to estimate."));
388
+ }
389
+ const totalMb = estimateMemoryMb(modelBytes);
390
+ const overheadBytes = Math.max(0, totalMb * 1024 * 1024 - modelBytes);
391
+ return renderSection("Memory estimate", renderRows([
392
+ ["Estimated total", pc.bold(`~${formatBytes(totalMb * 1024 * 1024)}`)],
393
+ ["Model", formatBytes(modelBytes)],
394
+ ["Overhead", `~${formatBytes(overheadBytes)} (KV cache, APC, runtime)`],
395
+ ]));
396
+ }
397
+
398
+ function mlxDetectionSummary(caps) {
399
+ const parts = [];
400
+ if (caps.architecture) parts.push(caps.architecture);
401
+ if (caps.thinking) parts.push("thinking");
402
+ if (caps.vision) parts.push("vision");
403
+ return parts.length > 0 ? parts.join(" · ") : "standard MLX";
404
+ }