offgrid-ai 0.9.6 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,35 +1,41 @@
1
1
  import { scanGgufModels, matchDrafter } from "./scan.mjs";
2
2
  import { loadProfiles, normalizeProfile, sanitizeProfileId } from "./profiles.mjs";
3
3
  import { scanManagedModels } from "./managed.mjs";
4
+ import { scanMlxModels } from "./mlx-discovery.mjs";
4
5
  import { isProfileFileMissing } from "./model-summary.mjs";
6
+ import { backendFor } from "./backends.mjs";
5
7
 
6
8
  export async function loadModelCatalog() {
7
- const [profiles, { models: ggufModels, drafters }, managedModels] = await Promise.all([
9
+ const [profiles, { models: ggufModels, drafters }, managedModels, mlxModels] = await Promise.all([
8
10
  loadProfiles(),
9
11
  scanGgufModels(),
10
12
  scanManagedModels(),
13
+ scanMlxModels(),
11
14
  ]);
12
- return normalizeCatalog({ profiles, ggufModels, drafters, managedModels });
15
+ return normalizeCatalog({ profiles, ggufModels, drafters, managedModels, mlxModels });
13
16
  }
14
17
 
15
18
  export function normalizeCatalog(catalog) {
16
19
  if (catalog.newModels && catalog.managedItems) return catalog;
17
- const { profiles, ggufModels, drafters, managedModels } = catalog;
20
+ const { profiles, ggufModels, drafters, managedModels, mlxModels = [] } = catalog;
18
21
  const profiledPaths = new Set(profiles.map((profile) => profile.modelPath).filter(Boolean));
19
- const newModels = ggufModels.filter((model) => !profiledPaths.has(model.path));
22
+ const newModels = [
23
+ ...ggufModels.filter((model) => !profiledPaths.has(model.path)),
24
+ ...mlxModels.filter((model) => !profiledPaths.has(model.path)),
25
+ ];
20
26
  const managedItems = [];
21
27
  for (const { backendId, models, status } of managedModels) {
22
28
  if (status === "unavailable") continue;
23
29
  const profiledAliases = new Set(
24
30
  profiles
25
31
  .filter((profile) => profile.backend === backendId)
26
- .map((profile) => backendId === "ollama" ? `ollama:${profile.ollamaModel ?? profile.modelAlias}` : `omlx:${profile.omlxModel ?? profile.modelAlias}`),
32
+ .map((profile) => `omlx:${profile.omlxModel ?? profile.modelAlias}`),
27
33
  );
28
34
  for (const model of models) {
29
35
  if (!profiledAliases.has(`${backendId}:${model.id}`)) managedItems.push({ model, backendId });
30
36
  }
31
37
  }
32
- return { profiles, ggufModels, drafters, managedModels, newModels, managedItems };
38
+ return { profiles, ggufModels, drafters, managedModels, mlxModels, newModels, managedItems };
33
39
  }
34
40
 
35
41
  export function itemKey(item) {
@@ -51,13 +57,73 @@ function compareRecency(a, b) {
51
57
  }
52
58
 
53
59
  export function buildCatalogItems(normalized) {
54
- const { profiles, newModels, managedItems, drafters } = normalized;
55
- const profileItems = profiles.map((profile) => ({ type: "profile", profile, label: profile.label, fileMissing: isProfileFileMissing(profile) }));
60
+ const { profiles, newModels, managedItems, drafters, ggufModels = [], mlxModels = [], managedModels = [] } = normalized;
61
+
62
+ // Lookup maps for enriching profile items with scan data (size + context).
63
+ const scanByPath = new Map();
64
+ for (const m of ggufModels) scanByPath.set(m.path, m);
65
+ for (const m of mlxModels) scanByPath.set(m.filePath ?? m.path, m);
66
+
67
+ const managedByKey = new Map();
68
+ for (const { backendId, models } of managedModels) {
69
+ for (const m of models) managedByKey.set(`${backendId}:${m.id}`, m);
70
+ }
71
+
72
+ const profileItems = profiles.map((profile) => {
73
+ const item = { type: "profile", profile, label: profile.label, fileMissing: isProfileFileMissing(profile) };
74
+
75
+ // Resolve size: profile.modelSizeBytes → scan lookup → managed lookup
76
+ let sizeBytes = profile.modelSizeBytes || 0;
77
+ if (!sizeBytes && profile.modelPath) {
78
+ const scanModel = scanByPath.get(profile.modelPath);
79
+ if (scanModel?.sizeBytes) sizeBytes = scanModel.sizeBytes;
80
+ }
81
+ if (!sizeBytes) {
82
+ const backend = backendFor(profile.backend);
83
+ if (backend.type === "managed-server" && profile.omlxModel) {
84
+ const managedModel = managedByKey.get(`${profile.backend}:${profile.omlxModel}`);
85
+ if (managedModel?.sizeBytes) sizeBytes = managedModel.sizeBytes;
86
+ }
87
+ }
88
+ item.sizeBytes = sizeBytes || null;
89
+
90
+ // Resolve context: flags.ctxSize (configured) → capabilities.ctxSize (trained) → scan → managed
91
+ let contextLength = profile.flags?.ctxSize ?? null;
92
+ if (!contextLength) contextLength = profile.capabilities?.ctxSize ?? null;
93
+ if (!contextLength && profile.modelPath) {
94
+ const scanModel = scanByPath.get(profile.modelPath);
95
+ if (scanModel?.contextLength) contextLength = scanModel.contextLength;
96
+ }
97
+ if (!contextLength) {
98
+ const backend = backendFor(profile.backend);
99
+ if (backend.type === "managed-server" && profile.omlxModel) {
100
+ const managedModel = managedByKey.get(`${profile.backend}:${profile.omlxModel}`);
101
+ if (managedModel?.contextLength) contextLength = managedModel.contextLength;
102
+ }
103
+ }
104
+ item.contextLength = contextLength;
105
+
106
+ return item;
107
+ });
56
108
  profileItems.sort(compareRecency);
57
109
  return [
58
110
  ...profileItems,
59
- ...newModels.map((model) => ({ type: "new", model, label: model.label, drafter: matchDrafter(model.path, drafters) })),
60
- ...managedItems.map(({ model, backendId }) => ({ type: "managed", model, backendId, label: model.label })),
111
+ ...newModels.map((model) => ({
112
+ type: "new",
113
+ model,
114
+ label: model.label,
115
+ drafter: matchDrafter(model.path, drafters),
116
+ sizeBytes: model.sizeBytes || null,
117
+ contextLength: model.contextLength ?? null,
118
+ })),
119
+ ...managedItems.map(({ model, backendId }) => ({
120
+ type: "managed",
121
+ model,
122
+ backendId,
123
+ label: model.label,
124
+ sizeBytes: model.sizeBytes || null,
125
+ contextLength: model.contextLength ?? null,
126
+ })),
61
127
  ];
62
128
  }
63
129
 
@@ -66,8 +132,9 @@ export function createManagedProfile(model, backendId) {
66
132
  id: `${backendId}-${sanitizeProfileId(model.id)}`,
67
133
  label: model.label,
68
134
  backend: backendId,
135
+ source: backendId,
69
136
  modelAlias: model.aliasSuggestion,
70
- ...(backendId === "ollama" ? { ollamaModel: model.id } : {}),
137
+ modelSizeBytes: model.sizeBytes || 0,
71
138
  ...(backendId === "omlx" ? { omlxModel: model.id } : {}),
72
139
  });
73
140
  }
@@ -68,9 +68,8 @@ const TAG_TOKENS = [
68
68
  /**
69
69
  * Parse a raw model identifier into a structured display name.
70
70
  *
71
- * @param {string} rawId The raw identifier: GGUF filename (no .gguf),
72
- * Ollama model name, or oMLX model id.
73
- * @param {"local-gguf"|"ollama"|"omlx"} source Where this name came from.
71
+ * @param {string} rawId The raw identifier: GGUF filename (no .gguf) or oMLX model id.
72
+ * @param {"local-gguf"|"omlx"} source Where this name came from.
74
73
  * @returns {{ publisher: string|null, model: string, params: string|null,
75
74
  * quant: string|null, tags: string[], display: string,
76
75
  * sort: string, id: string }}
@@ -87,18 +86,7 @@ export function parseModelName(rawId, source) {
87
86
  name = rawId.slice(slashIdx + 1);
88
87
  }
89
88
 
90
- // 2. For Ollama, split on : to separate model from tag (e.g. "gemma3:4b")
91
- // The tag after : is a model size/variant identifier — not a GGUF quant.
92
- let ollamaTag = null;
93
- if (source === "ollama") {
94
- const colonIdx = name.lastIndexOf(":");
95
- if (colonIdx !== -1) {
96
- ollamaTag = name.slice(colonIdx + 1);
97
- name = name.slice(0, colonIdx);
98
- }
99
- }
100
-
101
- // 3. Extract quant (GGUF quantization suffix)
89
+ // 2. Extract quant (GGUF quantization suffix)
102
90
  let quant = null;
103
91
  for (const pattern of QUANT_PATTERNS) {
104
92
  const match = name.match(pattern);
@@ -125,13 +113,7 @@ export function parseModelName(rawId, source) {
125
113
  // Clean up leftover separators
126
114
  name = name.replace(/[-_]{2,}/g, "-").replace(/^[-_]+|[-_]+$/g, "");
127
115
 
128
- // 5. For Ollama, re-attach the tag as part of the model name
129
- // (Ollama tags like "4b" or "30b-a3b" are size variants, not quants)
130
- if (ollamaTag) {
131
- name = name + "-" + ollamaTag;
132
- }
133
-
134
- // 6. Title-case the remaining model name
116
+ // 5. Title-case the remaining model name
135
117
  let model = titleCaseModel(name);
136
118
 
137
119
  // If nothing is left after parsing, fall back to the raw name
@@ -139,13 +121,13 @@ export function parseModelName(rawId, source) {
139
121
  model = rawId.includes("/") ? rawId : rawId.replace(/[-_]/g, " ");
140
122
  }
141
123
 
142
- // 7. Extract params (size like 30B, 12B) for sort/filter convenience
124
+ // 6. Extract params (size like 30B, 12B) for sort/filter convenience
143
125
  const params = extractParams(model);
144
126
 
145
- // 8. Build display string
127
+ // 7. Build display string
146
128
  const display = buildDisplay(publisher, model, tags, quant);
147
129
 
148
- // 9. Build sort key (lowercase, no publisher, for alphabetical ordering)
130
+ // 8. Build sort key (lowercase, no publisher, for alphabetical ordering)
149
131
  const sort = model.toLowerCase().replace(/[-_]/g, " ");
150
132
 
151
133
  return { publisher, model, params, quant, tags, display, sort, id };
@@ -1,7 +1,8 @@
1
1
  import { existsSync, statSync } from "node:fs";
2
- import { BACKENDS, backendFor } from "./backends.mjs";
2
+ import { basename, dirname } from "node:path";
3
+ import { backendFor } from "./backends.mjs";
3
4
  import { readCommandArgv } from "./profiles.mjs";
4
- import { isProfileRunning, isProfileServerUp } from "./process.mjs";
5
+ import { isProfileRunning } from "./process.mjs";
5
6
  import { buildPrettyCommand } from "./command.mjs";
6
7
  import { pc, formatBytes, renderRows, renderSection } from "./ui.mjs";
7
8
  import { capabilitySummary, ggufDetailParts, isProfileFileMissing, profileDetailParts } from "./model-summary.mjs";
@@ -11,6 +12,7 @@ import { findBenchmarkRepo } from "./benchmark.mjs";
11
12
 
12
13
  const OPTION_SEPARATOR = pc.dim(" │ ");
13
14
  const OPTION_STATUS_WIDTH = 10;
15
+ const OPTION_BACKEND_WIDTH = 14;
14
16
  const OPTION_SOURCE_WIDTH = 14;
15
17
  const OPTION_CTX_WIDTH = 5;
16
18
 
@@ -25,7 +27,7 @@ function optionPad(text, color, width) {
25
27
  function optionStatusTag(kind) {
26
28
  const statuses = {
27
29
  running: ["RUNNING", pc.green],
28
- serverup: ["SERVER UP", pc.yellow],
30
+ serverup: ["READY", pc.blue],
29
31
  ready: ["READY", pc.blue],
30
32
  missing: ["MISSING", pc.red],
31
33
  setup: ["SETUP", pc.yellow],
@@ -34,38 +36,80 @@ function optionStatusTag(kind) {
34
36
  return optionPad(text, color, OPTION_STATUS_WIDTH);
35
37
  }
36
38
 
37
- function optionSourceTag(sourceId, label) {
39
+ function optionSourceTag(sourceId) {
40
+ const label = formatSourceLabel(sourceId);
38
41
  const colors = {
39
- "llama-cpp": pc.cyan,
40
- "llama-cpp-mtp": pc.blue,
41
- ollama: pc.green,
42
+ huggingface: pc.cyan,
43
+ lmstudio: pc.blue,
42
44
  omlx: pc.magenta,
45
+ "llama.cpp": pc.cyan,
43
46
  gguf: pc.cyan,
47
+ mlx: pc.yellow,
48
+ "mlx-vlm": pc.yellow,
44
49
  };
45
50
  return optionPad(label, colors[sourceId] ?? pc.dim, OPTION_SOURCE_WIDTH);
46
51
  }
47
52
 
53
+ function optionBackendTag(backendId) {
54
+ const backend = backendId ? backendFor(backendId) : null;
55
+ const label = backend?.label ?? backendId ?? "unknown";
56
+ const colors = {
57
+ "llama-cpp": pc.cyan,
58
+ "llama-cpp-mtp": pc.blue,
59
+ omlx: pc.magenta,
60
+ "mlx-vlm": pc.yellow,
61
+ };
62
+ return optionPad(label, colors[backendId] ?? pc.dim, OPTION_BACKEND_WIDTH);
63
+ }
64
+
65
+ function formatSourceLabel(sourceId) {
66
+ if (!sourceId) return "unknown";
67
+ const map = {
68
+ huggingface: "HuggingFace",
69
+ lmstudio: "LM Studio",
70
+ omlx: "oMLX",
71
+ "llama.cpp": "llama.cpp",
72
+ gguf: "GGUF file",
73
+ mlx: "MLX",
74
+ "mlx-vlm": "MLX",
75
+ };
76
+ return map[sourceId] ?? String(sourceId);
77
+ }
78
+
79
+ function inferSourceFromPath(modelPath) {
80
+ if (!modelPath) return null;
81
+ const normalized = modelPath.toLowerCase().replace(/\\/g, "/");
82
+ if (normalized.includes("/.omlx/models")) return "omlx";
83
+ if (normalized.includes("/.lmstudio/models")) return "lmstudio";
84
+ if (normalized.includes("/.cache/huggingface")) return "huggingface";
85
+ if (normalized.includes("/.cache/llama.cpp")) return "llama.cpp";
86
+ const parent = basename(dirname(modelPath));
87
+ if (parent && parent !== ".") return parent.replace(/^\./, "");
88
+ return null;
89
+ }
90
+
91
+ function discoverySourceForProfile(profile) {
92
+ const backend = backendFor(profile.backend);
93
+ if (backend.type === "managed-server") return backend.id;
94
+ if (profile.source && profile.source !== "local-gguf") return profile.source;
95
+ return inferSourceFromPath(profile.modelPath);
96
+ }
97
+
98
+ function discoverySourceForItem(item) {
99
+ if (item.type === "profile") return discoverySourceForProfile(item.profile);
100
+ return item.model?.source ?? null;
101
+ }
102
+
48
103
  function optionCtxLabel(item) {
49
- if (item.type === "profile" && item.profile.flags?.ctxSize) {
50
- return optionPad(`${(item.profile.flags.ctxSize / 1000).toFixed(0)}k`, null, OPTION_CTX_WIDTH);
104
+ if (item.contextLength) {
105
+ return optionPad(`${(item.contextLength / 1000).toFixed(0)}k`, null, OPTION_CTX_WIDTH);
51
106
  }
52
107
  return optionPad("—", null, OPTION_CTX_WIDTH);
53
108
  }
54
109
 
55
110
  function optionSizeLabel(item) {
56
- if (item.type === "profile") {
57
- if (item.fileMissing) return "—";
58
- if (item.profile.modelPath && existsSync(item.profile.modelPath)) {
59
- return formatBytes(statSync(item.profile.modelPath).size);
60
- }
61
- return "—";
62
- }
63
- if (item.type === "new") {
64
- return formatBytes(item.model.sizeBytes);
65
- }
66
- // managed
67
- if (item.model.sizeBytes) return formatBytes(item.model.sizeBytes);
68
- if (item.model.quant) return item.model.quant;
111
+ if (item.type === "profile" && item.fileMissing) return "—";
112
+ if (item.sizeBytes) return formatBytes(item.sizeBytes);
69
113
  return "—";
70
114
  }
71
115
 
@@ -76,17 +120,18 @@ export function modelNameWidth(items) {
76
120
  return Math.max(20, maxName + 2);
77
121
  }
78
122
 
79
- function optionLabel({ status, source, name, ctx, size, nameWidth }) {
80
- return [status, source, pc.bold(optionPad(name, null, nameWidth)), ctx, pc.dim(size)].join(OPTION_SEPARATOR);
123
+ function optionLabel({ status, backend, source, name, ctx, size, nameWidth }) {
124
+ return [status, backend, source, pc.bold(optionPad(name, null, nameWidth)), ctx, pc.dim(size)].join(OPTION_SEPARATOR);
81
125
  }
82
126
 
83
- export function modelSelectOption(item, { runningProfilesNow, serverUpIds, modelMissingIds, nameWidth }) {
127
+ export function modelSelectOption(item, { runningProfilesNow, modelMissingIds, nameWidth }) {
128
+ const sourceId = discoverySourceForItem(item) ?? "unknown";
129
+ const backendId = inferBackendId(item);
84
130
  if (item.type === "profile") {
85
131
  const backend = backendFor(item.profile.backend);
86
132
  const running = runningProfilesNow.some((profile) => profile.id === item.profile.id);
87
- const serverUp = !running && !item.fileMissing && serverUpIds?.has(item.profile.id);
88
133
  const modelMissing = !item.fileMissing && modelMissingIds?.has(item.profile.id);
89
- const status = item.fileMissing || modelMissing ? "missing" : running ? "running" : serverUp ? "serverup" : "ready";
134
+ const status = item.fileMissing || modelMissing ? "missing" : running ? "running" : "ready";
90
135
  const drafterMissing = Boolean(item.profile.drafterPath) && !existsSync(item.profile.drafterPath);
91
136
  const hint = drafterMissing ? "MTP drafter missing — reconfigure"
92
137
  : modelMissing ? `${backend.label} model no longer available`
@@ -95,7 +140,8 @@ export function modelSelectOption(item, { runningProfilesNow, serverUpIds, model
95
140
  value: itemKey(item),
96
141
  label: optionLabel({
97
142
  status: optionStatusTag(status),
98
- source: optionSourceTag(item.profile.backend, backend.label),
143
+ backend: optionBackendTag(backendId),
144
+ source: optionSourceTag(sourceId),
99
145
  name: item.profile.label,
100
146
  nameWidth,
101
147
  ctx: optionCtxLabel(item),
@@ -109,7 +155,8 @@ export function modelSelectOption(item, { runningProfilesNow, serverUpIds, model
109
155
  value: itemKey(item),
110
156
  label: optionLabel({
111
157
  status: optionStatusTag("setup"),
112
- source: optionSourceTag("gguf", "GGUF file"),
158
+ backend: optionBackendTag(backendId),
159
+ source: optionSourceTag(sourceId),
113
160
  name: item.model.label,
114
161
  nameWidth,
115
162
  ctx: optionCtxLabel(item),
@@ -117,12 +164,12 @@ export function modelSelectOption(item, { runningProfilesNow, serverUpIds, model
117
164
  }),
118
165
  };
119
166
  }
120
- const backend = BACKENDS[item.backendId];
121
167
  return {
122
168
  value: itemKey(item),
123
169
  label: optionLabel({
124
170
  status: optionStatusTag("setup"),
125
- source: optionSourceTag(item.backendId, backend.label),
171
+ backend: optionBackendTag(backendId),
172
+ source: optionSourceTag(sourceId),
126
173
  name: item.model.label,
127
174
  nameWidth,
128
175
  ctx: optionCtxLabel(item),
@@ -131,19 +178,26 @@ export function modelSelectOption(item, { runningProfilesNow, serverUpIds, model
131
178
  };
132
179
  }
133
180
 
134
- export function printWorkspaceHeader(normalized, runningProfilesNow, serverUpIds = new Set(), modelMissingIds = new Set()) {
181
+ function inferBackendId(item) {
182
+ if (item.type === "profile") return item.profile.backend;
183
+ if (item.type === "managed") return item.backendId;
184
+ // new model: derive from format
185
+ if (item.model?.format === "mlx") return "mlx-vlm";
186
+ if (item.model?.backend) return item.model.backend;
187
+ return "llama-cpp";
188
+ }
189
+
190
+ export function printWorkspaceHeader(normalized, runningProfilesNow, modelMissingIds = new Set()) {
135
191
  const profiles = normalized.profiles;
136
192
  const isRunning = (p) => runningProfilesNow.some((r) => r.id === p.id);
137
193
  const isMissing = (p) => isProfileFileMissing(p) || modelMissingIds.has(p.id);
138
- const readyCount = profiles.filter((p) => !isMissing(p) && !isRunning(p) && !serverUpIds.has(p.id)).length;
194
+ const readyCount = profiles.filter((p) => !isMissing(p) && !isRunning(p)).length;
139
195
  const runningCount = runningProfilesNow.length;
140
- const serverUpCount = profiles.filter((p) => !isMissing(p) && serverUpIds.has(p.id) && !isRunning(p)).length;
141
196
  const missingCount = profiles.filter(isMissing).length;
142
197
  const setupCount = normalized.newModels.length + normalized.managedItems.length;
143
198
 
144
199
  const countParts = [];
145
200
  if (runningCount > 0) countParts.push(pc.green(`${runningCount} running`));
146
- if (serverUpCount > 0) countParts.push(pc.yellow(`${serverUpCount} server up, model not loaded`));
147
201
  if (readyCount > 0) countParts.push(pc.blue(`${readyCount} model${readyCount === 1 ? "" : "s"} ready`));
148
202
  if (missingCount > 0) countParts.push(pc.red(`${missingCount} model${missingCount === 1 ? "" : "s"} missing`));
149
203
  if (setupCount > 0) countParts.push(pc.yellow(`${setupCount} model${setupCount === 1 ? "" : "s"} need${setupCount === 1 ? "s" : ""} setup`));
@@ -166,11 +220,10 @@ export async function printProfileDetails(profile) {
166
220
  const backend = backendFor(profile.backend);
167
221
  const isManaged = backend.type === "managed-server";
168
222
  const running = await isProfileRunning(profile);
169
- const serverUp = !running && isManaged && await isProfileServerUp(profile);
170
223
  const fileMissing = !isManaged && isProfileFileMissing(profile);
171
224
  console.log("\n" + renderSection("Model overview", renderRows([
172
225
  ["Name", pc.bold(profile.label)],
173
- ["Status", fileMissing ? pc.red("File missing") : running ? pc.green("Running now") : serverUp ? pc.yellow("Server up, model not loaded") : pc.blue("Ready")],
226
+ ["Status", fileMissing ? pc.red("File missing") : running ? pc.green("Running now") : pc.blue("Ready")],
174
227
  ["Details", profileDetailParts(profile, { fileMissing }).join(pc.dim(" · "))],
175
228
  ["Server", fileMissing ? pc.red(profile.baseUrl) : profile.baseUrl],
176
229
  ])));
@@ -185,7 +238,7 @@ export async function printProfileDetails(profile) {
185
238
  detailRows.push(
186
239
  ["Local file", fileMissing ? pc.red(`${profile.modelPath} (not found)`) : profile.modelPath ?? "unknown"],
187
240
  ["Vision file", profile.mmprojPath ? (existsSync(profile.mmprojPath) ? profile.mmprojPath : pc.red(`${profile.mmprojPath} (not found)`)) : "none"],
188
- ["Model size", profile.modelPath && existsSync(profile.modelPath) ? formatBytes(statSync(profile.modelPath).size) : "unknown"],
241
+ ["Model size", profile.modelSizeBytes ? formatBytes(profile.modelSizeBytes) : (profile.modelPath && existsSync(profile.modelPath) && statSync(profile.modelPath).isFile() ? formatBytes(statSync(profile.modelPath).size) : "unknown")],
189
242
  );
190
243
  if (profile.drafterPath) {
191
244
  detailRows.push(["Drafter", existsSync(profile.drafterPath) ? profile.drafterPath : pc.red(`${profile.drafterPath} (not found)`)]);
@@ -219,6 +272,29 @@ export function printGgufModelDetails(model, drafter) {
219
272
  console.log("\n" + renderSection("Model details", renderRows(detailRows), { columns: 110 }));
220
273
  }
221
274
 
275
+ export async function printMlxModelDetails(model) {
276
+ const { detectMlxCapabilities } = await import("./mlx-discovery.mjs");
277
+ const caps = await detectMlxCapabilities(model.filePath ?? model.path);
278
+ const parts = [];
279
+ if (caps.architecture) parts.push(caps.architecture);
280
+ if (caps.thinking) parts.push("thinking");
281
+ if (caps.vision) parts.push("vision");
282
+ const summary = parts.length > 0 ? parts.join(pc.dim(" · ")) : "standard MLX";
283
+ console.log("\n" + renderSection("Downloaded model", renderRows([
284
+ ["Name", pc.bold(model.label)],
285
+ ["Status", pc.yellow("Needs one-time setup")],
286
+ ["Details", summary],
287
+ ])));
288
+ console.log("\n" + renderSection("Model details", renderRows([
289
+ ["Model dir", model.path],
290
+ ["Backend", "mlx-vlm"],
291
+ ["Source", formatSourceLabel(model.source)],
292
+ ["Detected", summary],
293
+ ["Size", formatBytes(model.sizeBytes)],
294
+ ["Context", caps.contextLength ? `${caps.contextLength.toLocaleString()} trained` : "unknown"],
295
+ ]), { columns: 110 }));
296
+ }
297
+
222
298
  export function printManagedModelDetails(model, backend) {
223
299
  console.log("\n" + renderSection(`${backend.label} model`, renderRows([
224
300
  ["Name", pc.bold(model.label)],
package/src/process.mjs CHANGED
@@ -35,11 +35,13 @@ async function startLocalServer(profile) {
35
35
 
36
36
  // Build argv: binary + command.json args
37
37
  const argv = [...commandArgv];
38
+ // mlx-vlm requires APC_ENABLED=1 (86x TTFT improvement; fixes Metal cache clearing).
39
+ const env = profile.backend === "mlx-vlm" ? { ...process.env, APC_ENABLED: "1" } : process.env;
38
40
 
39
41
  const rawFd = openSync(rawLogPath, "a");
40
42
  let child;
41
43
  try {
42
- child = spawn(binary, argv, { detached: true, stdio: ["ignore", rawFd, rawFd] });
44
+ child = spawn(binary, argv, { detached: true, stdio: ["ignore", rawFd, rawFd], env });
43
45
  } finally {
44
46
  closeSync(rawFd);
45
47
  }
@@ -96,16 +98,134 @@ export async function stopProfile(profile) {
96
98
  await writeState(profile.id, { ...state, pid: null, stoppedAt: new Date().toISOString(), stopReason: "pid-not-running" });
97
99
  return { stopped: false, message: `${profile.id} pid ${state.pid} is no longer running.` };
98
100
  }
101
+ const pid = state.pid;
99
102
  try {
100
- try {
101
- process.kill(-state.pid, "SIGTERM");
102
- } catch {
103
- process.kill(state.pid, "SIGTERM");
104
- }
105
- await writeState(profile.id, { ...state, pid: null, stoppedAt: new Date().toISOString(), stopSignal: "SIGTERM" });
106
- return { stopped: true, message: `Stopped ${profile.id} pid ${state.pid}` };
103
+ const signal = await terminateProcess(pid);
104
+ await writeState(profile.id, { ...state, pid: null, stoppedAt: new Date().toISOString(), stopSignal: signal });
105
+ return { stopped: true, message: `Stopped ${profile.id} pid ${pid}` };
107
106
  } catch (error) {
108
- return { stopped: false, message: `Could not stop pid ${state.pid}: ${error.message}` };
107
+ return { stopped: false, message: `Could not stop pid ${pid}: ${error.message}` };
108
+ }
109
+ }
110
+
111
+ // Reliably terminate a detached local-server process group: SIGTERM with a
112
+ // grace period for graceful shutdown (lets mlx-vlm/llama-server release the
113
+ // model), then SIGKILL if still alive. Guarantees the model is unloaded when a
114
+ // profile stops — consistent across backends (llama-server exits on SIGTERM;
115
+ // mlx-vlm/uvicorn often does not, hence the SIGKILL fallback).
116
+ async function terminateProcess(pid) {
117
+ const signalGroup = (sig) => {
118
+ try { process.kill(-pid, sig); }
119
+ catch { process.kill(pid, sig); } // not a group leader — kill the proc itself
120
+ };
121
+ signalGroup("SIGTERM");
122
+ for (let i = 0; i < 50; i++) { // 5s grace for graceful shutdown
123
+ if (await processGone(pid)) return "SIGTERM";
124
+ await sleep(100);
125
+ }
126
+ signalGroup("SIGKILL");
127
+ for (let i = 0; i < 30; i++) { // 3s for SIGKILL to take effect
128
+ if (await processGone(pid)) return "SIGKILL";
129
+ await sleep(100);
130
+ }
131
+ throw new Error(`pid ${pid} did not exit after SIGKILL`);
132
+ }
133
+
134
+ // True if the process is dead (or a zombie about to be reaped).
135
+ async function processGone(pid) {
136
+ try { process.kill(pid, 0); }
137
+ catch { return true; } // no such process
138
+ // Alive to signal(0) — but a detached setsid child can briefly appear as a
139
+ // zombie before launchd reaps it. Treat zombie as gone.
140
+ try {
141
+ const { stdout } = await execFileAsync("ps", ["-o", "stat=", "-p", String(pid)]);
142
+ return /^Z/.test(stdout.trim());
143
+ } catch {
144
+ return false;
145
+ }
146
+ }
147
+
148
+ // ── Unload model from a managed server (oMLX) ─────────────────────────────
149
+ // Counterpart to stopProfile for local-server backends: stopProfile kills the
150
+ // server process (which unloads the model); unloadModelFromServer asks a
151
+ // managed server to release the model from memory via its HTTP API, leaving the
152
+ // server itself running. Together they give a consistent UX: quitting Pi
153
+ // unloads the model regardless of backend type.
154
+
155
+ export async function unloadModelFromServer(profile) {
156
+ const backend = backendFor(profile.backend);
157
+
158
+ if (backend.id === "llama-cpp" || backend.id === "llama-cpp-mtp") {
159
+ // llama.cpp unloads when the server process exits; no HTTP unload API exists.
160
+ // If offgrid-ai started the server, stopProfile already handled it.
161
+ return { unloaded: false, backend: backend.id, reason: "stop server to unload" };
162
+ }
163
+
164
+ if (backend.id === "omlx") {
165
+ return await unloadOmlxModel(profile);
166
+ }
167
+
168
+ if (backend.id === "mlx-vlm") {
169
+ // mlx-vlm is a local-server backend — stopProfile handles unload by killing
170
+ // the process. No HTTP unload API.
171
+ return { unloaded: false, backend: backend.id, reason: "stop server to unload" };
172
+ }
173
+
174
+ return { unloaded: false, backend: backend.id, reason: "unsupported backend" };
175
+ }
176
+
177
+ async function unloadOmlxModel(profile) {
178
+ const baseUrl = profile.baseUrl?.replace(/\/v1\/?$/u, "") || "";
179
+ const adminUrl = `${baseUrl}/admin/api/models`;
180
+ const modelId = profile.modelAlias || profile.omlxModel || profile.id;
181
+
182
+ try {
183
+ const ids = await serverModelIds(profile.baseUrl);
184
+ const match = ids.find((id) => id.toLowerCase() === modelId.toLowerCase());
185
+ const targetId = match ?? modelId;
186
+
187
+ const response = await fetch(`${adminUrl}/${encodeURIComponent(targetId)}/unload`, {
188
+ method: "POST",
189
+ headers: { "Content-Type": "application/json" },
190
+ signal: AbortSignal.timeout(30000),
191
+ });
192
+
193
+ if (response.ok) {
194
+ return { unloaded: true, backend: "omlx", modelId: targetId };
195
+ }
196
+
197
+ const detail = await responseErrorDetail(response);
198
+
199
+ if (response.status === 400 && /not loaded/i.test(detail)) {
200
+ return { unloaded: true, backend: "omlx", modelId: targetId, reason: "model was not loaded" };
201
+ }
202
+
203
+ if (response.status === 401 || response.status === 403) {
204
+ return {
205
+ unloaded: false,
206
+ backend: "omlx",
207
+ modelId: targetId,
208
+ error: "oMLX admin authentication required. Enable skip_api_key_verification in oMLX settings, or unload manually from the admin panel.",
209
+ };
210
+ }
211
+
212
+ return { unloaded: false, backend: "omlx", modelId: targetId, error: `HTTP ${response.status}: ${detail}` };
213
+ } catch (err) {
214
+ if (err?.name === "AbortError" || err?.name === "TimeoutError") {
215
+ return { unloaded: false, backend: "omlx", modelId, error: "Unload request timed out. The model may still be unloading in the background." };
216
+ }
217
+ return { unloaded: false, backend: "omlx", modelId, error: err.message };
218
+ }
219
+ }
220
+
221
+ async function responseErrorDetail(response) {
222
+ const text = await response.text().catch(() => "");
223
+ if (!text) return "";
224
+ try {
225
+ const body = JSON.parse(text);
226
+ return body?.detail ?? body?.message ?? text;
227
+ } catch {
228
+ return text;
109
229
  }
110
230
  }
111
231
 
@@ -126,7 +246,6 @@ export async function isProfileServerUp(profile) {
126
246
 
127
247
  export async function modelLoadedOnServer(profile) {
128
248
  const backend = backendFor(profile.backend);
129
- if (backend.id === "ollama") return modelIdsMatch(await ollamaLoadedModelIds(profile), expectedModelIds(profile));
130
249
  if (backend.id === "omlx") return modelIdsMatch(await omlxLoadedModelIds(profile), expectedModelIds(profile));
131
250
  const { matches } = await serverMatchesProfile(profile);
132
251
  return matches;
@@ -134,9 +253,6 @@ export async function modelLoadedOnServer(profile) {
134
253
 
135
254
  export async function modelAvailableOnServer(profile) {
136
255
  const backend = backendFor(profile.backend);
137
- if (backend.id === "ollama") {
138
- return modelIdsMatch(await ollamaAvailableModelIds(profile), expectedModelIds(profile));
139
- }
140
256
  if (backend.id === "omlx") {
141
257
  // /v1/models lists discovered models; an ID must exist there to be usable.
142
258
  return modelIdsMatch(await serverModelIds(profile.baseUrl), expectedModelIds(profile));
@@ -217,24 +333,6 @@ export async function serverModelIds(baseUrl) {
217
333
  .filter(Boolean);
218
334
  }
219
335
 
220
- async function ollamaLoadedModelIds(profile) {
221
- const result = await fetchJson(`${apiRootUrl(profile.baseUrl)}/api/ps`);
222
- if (!result.ok) return [];
223
- return (Array.isArray(result.data?.models) ? result.data.models : [])
224
- .flatMap((model) => [model?.name, model?.model])
225
- .map((id) => String(id ?? "").trim())
226
- .filter(Boolean);
227
- }
228
-
229
- async function ollamaAvailableModelIds(profile) {
230
- const result = await fetchJson(`${apiRootUrl(profile.baseUrl)}/api/tags`);
231
- if (!result.ok) return [];
232
- return (Array.isArray(result.data?.models) ? result.data.models : [])
233
- .flatMap((model) => [model?.name, model?.model])
234
- .map((id) => String(id ?? "").trim())
235
- .filter(Boolean);
236
- }
237
-
238
336
  async function omlxLoadedModelIds(profile) {
239
337
  const statusResult = await fetchJson(`${profile.baseUrl.replace(/\/+$/u, "")}/models/status`);
240
338
  const fromStatus = statusResult.ok
@@ -305,7 +403,6 @@ function expectedModelIds(profile) {
305
403
  return [
306
404
  profile.modelAlias,
307
405
  profile.label,
308
- profile.ollamaModel,
309
406
  profile.omlxModel,
310
407
  profile.modelPath,
311
408
  fileName,