offgrid-ai 0.9.5 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,93 @@
1
+ // mlx-vlm server flag computation — pure functions, no side effects.
2
+ // Ported from deprecated-offgrid-desktop/src/main/server-flags.ts (MLX subset).
3
+ //
4
+ // Benchmark-informed decisions (see sidequests/mlx-backend-benchmark/RESULTS.md):
5
+ // - mlx-vlm requires APC_ENABLED=1 env var (86x TTFT improvement) — set at spawn
6
+ // time in process.mjs, NOT here (this module only computes args).
7
+ // - mlx-vlm uses a strict=False wrapper script for shared-KV architectures
8
+ // (Gemma 4-class). Safe for all models — strict=False is a no-op for models
9
+ // that load fine with strict=True.
10
+ // - mlx-vlm uses --enable-thinking for thinking-mode control.
11
+ // - mlx-vlm uses --max-kv-size for the KV cache / context window.
12
+ //
13
+ // Only the mlx-vlm-relevant logic is ported here. offgrid-ai's existing GGUF
14
+ // flag logic (autodetect.mjs / profile-setup.mjs / estimate.mjs) is unchanged.
15
+
16
+ import { fileURLToPath } from "node:url";
17
+ import { dirname, join } from "node:path";
18
+
19
+ const MB = 1024 ** 2;
20
+
21
+ /** Default port for the local model server. Matches the desktop's DEFAULT_PORT. */
22
+ export const DEFAULT_PORT = 18080;
23
+
24
+ /** Resolved path to the bundled strict=False wrapper script (sibling of src/). */
25
+ export const MLX_VLM_WRAPPER = join(dirname(fileURLToPath(import.meta.url)), "..", "resources", "mlxvlm-server-wrapper.py");
26
+
27
+ /** Overhead multiplier for mlx-vlm: weights × 1.5 (covers KV cache, activations, APC cache; benchmark-validated). */
28
+ const MLX_VLM_OVERHEAD_MULTIPLIER = 1.5;
29
+
30
+ /** Server process overhead in MB. */
31
+ const PROCESS_OVERHEAD_MB = 200;
32
+
33
+ /**
34
+ * Estimate mlx-vlm memory usage (MB): model weights × 1.5 + process overhead.
35
+ *
36
+ * The 1.5 multiplier covers KV cache, activations, and APC cache overhead
37
+ * (benchmark-validated; see sidequests/mlx-backend-benchmark/RESULTS.md).
38
+ * GGUF/llama-server estimation uses the detailed path in estimate.mjs.
39
+ *
40
+ * @param {number} fileSizeBytes - model size on disk (sum of MLX safetensors).
41
+ * @returns {number} estimated memory in MB.
42
+ */
43
+ export function estimateMemoryMb(fileSizeBytes) {
44
+ return Math.round((fileSizeBytes / MB) * MLX_VLM_OVERHEAD_MULTIPLIER + PROCESS_OVERHEAD_MB);
45
+ }
46
+
47
+ /**
48
+ * Compute mlx-vlm server arguments.
49
+ *
50
+ * mlx-vlm is the MLX-native server (benchmark-validated best throughput + memory
51
+ * efficiency on Apple Silicon). Invoked via the strict=False wrapper script for
52
+ * compatibility with shared-KV architectures (Gemma 4-class).
53
+ *
54
+ * The APC_ENABLED=1 env var is MANDATORY but is set at spawn time in
55
+ * process.mjs, not in args.
56
+ *
57
+ * The wrapper script (resources/mlxvlm-server-wrapper.py) applies strict=False
58
+ * model loading + the BatchRotatingKVCache.merge() fix, both required for
59
+ * shared-KV architectures (Gemma 4-class). It is resolved to a real path via
60
+ * MLX_VLM_WRAPPER; there is intentionally no raw-mlx_vlm.server path.
61
+ *
62
+ * @param {string} modelPath - path to the MLX model directory.
63
+ * @param {object} [options]
64
+ * @param {number} [options.port] - port (default DEFAULT_PORT).
65
+ * @param {number} [options.ctxSize] - context window (passed as --max-kv-size).
66
+ * @param {boolean} [options.thinkingEnabled=true] - whether to enable thinking.
67
+ * @returns {{ args: string[], port: number }}
68
+ */
69
+ export function computeMlxVlmFlags(modelPath, options = {}) {
70
+ const port = options.port ?? DEFAULT_PORT;
71
+ const ctxSize = options.ctxSize;
72
+ const thinkingEnabled = options.thinkingEnabled ?? true;
73
+
74
+ // The binary is "python3" (resolved by backendBinaryFor in backends.mjs); the
75
+ // wrapper path is the first arg.
76
+ const args = [
77
+ MLX_VLM_WRAPPER,
78
+ "--model", modelPath,
79
+ "--host", "127.0.0.1",
80
+ "--port", String(port),
81
+ ];
82
+
83
+ if (thinkingEnabled) {
84
+ args.push("--enable-thinking");
85
+ }
86
+
87
+ // Context size: mlx-vlm uses --max-kv-size for the KV cache / context window.
88
+ if (ctxSize && ctxSize > 0) {
89
+ args.push("--max-kv-size", String(ctxSize));
90
+ }
91
+
92
+ return { args, port };
93
+ }
@@ -1,29 +1,34 @@
1
1
  import { scanGgufModels, matchDrafter } from "./scan.mjs";
2
2
  import { loadProfiles, normalizeProfile, sanitizeProfileId } from "./profiles.mjs";
3
3
  import { scanManagedModels } from "./managed.mjs";
4
+ import { scanMlxModels } from "./mlx-discovery.mjs";
4
5
  import { isProfileFileMissing } from "./model-summary.mjs";
5
6
 
6
7
  export async function loadModelCatalog() {
7
- const [profiles, { models: ggufModels, drafters }, managedModels] = await Promise.all([
8
+ const [profiles, { models: ggufModels, drafters }, managedModels, mlxModels] = await Promise.all([
8
9
  loadProfiles(),
9
10
  scanGgufModels(),
10
11
  scanManagedModels(),
12
+ scanMlxModels(),
11
13
  ]);
12
- return normalizeCatalog({ profiles, ggufModels, drafters, managedModels });
14
+ return normalizeCatalog({ profiles, ggufModels, drafters, managedModels, mlxModels });
13
15
  }
14
16
 
15
17
  export function normalizeCatalog(catalog) {
16
18
  if (catalog.newModels && catalog.managedItems) return catalog;
17
- const { profiles, ggufModels, drafters, managedModels } = catalog;
19
+ const { profiles, ggufModels, drafters, managedModels, mlxModels = [] } = catalog;
18
20
  const profiledPaths = new Set(profiles.map((profile) => profile.modelPath).filter(Boolean));
19
- const newModels = ggufModels.filter((model) => !profiledPaths.has(model.path));
21
+ const newModels = [
22
+ ...ggufModels.filter((model) => !profiledPaths.has(model.path)),
23
+ ...mlxModels.filter((model) => !profiledPaths.has(model.path)),
24
+ ];
20
25
  const managedItems = [];
21
26
  for (const { backendId, models, status } of managedModels) {
22
27
  if (status === "unavailable") continue;
23
28
  const profiledAliases = new Set(
24
29
  profiles
25
30
  .filter((profile) => profile.backend === backendId)
26
- .map((profile) => backendId === "ollama" ? `ollama:${profile.ollamaModel ?? profile.modelAlias}` : `omlx:${profile.omlxModel ?? profile.modelAlias}`),
31
+ .map((profile) => `omlx:${profile.omlxModel ?? profile.modelAlias}`),
27
32
  );
28
33
  for (const model of models) {
29
34
  if (!profiledAliases.has(`${backendId}:${model.id}`)) managedItems.push({ model, backendId });
@@ -66,8 +71,9 @@ export function createManagedProfile(model, backendId) {
66
71
  id: `${backendId}-${sanitizeProfileId(model.id)}`,
67
72
  label: model.label,
68
73
  backend: backendId,
74
+ source: backendId,
69
75
  modelAlias: model.aliasSuggestion,
70
- ...(backendId === "ollama" ? { ollamaModel: model.id } : {}),
76
+ modelSizeBytes: model.sizeBytes || 0,
71
77
  ...(backendId === "omlx" ? { omlxModel: model.id } : {}),
72
78
  });
73
79
  }
@@ -68,9 +68,8 @@ const TAG_TOKENS = [
68
68
  /**
69
69
  * Parse a raw model identifier into a structured display name.
70
70
  *
71
- * @param {string} rawId The raw identifier: GGUF filename (no .gguf),
72
- * Ollama model name, or oMLX model id.
73
- * @param {"local-gguf"|"ollama"|"omlx"} source Where this name came from.
71
+ * @param {string} rawId The raw identifier: GGUF filename (no .gguf) or oMLX model id.
72
+ * @param {"local-gguf"|"omlx"} source Where this name came from.
74
73
  * @returns {{ publisher: string|null, model: string, params: string|null,
75
74
  * quant: string|null, tags: string[], display: string,
76
75
  * sort: string, id: string }}
@@ -87,18 +86,7 @@ export function parseModelName(rawId, source) {
87
86
  name = rawId.slice(slashIdx + 1);
88
87
  }
89
88
 
90
- // 2. For Ollama, split on : to separate model from tag (e.g. "gemma3:4b")
91
- // The tag after : is a model size/variant identifier — not a GGUF quant.
92
- let ollamaTag = null;
93
- if (source === "ollama") {
94
- const colonIdx = name.lastIndexOf(":");
95
- if (colonIdx !== -1) {
96
- ollamaTag = name.slice(colonIdx + 1);
97
- name = name.slice(0, colonIdx);
98
- }
99
- }
100
-
101
- // 3. Extract quant (GGUF quantization suffix)
89
+ // 2. Extract quant (GGUF quantization suffix)
102
90
  let quant = null;
103
91
  for (const pattern of QUANT_PATTERNS) {
104
92
  const match = name.match(pattern);
@@ -125,13 +113,7 @@ export function parseModelName(rawId, source) {
125
113
  // Clean up leftover separators
126
114
  name = name.replace(/[-_]{2,}/g, "-").replace(/^[-_]+|[-_]+$/g, "");
127
115
 
128
- // 5. For Ollama, re-attach the tag as part of the model name
129
- // (Ollama tags like "4b" or "30b-a3b" are size variants, not quants)
130
- if (ollamaTag) {
131
- name = name + "-" + ollamaTag;
132
- }
133
-
134
- // 6. Title-case the remaining model name
116
+ // 5. Title-case the remaining model name
135
117
  let model = titleCaseModel(name);
136
118
 
137
119
  // If nothing is left after parsing, fall back to the raw name
@@ -139,13 +121,13 @@ export function parseModelName(rawId, source) {
139
121
  model = rawId.includes("/") ? rawId : rawId.replace(/[-_]/g, " ");
140
122
  }
141
123
 
142
- // 7. Extract params (size like 30B, 12B) for sort/filter convenience
124
+ // 6. Extract params (size like 30B, 12B) for sort/filter convenience
143
125
  const params = extractParams(model);
144
126
 
145
- // 8. Build display string
127
+ // 7. Build display string
146
128
  const display = buildDisplay(publisher, model, tags, quant);
147
129
 
148
- // 9. Build sort key (lowercase, no publisher, for alphabetical ordering)
130
+ // 8. Build sort key (lowercase, no publisher, for alphabetical ordering)
149
131
  const sort = model.toLowerCase().replace(/[-_]/g, " ");
150
132
 
151
133
  return { publisher, model, params, quant, tags, display, sort, id };
@@ -1,7 +1,8 @@
1
1
  import { existsSync, statSync } from "node:fs";
2
- import { BACKENDS, backendFor } from "./backends.mjs";
2
+ import { basename, dirname } from "node:path";
3
+ import { backendFor } from "./backends.mjs";
3
4
  import { readCommandArgv } from "./profiles.mjs";
4
- import { isProfileRunning, isProfileServerUp } from "./process.mjs";
5
+ import { isProfileRunning } from "./process.mjs";
5
6
  import { buildPrettyCommand } from "./command.mjs";
6
7
  import { pc, formatBytes, renderRows, renderSection } from "./ui.mjs";
7
8
  import { capabilitySummary, ggufDetailParts, isProfileFileMissing, profileDetailParts } from "./model-summary.mjs";
@@ -11,6 +12,7 @@ import { findBenchmarkRepo } from "./benchmark.mjs";
11
12
 
12
13
  const OPTION_SEPARATOR = pc.dim(" │ ");
13
14
  const OPTION_STATUS_WIDTH = 10;
15
+ const OPTION_BACKEND_WIDTH = 14;
14
16
  const OPTION_SOURCE_WIDTH = 14;
15
17
  const OPTION_CTX_WIDTH = 5;
16
18
 
@@ -25,7 +27,7 @@ function optionPad(text, color, width) {
25
27
  function optionStatusTag(kind) {
26
28
  const statuses = {
27
29
  running: ["RUNNING", pc.green],
28
- serverup: ["SERVER UP", pc.yellow],
30
+ serverup: ["READY", pc.blue],
29
31
  ready: ["READY", pc.blue],
30
32
  missing: ["MISSING", pc.red],
31
33
  setup: ["SETUP", pc.yellow],
@@ -34,17 +36,70 @@ function optionStatusTag(kind) {
34
36
  return optionPad(text, color, OPTION_STATUS_WIDTH);
35
37
  }
36
38
 
37
- function optionSourceTag(sourceId, label) {
39
+ function optionSourceTag(sourceId) {
40
+ const label = formatSourceLabel(sourceId);
38
41
  const colors = {
39
- "llama-cpp": pc.cyan,
40
- "llama-cpp-mtp": pc.blue,
41
- ollama: pc.green,
42
+ huggingface: pc.cyan,
43
+ lmstudio: pc.blue,
42
44
  omlx: pc.magenta,
45
+ "llama.cpp": pc.cyan,
43
46
  gguf: pc.cyan,
47
+ mlx: pc.yellow,
48
+ "mlx-vlm": pc.yellow,
44
49
  };
45
50
  return optionPad(label, colors[sourceId] ?? pc.dim, OPTION_SOURCE_WIDTH);
46
51
  }
47
52
 
53
+ function optionBackendTag(backendId) {
54
+ const backend = backendId ? backendFor(backendId) : null;
55
+ const label = backend?.label ?? backendId ?? "unknown";
56
+ const colors = {
57
+ "llama-cpp": pc.cyan,
58
+ "llama-cpp-mtp": pc.blue,
59
+ omlx: pc.magenta,
60
+ "mlx-vlm": pc.yellow,
61
+ };
62
+ return optionPad(label, colors[backendId] ?? pc.dim, OPTION_BACKEND_WIDTH);
63
+ }
64
+
65
+ function formatSourceLabel(sourceId) {
66
+ if (!sourceId) return "unknown";
67
+ const map = {
68
+ huggingface: "HuggingFace",
69
+ lmstudio: "LM Studio",
70
+ omlx: "oMLX",
71
+ "llama.cpp": "llama.cpp",
72
+ gguf: "GGUF file",
73
+ mlx: "MLX",
74
+ "mlx-vlm": "MLX",
75
+ };
76
+ return map[sourceId] ?? String(sourceId);
77
+ }
78
+
79
+ function inferSourceFromPath(modelPath) {
80
+ if (!modelPath) return null;
81
+ const normalized = modelPath.toLowerCase().replace(/\\/g, "/");
82
+ if (normalized.includes("/.omlx/models")) return "omlx";
83
+ if (normalized.includes("/.lmstudio/models")) return "lmstudio";
84
+ if (normalized.includes("/.cache/huggingface")) return "huggingface";
85
+ if (normalized.includes("/.cache/llama.cpp")) return "llama.cpp";
86
+ const parent = basename(dirname(modelPath));
87
+ if (parent && parent !== ".") return parent.replace(/^\./, "");
88
+ return null;
89
+ }
90
+
91
+ function discoverySourceForProfile(profile) {
92
+ const backend = backendFor(profile.backend);
93
+ if (backend.type === "managed-server") return backend.id;
94
+ if (profile.source && profile.source !== "local-gguf") return profile.source;
95
+ return inferSourceFromPath(profile.modelPath);
96
+ }
97
+
98
+ function discoverySourceForItem(item) {
99
+ if (item.type === "profile") return discoverySourceForProfile(item.profile);
100
+ return item.model?.source ?? null;
101
+ }
102
+
48
103
  function optionCtxLabel(item) {
49
104
  if (item.type === "profile" && item.profile.flags?.ctxSize) {
50
105
  return optionPad(`${(item.profile.flags.ctxSize / 1000).toFixed(0)}k`, null, OPTION_CTX_WIDTH);
@@ -52,12 +107,18 @@ function optionCtxLabel(item) {
52
107
  return optionPad("—", null, OPTION_CTX_WIDTH);
53
108
  }
54
109
 
55
- function optionSizeLabel(item) {
110
+ function optionSizeLabel(item, managedModels) {
56
111
  if (item.type === "profile") {
57
112
  if (item.fileMissing) return "—";
113
+ if (item.profile.modelSizeBytes) return formatBytes(item.profile.modelSizeBytes);
58
114
  if (item.profile.modelPath && existsSync(item.profile.modelPath)) {
59
- return formatBytes(statSync(item.profile.modelPath).size);
115
+ const s = statSync(item.profile.modelPath);
116
+ // Only stat regular files — a modelPath that is a directory (MLX)
117
+ // reports the dir entry size, not the model size.
118
+ if (s.isFile()) return formatBytes(s.size);
60
119
  }
120
+ const managedSize = managedProfileSizeBytes(item.profile, managedModels);
121
+ if (managedSize) return formatBytes(managedSize);
61
122
  return "—";
62
123
  }
63
124
  if (item.type === "new") {
@@ -76,26 +137,34 @@ export function modelNameWidth(items) {
76
137
  return Math.max(20, maxName + 2);
77
138
  }
78
139
 
79
- function optionLabel({ status, source, name, ctx, size, nameWidth }) {
80
- return [status, source, pc.bold(optionPad(name, null, nameWidth)), ctx, pc.dim(size)].join(OPTION_SEPARATOR);
140
+ function optionLabel({ status, backend, source, name, ctx, size, nameWidth }) {
141
+ return [status, backend, source, pc.bold(optionPad(name, null, nameWidth)), ctx, pc.dim(size)].join(OPTION_SEPARATOR);
81
142
  }
82
143
 
83
- export function modelSelectOption(item, { runningProfilesNow, serverUpIds, nameWidth }) {
144
+ export function modelSelectOption(item, { runningProfilesNow, modelMissingIds, nameWidth, managedModels }) {
145
+ const sourceId = discoverySourceForItem(item) ?? "unknown";
146
+ const backendId = inferBackendId(item);
84
147
  if (item.type === "profile") {
85
148
  const backend = backendFor(item.profile.backend);
86
149
  const running = runningProfilesNow.some((profile) => profile.id === item.profile.id);
87
- const serverUp = !running && !item.fileMissing && serverUpIds?.has(item.profile.id);
88
- const status = item.fileMissing ? "missing" : running ? "running" : serverUp ? "serverup" : "ready";
150
+ const modelMissing = !item.fileMissing && modelMissingIds?.has(item.profile.id);
151
+ const status = item.fileMissing || modelMissing ? "missing" : running ? "running" : "ready";
152
+ const drafterMissing = Boolean(item.profile.drafterPath) && !existsSync(item.profile.drafterPath);
153
+ const hint = drafterMissing ? "MTP drafter missing — reconfigure"
154
+ : modelMissing ? `${backend.label} model no longer available`
155
+ : undefined;
89
156
  return {
90
157
  value: itemKey(item),
91
158
  label: optionLabel({
92
159
  status: optionStatusTag(status),
93
- source: optionSourceTag(item.profile.backend, backend.label),
160
+ backend: optionBackendTag(backendId),
161
+ source: optionSourceTag(sourceId),
94
162
  name: item.profile.label,
95
163
  nameWidth,
96
164
  ctx: optionCtxLabel(item),
97
- size: optionSizeLabel(item),
165
+ size: optionSizeLabel(item, managedModels),
98
166
  }),
167
+ ...(hint ? { hint: pc.red(hint) } : {}),
99
168
  };
100
169
  }
101
170
  if (item.type === "new") {
@@ -103,41 +172,60 @@ export function modelSelectOption(item, { runningProfilesNow, serverUpIds, nameW
103
172
  value: itemKey(item),
104
173
  label: optionLabel({
105
174
  status: optionStatusTag("setup"),
106
- source: optionSourceTag("gguf", "GGUF file"),
175
+ backend: optionBackendTag(backendId),
176
+ source: optionSourceTag(sourceId),
107
177
  name: item.model.label,
108
178
  nameWidth,
109
179
  ctx: optionCtxLabel(item),
110
- size: optionSizeLabel(item),
180
+ size: optionSizeLabel(item, managedModels),
111
181
  }),
112
182
  };
113
183
  }
114
- const backend = BACKENDS[item.backendId];
115
184
  return {
116
185
  value: itemKey(item),
117
186
  label: optionLabel({
118
187
  status: optionStatusTag("setup"),
119
- source: optionSourceTag(item.backendId, backend.label),
188
+ backend: optionBackendTag(backendId),
189
+ source: optionSourceTag(sourceId),
120
190
  name: item.model.label,
121
191
  nameWidth,
122
192
  ctx: optionCtxLabel(item),
123
- size: optionSizeLabel(item),
193
+ size: optionSizeLabel(item, managedModels),
124
194
  }),
125
195
  };
126
196
  }
127
197
 
128
- export function printWorkspaceHeader(normalized, runningProfilesNow, serverUpIds = new Set()) {
198
+ function managedProfileSizeBytes(profile, managedModels) {
199
+ if (!managedModels || !Array.isArray(managedModels)) return null;
200
+ const backend = backendFor(profile.backend);
201
+ if (backend.type !== "managed-server") return null;
202
+ const backendModels = managedModels.find((m) => m.backendId === profile.backend)?.models ?? [];
203
+ const modelId = profile.omlxModel ?? null;
204
+ if (!modelId) return null;
205
+ const model = backendModels.find((m) => m.id === modelId);
206
+ return model?.sizeBytes || null;
207
+ }
208
+
209
+ function inferBackendId(item) {
210
+ if (item.type === "profile") return item.profile.backend;
211
+ if (item.type === "managed") return item.backendId;
212
+ // new model: derive from format
213
+ if (item.model?.format === "mlx") return "mlx-vlm";
214
+ if (item.model?.backend) return item.model.backend;
215
+ return "llama-cpp";
216
+ }
217
+
218
+ export function printWorkspaceHeader(normalized, runningProfilesNow, modelMissingIds = new Set()) {
129
219
  const profiles = normalized.profiles;
130
220
  const isRunning = (p) => runningProfilesNow.some((r) => r.id === p.id);
131
- const isMissing = (p) => isProfileFileMissing(p);
132
- const readyCount = profiles.filter((p) => !isMissing(p) && !isRunning(p) && !serverUpIds.has(p.id)).length;
221
+ const isMissing = (p) => isProfileFileMissing(p) || modelMissingIds.has(p.id);
222
+ const readyCount = profiles.filter((p) => !isMissing(p) && !isRunning(p)).length;
133
223
  const runningCount = runningProfilesNow.length;
134
- const serverUpCount = profiles.filter((p) => !isMissing(p) && serverUpIds.has(p.id) && !isRunning(p)).length;
135
224
  const missingCount = profiles.filter(isMissing).length;
136
225
  const setupCount = normalized.newModels.length + normalized.managedItems.length;
137
226
 
138
227
  const countParts = [];
139
228
  if (runningCount > 0) countParts.push(pc.green(`${runningCount} running`));
140
- if (serverUpCount > 0) countParts.push(pc.yellow(`${serverUpCount} server up, model not loaded`));
141
229
  if (readyCount > 0) countParts.push(pc.blue(`${readyCount} model${readyCount === 1 ? "" : "s"} ready`));
142
230
  if (missingCount > 0) countParts.push(pc.red(`${missingCount} model${missingCount === 1 ? "" : "s"} missing`));
143
231
  if (setupCount > 0) countParts.push(pc.yellow(`${setupCount} model${setupCount === 1 ? "" : "s"} need${setupCount === 1 ? "s" : ""} setup`));
@@ -160,11 +248,10 @@ export async function printProfileDetails(profile) {
160
248
  const backend = backendFor(profile.backend);
161
249
  const isManaged = backend.type === "managed-server";
162
250
  const running = await isProfileRunning(profile);
163
- const serverUp = !running && isManaged && await isProfileServerUp(profile);
164
251
  const fileMissing = !isManaged && isProfileFileMissing(profile);
165
252
  console.log("\n" + renderSection("Model overview", renderRows([
166
253
  ["Name", pc.bold(profile.label)],
167
- ["Status", fileMissing ? pc.red("File missing") : running ? pc.green("Running now") : serverUp ? pc.yellow("Server up, model not loaded") : pc.blue("Ready")],
254
+ ["Status", fileMissing ? pc.red("File missing") : running ? pc.green("Running now") : pc.blue("Ready")],
168
255
  ["Details", profileDetailParts(profile, { fileMissing }).join(pc.dim(" · "))],
169
256
  ["Server", fileMissing ? pc.red(profile.baseUrl) : profile.baseUrl],
170
257
  ])));
@@ -213,6 +300,29 @@ export function printGgufModelDetails(model, drafter) {
213
300
  console.log("\n" + renderSection("Model details", renderRows(detailRows), { columns: 110 }));
214
301
  }
215
302
 
303
+ export async function printMlxModelDetails(model) {
304
+ const { detectMlxCapabilities } = await import("./mlx-discovery.mjs");
305
+ const caps = await detectMlxCapabilities(model.filePath ?? model.path);
306
+ const parts = [];
307
+ if (caps.architecture) parts.push(caps.architecture);
308
+ if (caps.thinking) parts.push("thinking");
309
+ if (caps.vision) parts.push("vision");
310
+ const summary = parts.length > 0 ? parts.join(pc.dim(" · ")) : "standard MLX";
311
+ console.log("\n" + renderSection("Downloaded model", renderRows([
312
+ ["Name", pc.bold(model.label)],
313
+ ["Status", pc.yellow("Needs one-time setup")],
314
+ ["Details", summary],
315
+ ])));
316
+ console.log("\n" + renderSection("Model details", renderRows([
317
+ ["Model dir", model.path],
318
+ ["Backend", "mlx-vlm"],
319
+ ["Source", formatSourceLabel(model.source)],
320
+ ["Detected", summary],
321
+ ["Size", formatBytes(model.sizeBytes)],
322
+ ["Context", caps.contextLength ? `${caps.contextLength.toLocaleString()} trained` : "unknown"],
323
+ ]), { columns: 110 }));
324
+ }
325
+
216
326
  export function printManagedModelDetails(model, backend) {
217
327
  console.log("\n" + renderSection(`${backend.label} model`, renderRows([
218
328
  ["Name", pc.bold(model.label)],