npm - offgrid-ai - Versions diffs - 0.9.6 → 0.10.0 - Mend

offgrid-ai 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/README.md +6 -6
package/package.json +4 -3
package/resources/hf-download.py +79 -0
package/resources/mlxvlm-server-wrapper.py +112 -0
package/resources/recommendations.json +60 -0
package/src/backend-installers.mjs +1 -16
package/src/backends.mjs +17 -45
package/src/benchmark/finalize.mjs +3 -90
package/src/benchmark/flow.mjs +3 -4
package/src/benchmark/metrics.mjs +0 -44
package/src/benchmark/prepare.mjs +1 -1
package/src/benchmark.mjs +3 -1
package/src/commands/main.mjs +3 -5
package/src/commands/models.mjs +21 -18
package/src/commands/onboard.mjs +67 -9
package/src/commands/run.mjs +20 -5
package/src/commands/status.mjs +1 -1
package/src/config.mjs +11 -2
package/src/discovery-shared.mjs +44 -0
package/src/hardware.mjs +49 -0
package/src/harness-pi.mjs +25 -11
package/src/huggingface.mjs +209 -0
package/src/managed.mjs +1 -5
package/src/mlx-discovery.mjs +290 -0
package/src/mlx-flags.mjs +93 -0
package/src/model-catalog.mjs +12 -6
package/src/model-name.mjs +7 -25
package/src/model-presenters.mjs +131 -27
package/src/process.mjs +129 -32
package/src/profile-setup.mjs +105 -0
package/src/profiles.mjs +30 -0
package/src/recommendations.mjs +56 -14
package/src/scan.mjs +39 -8

package/src/model-name.mjs CHANGED Viewed

@@ -68,9 +68,8 @@ const TAG_TOKENS = [
 /**
  * Parse a raw model identifier into a structured display name.
  *
- * @param {string} rawId  The raw identifier: GGUF filename (no .gguf),
- *                        Ollama model name, or oMLX model id.
- * @param {"local-gguf"|"ollama"|"omlx"} source  Where this name came from.
+ * @param {string} rawId  The raw identifier: GGUF filename (no .gguf) or oMLX model id.
+ * @param {"local-gguf"|"omlx"} source  Where this name came from.
  * @returns {{ publisher: string|null, model: string, params: string|null,
  *             quant: string|null, tags: string[], display: string,
  *             sort: string, id: string }}
@@ -87,18 +86,7 @@ export function parseModelName(rawId, source) {
     name = rawId.slice(slashIdx + 1);
   }
-  // 2. For Ollama, split on : to separate model from tag (e.g. "gemma3:4b")
-  //    The tag after : is a model size/variant identifier — not a GGUF quant.
-  let ollamaTag = null;
-  if (source === "ollama") {
-    const colonIdx = name.lastIndexOf(":");
-    if (colonIdx !== -1) {
-      ollamaTag = name.slice(colonIdx + 1);
-      name = name.slice(0, colonIdx);
-    }
-  }
-  // 3. Extract quant (GGUF quantization suffix)
+  // 2. Extract quant (GGUF quantization suffix)
   let quant = null;
   for (const pattern of QUANT_PATTERNS) {
     const match = name.match(pattern);
@@ -125,13 +113,7 @@ export function parseModelName(rawId, source) {
   // Clean up leftover separators
   name = name.replace(/[-_]{2,}/g, "-").replace(/^[-_]+|[-_]+$/g, "");
-  // 5. For Ollama, re-attach the tag as part of the model name
-  //    (Ollama tags like "4b" or "30b-a3b" are size variants, not quants)
-  if (ollamaTag) {
-    name = name + "-" + ollamaTag;
-  }
-  // 6. Title-case the remaining model name
+  // 5. Title-case the remaining model name
   let model = titleCaseModel(name);
   // If nothing is left after parsing, fall back to the raw name
@@ -139,13 +121,13 @@ export function parseModelName(rawId, source) {
     model = rawId.includes("/") ? rawId : rawId.replace(/[-_]/g, " ");
   }
-  // 7. Extract params (size like 30B, 12B) for sort/filter convenience
+  // 6. Extract params (size like 30B, 12B) for sort/filter convenience
   const params = extractParams(model);
-  // 8. Build display string
+  // 7. Build display string
   const display = buildDisplay(publisher, model, tags, quant);
-  // 9. Build sort key (lowercase, no publisher, for alphabetical ordering)
+  // 8. Build sort key (lowercase, no publisher, for alphabetical ordering)
   const sort = model.toLowerCase().replace(/[-_]/g, " ");
   return { publisher, model, params, quant, tags, display, sort, id };

package/src/model-presenters.mjs CHANGED Viewed

@@ -1,7 +1,8 @@
 import { existsSync, statSync } from "node:fs";
-import { BACKENDS, backendFor } from "./backends.mjs";
+import { basename, dirname } from "node:path";
+import { backendFor } from "./backends.mjs";
 import { readCommandArgv } from "./profiles.mjs";
-import { isProfileRunning, isProfileServerUp } from "./process.mjs";
+import { isProfileRunning } from "./process.mjs";
 import { buildPrettyCommand } from "./command.mjs";
 import { pc, formatBytes, renderRows, renderSection } from "./ui.mjs";
 import { capabilitySummary, ggufDetailParts, isProfileFileMissing, profileDetailParts } from "./model-summary.mjs";
@@ -11,6 +12,7 @@ import { findBenchmarkRepo } from "./benchmark.mjs";
 const OPTION_SEPARATOR = pc.dim("  │  ");
 const OPTION_STATUS_WIDTH = 10;
+const OPTION_BACKEND_WIDTH = 14;
 const OPTION_SOURCE_WIDTH = 14;
 const OPTION_CTX_WIDTH = 5;
@@ -25,7 +27,7 @@ function optionPad(text, color, width) {
 function optionStatusTag(kind) {
   const statuses = {
     running: ["RUNNING", pc.green],
-    serverup: ["SERVER UP", pc.yellow],
+    serverup: ["READY", pc.blue],
     ready: ["READY", pc.blue],
     missing: ["MISSING", pc.red],
     setup: ["SETUP", pc.yellow],
@@ -34,17 +36,70 @@ function optionStatusTag(kind) {
   return optionPad(text, color, OPTION_STATUS_WIDTH);
 }
-function optionSourceTag(sourceId, label) {
+function optionSourceTag(sourceId) {
+  const label = formatSourceLabel(sourceId);
   const colors = {
-    "llama-cpp": pc.cyan,
-    "llama-cpp-mtp": pc.blue,
-    ollama: pc.green,
+    huggingface: pc.cyan,
+    lmstudio: pc.blue,
     omlx: pc.magenta,
+    "llama.cpp": pc.cyan,
     gguf: pc.cyan,
+    mlx: pc.yellow,
+    "mlx-vlm": pc.yellow,
   };
   return optionPad(label, colors[sourceId] ?? pc.dim, OPTION_SOURCE_WIDTH);
 }
+function optionBackendTag(backendId) {
+  const backend = backendId ? backendFor(backendId) : null;
+  const label = backend?.label ?? backendId ?? "unknown";
+  const colors = {
+    "llama-cpp": pc.cyan,
+    "llama-cpp-mtp": pc.blue,
+    omlx: pc.magenta,
+    "mlx-vlm": pc.yellow,
+  };
+  return optionPad(label, colors[backendId] ?? pc.dim, OPTION_BACKEND_WIDTH);
+}
+function formatSourceLabel(sourceId) {
+  if (!sourceId) return "unknown";
+  const map = {
+    huggingface: "HuggingFace",
+    lmstudio: "LM Studio",
+    omlx: "oMLX",
+    "llama.cpp": "llama.cpp",
+    gguf: "GGUF file",
+    mlx: "MLX",
+    "mlx-vlm": "MLX",
+  };
+  return map[sourceId] ?? String(sourceId);
+}
+function inferSourceFromPath(modelPath) {
+  if (!modelPath) return null;
+  const normalized = modelPath.toLowerCase().replace(/\\/g, "/");
+  if (normalized.includes("/.omlx/models")) return "omlx";
+  if (normalized.includes("/.lmstudio/models")) return "lmstudio";
+  if (normalized.includes("/.cache/huggingface")) return "huggingface";
+  if (normalized.includes("/.cache/llama.cpp")) return "llama.cpp";
+  const parent = basename(dirname(modelPath));
+  if (parent && parent !== ".") return parent.replace(/^\./, "");
+  return null;
+}
+function discoverySourceForProfile(profile) {
+  const backend = backendFor(profile.backend);
+  if (backend.type === "managed-server") return backend.id;
+  if (profile.source && profile.source !== "local-gguf") return profile.source;
+  return inferSourceFromPath(profile.modelPath);
+}
+function discoverySourceForItem(item) {
+  if (item.type === "profile") return discoverySourceForProfile(item.profile);
+  return item.model?.source ?? null;
+}
 function optionCtxLabel(item) {
   if (item.type === "profile" && item.profile.flags?.ctxSize) {
     return optionPad(`${(item.profile.flags.ctxSize / 1000).toFixed(0)}k`, null, OPTION_CTX_WIDTH);
@@ -52,12 +107,18 @@ function optionCtxLabel(item) {
   return optionPad("—", null, OPTION_CTX_WIDTH);
 }
-function optionSizeLabel(item) {
+function optionSizeLabel(item, managedModels) {
   if (item.type === "profile") {
     if (item.fileMissing) return "—";
+    if (item.profile.modelSizeBytes) return formatBytes(item.profile.modelSizeBytes);
     if (item.profile.modelPath && existsSync(item.profile.modelPath)) {
-      return formatBytes(statSync(item.profile.modelPath).size);
+      const s = statSync(item.profile.modelPath);
+      // Only stat regular files — a modelPath that is a directory (MLX)
+      // reports the dir entry size, not the model size.
+      if (s.isFile()) return formatBytes(s.size);
     }
+    const managedSize = managedProfileSizeBytes(item.profile, managedModels);
+    if (managedSize) return formatBytes(managedSize);
     return "—";
   }
   if (item.type === "new") {
@@ -76,17 +137,18 @@ export function modelNameWidth(items) {
   return Math.max(20, maxName + 2);
 }
-function optionLabel({ status, source, name, ctx, size, nameWidth }) {
-  return [status, source, pc.bold(optionPad(name, null, nameWidth)), ctx, pc.dim(size)].join(OPTION_SEPARATOR);
+function optionLabel({ status, backend, source, name, ctx, size, nameWidth }) {
+  return [status, backend, source, pc.bold(optionPad(name, null, nameWidth)), ctx, pc.dim(size)].join(OPTION_SEPARATOR);
 }
-export function modelSelectOption(item, { runningProfilesNow, serverUpIds, modelMissingIds, nameWidth }) {
+export function modelSelectOption(item, { runningProfilesNow, modelMissingIds, nameWidth, managedModels }) {
+  const sourceId = discoverySourceForItem(item) ?? "unknown";
+  const backendId = inferBackendId(item);
   if (item.type === "profile") {
     const backend = backendFor(item.profile.backend);
     const running = runningProfilesNow.some((profile) => profile.id === item.profile.id);
-    const serverUp = !running && !item.fileMissing && serverUpIds?.has(item.profile.id);
     const modelMissing = !item.fileMissing && modelMissingIds?.has(item.profile.id);
-    const status = item.fileMissing || modelMissing ? "missing" : running ? "running" : serverUp ? "serverup" : "ready";
+    const status = item.fileMissing || modelMissing ? "missing" : running ? "running" : "ready";
     const drafterMissing = Boolean(item.profile.drafterPath) && !existsSync(item.profile.drafterPath);
     const hint = drafterMissing ? "MTP drafter missing — reconfigure"
       : modelMissing ? `${backend.label} model no longer available`
@@ -95,11 +157,12 @@ export function modelSelectOption(item, { runningProfilesNow, serverUpIds, model
       value: itemKey(item),
       label: optionLabel({
         status: optionStatusTag(status),
-        source: optionSourceTag(item.profile.backend, backend.label),
+        backend: optionBackendTag(backendId),
+        source: optionSourceTag(sourceId),
         name: item.profile.label,
         nameWidth,
         ctx: optionCtxLabel(item),
-        size: optionSizeLabel(item),
+        size: optionSizeLabel(item, managedModels),
       }),
       ...(hint ? { hint: pc.red(hint) } : {}),
     };
@@ -109,41 +172,60 @@ export function modelSelectOption(item, { runningProfilesNow, serverUpIds, model
       value: itemKey(item),
       label: optionLabel({
         status: optionStatusTag("setup"),
-        source: optionSourceTag("gguf", "GGUF file"),
+        backend: optionBackendTag(backendId),
+        source: optionSourceTag(sourceId),
         name: item.model.label,
         nameWidth,
         ctx: optionCtxLabel(item),
-        size: optionSizeLabel(item),
+        size: optionSizeLabel(item, managedModels),
       }),
     };
   }
-  const backend = BACKENDS[item.backendId];
   return {
     value: itemKey(item),
     label: optionLabel({
       status: optionStatusTag("setup"),
-      source: optionSourceTag(item.backendId, backend.label),
+      backend: optionBackendTag(backendId),
+      source: optionSourceTag(sourceId),
       name: item.model.label,
       nameWidth,
       ctx: optionCtxLabel(item),
-      size: optionSizeLabel(item),
+      size: optionSizeLabel(item, managedModels),
     }),
   };
 }
-export function printWorkspaceHeader(normalized, runningProfilesNow, serverUpIds = new Set(), modelMissingIds = new Set()) {
+function managedProfileSizeBytes(profile, managedModels) {
+  if (!managedModels || !Array.isArray(managedModels)) return null;
+  const backend = backendFor(profile.backend);
+  if (backend.type !== "managed-server") return null;
+  const backendModels = managedModels.find((m) => m.backendId === profile.backend)?.models ?? [];
+  const modelId = profile.omlxModel ?? null;
+  if (!modelId) return null;
+  const model = backendModels.find((m) => m.id === modelId);
+  return model?.sizeBytes || null;
+}
+function inferBackendId(item) {
+  if (item.type === "profile") return item.profile.backend;
+  if (item.type === "managed") return item.backendId;
+  // new model: derive from format
+  if (item.model?.format === "mlx") return "mlx-vlm";
+  if (item.model?.backend) return item.model.backend;
+  return "llama-cpp";
+}
+export function printWorkspaceHeader(normalized, runningProfilesNow, modelMissingIds = new Set()) {
   const profiles = normalized.profiles;
   const isRunning = (p) => runningProfilesNow.some((r) => r.id === p.id);
   const isMissing = (p) => isProfileFileMissing(p) || modelMissingIds.has(p.id);
-  const readyCount = profiles.filter((p) => !isMissing(p) && !isRunning(p) && !serverUpIds.has(p.id)).length;
+  const readyCount = profiles.filter((p) => !isMissing(p) && !isRunning(p)).length;
   const runningCount = runningProfilesNow.length;
-  const serverUpCount = profiles.filter((p) => !isMissing(p) && serverUpIds.has(p.id) && !isRunning(p)).length;
   const missingCount = profiles.filter(isMissing).length;
   const setupCount = normalized.newModels.length + normalized.managedItems.length;
   const countParts = [];
   if (runningCount > 0) countParts.push(pc.green(`${runningCount} running`));
-  if (serverUpCount > 0) countParts.push(pc.yellow(`${serverUpCount} server up, model not loaded`));
   if (readyCount > 0) countParts.push(pc.blue(`${readyCount} model${readyCount === 1 ? "" : "s"} ready`));
   if (missingCount > 0) countParts.push(pc.red(`${missingCount} model${missingCount === 1 ? "" : "s"} missing`));
   if (setupCount > 0) countParts.push(pc.yellow(`${setupCount} model${setupCount === 1 ? "" : "s"} need${setupCount === 1 ? "s" : ""} setup`));
@@ -166,11 +248,10 @@ export async function printProfileDetails(profile) {
   const backend = backendFor(profile.backend);
   const isManaged = backend.type === "managed-server";
   const running = await isProfileRunning(profile);
-  const serverUp = !running && isManaged && await isProfileServerUp(profile);
   const fileMissing = !isManaged && isProfileFileMissing(profile);
   console.log("\n" + renderSection("Model overview", renderRows([
     ["Name", pc.bold(profile.label)],
-    ["Status", fileMissing ? pc.red("File missing") : running ? pc.green("Running now") : serverUp ? pc.yellow("Server up, model not loaded") : pc.blue("Ready")],
+    ["Status", fileMissing ? pc.red("File missing") : running ? pc.green("Running now") : pc.blue("Ready")],
     ["Details", profileDetailParts(profile, { fileMissing }).join(pc.dim(" · "))],
     ["Server", fileMissing ? pc.red(profile.baseUrl) : profile.baseUrl],
   ])));
@@ -219,6 +300,29 @@ export function printGgufModelDetails(model, drafter) {
   console.log("\n" + renderSection("Model details", renderRows(detailRows), { columns: 110 }));
 }
+export async function printMlxModelDetails(model) {
+  const { detectMlxCapabilities } = await import("./mlx-discovery.mjs");
+  const caps = await detectMlxCapabilities(model.filePath ?? model.path);
+  const parts = [];
+  if (caps.architecture) parts.push(caps.architecture);
+  if (caps.thinking) parts.push("thinking");
+  if (caps.vision) parts.push("vision");
+  const summary = parts.length > 0 ? parts.join(pc.dim(" · ")) : "standard MLX";
+  console.log("\n" + renderSection("Downloaded model", renderRows([
+    ["Name", pc.bold(model.label)],
+    ["Status", pc.yellow("Needs one-time setup")],
+    ["Details", summary],
+  ])));
+  console.log("\n" + renderSection("Model details", renderRows([
+    ["Model dir", model.path],
+    ["Backend", "mlx-vlm"],
+    ["Source", formatSourceLabel(model.source)],
+    ["Detected", summary],
+    ["Size", formatBytes(model.sizeBytes)],
+    ["Context", caps.contextLength ? `${caps.contextLength.toLocaleString()} trained` : "unknown"],
+  ]), { columns: 110 }));
+}
 export function printManagedModelDetails(model, backend) {
   console.log("\n" + renderSection(`${backend.label} model`, renderRows([
     ["Name", pc.bold(model.label)],

package/src/process.mjs CHANGED Viewed

@@ -35,11 +35,13 @@ async function startLocalServer(profile) {
   // Build argv: binary + command.json args
   const argv = [...commandArgv];
+  // mlx-vlm requires APC_ENABLED=1 (86x TTFT improvement; fixes Metal cache clearing).
+  const env = profile.backend === "mlx-vlm" ? { ...process.env, APC_ENABLED: "1" } : process.env;
   const rawFd = openSync(rawLogPath, "a");
   let child;
   try {
-    child = spawn(binary, argv, { detached: true, stdio: ["ignore", rawFd, rawFd] });
+    child = spawn(binary, argv, { detached: true, stdio: ["ignore", rawFd, rawFd], env });
   } finally {
     closeSync(rawFd);
   }
@@ -96,16 +98,134 @@ export async function stopProfile(profile) {
     await writeState(profile.id, { ...state, pid: null, stoppedAt: new Date().toISOString(), stopReason: "pid-not-running" });
     return { stopped: false, message: `${profile.id} pid ${state.pid} is no longer running.` };
   }
+  const pid = state.pid;
   try {
-    try {
-      process.kill(-state.pid, "SIGTERM");
-    } catch {
-      process.kill(state.pid, "SIGTERM");
-    }
-    await writeState(profile.id, { ...state, pid: null, stoppedAt: new Date().toISOString(), stopSignal: "SIGTERM" });
-    return { stopped: true, message: `Stopped ${profile.id} pid ${state.pid}` };
+    const signal = await terminateProcess(pid);
+    await writeState(profile.id, { ...state, pid: null, stoppedAt: new Date().toISOString(), stopSignal: signal });
+    return { stopped: true, message: `Stopped ${profile.id} pid ${pid}` };
   } catch (error) {
-    return { stopped: false, message: `Could not stop pid ${state.pid}: ${error.message}` };
+    return { stopped: false, message: `Could not stop pid ${pid}: ${error.message}` };
+  }
+}
+// Reliably terminate a detached local-server process group: SIGTERM with a
+// grace period for graceful shutdown (lets mlx-vlm/llama-server release the
+// model), then SIGKILL if still alive. Guarantees the model is unloaded when a
+// profile stops — consistent across backends (llama-server exits on SIGTERM;
+// mlx-vlm/uvicorn often does not, hence the SIGKILL fallback).
+async function terminateProcess(pid) {
+  const signalGroup = (sig) => {
+    try { process.kill(-pid, sig); }
+    catch { process.kill(pid, sig); } // not a group leader — kill the proc itself
+  };
+  signalGroup("SIGTERM");
+  for (let i = 0; i < 50; i++) { // 5s grace for graceful shutdown
+    if (await processGone(pid)) return "SIGTERM";
+    await sleep(100);
+  }
+  signalGroup("SIGKILL");
+  for (let i = 0; i < 30; i++) { // 3s for SIGKILL to take effect
+    if (await processGone(pid)) return "SIGKILL";
+    await sleep(100);
+  }
+  throw new Error(`pid ${pid} did not exit after SIGKILL`);
+}
+// True if the process is dead (or a zombie about to be reaped).
+async function processGone(pid) {
+  try { process.kill(pid, 0); }
+  catch { return true; } // no such process
+  // Alive to signal(0) — but a detached setsid child can briefly appear as a
+  // zombie before launchd reaps it. Treat zombie as gone.
+  try {
+    const { stdout } = await execFileAsync("ps", ["-o", "stat=", "-p", String(pid)]);
+    return /^Z/.test(stdout.trim());
+  } catch {
+    return false;
+  }
+}
+// ── Unload model from a managed server (oMLX) ─────────────────────────────
+// Counterpart to stopProfile for local-server backends: stopProfile kills the
+// server process (which unloads the model); unloadModelFromServer asks a
+// managed server to release the model from memory via its HTTP API, leaving the
+// server itself running. Together they give a consistent UX: quitting Pi
+// unloads the model regardless of backend type.
+export async function unloadModelFromServer(profile) {
+  const backend = backendFor(profile.backend);
+  if (backend.id === "llama-cpp" || backend.id === "llama-cpp-mtp") {
+    // llama.cpp unloads when the server process exits; no HTTP unload API exists.
+    // If offgrid-ai started the server, stopProfile already handled it.
+    return { unloaded: false, backend: backend.id, reason: "stop server to unload" };
+  }
+  if (backend.id === "omlx") {
+    return await unloadOmlxModel(profile);
+  }
+  if (backend.id === "mlx-vlm") {
+    // mlx-vlm is a local-server backend — stopProfile handles unload by killing
+    // the process. No HTTP unload API.
+    return { unloaded: false, backend: backend.id, reason: "stop server to unload" };
+  }
+  return { unloaded: false, backend: backend.id, reason: "unsupported backend" };
+}
+async function unloadOmlxModel(profile) {
+  const baseUrl = profile.baseUrl?.replace(/\/v1\/?$/u, "") || "";
+  const adminUrl = `${baseUrl}/admin/api/models`;
+  const modelId = profile.modelAlias || profile.omlxModel || profile.id;
+  try {
+    const ids = await serverModelIds(profile.baseUrl);
+    const match = ids.find((id) => id.toLowerCase() === modelId.toLowerCase());
+    const targetId = match ?? modelId;
+    const response = await fetch(`${adminUrl}/${encodeURIComponent(targetId)}/unload`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      signal: AbortSignal.timeout(30000),
+    });
+    if (response.ok) {
+      return { unloaded: true, backend: "omlx", modelId: targetId };
+    }
+    const detail = await responseErrorDetail(response);
+    if (response.status === 400 && /not loaded/i.test(detail)) {
+      return { unloaded: true, backend: "omlx", modelId: targetId, reason: "model was not loaded" };
+    }
+    if (response.status === 401 || response.status === 403) {
+      return {
+        unloaded: false,
+        backend: "omlx",
+        modelId: targetId,
+        error: "oMLX admin authentication required. Enable skip_api_key_verification in oMLX settings, or unload manually from the admin panel.",
+      };
+    }
+    return { unloaded: false, backend: "omlx", modelId: targetId, error: `HTTP ${response.status}: ${detail}` };
+  } catch (err) {
+    if (err?.name === "AbortError" || err?.name === "TimeoutError") {
+      return { unloaded: false, backend: "omlx", modelId, error: "Unload request timed out. The model may still be unloading in the background." };
+    }
+    return { unloaded: false, backend: "omlx", modelId, error: err.message };
+  }
+}
+async function responseErrorDetail(response) {
+  const text = await response.text().catch(() => "");
+  if (!text) return "";
+  try {
+    const body = JSON.parse(text);
+    return body?.detail ?? body?.message ?? text;
+  } catch {
+    return text;
   }
 }
@@ -126,7 +246,6 @@ export async function isProfileServerUp(profile) {
 export async function modelLoadedOnServer(profile) {
   const backend = backendFor(profile.backend);
-  if (backend.id === "ollama") return modelIdsMatch(await ollamaLoadedModelIds(profile), expectedModelIds(profile));
   if (backend.id === "omlx") return modelIdsMatch(await omlxLoadedModelIds(profile), expectedModelIds(profile));
   const { matches } = await serverMatchesProfile(profile);
   return matches;
@@ -134,9 +253,6 @@ export async function modelLoadedOnServer(profile) {
 export async function modelAvailableOnServer(profile) {
   const backend = backendFor(profile.backend);
-  if (backend.id === "ollama") {
-    return modelIdsMatch(await ollamaAvailableModelIds(profile), expectedModelIds(profile));
-  }
   if (backend.id === "omlx") {
     // /v1/models lists discovered models; an ID must exist there to be usable.
     return modelIdsMatch(await serverModelIds(profile.baseUrl), expectedModelIds(profile));
@@ -217,24 +333,6 @@ export async function serverModelIds(baseUrl) {
     .filter(Boolean);
 }
-async function ollamaLoadedModelIds(profile) {
-  const result = await fetchJson(`${apiRootUrl(profile.baseUrl)}/api/ps`);
-  if (!result.ok) return [];
-  return (Array.isArray(result.data?.models) ? result.data.models : [])
-    .flatMap((model) => [model?.name, model?.model])
-    .map((id) => String(id ?? "").trim())
-    .filter(Boolean);
-}
-async function ollamaAvailableModelIds(profile) {
-  const result = await fetchJson(`${apiRootUrl(profile.baseUrl)}/api/tags`);
-  if (!result.ok) return [];
-  return (Array.isArray(result.data?.models) ? result.data.models : [])
-    .flatMap((model) => [model?.name, model?.model])
-    .map((id) => String(id ?? "").trim())
-    .filter(Boolean);
-}
 async function omlxLoadedModelIds(profile) {
   const statusResult = await fetchJson(`${profile.baseUrl.replace(/\/+$/u, "")}/models/status`);
   const fromStatus = statusResult.ok
@@ -305,7 +403,6 @@ function expectedModelIds(profile) {
   return [
     profile.modelAlias,
     profile.label,
-    profile.ollamaModel,
     profile.omlxModel,
     profile.modelPath,
     fileName,

package/src/profile-setup.mjs CHANGED Viewed

@@ -8,6 +8,7 @@ import { pc, formatBytes, renderRows, renderSection } from "./ui.mjs";
 import { detectCapabilities } from "./autodetect.mjs";
 import { matchDrafter } from "./scan.mjs";
 import { scanGgufModels } from "./scan.mjs";
+import { estimateMemoryMb } from "./mlx-flags.mjs";
 const execFileAsync = promisify(execFile);
@@ -297,3 +298,107 @@ function detectionSummary(caps) {
 function samplingSummary(flags) {
   return `temp ${flags.temperature}, top-p ${flags.topP}, top-k ${flags.topK}`;
 }
+// ── MLX profile configuration ─────────────────────────────────────────────
+/**
+ * Interactive configuration for an mlx-vlm profile.
+ */
+export async function configureMlxProfile(prompt, profile) {
+  let configured = profile;
+  console.log("");
+  console.log(renderSection("Model setup", renderRows([
+    ["Model", pc.bold(profile.label)],
+    ["Detected", mlxDetectionSummary(configured.capabilities)],
+    ["Context", String(configured.flags.ctxSize) + " tokens"],
+  ])));
+  console.log(pc.dim("Larger context windows use more memory. You can edit the profile later if needed.\n"));
+  if (configured.capabilities.vision) {
+    console.log(renderSection("Vision detected", renderRows([
+      ["Capability", "image / multimodal input"],
+      ["Note", "mlx-vlm loads vision from the model directory automatically."],
+    ])));
+  }
+  if (configured.capabilities.thinking) {
+    console.log("");
+    console.log(renderSection("Thinking mode", renderRows([
+      ["Flag", "--enable-thinking"],
+      ["Default", "on for Qwen 3 / Gemma 4 / DeepSeek-R class models"],
+    ])));
+    const useThinking = await prompt.yesNo("Enable thinking mode?", true);
+    configured = await applyMlxThinkingToggle(configured, useThinking);
+  }
+  const ctxSize = await prompt.number("Context window tokens", configured.flags.ctxSize, 1024, 1048576);
+  configured = applyMlxContextSize(configured, ctxSize);
+  console.log("\n" + renderMlxMemoryEstimate(configured));
+  console.log("");
+  console.log(renderSection("Defaults", renderRows([
+    ["Backend", configured.backend],
+    ["Endpoint", configured.baseUrl],
+    ["Context", String(configured.flags.ctxSize) + " tokens"],
+    ["Thinking", configured.capabilities.thinking && configured.commandArgv.includes("--enable-thinking") ? "on" : "off"],
+    ["Vision", configured.capabilities.vision ? "yes" : "no"],
+  ])));
+  if (!(await prompt.yesNo("Save profile with these settings?", true))) return null;
+  return configured;
+}
+async function applyMlxThinkingToggle(profile, enabled) {
+  if (!profile.capabilities.thinking) return profile;
+  const { computeMlxVlmFlags } = await import("./mlx-flags.mjs");
+  const { args } = computeMlxVlmFlags(profile.modelPath, {
+    port: profile.flags.port,
+    ctxSize: profile.flags.ctxSize,
+    thinkingEnabled: enabled,
+  });
+  return {
+    ...profile,
+    commandArgv: args,
+    capabilities: { ...profile.capabilities, thinkingEnabled: enabled },
+  };
+}
+function applyMlxContextSize(profile, ctxSize) {
+  const flags = { ...profile.flags, ctxSize };
+  const next = {
+    ...profile,
+    flags,
+    baseUrl: baseUrlForFlags(flags),
+  };
+  const idx = next.commandArgv.indexOf("--max-kv-size");
+  if (idx !== -1 && next.commandArgv[idx + 1] != null) {
+    next.commandArgv[idx + 1] = String(ctxSize);
+  } else if (ctxSize && ctxSize > 0) {
+    next.commandArgv.push("--max-kv-size", String(ctxSize));
+  }
+  return next;
+}
+function renderMlxMemoryEstimate(profile) {
+  const modelBytes = profile.modelSizeBytes || 0;
+  if (!modelBytes) {
+    return renderSection("Memory estimate", pc.dim("Model size unknown — save the profile to estimate."));
+  }
+  const totalMb = estimateMemoryMb(modelBytes);
+  const overheadBytes = Math.max(0, totalMb * 1024 * 1024 - modelBytes);
+  return renderSection("Memory estimate", renderRows([
+    ["Estimated total", pc.bold(`~${formatBytes(totalMb * 1024 * 1024)}`)],
+    ["Model", formatBytes(modelBytes)],
+    ["Overhead", `~${formatBytes(overheadBytes)} (KV cache, APC, runtime)`],
+  ]));
+}
+function mlxDetectionSummary(caps) {
+  const parts = [];
+  if (caps.architecture) parts.push(caps.architecture);
+  if (caps.thinking) parts.push("thinking");
+  if (caps.vision) parts.push("vision");
+  return parts.length > 0 ? parts.join(" · ") : "standard MLX";
+}