npm - offgrid-ai - Versions diffs - 0.15.9 → 0.16.3 - Mend

offgrid-ai 0.15.9 → 0.16.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/README.md +4 -4
package/package.json +1 -1
package/src/autodetect.mjs +1 -1
package/src/backends.mjs +4 -41
package/src/benchmark/flow.mjs +14 -13
package/src/benchmark/metrics.mjs +14 -20
package/src/commands/main.mjs +7 -7
package/src/commands/models.mjs +8 -21
package/src/commands/onboard.mjs +10 -43
package/src/commands/run.mjs +1 -1
package/src/commands/status.mjs +19 -0
package/src/config.mjs +48 -2
package/src/harness-pi.mjs +5 -7
package/src/managed.mjs +3 -3
package/src/mlx-discovery.mjs +77 -258
package/src/model-catalog.mjs +9 -14
package/src/model-presenters.mjs +0 -30
package/src/omlx-runtime.mjs +232 -0
package/src/process.mjs +87 -48
package/src/profile-setup.mjs +50 -113
package/src/profiles.mjs +12 -28
package/src/ui.mjs +2 -19
package/resources/mlxvlm-server-wrapper.py +0 -112
package/src/mlx-flags.mjs +0 -100

package/README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 # offgrid-ai
-**Helper CLI for running local AI models on Mac with llama-server, mlx-vlm, and oMLX.**
+**Helper CLI for running local AI models on Mac with llama-server and oMLX.**
 [![node](https://img.shields.io/badge/node-20%2B-3c873a)](package.json)
 [![platform](https://img.shields.io/badge/platform-macOS%20%7C%20Linux-blue)]()
@@ -12,7 +12,7 @@
 ## What is offgrid-ai?
-offgrid-ai is a command-line tool that lets you run AI models locally. Running local models with llama-server, mlx-vlm, or oMLX have a steep learning curve compared to cloud-based models, so offgrid-ai is designed to abstract away the complexity, while still providing a powerful and flexible way to run local models.
+offgrid-ai is a command-line tool that lets you run AI models locally. Running local models with llama-server or oMLX have a steep learning curve compared to cloud-based models, so offgrid-ai is designed to abstract away the complexity, while still providing a powerful and flexible way to run local models.
 This is the recommended workflow:
@@ -23,8 +23,8 @@ This is the recommended workflow:
 ## Core Features
 - Auto-detects available models from LM Studio, oMLX, and HuggingFace
 - Auto-detects MTP (multi-token prediction) or QAT (quantization aware training) models, and applies the correct flags for llama.cpp
-- Auto-applies the optimal flags for the model type (llama.cpp server flags, mlx-vlm APC/thinking/context flags)
-- Start / stop local servers automatically for chat sessions (llama-server and mlx-vlm)
+- Auto-applies the optimal flags for the model type (llama.cpp server flags, oMLX auto-start and cache management)
+- Start / stop local servers automatically for chat sessions (llama-server and oMLX)
 ## Quick start

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "offgrid-ai",
-  "version": "0.15.9",
+  "version": "0.16.3",
   "description": "Privacy-first CLI for running local LLMs — discover, configure, run, benchmark",
   "author": "Eeshan Srivastava (https://eeshans.com)",
   "type": "module",

package/src/autodetect.mjs CHANGED Viewed

@@ -55,7 +55,7 @@ export function computeFlags(capabilities, modelPath, mmprojPath, draftModelPath
   const isLowMem = quant && /[Qq]4[_0]/i.test(quant);
   const flags = {
-    ...defaultFlagsForBackend(mtp ? "llama-cpp-mtp" : "llama-cpp"),
+    ...defaultFlagsForBackend("llama-cpp"),
     ctxSize: capabilities.ctxSize,
     flashAttention: "on",
     cacheTypeK: isLowMem ? "f16" : "bf16",

package/src/backends.mjs CHANGED Viewed

@@ -1,14 +1,12 @@
 import { findLlamaServer } from "./config.mjs";
 import { scanGgufModels } from "./scan.mjs";
 import { parseModelName } from "./model-name.mjs";
-import { scanMlxModels, scanOmlxModelSizes, lookupOmlxModelInfo } from "./mlx-discovery.mjs";
-import { DEFAULT_PORT as MLX_VLM_PORT } from "./mlx-flags.mjs";
+import { scanOmlxModelSizes, lookupOmlxModelInfo } from "./mlx-discovery.mjs";
 // ── Backend definitions ────────────────────────────────────────────────────
 export const LOCAL_HOST = "127.0.0.1";
 export const LLAMA_CPP_PORT = 8080;
-export const LLAMA_CPP_MTP_PORT = 8081;
 export const OMLX_PORT = 8000;
 export function baseUrlFor({ host = LOCAL_HOST, port, path = "/v1" }) {
@@ -31,17 +29,6 @@ export const BACKENDS = {
     needsCommandFile: true,
     scanModels: async () => (await scanGgufModels()).models,
   },
-  "llama-cpp-mtp": {
-    id: "llama-cpp-mtp",
-    label: "llama.cpp MTP",
-    type: "local-server",
-    providerId: "llama-cpp-mtp",
-    defaultHost: LOCAL_HOST,
-    defaultPort: LLAMA_CPP_MTP_PORT,
-    defaultBaseUrl: baseUrlFor({ port: LLAMA_CPP_MTP_PORT }),
-    needsCommandFile: true,
-    scanModels: async () => (await scanGgufModels()).models,
-  },
   "omlx": {
     id: "omlx",
     label: "oMLX",
@@ -54,17 +41,6 @@ export const BACKENDS = {
     needsCommandFile: false,
     scanModels: () => scanOmlxModels(),
   },
-  "mlx-vlm": {
-    id: "mlx-vlm",
-    label: "mlx-vlm",
-    type: "local-server",
-    providerId: "mlx-vlm",
-    defaultHost: LOCAL_HOST,
-    defaultPort: MLX_VLM_PORT,
-    defaultBaseUrl: baseUrlFor({ port: MLX_VLM_PORT }),
-    needsCommandFile: true,
-    scanModels: async () => scanMlxModels(),
-  },
 };
 export function backendFor(backendId) {
@@ -75,10 +51,8 @@ export function backendFor(backendId) {
 export async function backendBinaryFor(backendId) {
   const backend = BACKENDS[backendId ?? "llama-cpp"];
-  if (backend.id === "mlx-vlm") return "python3"; // mlx-vlm spawns via python3 + the strict=False wrapper
   if (backend.type === "managed-server") return null;
-  const discovered = await findLlamaServer();
-  return discovered; // null means "not found — trigger onboarding"
+  return await findLlamaServer();
 }
 export function defaultFlagsForBackend(backendId) {
@@ -96,21 +70,15 @@ async function scanOmlxModels() {
   const body = await response.json();
   if (!Array.isArray(body?.data)) return [];
-  // The oMLX API doesn't return model sizes or publishers — look them up from disk.
   const infoMap = await scanOmlxModelSizes();
-  // The oMLX API can return the same model multiple times with different
-  // ID formats (e.g. "Qwen3.6-35B-A3B-OptiQ-4bit" and
-  // "mlx-community--Qwen3.6-35B-A3B-OptiQ-4bit"). Deduplicate by the
-  // normalized full name (publisher/model with / separator), keeping
-  // the first entry (which has the most complete metadata).
+  // Deduplicate by normalized full name (publisher/model with / separator)
   const seen = new Set();
   const deduped = [];
   for (const model of body.data.filter(isChatOmlxModel)) {
     const info = lookupOmlxModelInfo(model.id, infoMap);
     const hasPublisher = model.id.includes("/") || model.id.includes("--");
     const fullName = (!hasPublisher && info?.publisher) ? `${info.publisher}/${model.id}` : model.id;
-    // Normalize: convert -- separator to / for dedup comparison
     const normalized = fullName.replace(/--/g, "/");
     if (seen.has(normalized)) continue;
     seen.add(normalized);
@@ -137,15 +105,10 @@ async function scanOmlxModels() {
     }).sort((a, b) => a.label.localeCompare(b.label));
 }
-// ── Labels ──────────────────────────────────────────────────────────────
 function isChatOmlxModel(model) {
   if (typeof model?.id !== "string" || !model.id.trim()) return false;
   const type = String(model.type ?? model.model_type ?? "").toLowerCase();
   if (["embedding", "embeddings", "reranker", "tool", "converter", "markitdown"].includes(type)) return false;
   if (Object.hasOwn(model, "max_model_len") && model.max_model_len === null) return false;
   return true;
-}
-// (ollamaLabel and omlxLabel removed — parseModelName in model-name.mjs is the single path)
-// (Ollama backend removed — offgrid-ai now uses llama-server + mlx-vlm + oMLX)
+}

package/src/benchmark/flow.mjs CHANGED Viewed

@@ -15,7 +15,7 @@ import { finalizeBenchmarkRun, renderBenchmarkSummary } from "./finalize.mjs";
 function benchmarkModelSource(profile) {
   if (!profile) return "cloud";
-  return profile.providerId === "llama-cpp-mtp" ? "llama-cpp-mtp" : profile.backend === "omlx" ? "omlx" : "llama-cpp";
+  return profile.backend === "omlx" ? "omlx" : "llama-cpp";
 }
 async function chooseBenchmarkAction(prompt, canRun) {
@@ -150,16 +150,22 @@ async function selectBenchmark(prompt, repoPath) {
   return { kind, benchmark };
 }
-// ── Benchmark from a selected profile (from model picker) ────────────────
+// ── Shared benchmark setup ───────────────────────────────────────────────
-export async function benchmarkForProfile(profile) {
+async function benchmarkSetup() {
   await ensureDirs();
   const prompt = createPrompt();
-  try {
-    const repoPath = await linkBenchmarkRepo(prompt);
-    if (!repoPath) return;
+  const repoPath = await linkBenchmarkRepo(prompt);
+  if (!repoPath) return { prompt, repoPath: null, selected: null };
+  const selected = await selectBenchmark(prompt, repoPath);
+  return { prompt, repoPath, selected };
+}
+// ── Benchmark from a selected profile (from model picker) ────────────────
-    const selected = await selectBenchmark(prompt, repoPath);
+export async function benchmarkForProfile(profile) {
+  const { prompt, repoPath, selected } = await benchmarkSetup();
+  try {
     if (!selected) return;
     const { kind, benchmark: selectedBenchmark } = selected;
@@ -185,13 +191,8 @@ export async function benchmarkForProfile(profile) {
 // ── Standalone benchmark flow (offgrid-ai benchmark) ──────────────────────
 export async function benchmarkFlow() {
-  await ensureDirs();
-  const prompt = createPrompt();
+  const { prompt, repoPath, selected } = await benchmarkSetup();
   try {
-    const repoPath = await linkBenchmarkRepo(prompt);
-    if (!repoPath) return;
-    const selected = await selectBenchmark(prompt, repoPath);
     if (!selected) return;
     const { kind, benchmark: selectedBenchmark } = selected;

package/src/benchmark/metrics.mjs CHANGED Viewed

@@ -9,7 +9,7 @@ const SPEED_QUERY_MAX_TOKENS = 64;
 export async function queryServerMetrics(profile) {
   const backend = backendFor(profile.backend);
-  if (backend.id === "llama-cpp" || backend.id === "llama-cpp-mtp") {
+  if (backend.id === "llama-cpp") {
     return await queryLlamaCppMetrics(profile);
   }
   if (backend.id === "omlx") {
@@ -19,12 +19,13 @@ export async function queryServerMetrics(profile) {
   throw new Error(`Unsupported backend for benchmark speed metrics: ${backend.id}`);
 }
-async function queryLlamaCppMetrics(profile) {
+async function speedQueryFetch(profile, { stream = false, streamOptions = null, errorLabel = "speed query" } = {}) {
   const body = {
     model: profile.modelAlias,
     messages: [{ role: "user", content: BENCH_SPEED_PROMPT }],
-    stream: false,
+    stream,
     max_tokens: SPEED_QUERY_MAX_TOKENS,
+    ...(streamOptions ? { stream_options: streamOptions } : {}),
   };
   const response = await fetch(profile.baseUrl.replace(/\/$/u, "") + "/chat/completions", {
@@ -35,9 +36,15 @@ async function queryLlamaCppMetrics(profile) {
   });
   if (!response.ok) {
-    throw new Error(`llama.cpp speed query failed: ${response.status} ${response.statusText}`);
+    throw new Error(`${errorLabel} failed: ${response.status} ${response.statusText}`);
   }
+  return response;
+}
+async function queryLlamaCppMetrics(profile) {
+  const response = await speedQueryFetch(profile, { errorLabel: "llama.cpp speed query" });
   const data = await response.json();
   const timings = data.timings;
   if (!timings || typeof timings.prompt_per_second !== "number" || typeof timings.predicted_per_second !== "number") {
@@ -60,25 +67,12 @@ async function queryLlamaCppMetrics(profile) {
 }
 async function queryOmlxMetrics(profile) {
-  const body = {
-    model: profile.modelAlias,
-    messages: [{ role: "user", content: BENCH_SPEED_PROMPT }],
+  const response = await speedQueryFetch(profile, {
     stream: true,
-    stream_options: { include_usage: true },
-    max_tokens: SPEED_QUERY_MAX_TOKENS,
-  };
-  const response = await fetch(profile.baseUrl.replace(/\/$/u, "") + "/chat/completions", {
-    method: "POST",
-    headers: { "Content-Type": "application/json" },
-    body: JSON.stringify(body),
-    signal: AbortSignal.timeout(SPEED_QUERY_TIMEOUT_MS),
+    streamOptions: { include_usage: true },
+    errorLabel: "oMLX speed query",
   });
-  if (!response.ok) {
-    throw new Error(`oMLX speed query failed: ${response.status} ${response.statusText}`);
-  }
   const text = await response.text();
   let usage = null;
   for (const line of text.split("\n").reverse()) {

package/src/commands/main.mjs CHANGED Viewed

@@ -1,11 +1,11 @@
 import { findLlamaServer, ensureDirs } from "../config.mjs";
 import { backendFor } from "../backends.mjs";
 import { scanGgufModels } from "../scan.mjs";
-import { scanMlxModels } from "../mlx-discovery.mjs";
 import { loadProfiles } from "../profiles.mjs";
 import { hasPi } from "../harness-pi.mjs";
 import { offerManagedLlamaRuntimeUpdate } from "../runtime.mjs";
-import { hasLmStudioInstalled, hasOmlxInstalled, scanManagedModels } from "../managed.mjs";
+import { offerManagedOmlxUpdate, hasOmlx } from "../omlx-runtime.mjs";
+import { hasLmStudioInstalled, scanManagedModels } from "../managed.mjs";
 import { recommendedModel } from "../recommendations.mjs";
 import { pc, startInteractive, createPrompt } from "../ui.mjs";
 import { onboardFlow } from "./onboard.mjs";
@@ -19,6 +19,7 @@ export async function mainFlow() {
     const runtimePrompt = createPrompt();
     try {
       await offerManagedLlamaRuntimeUpdate(runtimePrompt);
+      await offerManagedOmlxUpdate(runtimePrompt);
     } finally {
       runtimePrompt.close();
     }
@@ -27,10 +28,9 @@ export async function mainFlow() {
   const llamaBinary = await findLlamaServer();
   const { models: ggufModels, drafters } = await scanGgufModels();
   const managedModels = await scanManagedModels();
-  const mlxModels = await scanMlxModels();
   const profiles = await loadProfiles();
   const hasAnyBackend = llamaBinary || managedModels.some((item) => item.status === "ok" && item.models.length > 0);
-  const hasAnyModels = ggufModels.length > 0 || mlxModels.length > 0 || managedModels.some((item) => item.status === "ok" && item.models.length > 0);
+  const hasAnyModels = ggufModels.length > 0 || managedModels.some((item) => item.status === "ok" && item.models.length > 0);
   const piInstalled = await hasPi();
   const needsLlama = ggufModels.length > 0 || profiles.some((profile) => backendFor(profile.backend).type === "local-server");
@@ -58,14 +58,14 @@ export async function mainFlow() {
   if (!process.stdin.isTTY) return await statusCommand();
   startInteractive("offgrid-ai");
-  return await modelCommandCenter({ profiles, ggufModels, managedModels, mlxModels, drafters });
+  return await modelCommandCenter({ profiles, ggufModels, managedModels, drafters });
 }
 async function printNoModelsHelp(llamaBinary) {
   console.log(pc.yellow("No models found."));
   console.log(pc.dim("You need to download a model to use offgrid-ai.\n"));
-  const omlxInstalled = await hasOmlxInstalled();
+  const omlxInstalled = await hasOmlx();
   const lmStudioInstalled = hasLmStudioInstalled();
   const hasBackends = llamaBinary || omlxInstalled || lmStudioInstalled;
   if (!hasBackends) {
@@ -86,4 +86,4 @@ async function printNoModelsHelp(llamaBinary) {
     console.log(pc.dim(`  Recommended: ${model.label}`));
   }
   if (omlxInstalled) console.log(pc.bold("  omlx start"));
-}
+}

package/src/commands/models.mjs CHANGED Viewed

@@ -3,10 +3,10 @@ import { backendFor, BACKENDS } from "../backends.mjs";
 import { createProfileFromModel, readProfile, saveProfile, deleteProfile, profileJsonPath } from "../profiles.mjs";
 import { isProfileRunning, isProfileServerUp, modelAvailableOnServer, stopProfile } from "../process.mjs";
 import { syncPiConfig, removeFromPiConfig } from "../harness-pi.mjs";
-import { configureLocalProfile } from "../profile-setup.mjs";
+import { configureLocalProfile, configureManagedProfile } from "../profile-setup.mjs";
 import { pc, startInteractive, createPrompt, modelSelect } from "../ui.mjs";
 import { buildCatalogItems, createManagedProfile, itemKey, loadModelCatalog, normalizeCatalog } from "../model-catalog.mjs";
-import { modelSelectOption, modelNameWidth, inferBackendId, formatSourceLabel, discoverySourceForItem, printGgufModelDetails, printMlxModelDetails, printManagedModelDetails, printWorkspaceHeader, printBenchmarkLine, printProfileDetails } from "../model-presenters.mjs";
+import { modelSelectOption, modelNameWidth, inferBackendId, formatSourceLabel, discoverySourceForItem, printGgufModelDetails, printManagedModelDetails, printWorkspaceHeader, printBenchmarkLine, printProfileDetails } from "../model-presenters.mjs";
 import { runProfile } from "./run.mjs";
 const { stripVTControlCharacters } = await import("node:util");
@@ -83,9 +83,7 @@ export async function modelCommandCenter(initialCatalog) {
   const groups = [];
   const backendColors = {
-    "mlx-vlm": pc.yellow,
     "llama-cpp": pc.cyan,
-    "llama-cpp-mtp": pc.blue,
     omlx: pc.magenta,
   };
   for (const { backendId, sourceId, items } of byBackend.values()) {
@@ -185,7 +183,6 @@ async function performAction(prompt, action, item) {
   if (action === "inspect") {
     if (item.type === "profile") return await printProfileDetails(await readProfile(item.profile.id));
     if (item.type === "managed") return printManagedModelDetails(item.model, BACKENDS[item.backendId]);
-    if (item.model?.format === "mlx") return await printMlxModelDetails(item.model);
     return printGgufModelDetails(item.model, item.drafter);
   }
   if (action === "benchmark") {
@@ -197,7 +194,7 @@ async function performAction(prompt, action, item) {
     return await benchmarkFlow();
   }
   if (action === "run") return await runItem(item);
-  if (action === "reconfigure" || action === "setup") return await setupItem(prompt, item, action);
+  if (action === "reconfigure" || action === "setup") return await setupItem(prompt, item);
   if (action === "remove" && item.type === "profile") return await removeProfileInteractive(item.profile.id);
 }
@@ -209,30 +206,20 @@ function printProfileSaved(id) {
   console.log(pc.dim(`  Profile: ${profileJsonPath(id)}`));
 }
-async function setupItem(prompt, item, action) {
+async function setupItem(prompt, item) {
   if (item.type === "profile") {
     const configured = await configureLocalProfile(prompt, await readProfile(item.profile.id));
     if (!configured) return;
-    await saveProfile(configured, { writeCommand: true });
+    await saveProfile(configured);
     await syncPiConfig(configured);
     printProfileSaved(configured.id);
     return;
   }
   if (item.type === "managed") {
     const profile = createManagedProfile(item.model, item.backendId);
-    await saveProfile(profile);
-    await syncPiConfig(profile);
-    printProfileSaved(profile.id);
-    return;
-  }
-  // MLX models: build a mlx-vlm profile and run interactive config.
-  if (item.model.format === "mlx") {
-    const { createProfileFromMlxModel } = await import("../profiles.mjs");
-    const { configureMlxProfile } = await import("../profile-setup.mjs");
-    const profile = await createProfileFromMlxModel(item.model);
-    const configured = await configureMlxProfile(prompt, profile);
+    const configured = await configureManagedProfile(prompt, profile);
     if (!configured) return;
-    await saveProfile(configured, { writeCommand: true });
+    await saveProfile(configured);
     await syncPiConfig(configured);
     printProfileSaved(configured.id);
     return;
@@ -240,7 +227,7 @@ async function setupItem(prompt, item, action) {
   const profile = await createProfileFromModel(item.model, null, item.drafter?.path);
   const configured = await configureLocalProfile(prompt, profile);
   if (!configured) return;
-  await saveProfile(configured, { writeCommand: action === "reconfigure" });
+  await saveProfile(configured);
   await syncPiConfig(configured);
   printProfileSaved(configured.id);
 }

package/src/commands/onboard.mjs CHANGED Viewed

@@ -1,10 +1,9 @@
-import { existsSync } from "node:fs";
-import { ensureDirs, findLlamaServer, hasHomebrew, HF_HUB_DIR } from "../config.mjs";
+import { ensureDirs, findLlamaServer, ensureHomebrewFor, HF_HUB_DIR } from "../config.mjs";
 import { BACKENDS } from "../backends.mjs";
 import { scanGgufModels } from "../scan.mjs";
-import { scanMlxModels } from "../mlx-discovery.mjs";
 import { hasPi } from "../harness-pi.mjs";
 import { offerManagedLlamaRuntimeUpdate } from "../runtime.mjs";
+import { ensureOmlxRuntime } from "../omlx-runtime.mjs";
 import { scanManagedModels } from "../managed.mjs";
 import { BACKEND_INSTALL_CHOICES, BACKEND_INSTALLERS } from "../backend-installers.mjs";
 import { recommendedModel, selectFormat, allFittingModels } from "../recommendations.mjs";
@@ -25,18 +24,18 @@ export async function onboardFlow() {
     console.log(pc.dim("Let's make sure you have everything you need to run local models.\n"));
     const llamaBinary = await ensureLlamaRuntime(prompt);
+    await ensureOmlxRuntime(prompt, run);
     if (!(await ensurePi(prompt, run))) return;
-    const [{ models: ggufModels }, managedModels, mlxModels] = await Promise.all([
+    const [{ models: ggufModels }, managedModels] = await Promise.all([
       scanGgufModels(),
       scanManagedModels(),
-      scanMlxModels(),
     ]);
     const totalManaged = managedModels.reduce((sum, item) => sum + item.models.length, 0);
-    const hasModels = ggufModels.length > 0 || totalManaged > 0 || mlxModels.length > 0;
+    const hasModels = ggufModels.length > 0 || totalManaged > 0;
     if (hasModels) {
-      printFoundModels(ggufModels, managedModels, mlxModels, llamaBinary);
+      printFoundModels(ggufModels, managedModels, llamaBinary);
     } else {
       const canDownload = await hasHuggingfaceHub();
       if (canDownload) {
@@ -96,14 +95,11 @@ async function ensurePi(prompt, run) {
   return true;
 }
-function printFoundModels(ggufModels, managedModels, mlxModels, llamaBinary) {
+function printFoundModels(ggufModels, managedModels, llamaBinary) {
   if (ggufModels.length > 0) {
     console.log(pc.green(`✓ Found ${ggufModels.length} GGUF model${ggufModels.length === 1 ? "" : "s"}`));
     if (!llamaBinary) console.log(pc.yellow("Install the managed llama.cpp runtime to run these GGUF models."));
   }
-  if (mlxModels.length > 0) {
-    console.log(pc.green(`✓ Found ${mlxModels.length} MLX model${mlxModels.length === 1 ? "" : "s"}`));
-  }
   for (const { backendId, models, status, reason } of managedModels) {
     if (status === "unavailable") {
       console.log(pc.yellow(`${BACKENDS[backendId].label}: unavailable${reason ? ` — ${reason}` : ""}`));
@@ -117,7 +113,7 @@ async function offerModelDownload(prompt) {
   const hardware = detectHardware();
   const candidates = allFittingModels(hardware)
     .map((entry) => ({ entry, format: selectFormat(entry, hardware) }))
-    .filter((item) => item.format != null);
+    .filter((item) => item.format === "gguf");
   if (candidates.length === 0) {
     console.log(pc.yellow("No curated models fit your hardware."));
     return false;
@@ -134,7 +130,7 @@ async function offerModelDownload(prompt) {
   const shouldDownload = await prompt.yesNo("Download " + primary.entry.label + " (" + primary.format + ")?", true);
   if (!shouldDownload) return false;
-  const hfRef = primary.format === "mlx" ? primary.entry.mlx : primary.entry.gguf;
+  const hfRef = primary.entry.gguf;
   try {
     const plan = await resolveHfDownload(hfRef);
     console.log(pc.dim("Total size: " + formatBytes(plan.totalSizeBytes)));
@@ -174,35 +170,6 @@ async function offerBackendInstall(prompt, run) {
   await installBackend(prompt, run, choice, model);
 }
-async function ensureHomebrewFor(prompt, run, label) {
-  if (await hasHomebrew()) return true;
-  const install = await prompt.yesNo(`Homebrew is needed to install ${label}. Install Homebrew now?`, true);
-  if (!install) {
-    console.log(pc.dim(`Install ${label} manually, or install Homebrew from https://brew.sh and run offgrid-ai again.`));
-    return false;
-  }
-  console.log(pc.cyan("Installing Homebrew..."));
-  try {
-    await run("/bin/bash", ["-c", "NONINTERACTIVE=1 /bin/bash -c \"$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)\""], "Homebrew");
-    for (const path of ["/opt/homebrew/bin", "/usr/local/bin"]) {
-      if (existsSync(path)) {
-        process.env.PATH = `${path}:${process.env.PATH}`;
-        break;
-      }
-    }
-  } catch {
-    console.log(pc.red("✗ Homebrew installation failed."));
-    console.log(pc.dim("Install it manually from https://brew.sh, then run offgrid-ai again."));
-    return false;
-  }
-  if (!(await hasHomebrew())) {
-    console.log(pc.red("Homebrew was installed but not found on PATH. Restart your terminal and run offgrid-ai again."));
-    return false;
-  }
-  console.log(pc.green("✓ Homebrew found"));
-  return true;
-}
 async function installBackend(prompt, run, backendId, model) {
   const installer = BACKEND_INSTALLERS[backendId];
   if (!(await ensureHomebrewFor(prompt, run, installer.label))) return;
@@ -236,4 +203,4 @@ async function installAllBackends(prompt, run, model) {
 async function runInstallerCommands(run, installer) {
   for (const [cmd, args, label] of installer.commands) await run(cmd, args, label);
-}
+}

package/src/commands/run.mjs CHANGED Viewed

@@ -77,7 +77,7 @@ async function ensureLocalServer(profile, backend, options) {
       console.log(pc.yellow("Vision projector is not supported by this llama.cpp build. Retrying text-only."));
       console.log(pc.dim("Update llama.cpp later to re-enable vision for this model."));
       const textOnly = textOnlyProfile(profile);
-      await saveProfile(textOnly, { writeCommand: true });
+      await saveProfile(textOnly);
       return { handled: true, result: await runProfile(textOnly, { ...options, textOnlyRetry: true }) };
     }
     throw err;

package/src/commands/status.mjs CHANGED Viewed

@@ -2,6 +2,10 @@ import { ensureDirs } from "../config.mjs";
 import { backendFor } from "../backends.mjs";
 import { loadProfiles } from "../profiles.mjs";
 import { profileRuntimeStatus } from "../process.mjs";
+import { existsSync } from "node:fs";
+import { execFileSync } from "node:child_process";
+import { homedir } from "node:os";
+import { join } from "node:path";
 import { pc, renderRows, renderCard } from "../ui.mjs";
 export async function statusCommand() {
@@ -38,6 +42,21 @@ export async function statusCommand() {
   console.log(renderCard("Status", renderRows(summaryRows), { formatBorder: running.length > 0 ? pc.green : pc.dim }));
+  // Show oMLX cache disk usage if cache exists
+  const omlxCacheDir = join(homedir(), ".omlx", "cache");
+  if (existsSync(omlxCacheDir)) {
+    try {
+      const duOutput = execFileSync("du", ["-sh", omlxCacheDir], { encoding: "utf8" });
+      const cacheSize = duOutput.split(/\s+/)[0];
+      console.log("\n" + renderCard("oMLX cache", renderRows([
+        ["Location", pc.dim(omlxCacheDir)],
+        ["Disk usage", pc.bold(cacheSize)],
+      ]), { formatBorder: pc.magenta }));
+    } catch {
+      // du not available — skip
+    }
+  }
   if (managedUpMissing.length > 0 || managedUpNotLoaded.length > 0) {
     const detailRows = [];
     for (const { profile, status } of [...managedUpMissing, ...managedUpNotLoaded]) {

package/src/config.mjs CHANGED Viewed

@@ -3,6 +3,7 @@ import { existsSync } from "node:fs";
 import { homedir } from "node:os";
 import { dirname, join } from "node:path";
 import { readFile, writeFile } from "node:fs/promises";
+import { pc } from "./ui.mjs";
 // ── Base directories ──────────────────────────────────────────────────────
@@ -18,8 +19,8 @@ export const MANAGED_LLAMA_SERVER = join(RUNTIME_DIR, "bin", "llama-server");
 // HuggingFace hub cache: $HF_HUB_CACHE, else $HF_HOME/hub, else
 // ~/.cache/huggingface/hub. This is where huggingface_hub stores
 // models--org--name/... and where offgrid-ai scans + downloads. Pointing at the
-// hub (not the HF root) keeps the HF-hub MLX/GGUF scanners and the downloader
-// on the same layout.
+// hub (not the HF root) keeps the GGUF scanner and the downloader on the
+// same layout.
 export const HF_HUB_DIR = process.env.HF_HUB_CACHE
   || (process.env.HF_HOME ? join(process.env.HF_HOME, "hub") : join(homedir(), ".cache", "huggingface", "hub"));
@@ -124,4 +125,49 @@ export async function hasHomebrew() {
   } catch {
     return false;
   }
+}
+/**
+ * Install Homebrew non-interactively and add it to PATH for this process.
+ * Returns true if Homebrew is available after installation.
+ */
+export async function installHomebrew(run) {
+  await run("/bin/bash", ["-c", 'NONINTERACTIVE=1 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"'], "Homebrew");
+  for (const path of ["/opt/homebrew/bin", "/usr/local/bin"]) {
+    if (existsSync(path)) {
+      process.env.PATH = `${path}:${process.env.PATH}`;
+      break;
+    }
+  }
+  return await hasHomebrew();
+}
+/**
+ * Ensure Homebrew is installed, prompting the user if necessary.
+ * @param {object} prompt - UI prompt interface (needs yesNo)
+ * @param {function} run - runCommand function for verbose command execution
+ * @param {string} label - what we're installing (for the prompt message)
+ * @returns {Promise<boolean>} true if Homebrew is available
+ */
+export async function ensureHomebrewFor(prompt, run, label) {
+  if (await hasHomebrew()) return true;
+  const install = await prompt.yesNo(`Homebrew is needed to install ${label}. Install Homebrew now?`, true);
+  if (!install) {
+    console.log(pc.dim(`Install ${label} manually, or install Homebrew from https://brew.sh and run offgrid-ai again.`));
+    return false;
+  }
+  console.log(pc.cyan("Installing Homebrew..."));
+  try {
+    const success = await installHomebrew(run);
+    if (!success) {
+      console.log(pc.red("Homebrew was installed but not found on PATH. Restart your terminal and run offgrid-ai again."));
+      return false;
+    }
+  } catch {
+    console.log(pc.red("✗ Homebrew installation failed."));
+    console.log(pc.dim("Install it manually from https://brew.sh, then run offgrid-ai again."));
+    return false;
+  }
+  console.log(pc.green("✓ Homebrew found"));
+  return true;
 }