offgrid-ai 0.15.9 → 0.16.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  # offgrid-ai
4
4
 
5
- **Helper CLI for running local AI models on Mac with llama-server, mlx-vlm, and oMLX.**
5
+ **Helper CLI for running local AI models on Mac with llama-server and oMLX.**
6
6
 
7
7
  [![node](https://img.shields.io/badge/node-20%2B-3c873a)](package.json)
8
8
  [![platform](https://img.shields.io/badge/platform-macOS%20%7C%20Linux-blue)]()
@@ -12,7 +12,7 @@
12
12
 
13
13
  ## What is offgrid-ai?
14
14
 
15
- offgrid-ai is a command-line tool that lets you run AI models locally. Running local models with llama-server, mlx-vlm, or oMLX have a steep learning curve compared to cloud-based models, so offgrid-ai is designed to abstract away the complexity, while still providing a powerful and flexible way to run local models.
15
+ offgrid-ai is a command-line tool that lets you run AI models locally. Running local models with llama-server or oMLX have a steep learning curve compared to cloud-based models, so offgrid-ai is designed to abstract away the complexity, while still providing a powerful and flexible way to run local models.
16
16
 
17
17
  This is the recommended workflow:
18
18
 
@@ -23,8 +23,8 @@ This is the recommended workflow:
23
23
  ## Core Features
24
24
  - Auto-detects available models from LM Studio, oMLX, and HuggingFace
25
25
  - Auto-detects MTP (multi-token prediction) or QAT (quantization aware training) models, and applies the correct flags for llama.cpp
26
- - Auto-applies the optimal flags for the model type (llama.cpp server flags, mlx-vlm APC/thinking/context flags)
27
- - Start / stop local servers automatically for chat sessions (llama-server and mlx-vlm)
26
+ - Auto-applies the optimal flags for the model type (llama.cpp server flags, oMLX auto-start and cache management)
27
+ - Start / stop local servers automatically for chat sessions (llama-server and oMLX)
28
28
 
29
29
  ## Quick start
30
30
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "offgrid-ai",
3
- "version": "0.15.9",
3
+ "version": "0.16.3",
4
4
  "description": "Privacy-first CLI for running local LLMs — discover, configure, run, benchmark",
5
5
  "author": "Eeshan Srivastava (https://eeshans.com)",
6
6
  "type": "module",
@@ -55,7 +55,7 @@ export function computeFlags(capabilities, modelPath, mmprojPath, draftModelPath
55
55
  const isLowMem = quant && /[Qq]4[_0]/i.test(quant);
56
56
 
57
57
  const flags = {
58
- ...defaultFlagsForBackend(mtp ? "llama-cpp-mtp" : "llama-cpp"),
58
+ ...defaultFlagsForBackend("llama-cpp"),
59
59
  ctxSize: capabilities.ctxSize,
60
60
  flashAttention: "on",
61
61
  cacheTypeK: isLowMem ? "f16" : "bf16",
package/src/backends.mjs CHANGED
@@ -1,14 +1,12 @@
1
1
  import { findLlamaServer } from "./config.mjs";
2
2
  import { scanGgufModels } from "./scan.mjs";
3
3
  import { parseModelName } from "./model-name.mjs";
4
- import { scanMlxModels, scanOmlxModelSizes, lookupOmlxModelInfo } from "./mlx-discovery.mjs";
5
- import { DEFAULT_PORT as MLX_VLM_PORT } from "./mlx-flags.mjs";
4
+ import { scanOmlxModelSizes, lookupOmlxModelInfo } from "./mlx-discovery.mjs";
6
5
 
7
6
  // ── Backend definitions ────────────────────────────────────────────────────
8
7
 
9
8
  export const LOCAL_HOST = "127.0.0.1";
10
9
  export const LLAMA_CPP_PORT = 8080;
11
- export const LLAMA_CPP_MTP_PORT = 8081;
12
10
  export const OMLX_PORT = 8000;
13
11
 
14
12
  export function baseUrlFor({ host = LOCAL_HOST, port, path = "/v1" }) {
@@ -31,17 +29,6 @@ export const BACKENDS = {
31
29
  needsCommandFile: true,
32
30
  scanModels: async () => (await scanGgufModels()).models,
33
31
  },
34
- "llama-cpp-mtp": {
35
- id: "llama-cpp-mtp",
36
- label: "llama.cpp MTP",
37
- type: "local-server",
38
- providerId: "llama-cpp-mtp",
39
- defaultHost: LOCAL_HOST,
40
- defaultPort: LLAMA_CPP_MTP_PORT,
41
- defaultBaseUrl: baseUrlFor({ port: LLAMA_CPP_MTP_PORT }),
42
- needsCommandFile: true,
43
- scanModels: async () => (await scanGgufModels()).models,
44
- },
45
32
  "omlx": {
46
33
  id: "omlx",
47
34
  label: "oMLX",
@@ -54,17 +41,6 @@ export const BACKENDS = {
54
41
  needsCommandFile: false,
55
42
  scanModels: () => scanOmlxModels(),
56
43
  },
57
- "mlx-vlm": {
58
- id: "mlx-vlm",
59
- label: "mlx-vlm",
60
- type: "local-server",
61
- providerId: "mlx-vlm",
62
- defaultHost: LOCAL_HOST,
63
- defaultPort: MLX_VLM_PORT,
64
- defaultBaseUrl: baseUrlFor({ port: MLX_VLM_PORT }),
65
- needsCommandFile: true,
66
- scanModels: async () => scanMlxModels(),
67
- },
68
44
  };
69
45
 
70
46
  export function backendFor(backendId) {
@@ -75,10 +51,8 @@ export function backendFor(backendId) {
75
51
 
76
52
  export async function backendBinaryFor(backendId) {
77
53
  const backend = BACKENDS[backendId ?? "llama-cpp"];
78
- if (backend.id === "mlx-vlm") return "python3"; // mlx-vlm spawns via python3 + the strict=False wrapper
79
54
  if (backend.type === "managed-server") return null;
80
- const discovered = await findLlamaServer();
81
- return discovered; // null means "not found — trigger onboarding"
55
+ return await findLlamaServer();
82
56
  }
83
57
 
84
58
  export function defaultFlagsForBackend(backendId) {
@@ -96,21 +70,15 @@ async function scanOmlxModels() {
96
70
  const body = await response.json();
97
71
  if (!Array.isArray(body?.data)) return [];
98
72
 
99
- // The oMLX API doesn't return model sizes or publishers — look them up from disk.
100
73
  const infoMap = await scanOmlxModelSizes();
101
74
 
102
- // The oMLX API can return the same model multiple times with different
103
- // ID formats (e.g. "Qwen3.6-35B-A3B-OptiQ-4bit" and
104
- // "mlx-community--Qwen3.6-35B-A3B-OptiQ-4bit"). Deduplicate by the
105
- // normalized full name (publisher/model with / separator), keeping
106
- // the first entry (which has the most complete metadata).
75
+ // Deduplicate by normalized full name (publisher/model with / separator)
107
76
  const seen = new Set();
108
77
  const deduped = [];
109
78
  for (const model of body.data.filter(isChatOmlxModel)) {
110
79
  const info = lookupOmlxModelInfo(model.id, infoMap);
111
80
  const hasPublisher = model.id.includes("/") || model.id.includes("--");
112
81
  const fullName = (!hasPublisher && info?.publisher) ? `${info.publisher}/${model.id}` : model.id;
113
- // Normalize: convert -- separator to / for dedup comparison
114
82
  const normalized = fullName.replace(/--/g, "/");
115
83
  if (seen.has(normalized)) continue;
116
84
  seen.add(normalized);
@@ -137,15 +105,10 @@ async function scanOmlxModels() {
137
105
  }).sort((a, b) => a.label.localeCompare(b.label));
138
106
  }
139
107
 
140
- // ── Labels ──────────────────────────────────────────────────────────────
141
-
142
108
  function isChatOmlxModel(model) {
143
109
  if (typeof model?.id !== "string" || !model.id.trim()) return false;
144
110
  const type = String(model.type ?? model.model_type ?? "").toLowerCase();
145
111
  if (["embedding", "embeddings", "reranker", "tool", "converter", "markitdown"].includes(type)) return false;
146
112
  if (Object.hasOwn(model, "max_model_len") && model.max_model_len === null) return false;
147
113
  return true;
148
- }
149
-
150
- // (ollamaLabel and omlxLabel removed — parseModelName in model-name.mjs is the single path)
151
- // (Ollama backend removed — offgrid-ai now uses llama-server + mlx-vlm + oMLX)
114
+ }
@@ -15,7 +15,7 @@ import { finalizeBenchmarkRun, renderBenchmarkSummary } from "./finalize.mjs";
15
15
 
16
16
  function benchmarkModelSource(profile) {
17
17
  if (!profile) return "cloud";
18
- return profile.providerId === "llama-cpp-mtp" ? "llama-cpp-mtp" : profile.backend === "omlx" ? "omlx" : "llama-cpp";
18
+ return profile.backend === "omlx" ? "omlx" : "llama-cpp";
19
19
  }
20
20
 
21
21
  async function chooseBenchmarkAction(prompt, canRun) {
@@ -150,16 +150,22 @@ async function selectBenchmark(prompt, repoPath) {
150
150
  return { kind, benchmark };
151
151
  }
152
152
 
153
- // ── Benchmark from a selected profile (from model picker) ────────────────
153
+ // ── Shared benchmark setup ───────────────────────────────────────────────
154
154
 
155
- export async function benchmarkForProfile(profile) {
155
+ async function benchmarkSetup() {
156
156
  await ensureDirs();
157
157
  const prompt = createPrompt();
158
- try {
159
- const repoPath = await linkBenchmarkRepo(prompt);
160
- if (!repoPath) return;
158
+ const repoPath = await linkBenchmarkRepo(prompt);
159
+ if (!repoPath) return { prompt, repoPath: null, selected: null };
160
+ const selected = await selectBenchmark(prompt, repoPath);
161
+ return { prompt, repoPath, selected };
162
+ }
163
+
164
+ // ── Benchmark from a selected profile (from model picker) ────────────────
161
165
 
162
- const selected = await selectBenchmark(prompt, repoPath);
166
+ export async function benchmarkForProfile(profile) {
167
+ const { prompt, repoPath, selected } = await benchmarkSetup();
168
+ try {
163
169
  if (!selected) return;
164
170
  const { kind, benchmark: selectedBenchmark } = selected;
165
171
 
@@ -185,13 +191,8 @@ export async function benchmarkForProfile(profile) {
185
191
  // ── Standalone benchmark flow (offgrid-ai benchmark) ──────────────────────
186
192
 
187
193
  export async function benchmarkFlow() {
188
- await ensureDirs();
189
- const prompt = createPrompt();
194
+ const { prompt, repoPath, selected } = await benchmarkSetup();
190
195
  try {
191
- const repoPath = await linkBenchmarkRepo(prompt);
192
- if (!repoPath) return;
193
-
194
- const selected = await selectBenchmark(prompt, repoPath);
195
196
  if (!selected) return;
196
197
  const { kind, benchmark: selectedBenchmark } = selected;
197
198
 
@@ -9,7 +9,7 @@ const SPEED_QUERY_MAX_TOKENS = 64;
9
9
  export async function queryServerMetrics(profile) {
10
10
  const backend = backendFor(profile.backend);
11
11
 
12
- if (backend.id === "llama-cpp" || backend.id === "llama-cpp-mtp") {
12
+ if (backend.id === "llama-cpp") {
13
13
  return await queryLlamaCppMetrics(profile);
14
14
  }
15
15
  if (backend.id === "omlx") {
@@ -19,12 +19,13 @@ export async function queryServerMetrics(profile) {
19
19
  throw new Error(`Unsupported backend for benchmark speed metrics: ${backend.id}`);
20
20
  }
21
21
 
22
- async function queryLlamaCppMetrics(profile) {
22
+ async function speedQueryFetch(profile, { stream = false, streamOptions = null, errorLabel = "speed query" } = {}) {
23
23
  const body = {
24
24
  model: profile.modelAlias,
25
25
  messages: [{ role: "user", content: BENCH_SPEED_PROMPT }],
26
- stream: false,
26
+ stream,
27
27
  max_tokens: SPEED_QUERY_MAX_TOKENS,
28
+ ...(streamOptions ? { stream_options: streamOptions } : {}),
28
29
  };
29
30
 
30
31
  const response = await fetch(profile.baseUrl.replace(/\/$/u, "") + "/chat/completions", {
@@ -35,9 +36,15 @@ async function queryLlamaCppMetrics(profile) {
35
36
  });
36
37
 
37
38
  if (!response.ok) {
38
- throw new Error(`llama.cpp speed query failed: ${response.status} ${response.statusText}`);
39
+ throw new Error(`${errorLabel} failed: ${response.status} ${response.statusText}`);
39
40
  }
40
41
 
42
+ return response;
43
+ }
44
+
45
+ async function queryLlamaCppMetrics(profile) {
46
+ const response = await speedQueryFetch(profile, { errorLabel: "llama.cpp speed query" });
47
+
41
48
  const data = await response.json();
42
49
  const timings = data.timings;
43
50
  if (!timings || typeof timings.prompt_per_second !== "number" || typeof timings.predicted_per_second !== "number") {
@@ -60,25 +67,12 @@ async function queryLlamaCppMetrics(profile) {
60
67
  }
61
68
 
62
69
  async function queryOmlxMetrics(profile) {
63
- const body = {
64
- model: profile.modelAlias,
65
- messages: [{ role: "user", content: BENCH_SPEED_PROMPT }],
70
+ const response = await speedQueryFetch(profile, {
66
71
  stream: true,
67
- stream_options: { include_usage: true },
68
- max_tokens: SPEED_QUERY_MAX_TOKENS,
69
- };
70
-
71
- const response = await fetch(profile.baseUrl.replace(/\/$/u, "") + "/chat/completions", {
72
- method: "POST",
73
- headers: { "Content-Type": "application/json" },
74
- body: JSON.stringify(body),
75
- signal: AbortSignal.timeout(SPEED_QUERY_TIMEOUT_MS),
72
+ streamOptions: { include_usage: true },
73
+ errorLabel: "oMLX speed query",
76
74
  });
77
75
 
78
- if (!response.ok) {
79
- throw new Error(`oMLX speed query failed: ${response.status} ${response.statusText}`);
80
- }
81
-
82
76
  const text = await response.text();
83
77
  let usage = null;
84
78
  for (const line of text.split("\n").reverse()) {
@@ -1,11 +1,11 @@
1
1
  import { findLlamaServer, ensureDirs } from "../config.mjs";
2
2
  import { backendFor } from "../backends.mjs";
3
3
  import { scanGgufModels } from "../scan.mjs";
4
- import { scanMlxModels } from "../mlx-discovery.mjs";
5
4
  import { loadProfiles } from "../profiles.mjs";
6
5
  import { hasPi } from "../harness-pi.mjs";
7
6
  import { offerManagedLlamaRuntimeUpdate } from "../runtime.mjs";
8
- import { hasLmStudioInstalled, hasOmlxInstalled, scanManagedModels } from "../managed.mjs";
7
+ import { offerManagedOmlxUpdate, hasOmlx } from "../omlx-runtime.mjs";
8
+ import { hasLmStudioInstalled, scanManagedModels } from "../managed.mjs";
9
9
  import { recommendedModel } from "../recommendations.mjs";
10
10
  import { pc, startInteractive, createPrompt } from "../ui.mjs";
11
11
  import { onboardFlow } from "./onboard.mjs";
@@ -19,6 +19,7 @@ export async function mainFlow() {
19
19
  const runtimePrompt = createPrompt();
20
20
  try {
21
21
  await offerManagedLlamaRuntimeUpdate(runtimePrompt);
22
+ await offerManagedOmlxUpdate(runtimePrompt);
22
23
  } finally {
23
24
  runtimePrompt.close();
24
25
  }
@@ -27,10 +28,9 @@ export async function mainFlow() {
27
28
  const llamaBinary = await findLlamaServer();
28
29
  const { models: ggufModels, drafters } = await scanGgufModels();
29
30
  const managedModels = await scanManagedModels();
30
- const mlxModels = await scanMlxModels();
31
31
  const profiles = await loadProfiles();
32
32
  const hasAnyBackend = llamaBinary || managedModels.some((item) => item.status === "ok" && item.models.length > 0);
33
- const hasAnyModels = ggufModels.length > 0 || mlxModels.length > 0 || managedModels.some((item) => item.status === "ok" && item.models.length > 0);
33
+ const hasAnyModels = ggufModels.length > 0 || managedModels.some((item) => item.status === "ok" && item.models.length > 0);
34
34
 
35
35
  const piInstalled = await hasPi();
36
36
  const needsLlama = ggufModels.length > 0 || profiles.some((profile) => backendFor(profile.backend).type === "local-server");
@@ -58,14 +58,14 @@ export async function mainFlow() {
58
58
  if (!process.stdin.isTTY) return await statusCommand();
59
59
 
60
60
  startInteractive("offgrid-ai");
61
- return await modelCommandCenter({ profiles, ggufModels, managedModels, mlxModels, drafters });
61
+ return await modelCommandCenter({ profiles, ggufModels, managedModels, drafters });
62
62
  }
63
63
 
64
64
  async function printNoModelsHelp(llamaBinary) {
65
65
  console.log(pc.yellow("No models found."));
66
66
  console.log(pc.dim("You need to download a model to use offgrid-ai.\n"));
67
67
 
68
- const omlxInstalled = await hasOmlxInstalled();
68
+ const omlxInstalled = await hasOmlx();
69
69
  const lmStudioInstalled = hasLmStudioInstalled();
70
70
  const hasBackends = llamaBinary || omlxInstalled || lmStudioInstalled;
71
71
  if (!hasBackends) {
@@ -86,4 +86,4 @@ async function printNoModelsHelp(llamaBinary) {
86
86
  console.log(pc.dim(` Recommended: ${model.label}`));
87
87
  }
88
88
  if (omlxInstalled) console.log(pc.bold(" omlx start"));
89
- }
89
+ }
@@ -3,10 +3,10 @@ import { backendFor, BACKENDS } from "../backends.mjs";
3
3
  import { createProfileFromModel, readProfile, saveProfile, deleteProfile, profileJsonPath } from "../profiles.mjs";
4
4
  import { isProfileRunning, isProfileServerUp, modelAvailableOnServer, stopProfile } from "../process.mjs";
5
5
  import { syncPiConfig, removeFromPiConfig } from "../harness-pi.mjs";
6
- import { configureLocalProfile } from "../profile-setup.mjs";
6
+ import { configureLocalProfile, configureManagedProfile } from "../profile-setup.mjs";
7
7
  import { pc, startInteractive, createPrompt, modelSelect } from "../ui.mjs";
8
8
  import { buildCatalogItems, createManagedProfile, itemKey, loadModelCatalog, normalizeCatalog } from "../model-catalog.mjs";
9
- import { modelSelectOption, modelNameWidth, inferBackendId, formatSourceLabel, discoverySourceForItem, printGgufModelDetails, printMlxModelDetails, printManagedModelDetails, printWorkspaceHeader, printBenchmarkLine, printProfileDetails } from "../model-presenters.mjs";
9
+ import { modelSelectOption, modelNameWidth, inferBackendId, formatSourceLabel, discoverySourceForItem, printGgufModelDetails, printManagedModelDetails, printWorkspaceHeader, printBenchmarkLine, printProfileDetails } from "../model-presenters.mjs";
10
10
  import { runProfile } from "./run.mjs";
11
11
 
12
12
  const { stripVTControlCharacters } = await import("node:util");
@@ -83,9 +83,7 @@ export async function modelCommandCenter(initialCatalog) {
83
83
 
84
84
  const groups = [];
85
85
  const backendColors = {
86
- "mlx-vlm": pc.yellow,
87
86
  "llama-cpp": pc.cyan,
88
- "llama-cpp-mtp": pc.blue,
89
87
  omlx: pc.magenta,
90
88
  };
91
89
  for (const { backendId, sourceId, items } of byBackend.values()) {
@@ -185,7 +183,6 @@ async function performAction(prompt, action, item) {
185
183
  if (action === "inspect") {
186
184
  if (item.type === "profile") return await printProfileDetails(await readProfile(item.profile.id));
187
185
  if (item.type === "managed") return printManagedModelDetails(item.model, BACKENDS[item.backendId]);
188
- if (item.model?.format === "mlx") return await printMlxModelDetails(item.model);
189
186
  return printGgufModelDetails(item.model, item.drafter);
190
187
  }
191
188
  if (action === "benchmark") {
@@ -197,7 +194,7 @@ async function performAction(prompt, action, item) {
197
194
  return await benchmarkFlow();
198
195
  }
199
196
  if (action === "run") return await runItem(item);
200
- if (action === "reconfigure" || action === "setup") return await setupItem(prompt, item, action);
197
+ if (action === "reconfigure" || action === "setup") return await setupItem(prompt, item);
201
198
  if (action === "remove" && item.type === "profile") return await removeProfileInteractive(item.profile.id);
202
199
  }
203
200
 
@@ -209,30 +206,20 @@ function printProfileSaved(id) {
209
206
  console.log(pc.dim(` Profile: ${profileJsonPath(id)}`));
210
207
  }
211
208
 
212
- async function setupItem(prompt, item, action) {
209
+ async function setupItem(prompt, item) {
213
210
  if (item.type === "profile") {
214
211
  const configured = await configureLocalProfile(prompt, await readProfile(item.profile.id));
215
212
  if (!configured) return;
216
- await saveProfile(configured, { writeCommand: true });
213
+ await saveProfile(configured);
217
214
  await syncPiConfig(configured);
218
215
  printProfileSaved(configured.id);
219
216
  return;
220
217
  }
221
218
  if (item.type === "managed") {
222
219
  const profile = createManagedProfile(item.model, item.backendId);
223
- await saveProfile(profile);
224
- await syncPiConfig(profile);
225
- printProfileSaved(profile.id);
226
- return;
227
- }
228
- // MLX models: build a mlx-vlm profile and run interactive config.
229
- if (item.model.format === "mlx") {
230
- const { createProfileFromMlxModel } = await import("../profiles.mjs");
231
- const { configureMlxProfile } = await import("../profile-setup.mjs");
232
- const profile = await createProfileFromMlxModel(item.model);
233
- const configured = await configureMlxProfile(prompt, profile);
220
+ const configured = await configureManagedProfile(prompt, profile);
234
221
  if (!configured) return;
235
- await saveProfile(configured, { writeCommand: true });
222
+ await saveProfile(configured);
236
223
  await syncPiConfig(configured);
237
224
  printProfileSaved(configured.id);
238
225
  return;
@@ -240,7 +227,7 @@ async function setupItem(prompt, item, action) {
240
227
  const profile = await createProfileFromModel(item.model, null, item.drafter?.path);
241
228
  const configured = await configureLocalProfile(prompt, profile);
242
229
  if (!configured) return;
243
- await saveProfile(configured, { writeCommand: action === "reconfigure" });
230
+ await saveProfile(configured);
244
231
  await syncPiConfig(configured);
245
232
  printProfileSaved(configured.id);
246
233
  }
@@ -1,10 +1,9 @@
1
- import { existsSync } from "node:fs";
2
- import { ensureDirs, findLlamaServer, hasHomebrew, HF_HUB_DIR } from "../config.mjs";
1
+ import { ensureDirs, findLlamaServer, ensureHomebrewFor, HF_HUB_DIR } from "../config.mjs";
3
2
  import { BACKENDS } from "../backends.mjs";
4
3
  import { scanGgufModels } from "../scan.mjs";
5
- import { scanMlxModels } from "../mlx-discovery.mjs";
6
4
  import { hasPi } from "../harness-pi.mjs";
7
5
  import { offerManagedLlamaRuntimeUpdate } from "../runtime.mjs";
6
+ import { ensureOmlxRuntime } from "../omlx-runtime.mjs";
8
7
  import { scanManagedModels } from "../managed.mjs";
9
8
  import { BACKEND_INSTALL_CHOICES, BACKEND_INSTALLERS } from "../backend-installers.mjs";
10
9
  import { recommendedModel, selectFormat, allFittingModels } from "../recommendations.mjs";
@@ -25,18 +24,18 @@ export async function onboardFlow() {
25
24
  console.log(pc.dim("Let's make sure you have everything you need to run local models.\n"));
26
25
 
27
26
  const llamaBinary = await ensureLlamaRuntime(prompt);
27
+ await ensureOmlxRuntime(prompt, run);
28
28
  if (!(await ensurePi(prompt, run))) return;
29
29
 
30
- const [{ models: ggufModels }, managedModels, mlxModels] = await Promise.all([
30
+ const [{ models: ggufModels }, managedModels] = await Promise.all([
31
31
  scanGgufModels(),
32
32
  scanManagedModels(),
33
- scanMlxModels(),
34
33
  ]);
35
34
  const totalManaged = managedModels.reduce((sum, item) => sum + item.models.length, 0);
36
- const hasModels = ggufModels.length > 0 || totalManaged > 0 || mlxModels.length > 0;
35
+ const hasModels = ggufModels.length > 0 || totalManaged > 0;
37
36
 
38
37
  if (hasModels) {
39
- printFoundModels(ggufModels, managedModels, mlxModels, llamaBinary);
38
+ printFoundModels(ggufModels, managedModels, llamaBinary);
40
39
  } else {
41
40
  const canDownload = await hasHuggingfaceHub();
42
41
  if (canDownload) {
@@ -96,14 +95,11 @@ async function ensurePi(prompt, run) {
96
95
  return true;
97
96
  }
98
97
 
99
- function printFoundModels(ggufModels, managedModels, mlxModels, llamaBinary) {
98
+ function printFoundModels(ggufModels, managedModels, llamaBinary) {
100
99
  if (ggufModels.length > 0) {
101
100
  console.log(pc.green(`✓ Found ${ggufModels.length} GGUF model${ggufModels.length === 1 ? "" : "s"}`));
102
101
  if (!llamaBinary) console.log(pc.yellow("Install the managed llama.cpp runtime to run these GGUF models."));
103
102
  }
104
- if (mlxModels.length > 0) {
105
- console.log(pc.green(`✓ Found ${mlxModels.length} MLX model${mlxModels.length === 1 ? "" : "s"}`));
106
- }
107
103
  for (const { backendId, models, status, reason } of managedModels) {
108
104
  if (status === "unavailable") {
109
105
  console.log(pc.yellow(`${BACKENDS[backendId].label}: unavailable${reason ? ` — ${reason}` : ""}`));
@@ -117,7 +113,7 @@ async function offerModelDownload(prompt) {
117
113
  const hardware = detectHardware();
118
114
  const candidates = allFittingModels(hardware)
119
115
  .map((entry) => ({ entry, format: selectFormat(entry, hardware) }))
120
- .filter((item) => item.format != null);
116
+ .filter((item) => item.format === "gguf");
121
117
  if (candidates.length === 0) {
122
118
  console.log(pc.yellow("No curated models fit your hardware."));
123
119
  return false;
@@ -134,7 +130,7 @@ async function offerModelDownload(prompt) {
134
130
  const shouldDownload = await prompt.yesNo("Download " + primary.entry.label + " (" + primary.format + ")?", true);
135
131
  if (!shouldDownload) return false;
136
132
 
137
- const hfRef = primary.format === "mlx" ? primary.entry.mlx : primary.entry.gguf;
133
+ const hfRef = primary.entry.gguf;
138
134
  try {
139
135
  const plan = await resolveHfDownload(hfRef);
140
136
  console.log(pc.dim("Total size: " + formatBytes(plan.totalSizeBytes)));
@@ -174,35 +170,6 @@ async function offerBackendInstall(prompt, run) {
174
170
  await installBackend(prompt, run, choice, model);
175
171
  }
176
172
 
177
- async function ensureHomebrewFor(prompt, run, label) {
178
- if (await hasHomebrew()) return true;
179
- const install = await prompt.yesNo(`Homebrew is needed to install ${label}. Install Homebrew now?`, true);
180
- if (!install) {
181
- console.log(pc.dim(`Install ${label} manually, or install Homebrew from https://brew.sh and run offgrid-ai again.`));
182
- return false;
183
- }
184
- console.log(pc.cyan("Installing Homebrew..."));
185
- try {
186
- await run("/bin/bash", ["-c", "NONINTERACTIVE=1 /bin/bash -c \"$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)\""], "Homebrew");
187
- for (const path of ["/opt/homebrew/bin", "/usr/local/bin"]) {
188
- if (existsSync(path)) {
189
- process.env.PATH = `${path}:${process.env.PATH}`;
190
- break;
191
- }
192
- }
193
- } catch {
194
- console.log(pc.red("✗ Homebrew installation failed."));
195
- console.log(pc.dim("Install it manually from https://brew.sh, then run offgrid-ai again."));
196
- return false;
197
- }
198
- if (!(await hasHomebrew())) {
199
- console.log(pc.red("Homebrew was installed but not found on PATH. Restart your terminal and run offgrid-ai again."));
200
- return false;
201
- }
202
- console.log(pc.green("✓ Homebrew found"));
203
- return true;
204
- }
205
-
206
173
  async function installBackend(prompt, run, backendId, model) {
207
174
  const installer = BACKEND_INSTALLERS[backendId];
208
175
  if (!(await ensureHomebrewFor(prompt, run, installer.label))) return;
@@ -236,4 +203,4 @@ async function installAllBackends(prompt, run, model) {
236
203
 
237
204
  async function runInstallerCommands(run, installer) {
238
205
  for (const [cmd, args, label] of installer.commands) await run(cmd, args, label);
239
- }
206
+ }
@@ -77,7 +77,7 @@ async function ensureLocalServer(profile, backend, options) {
77
77
  console.log(pc.yellow("Vision projector is not supported by this llama.cpp build. Retrying text-only."));
78
78
  console.log(pc.dim("Update llama.cpp later to re-enable vision for this model."));
79
79
  const textOnly = textOnlyProfile(profile);
80
- await saveProfile(textOnly, { writeCommand: true });
80
+ await saveProfile(textOnly);
81
81
  return { handled: true, result: await runProfile(textOnly, { ...options, textOnlyRetry: true }) };
82
82
  }
83
83
  throw err;
@@ -2,6 +2,10 @@ import { ensureDirs } from "../config.mjs";
2
2
  import { backendFor } from "../backends.mjs";
3
3
  import { loadProfiles } from "../profiles.mjs";
4
4
  import { profileRuntimeStatus } from "../process.mjs";
5
+ import { existsSync } from "node:fs";
6
+ import { execFileSync } from "node:child_process";
7
+ import { homedir } from "node:os";
8
+ import { join } from "node:path";
5
9
  import { pc, renderRows, renderCard } from "../ui.mjs";
6
10
 
7
11
  export async function statusCommand() {
@@ -38,6 +42,21 @@ export async function statusCommand() {
38
42
 
39
43
  console.log(renderCard("Status", renderRows(summaryRows), { formatBorder: running.length > 0 ? pc.green : pc.dim }));
40
44
 
45
+ // Show oMLX cache disk usage if cache exists
46
+ const omlxCacheDir = join(homedir(), ".omlx", "cache");
47
+ if (existsSync(omlxCacheDir)) {
48
+ try {
49
+ const duOutput = execFileSync("du", ["-sh", omlxCacheDir], { encoding: "utf8" });
50
+ const cacheSize = duOutput.split(/\s+/)[0];
51
+ console.log("\n" + renderCard("oMLX cache", renderRows([
52
+ ["Location", pc.dim(omlxCacheDir)],
53
+ ["Disk usage", pc.bold(cacheSize)],
54
+ ]), { formatBorder: pc.magenta }));
55
+ } catch {
56
+ // du not available — skip
57
+ }
58
+ }
59
+
41
60
  if (managedUpMissing.length > 0 || managedUpNotLoaded.length > 0) {
42
61
  const detailRows = [];
43
62
  for (const { profile, status } of [...managedUpMissing, ...managedUpNotLoaded]) {
package/src/config.mjs CHANGED
@@ -3,6 +3,7 @@ import { existsSync } from "node:fs";
3
3
  import { homedir } from "node:os";
4
4
  import { dirname, join } from "node:path";
5
5
  import { readFile, writeFile } from "node:fs/promises";
6
+ import { pc } from "./ui.mjs";
6
7
 
7
8
  // ── Base directories ──────────────────────────────────────────────────────
8
9
 
@@ -18,8 +19,8 @@ export const MANAGED_LLAMA_SERVER = join(RUNTIME_DIR, "bin", "llama-server");
18
19
  // HuggingFace hub cache: $HF_HUB_CACHE, else $HF_HOME/hub, else
19
20
  // ~/.cache/huggingface/hub. This is where huggingface_hub stores
20
21
  // models--org--name/... and where offgrid-ai scans + downloads. Pointing at the
21
- // hub (not the HF root) keeps the HF-hub MLX/GGUF scanners and the downloader
22
- // on the same layout.
22
+ // hub (not the HF root) keeps the GGUF scanner and the downloader on the
23
+ // same layout.
23
24
  export const HF_HUB_DIR = process.env.HF_HUB_CACHE
24
25
  || (process.env.HF_HOME ? join(process.env.HF_HOME, "hub") : join(homedir(), ".cache", "huggingface", "hub"));
25
26
 
@@ -124,4 +125,49 @@ export async function hasHomebrew() {
124
125
  } catch {
125
126
  return false;
126
127
  }
128
+ }
129
+
130
+ /**
131
+ * Install Homebrew non-interactively and add it to PATH for this process.
132
+ * Returns true if Homebrew is available after installation.
133
+ */
134
+ export async function installHomebrew(run) {
135
+ await run("/bin/bash", ["-c", 'NONINTERACTIVE=1 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"'], "Homebrew");
136
+ for (const path of ["/opt/homebrew/bin", "/usr/local/bin"]) {
137
+ if (existsSync(path)) {
138
+ process.env.PATH = `${path}:${process.env.PATH}`;
139
+ break;
140
+ }
141
+ }
142
+ return await hasHomebrew();
143
+ }
144
+
145
+ /**
146
+ * Ensure Homebrew is installed, prompting the user if necessary.
147
+ * @param {object} prompt - UI prompt interface (needs yesNo)
148
+ * @param {function} run - runCommand function for verbose command execution
149
+ * @param {string} label - what we're installing (for the prompt message)
150
+ * @returns {Promise<boolean>} true if Homebrew is available
151
+ */
152
+ export async function ensureHomebrewFor(prompt, run, label) {
153
+ if (await hasHomebrew()) return true;
154
+ const install = await prompt.yesNo(`Homebrew is needed to install ${label}. Install Homebrew now?`, true);
155
+ if (!install) {
156
+ console.log(pc.dim(`Install ${label} manually, or install Homebrew from https://brew.sh and run offgrid-ai again.`));
157
+ return false;
158
+ }
159
+ console.log(pc.cyan("Installing Homebrew..."));
160
+ try {
161
+ const success = await installHomebrew(run);
162
+ if (!success) {
163
+ console.log(pc.red("Homebrew was installed but not found on PATH. Restart your terminal and run offgrid-ai again."));
164
+ return false;
165
+ }
166
+ } catch {
167
+ console.log(pc.red("✗ Homebrew installation failed."));
168
+ console.log(pc.dim("Install it manually from https://brew.sh, then run offgrid-ai again."));
169
+ return false;
170
+ }
171
+ console.log(pc.green("✓ Homebrew found"));
172
+ return true;
127
173
  }