offgrid-ai 0.10.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "offgrid-ai",
3
- "version": "0.10.1",
3
+ "version": "0.11.0",
4
4
  "description": "Privacy-first CLI for running local LLMs — discover, configure, run, benchmark",
5
5
  "author": "Eeshan Srivastava (https://eeshans.com)",
6
6
  "type": "module",
package/src/backends.mjs CHANGED
@@ -1,7 +1,7 @@
1
1
  import { findLlamaServer } from "./config.mjs";
2
2
  import { scanGgufModels } from "./scan.mjs";
3
3
  import { parseModelName } from "./model-name.mjs";
4
- import { scanMlxModels } from "./mlx-discovery.mjs";
4
+ import { scanMlxModels, scanOmlxModelSizes, lookupOmlxModelSize } from "./mlx-discovery.mjs";
5
5
  import { DEFAULT_PORT as MLX_VLM_PORT } from "./mlx-flags.mjs";
6
6
 
7
7
  // ── Backend definitions ────────────────────────────────────────────────────
@@ -95,19 +95,27 @@ async function scanOmlxModels() {
95
95
  }
96
96
  const body = await response.json();
97
97
  if (!Array.isArray(body?.data)) return [];
98
+
99
+ // The oMLX API doesn't return model sizes — look them up from disk.
100
+ const sizeMap = await scanOmlxModelSizes();
101
+
98
102
  return body.data
99
103
  .filter((model) => isChatOmlxModel(model))
100
- .map((model) => ({
101
- id: model.id,
102
- label: parseModelName(model.id, "omlx").display,
103
- aliasSuggestion: model.id,
104
- sizeBytes: model.size ?? 0,
105
- contextLength: model.max_model_len ?? null,
106
- quant: null,
107
- family: null,
108
- backend: "omlx",
109
- source: "omlx",
110
- })).sort((a, b) => a.label.localeCompare(b.label));
104
+ .map((model) => {
105
+ const sizeFromDisk = lookupOmlxModelSize(model.id, sizeMap);
106
+ const parsed = parseModelName(model.id, "omlx");
107
+ return {
108
+ id: model.id,
109
+ label: parsed.display,
110
+ aliasSuggestion: model.id,
111
+ sizeBytes: sizeFromDisk ?? (model.size ?? 0),
112
+ contextLength: model.max_model_len ?? null,
113
+ quant: parsed.quant,
114
+ family: null,
115
+ backend: "omlx",
116
+ source: "omlx",
117
+ };
118
+ }).sort((a, b) => a.label.localeCompare(b.label));
111
119
  }
112
120
 
113
121
  // ── Labels ──────────────────────────────────────────────────────────────
@@ -16,6 +16,7 @@ import { join, basename } from "node:path";
16
16
  import { homedir } from "node:os";
17
17
  import { getModelScanDirs } from "./config.mjs";
18
18
  import { inferSourceLabel, MIN_MODEL_SIZE_BYTES, EMBEDDING_MODEL_TYPES } from "./discovery-shared.mjs";
19
+ import { parseModelName } from "./model-name.mjs";
19
20
 
20
21
  // ── Folder → backend mapping ──────────────────────────────────────────────
21
22
  // The oMLX folder is oMLX-exclusive: models there are served by the oMLX
@@ -81,7 +82,8 @@ async function scanDirRecursiveForMlx(rootDir, sourceLabel, maxDepth = 3) {
81
82
  if (sizeBytes < MIN_MODEL_SIZE_BYTES) return;
82
83
  if (await isEmbeddingMlxModel(join(dir, "config.json"))) return;
83
84
  const caps = await detectMlxCapabilities(dir);
84
- models.push(makeMlxModel(dir, basename(dir), sizeBytes, sourceLabel, rootDir, caps.contextLength));
85
+ const { display, quant } = parseModelName(basename(dir), sourceLabel);
86
+ models.push(makeMlxModel(dir, display, sizeBytes, sourceLabel, rootDir, caps.contextLength, quant));
85
87
  return;
86
88
  }
87
89
 
@@ -94,7 +96,12 @@ async function scanDirRecursiveForMlx(rootDir, sourceLabel, maxDepth = 3) {
94
96
  if (sizeBytes < MIN_MODEL_SIZE_BYTES) continue;
95
97
  if (await isEmbeddingMlxModel(join(fullPath, "config.json"))) continue;
96
98
  const caps = await detectMlxCapabilities(fullPath);
97
- models.push(makeMlxModel(fullPath, entry.name, sizeBytes, sourceLabel, rootDir, caps.contextLength));
99
+ // Extract publisher from parent dir (LM Studio: publisher/model-dir)
100
+ const relParts = fullPath.slice(rootDir.length + 1).split("/");
101
+ const publisher = (sourceLabel === "lmstudio" && relParts.length >= 2) ? relParts[0] : null;
102
+ const rawLabel = publisher ? `${publisher}/${entry.name}` : entry.name;
103
+ const { display, quant } = parseModelName(rawLabel, sourceLabel);
104
+ models.push(makeMlxModel(fullPath, display, sizeBytes, sourceLabel, rootDir, caps.contextLength, quant));
98
105
  } else {
99
106
  await walk(fullPath, depth + 1);
100
107
  }
@@ -151,13 +158,16 @@ async function scanHfHubForMlx(dir, sourceLabel) {
151
158
  const sizeBytes = await getMlxDirSizeBytes(snapshotPath);
152
159
  if (sizeBytes < MIN_MODEL_SIZE_BYTES) continue;
153
160
  if (await isEmbeddingMlxModel(join(snapshotPath, "config.json"))) continue;
161
+ const caps = await detectMlxCapabilities(snapshotPath);
162
+ const { display, quant } = parseModelName(label, sourceLabel);
154
163
  models.push({
155
164
  id: `${sourceLabel}:${entry.name}`,
156
- label,
165
+ label: display,
157
166
  path: snapshotPath,
158
167
  filePath: snapshotPath,
159
168
  sizeBytes,
160
- contextLength: (await detectMlxCapabilities(snapshotPath)).contextLength,
169
+ contextLength: caps.contextLength,
170
+ quant,
161
171
  backend: "mlx-vlm",
162
172
  format: "mlx",
163
173
  source: sourceLabel,
@@ -188,7 +198,7 @@ async function isEmbeddingMlxModel(configPath) {
188
198
 
189
199
  // ── MLX model entry builder ───────────────────────────────────────────────
190
200
 
191
- function makeMlxModel(dir, label, sizeBytes, sourceLabel, rootDir, contextLength = null) {
201
+ function makeMlxModel(dir, label, sizeBytes, sourceLabel, rootDir, contextLength = null, quant = null) {
192
202
  return {
193
203
  id: `${sourceLabel}:${dir.replace(rootDir + "/", "")}`,
194
204
  label,
@@ -196,6 +206,7 @@ function makeMlxModel(dir, label, sizeBytes, sourceLabel, rootDir, contextLength
196
206
  filePath: dir,
197
207
  sizeBytes,
198
208
  contextLength,
209
+ quant,
199
210
  backend: "mlx-vlm",
200
211
  format: "mlx",
201
212
  source: sourceLabel,
@@ -291,4 +302,51 @@ export function defaultMlxContextLength(trainedCtx, ramGb) {
291
302
  if (ramGb < 16) return Math.min(trainedCtx, 8192);
292
303
  if (ramGb < 32) return Math.min(trainedCtx, 16384);
293
304
  return trainedCtx;
305
+ }
306
+
307
+ // ── oMLX model size lookup (from disk) ────────────────────────────────────
308
+
309
+ /**
310
+ * Scan the oMLX models directory (~/.omlx/models/) for MLX model directories
311
+ * and return a Map of basename → sizeBytes. The oMLX API doesn't return model
312
+ * sizes, so we compute them from the safetensors files on disk.
313
+ */
314
+ export async function scanOmlxModelSizes() {
315
+ if (!existsSync(OMLX_MODELS_DIR)) return new Map();
316
+ const sizeByBasename = new Map();
317
+
318
+ async function walk(dir) {
319
+ let entries;
320
+ try {
321
+ entries = await readdir(dir, { withFileTypes: true });
322
+ } catch {
323
+ return;
324
+ }
325
+ for (const entry of entries) {
326
+ if (!entry.isDirectory()) continue;
327
+ const fullPath = join(dir, entry.name);
328
+ if (await isMlxModelDir(fullPath)) {
329
+ const sizeBytes = await getMlxDirSizeBytes(fullPath);
330
+ if (sizeBytes > 0) sizeByBasename.set(entry.name, sizeBytes);
331
+ } else {
332
+ await walk(fullPath);
333
+ }
334
+ }
335
+ }
336
+
337
+ await walk(OMLX_MODELS_DIR);
338
+ return sizeByBasename;
339
+ }
340
+
341
+ /**
342
+ * Look up a model's size by its oMLX API id. Tries exact match, then the
343
+ * segment after `--` (oMLX org--name format), then after `/` (HF format).
344
+ */
345
+ export function lookupOmlxModelSize(modelId, sizeMap) {
346
+ if (sizeMap.has(modelId)) return sizeMap.get(modelId);
347
+ const dashIdx = modelId.indexOf("--");
348
+ if (dashIdx >= 0 && sizeMap.has(modelId.slice(dashIdx + 2))) return sizeMap.get(modelId.slice(dashIdx + 2));
349
+ const slashIdx = modelId.indexOf("/");
350
+ if (slashIdx >= 0 && sizeMap.has(modelId.slice(slashIdx + 1))) return sizeMap.get(modelId.slice(slashIdx + 1));
351
+ return null;
294
352
  }
@@ -72,6 +72,27 @@ export function buildCatalogItems(normalized) {
72
72
  const profileItems = profiles.map((profile) => {
73
73
  const item = { type: "profile", profile, label: profile.label, fileMissing: isProfileFileMissing(profile) };
74
74
 
75
+ // Resolve label + quant from scan data (re-parse for consistency)
76
+ let quant = profile.capabilities?.quant ?? null;
77
+ if (profile.modelPath) {
78
+ const scanModel = scanByPath.get(profile.modelPath);
79
+ if (scanModel) {
80
+ item.label = scanModel.label; // re-parsed label (publisher/model-name)
81
+ if (scanModel.quant) quant = scanModel.quant;
82
+ }
83
+ }
84
+ if (!quant) {
85
+ const backend = backendFor(profile.backend);
86
+ if (backend.type === "managed-server" && profile.omlxModel) {
87
+ const managedModel = managedByKey.get(`${profile.backend}:${profile.omlxModel}`);
88
+ if (managedModel) {
89
+ item.label = managedModel.label;
90
+ if (managedModel.quant) quant = managedModel.quant;
91
+ }
92
+ }
93
+ }
94
+ item.quant = quant;
95
+
75
96
  // Resolve size: profile.modelSizeBytes → scan lookup → managed lookup
76
97
  let sizeBytes = profile.modelSizeBytes || 0;
77
98
  if (!sizeBytes && profile.modelPath) {
@@ -115,6 +136,7 @@ export function buildCatalogItems(normalized) {
115
136
  drafter: matchDrafter(model.path, drafters),
116
137
  sizeBytes: model.sizeBytes || null,
117
138
  contextLength: model.contextLength ?? null,
139
+ quant: model.quant ?? null,
118
140
  })),
119
141
  ...managedItems.map(({ model, backendId }) => ({
120
142
  type: "managed",
@@ -123,6 +145,7 @@ export function buildCatalogItems(normalized) {
123
145
  label: model.label,
124
146
  sizeBytes: model.sizeBytes || null,
125
147
  contextLength: model.contextLength ?? null,
148
+ quant: model.quant ?? null,
126
149
  })),
127
150
  ];
128
151
  }
@@ -54,6 +54,7 @@ const QUANT_PATTERNS = [
54
54
  /[-_]Q\d_[01]/i,
55
55
  /[-_]F(?:16|32)/i,
56
56
  /[-_]BF16/i,
57
+ /[-_]\d+bit\b/i,
57
58
  ];
58
59
 
59
60
  // ── Tag tokens extracted from the name ──────────────────────────────────
@@ -77,13 +78,20 @@ const TAG_TOKENS = [
77
78
  export function parseModelName(rawId, source) {
78
79
  const id = rawId; // never modify the raw id
79
80
 
80
- // 1. Extract publisher (anything before the first /)
81
+ // 1. Extract publisher (anything before the first /, or -- for oMLX)
81
82
  let publisher = null;
82
83
  let name = rawId;
83
84
  const slashIdx = rawId.indexOf("/");
84
85
  if (slashIdx !== -1) {
85
86
  publisher = rawId.slice(0, slashIdx);
86
87
  name = rawId.slice(slashIdx + 1);
88
+ } else if (source === "omlx") {
89
+ // oMLX uses org--name format
90
+ const dashIdx = rawId.indexOf("--");
91
+ if (dashIdx !== -1) {
92
+ publisher = rawId.slice(0, dashIdx);
93
+ name = rawId.slice(dashIdx + 2);
94
+ }
87
95
  }
88
96
 
89
97
  // 2. Extract quant (GGUF quantization suffix)
@@ -125,7 +133,7 @@ export function parseModelName(rawId, source) {
125
133
  const params = extractParams(model);
126
134
 
127
135
  // 7. Build display string
128
- const display = buildDisplay(publisher, model, tags, quant);
136
+ const display = buildDisplay(publisher, model, tags);
129
137
 
130
138
  // 8. Build sort key (lowercase, no publisher, for alphabetical ordering)
131
139
  const sort = model.toLowerCase().replace(/[-_]/g, " ");
@@ -135,20 +143,12 @@ export function parseModelName(rawId, source) {
135
143
 
136
144
  // ── Display builder ────────────────────────────────────────────────────
137
145
 
138
- function buildDisplay(publisher, model, tags, quant) {
139
- const parts = [];
140
- if (publisher) {
141
- parts.push(publisher);
142
- }
146
+ function buildDisplay(publisher, model, tags) {
143
147
  let modelPart = model;
144
148
  if (tags.length > 0) {
145
149
  modelPart += ` (${tags.join(", ")})`;
146
150
  }
147
- parts.push(modelPart);
148
- if (quant) {
149
- parts.push(quant);
150
- }
151
- return parts.join(" › ");
151
+ return publisher ? `${publisher}/${modelPart}` : modelPart;
152
152
  }
153
153
 
154
154
  // ── Params extraction ──────────────────────────────────────────────────
@@ -11,9 +11,10 @@ import { DATA_DIR } from "./config.mjs";
11
11
  import { findBenchmarkRepo } from "./benchmark.mjs";
12
12
 
13
13
  const OPTION_SEPARATOR = pc.dim(" │ ");
14
- const OPTION_STATUS_WIDTH = 10;
14
+ const OPTION_STATUS_WIDTH = 12;
15
15
  const OPTION_BACKEND_WIDTH = 14;
16
16
  const OPTION_SOURCE_WIDTH = 14;
17
+ const OPTION_QUANT_WIDTH = 10;
17
18
  const OPTION_CTX_WIDTH = 5;
18
19
 
19
20
  const { stripVTControlCharacters } = await import("node:util");
@@ -30,7 +31,7 @@ function optionStatusTag(kind) {
30
31
  serverup: ["READY", pc.blue],
31
32
  ready: ["READY", pc.blue],
32
33
  missing: ["MISSING", pc.red],
33
- setup: ["SETUP", pc.yellow],
34
+ setup: ["NEEDS SETUP", pc.yellow],
34
35
  };
35
36
  const [text, color] = statuses[kind] ?? [kind, pc.dim];
36
37
  return optionPad(text, color, OPTION_STATUS_WIDTH);
@@ -100,7 +101,15 @@ function discoverySourceForItem(item) {
100
101
  return item.model?.source ?? null;
101
102
  }
102
103
 
104
+ function optionQuantLabel(item) {
105
+ if (item.quant) return optionPad(item.quant, null, OPTION_QUANT_WIDTH);
106
+ return optionPad("—", null, OPTION_QUANT_WIDTH);
107
+ }
108
+
103
109
  function optionCtxLabel(item) {
110
+ // Context window is a configured value — only profiles (READY/RUNNING)
111
+ // have one. SETUP items (new/managed) show "—".
112
+ if (item.type !== "profile") return optionPad("—", null, OPTION_CTX_WIDTH);
104
113
  if (item.contextLength) {
105
114
  return optionPad(`${(item.contextLength / 1000).toFixed(0)}k`, null, OPTION_CTX_WIDTH);
106
115
  }
@@ -120,8 +129,8 @@ export function modelNameWidth(items) {
120
129
  return Math.max(20, maxName + 2);
121
130
  }
122
131
 
123
- function optionLabel({ status, backend, source, name, ctx, size, nameWidth }) {
124
- return [status, backend, source, pc.bold(optionPad(name, null, nameWidth)), ctx, pc.dim(size)].join(OPTION_SEPARATOR);
132
+ function optionLabel({ status, backend, source, name, quant, ctx, size, nameWidth }) {
133
+ return [status, backend, source, pc.bold(optionPad(name, null, nameWidth)), quant, ctx, pc.dim(size)].join(OPTION_SEPARATOR);
125
134
  }
126
135
 
127
136
  export function modelSelectOption(item, { runningProfilesNow, modelMissingIds, nameWidth }) {
@@ -142,8 +151,9 @@ export function modelSelectOption(item, { runningProfilesNow, modelMissingIds, n
142
151
  status: optionStatusTag(status),
143
152
  backend: optionBackendTag(backendId),
144
153
  source: optionSourceTag(sourceId),
145
- name: item.profile.label,
154
+ name: item.label,
146
155
  nameWidth,
156
+ quant: optionQuantLabel(item),
147
157
  ctx: optionCtxLabel(item),
148
158
  size: optionSizeLabel(item),
149
159
  }),
@@ -157,8 +167,9 @@ export function modelSelectOption(item, { runningProfilesNow, modelMissingIds, n
157
167
  status: optionStatusTag("setup"),
158
168
  backend: optionBackendTag(backendId),
159
169
  source: optionSourceTag(sourceId),
160
- name: item.model.label,
170
+ name: item.label,
161
171
  nameWidth,
172
+ quant: optionQuantLabel(item),
162
173
  ctx: optionCtxLabel(item),
163
174
  size: optionSizeLabel(item),
164
175
  }),
@@ -170,8 +181,9 @@ export function modelSelectOption(item, { runningProfilesNow, modelMissingIds, n
170
181
  status: optionStatusTag("setup"),
171
182
  backend: optionBackendTag(backendId),
172
183
  source: optionSourceTag(sourceId),
173
- name: item.model.label,
184
+ name: item.label,
174
185
  nameWidth,
186
+ quant: optionQuantLabel(item),
175
187
  ctx: optionCtxLabel(item),
176
188
  size: optionSizeLabel(item),
177
189
  }),
package/src/scan.mjs CHANGED
@@ -52,8 +52,6 @@ async function scanOneDir(root, sourceLabel = "local-gguf") {
52
52
  const name = basename(path).replace(/\.gguf$/i, "");
53
53
  const sizeBytes = statSync(path).size;
54
54
  if (sizeBytes < MIN_MODEL_SIZE_BYTES) continue;
55
- const parsed = parseModelName(name, "local-gguf");
56
-
57
55
  // Read GGUF metadata to detect drafter architecture and embeddings
58
56
  const meta = safeReadGgufMetadata(path);
59
57
  const architecture = typeof meta["general.architecture"] === "string" ? meta["general.architecture"] : null;
@@ -61,6 +59,12 @@ async function scanOneDir(root, sourceLabel = "local-gguf") {
61
59
  ? meta[`${architecture}.context_length`]
62
60
  : null;
63
61
 
62
+ // Extract publisher from GGUF metadata (repo_url or quantized_by),
63
+ // falling back to directory structure (LM Studio: publisher/model-dir/file).
64
+ const publisher = publisherFromGgufMeta(meta) ?? publisherFromPath(path, root, sourceLabel);
65
+
66
+ const parsed = parseModelName(publisher ? `${publisher}/${name}` : name, sourceLabel);
67
+
64
68
  if (isEmbeddingArchitecture(architecture, name)) continue;
65
69
 
66
70
  if (architecture === "gemma4-assistant" || architecture === "gemma4_assistant") {
@@ -189,4 +193,37 @@ function safeReadGgufMetadata(path) {
189
193
  } catch {
190
194
  return {};
191
195
  }
196
+ }
197
+
198
+ // ── Publisher extraction ─────────────────────────────────────────────────
199
+
200
+ /** Extract publisher from GGUF metadata (repo_url or quantized_by). */
201
+ function publisherFromGgufMeta(meta) {
202
+ const repoUrl = meta["general.repo_url"];
203
+ if (typeof repoUrl === "string") {
204
+ const match = repoUrl.match(/huggingface\.co\/([^/?#]+)/i);
205
+ if (match) return match[1];
206
+ }
207
+ const quantizedBy = meta["general.quantized_by"];
208
+ if (typeof quantizedBy === "string" && quantizedBy.trim()) {
209
+ return quantizedBy.trim().toLowerCase();
210
+ }
211
+ return null;
212
+ }
213
+
214
+ /** Extract publisher from directory structure relative to the scan root. */
215
+ function publisherFromPath(filePath, scanRoot, sourceLabel) {
216
+ const rel = filePath.slice(scanRoot.length + 1).replace(/\\/g, "/");
217
+ const parts = rel.split("/");
218
+ if (parts.length === 0) return null;
219
+ // HF hub: models--org--name/snapshots/hash/file
220
+ if (parts[0]?.startsWith("models--")) {
221
+ const after = parts[0].slice("models--".length);
222
+ const dashIdx = after.indexOf("--");
223
+ if (dashIdx > 0) return after.slice(0, dashIdx);
224
+ return null;
225
+ }
226
+ // LM Studio: publisher/model-dir/file.gguf (3+ parts)
227
+ if (sourceLabel === "lmstudio" && parts.length >= 3) return parts[0];
228
+ return null;
192
229
  }