offgrid-ai 0.3.15 → 0.3.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "offgrid-ai",
3
- "version": "0.3.15",
3
+ "version": "0.3.17",
4
4
  "description": "Privacy-first CLI for running local LLMs — discover, configure, run, benchmark",
5
5
  "author": "Eeshan Srivastava (https://eeshans.com)",
6
6
  "type": "module",
@@ -7,20 +7,26 @@ import { readGgufMetadata } from "./gguf.mjs";
7
7
  export function detectCapabilities(modelPath, mmprojPath) {
8
8
  const meta = safeReadGgufMetadata(modelPath);
9
9
  const name = basename(modelPath).toLowerCase();
10
+ const pathHints = String(modelPath).toLowerCase();
10
11
 
11
12
  // Architecture
12
13
  const architecture = meta["general.architecture"] ?? null;
13
14
 
14
15
  // Thinking / reasoning mode
15
16
  const hasThinkingKwargs = meta["chat_template_kwargs"] !== undefined;
16
- const nameHintsThinking = /qwen3|gemma-4|gemma4|deepseek-r[12]/i.test(name);
17
+ const nameHintsThinking = /qwen3|qwen3\.\d|gemma-4|gemma4|deepseek-r[12]/i.test(pathHints);
17
18
  const thinking = hasThinkingKwargs || nameHintsThinking;
18
19
 
20
+ // Quantization-aware / imatrix quantization hints. These mostly affect
21
+ // display and defaults transparency; llama-server does not need a QAT flag.
22
+ const qat = /qat|imatrix|i-?matrix/i.test(pathHints) || Object.keys(meta).some((key) => key.startsWith("quantize.imatrix."));
23
+
19
24
  // Vision — mmproj present
20
25
  const vision = Boolean(mmprojPath && existsSync(mmprojPath));
21
26
 
22
- // MTP (multi-token prediction) — detect speculative decoding
23
- const mtp = /mtp/i.test(name) || architecture === "qwen3";
27
+ // MTP (multi-token prediction) — detect speculative decoding.
28
+ // Do not treat all Qwen models as MTP; require an explicit filename or metadata hint.
29
+ const mtp = /\bmtp\b|draft-mtp|multi-token/i.test(pathHints) || Object.keys(meta).some((key) => /mtp|draft|speculative/i.test(key));
24
30
 
25
31
  // Quantization
26
32
  const quant = name.match(/(Q\d_K_[A-Z]+|UD-[A-Z0-9_]+)/i)?.[1] ?? null;
@@ -31,7 +37,7 @@ export function detectCapabilities(modelPath, mmprojPath) {
31
37
  : undefined;
32
38
  const ctxSize = metaCtx ?? (thinking ? 80000 : 32768);
33
39
 
34
- return { architecture, thinking, vision, mtp, quant, metaCtx, ctxSize, meta };
40
+ return { architecture, thinking, vision, mtp, qat, quant, metaCtx, ctxSize, meta };
35
41
  }
36
42
 
37
43
  // ── Compute llama-server flags from capabilities ───────────────────────────
@@ -42,7 +48,7 @@ export function computeFlags(capabilities, modelPath, mmprojPath, draftModelPath
42
48
 
43
49
  const flags = {
44
50
  host: "127.0.0.1",
45
- port: 8080,
51
+ port: mtp ? 8081 : 8080,
46
52
  ctxSize: capabilities.ctxSize,
47
53
  flashAttention: "on",
48
54
  cacheTypeK: isLowMem ? "f16" : "bf16",
package/src/cli.mjs CHANGED
@@ -14,6 +14,7 @@ import { checkForUpdate, currentPackageVersion, detectInvocation, updateCommand,
14
14
  import { removeInstallerPathEntries } from "./shell-path.mjs";
15
15
  import { configureLocalProfile } from "./profile-setup.mjs";
16
16
  import { buildPrettyCommand } from "./command.mjs";
17
+ import { detectCapabilities } from "./autodetect.mjs";
17
18
 
18
19
  // ── Entry point ────────────────────────────────────────────────────────────
19
20
 
@@ -153,76 +154,15 @@ export async function mainFlow() {
153
154
  return;
154
155
  }
155
156
 
156
- // 6. Interactive: pick an action
157
+ // 6. Interactive: one command center after onboarding.
157
158
  startInteractive("offgrid-ai");
158
- const prompt = createPrompt();
159
- try {
160
- // Show what we found
161
- const profiledPaths = new Set(profiles.map((p) => p.modelPath).filter(Boolean));
162
- const newModels = ggufModels.filter((m) => !profiledPaths.has(m.path));
163
-
164
- // Managed backend models
165
- const managedItems = [];
166
- for (const { backendId, models } of managedModels) {
167
- const profiledAliases = new Set(
168
- profiles.filter((p) => p.backend === backendId).map((p) => backendId === "ollama" ? `ollama:${p.ollamaModel ?? p.modelAlias}` : `omlx:${p.omlxModel ?? p.modelAlias}`)
169
- );
170
- for (const model of models) {
171
- if (!profiledAliases.has(`${backendId}:${model.id}`)) {
172
- managedItems.push({ model, backendId });
173
- }
174
- }
175
- }
176
-
177
- // Show what we found
178
- if (profiles.length > 0) {
179
- console.log(pc.bold("\nSaved profiles"));
180
- for (const profile of profiles) {
181
- const backend = backendFor(profile.backend);
182
- const colorMap = { "llama-cpp": pc.yellow, "llama-cpp-mtp": pc.blue, "ollama": pc.magenta, "omlx": pc.cyan };
183
- const running = await isProfileRunning(profile);
184
- const c = colorMap[profile.backend] ?? pc.magenta;
185
- console.log(` ${running ? pc.green("●") : pc.dim("○")} ${pc.bold(profile.label)} ${c(`[${backend.label}]`)} · ${pc.cyan(profile.modelAlias)}`);
186
- }
187
- }
188
- if (newModels.length > 0) {
189
- console.log(pc.bold("\nNew models"));
190
- for (const model of newModels.slice(0, 10)) {
191
- console.log(` ${pc.cyan(model.label)} ${pc.dim(model.quant ?? "")} · ${pc.dim(formatBytes(model.sizeBytes))}`);
192
- }
193
- if (newModels.length > 10) console.log(pc.dim(` ... and ${newModels.length - 10} more`));
194
- }
195
- for (const { backendId, models } of managedModels) {
196
- if (models.length > 0) {
197
- const be = BACKENDS[backendId];
198
- console.log(pc.bold(`\n${be.label} models`));
199
- for (const model of models.slice(0, 5)) {
200
- console.log(` ${pc.cyan(model.label)}`);
201
- }
202
- if (models.length > 5) console.log(pc.dim(` ... and ${models.length - 5} more`));
203
- }
204
- }
205
-
206
- // Pick what to do
207
- const action = await prompt.choice("What next?", [
208
- { value: "run", label: "Run a model", hint: "Start server and launch Pi" },
209
- ...(profiles.length > 0 ? [{ value: "manage", label: "Manage profiles", hint: "Sync, remove, or inspect" }] : []),
210
- { value: "benchmark", label: "Benchmark", hint: "Run a benchmark prompt" },
211
- ], "run");
212
-
213
- if (action === "run") return await pickAndRun(prompt, profiles, newModels, managedItems);
214
- if (action === "manage") return await manageProfiles(prompt, profiles);
215
- if (action === "benchmark") return await benchmarkFlow(prompt, profiles);
216
- } finally {
217
- prompt.close();
218
- }
159
+ return await modelCommandCenter({ profiles, ggufModels, managedModels });
219
160
  }
220
161
 
221
- // ── Explicit model/run commands ─────────────────────────────────────────────
162
+ // ── Model command center ────────────────────────────────────────────────────
222
163
 
223
164
  async function modelsCommand(argv) {
224
165
  await ensureDirs();
225
- if (process.stdin.isTTY) startInteractive("offgrid-ai models");
226
166
  const catalog = await loadModelCatalog();
227
167
 
228
168
  if (argv[0]) {
@@ -231,20 +171,28 @@ async function modelsCommand(argv) {
231
171
  return;
232
172
  }
233
173
 
234
- await printModelCatalog(catalog);
174
+ if (process.stdin.isTTY) startInteractive("offgrid-ai");
175
+ return await modelCommandCenter(catalog);
176
+ }
177
+
178
+ async function modelCommandCenter(catalog) {
179
+ const normalized = normalizeCatalog(catalog);
180
+ const items = modelCatalogItems(normalized);
181
+ await printModelCatalog(normalized, items);
235
182
  if (!process.stdin.isTTY) return;
236
183
 
237
- const items = modelCatalogItems(catalog);
238
184
  if (items.length === 0) return;
239
185
 
240
186
  const prompt = createPrompt();
241
187
  try {
242
- const action = await prompt.choice("Action", [
243
- { value: "inspect", label: "Inspect", hint: "View profile/model details" },
188
+ const action = await prompt.choice("What do you want to do?", [
189
+ { value: "inspect", label: "Inspect", hint: "View details" },
244
190
  { value: "setup", label: "Set up / sync", hint: "Create profile or sync Pi" },
245
191
  { value: "run", label: "Run", hint: "Start server and launch Pi" },
192
+ { value: "benchmark", label: "Benchmark", hint: "Coming soon: local benchmark project" },
246
193
  { value: "remove", label: "Remove", hint: "Delete a saved profile" },
247
- ], "inspect");
194
+ ], "run");
195
+ if (action === "benchmark") return await benchmarkFlow();
248
196
  const item = await chooseCatalogItem(prompt, items, action);
249
197
  if (!item) return;
250
198
  return await handleCatalogAction(prompt, action, item);
@@ -256,21 +204,9 @@ async function modelsCommand(argv) {
256
204
  async function runCommand(argv) {
257
205
  await ensureDirs();
258
206
  const { positional } = parseOptions(argv);
259
- if (positional[0]) {
260
- const profile = await readProfile(positional[0]);
261
- return await runProfile(profile);
262
- }
263
-
264
- const catalog = await loadModelCatalog();
265
- if (!process.stdin.isTTY) throw new Error("Run requires a profile id in non-interactive mode: offgrid-ai run <profile>");
266
- startInteractive("offgrid-ai run");
267
- await printModelCatalog(catalog);
268
- const prompt = createPrompt();
269
- try {
270
- return await pickAndRun(prompt, catalog.profiles, catalog.newModels, catalog.managedItems);
271
- } finally {
272
- prompt.close();
273
- }
207
+ if (!positional[0]) return await mainFlow();
208
+ const profile = await readProfile(positional[0]);
209
+ return await runProfile(profile);
274
210
  }
275
211
 
276
212
  async function loadModelCatalog() {
@@ -279,6 +215,12 @@ async function loadModelCatalog() {
279
215
  scanGgufModels(),
280
216
  scanManagedModels(),
281
217
  ]);
218
+ return normalizeCatalog({ profiles, ggufModels, managedModels });
219
+ }
220
+
221
+ function normalizeCatalog(catalog) {
222
+ if (catalog.newModels && catalog.managedItems) return catalog;
223
+ const { profiles, ggufModels, managedModels } = catalog;
282
224
  const profiledPaths = new Set(profiles.map((p) => p.modelPath).filter(Boolean));
283
225
  const newModels = ggufModels.filter((m) => !profiledPaths.has(m.path));
284
226
  const managedItems = [];
@@ -293,36 +235,54 @@ async function loadModelCatalog() {
293
235
  return { profiles, ggufModels, managedModels, newModels, managedItems };
294
236
  }
295
237
 
296
- async function printModelCatalog({ profiles, newModels, managedModels }) {
297
- if (profiles.length > 0) {
298
- console.log(pc.bold("\nSaved profiles"));
238
+ async function printModelCatalog({ profiles, newModels, managedItems }, items = modelCatalogItems({ profiles, newModels, managedItems })) {
239
+ const itemNumber = (predicate) => {
240
+ const index = items.findIndex(predicate);
241
+ return index === -1 ? " " : String(index + 1).padStart(2, " ");
242
+ };
243
+
244
+ console.log(pc.bold("\nSaved profiles"));
245
+ if (profiles.length === 0) {
246
+ console.log(pc.dim(" None yet."));
247
+ } else {
299
248
  for (const profile of profiles) {
300
249
  const backend = backendFor(profile.backend);
301
250
  const colorMap = { "llama-cpp": pc.yellow, "llama-cpp-mtp": pc.blue, "ollama": pc.magenta, "omlx": pc.cyan };
302
251
  const running = await isProfileRunning(profile);
303
252
  const piConfigured = await hasPiModel(profile);
304
253
  const c = colorMap[profile.backend] ?? pc.magenta;
305
- console.log(` ${running ? pc.green("●") : pc.dim("○")} ${pc.bold(profile.label)} ${c(`[${backend.label}]`)} · ${pc.cyan(profile.modelAlias)} ${piConfigured ? pc.green("· Pi synced") : pc.yellow("· Pi not synced")}`);
254
+ const num = itemNumber((item) => item.type === "profile" && item.profile.id === profile.id);
255
+ console.log(`${num}. ${running ? pc.green("●") : pc.dim("○")} ${pc.bold(profile.label)} ${c(`[${backend.label}]`)} · ${pc.cyan(profile.modelAlias)} ${piConfigured ? pc.green("· Pi synced") : pc.yellow("· Pi not synced")}`);
306
256
  }
307
- } else {
308
- console.log(pc.bold("\nSaved profiles"));
309
- console.log(pc.dim(" None yet."));
310
257
  }
311
258
 
312
- if (newModels.length > 0) {
313
- console.log(pc.bold("\nNew GGUF models"));
259
+ console.log("");
260
+ console.log(pc.bold("Downloaded models not set up yet"));
261
+ if (newModels.length === 0) {
262
+ console.log(pc.dim(" None. Every downloaded GGUF has a profile."));
263
+ } else {
314
264
  for (const model of newModels.slice(0, 20)) {
315
- console.log(` ${pc.cyan(model.label)} ${pc.dim(model.quant ?? "")} · ${pc.dim(formatBytes(model.sizeBytes))}`);
265
+ const caps = detectCapabilities(model.path, model.mmprojPath);
266
+ const num = itemNumber((item) => item.type === "new" && item.model.path === model.path);
267
+ console.log(`${num}. ${pc.cyan(model.label)} ${capabilityBadges(caps)} ${pc.dim(model.quant ?? "")}`);
268
+ console.log(` alias: ${pc.cyan(model.aliasSuggestion)}`);
269
+ console.log(` size: ${formatBytes(model.sizeBytes)}`);
316
270
  }
317
271
  if (newModels.length > 20) console.log(pc.dim(` ... and ${newModels.length - 20} more`));
318
272
  }
319
273
 
320
- for (const { backendId, models } of managedModels) {
321
- if (models.length === 0) continue;
274
+ for (const backendId of ["ollama", "omlx"]) {
275
+ const backendItems = managedItems.filter((item) => item.backendId === backendId);
276
+ if (backendItems.length === 0) continue;
322
277
  const be = BACKENDS[backendId];
323
- console.log(pc.bold(`\n${be.label} models`));
324
- for (const model of models.slice(0, 10)) console.log(` ${pc.cyan(model.label)}`);
325
- if (models.length > 10) console.log(pc.dim(` ... and ${models.length - 10} more`));
278
+ console.log("");
279
+ console.log(pc.bold(`${be.label} models`));
280
+ for (const { model } of backendItems.slice(0, 10)) {
281
+ const num = itemNumber((item) => item.type === "managed" && item.backendId === backendId && item.model.id === model.id);
282
+ console.log(`${num}. ${pc.cyan(model.label)} ${pc.dim(model.quant ?? "")}`);
283
+ console.log(` id: ${pc.cyan(model.id)}`);
284
+ }
285
+ if (backendItems.length > 10) console.log(pc.dim(` ... and ${backendItems.length - 10} more`));
326
286
  }
327
287
  }
328
288
 
@@ -335,17 +295,25 @@ function modelCatalogItems({ profiles, newModels, managedItems }) {
335
295
  }
336
296
 
337
297
  async function chooseCatalogItem(prompt, items, action) {
338
- const allowed = action === "remove" ? items.filter((item) => item.type === "profile") : items;
339
- if (allowed.length === 0) {
340
- console.log(pc.yellow(action === "remove" ? "No saved profiles to remove." : "No models available."));
298
+ if (action === "remove" && !items.some((item) => item.type === "profile")) {
299
+ console.log(pc.yellow("No saved profiles to remove."));
341
300
  return null;
342
301
  }
343
- const selected = await prompt.choice("Select", allowed.map((item, index) => ({
344
- value: String(index),
345
- label: item.label,
346
- hint: item.hint,
347
- })), "0");
348
- return allowed[Number(selected)];
302
+
303
+ const input = await prompt.text("Select a number", "");
304
+ if (!input) return null;
305
+ const index = Number(input) - 1;
306
+ if (!Number.isInteger(index) || index < 0 || index >= items.length) {
307
+ console.log(pc.yellow(`No item ${input}.`));
308
+ return null;
309
+ }
310
+
311
+ const item = items[index];
312
+ if (action === "remove" && item.type !== "profile") {
313
+ console.log(pc.yellow("Only saved profiles can be removed."));
314
+ return null;
315
+ }
316
+ return item;
349
317
  }
350
318
 
351
319
  async function handleCatalogAction(prompt, action, item) {
@@ -396,6 +364,7 @@ async function printProfileDetails(profile) {
396
364
  ["ID", pc.cyan(profile.id)],
397
365
  ["Label", pc.bold(profile.label)],
398
366
  ["Backend", backend.label],
367
+ ...(profile.capabilities ? [["Detected", capabilitySummary(profile.capabilities)]] : []),
399
368
  ["Endpoint", pc.green(profile.baseUrl)],
400
369
  ...(!isManaged ? [
401
370
  ["Model", profile.modelPath ?? "unknown"],
@@ -413,8 +382,10 @@ async function printProfileDetails(profile) {
413
382
  }
414
383
 
415
384
  function printGgufModelDetails(model) {
385
+ const caps = detectCapabilities(model.path, model.mmprojPath);
416
386
  console.log("\n" + renderSection("GGUF model", renderRows([
417
387
  ["Label", pc.bold(model.label)],
388
+ ["Detected", capabilitySummary(caps)],
418
389
  ["Model", model.path],
419
390
  ["MMProj", model.mmprojPath ?? "none"],
420
391
  ["Quant", model.quant ?? "unknown"],
@@ -431,6 +402,26 @@ function printManagedModelDetails(model, backend) {
431
402
  ])));
432
403
  }
433
404
 
405
+ function capabilitySummary(caps) {
406
+ const parts = [];
407
+ if (caps.architecture) parts.push(caps.architecture);
408
+ if (caps.quant) parts.push(caps.quant);
409
+ if (caps.mtp) parts.push("MTP");
410
+ if (caps.qat) parts.push("QAT/imatrix");
411
+ if (caps.thinking) parts.push("thinking");
412
+ if (caps.vision) parts.push("vision");
413
+ return parts.length > 0 ? parts.join(" · ") : "standard GGUF";
414
+ }
415
+
416
+ function capabilityBadges(caps) {
417
+ const badges = [];
418
+ if (caps.mtp) badges.push(pc.blue("[MTP]"));
419
+ if (caps.qat) badges.push(pc.green("[QAT]"));
420
+ if (caps.thinking) badges.push(pc.magenta("[thinking]"));
421
+ if (caps.vision) badges.push(pc.cyan("[vision]"));
422
+ return badges.join(" ");
423
+ }
424
+
434
425
  function createManagedProfile(model, backendId) {
435
426
  return normalizeProfile({
436
427
  id: model.id.replace(/[^a-z0-9._-]+/gi, "-").toLowerCase(),
@@ -442,91 +433,6 @@ function createManagedProfile(model, backendId) {
442
433
  });
443
434
  }
444
435
 
445
- // ── Pick and run ────────────────────────────────────────────────────────────
446
-
447
- async function pickAndRun(prompt, profiles, newModels, managedItems) {
448
- // If there's exactly one profile and it's already running, offer to connect or start fresh
449
- const choices = [];
450
-
451
- // Existing profiles
452
- for (const profile of profiles) {
453
- const running = await isProfileRunning(profile);
454
- const backend = backendFor(profile.backend);
455
- const colorMap = { "llama-cpp": pc.yellow, "llama-cpp-mtp": pc.blue, "ollama": pc.magenta, "omlx": pc.cyan };
456
- const c = colorMap[profile.backend] ?? pc.magenta;
457
- choices.push({
458
- value: `profile:${profile.id}`,
459
- label: `${running ? pc.green("● ") : ""}${profile.label}`,
460
- hint: `${c(backend.label)} · ${profile.modelAlias} · ${profile.baseUrl}`,
461
- });
462
- }
463
-
464
- // New GGUF models
465
- for (const model of newModels.slice(0, 20)) {
466
- choices.push({
467
- value: `new:${model.path}`,
468
- label: model.label,
469
- hint: `${model.quant ?? "GGUF"} · ${formatBytes(model.sizeBytes)}`,
470
- });
471
- }
472
-
473
- // Managed models
474
- for (const { model, backendId } of managedItems) {
475
- const be = BACKENDS[backendId];
476
- choices.push({
477
- value: `managed:${backendId}:${model.id}`,
478
- label: model.label,
479
- hint: `${be.label}`,
480
- });
481
- }
482
-
483
- if (choices.length === 0) {
484
- console.log(pc.yellow("No models available."));
485
- return;
486
- }
487
-
488
- const selected = await prompt.choice("Pick a model", choices, choices[0].value);
489
-
490
- if (selected.startsWith("profile:")) {
491
- const id = selected.slice("profile:".length);
492
- const profile = await readProfile(id);
493
- return await runProfile(profile);
494
- }
495
-
496
- if (selected.startsWith("new:")) {
497
- const modelPath = selected.slice("new:".length);
498
- const model = newModels.find((m) => m.path === modelPath);
499
- if (!model) throw new Error("Model not found.");
500
- const profile = await createProfileFromModel(model);
501
- const configured = await configureLocalProfile(prompt, profile);
502
- if (!configured) return;
503
- await saveProfile(configured);
504
- console.log(pc.green(`Saved profile: ${configured.label}`));
505
- await syncPiConfig(configured);
506
- return await runProfile(configured);
507
- }
508
-
509
- if (selected.startsWith("managed:")) {
510
- const managedSelection = selected.slice("managed:".length);
511
- const separator = managedSelection.indexOf(":");
512
- const backendId = separator === -1 ? managedSelection : managedSelection.slice(0, separator);
513
- const modelId = separator === -1 ? "" : managedSelection.slice(separator + 1);
514
- const model = managedItems.find((m) => m.model.id === modelId && m.backendId === backendId)?.model;
515
- if (!model) throw new Error("Model not found.");
516
- const profile = normalizeProfile({
517
- id: model.id.replace(/[^a-z0-9._-]+/gi, "-").toLowerCase(),
518
- label: model.label,
519
- backend: backendId,
520
- modelAlias: model.aliasSuggestion,
521
- ...(backendId === "ollama" ? { ollamaModel: model.id } : {}),
522
- ...(backendId === "omlx" ? { omlxModel: model.id } : {}),
523
- });
524
- await saveProfile(profile);
525
- await syncPiConfig(profile);
526
- return await runProfile(profile);
527
- }
528
- }
529
-
530
436
  async function runProfile(profile, options = {}) {
531
437
  const backend = backendFor(profile.backend);
532
438
  const withHarness = options.with ?? "pi";
@@ -608,56 +514,6 @@ async function runProfile(profile, options = {}) {
608
514
  }
609
515
  }
610
516
 
611
- // ── Manage profiles ─────────────────────────────────────────────────────────
612
-
613
- async function manageProfiles(prompt, profiles) {
614
- const choices = profiles.map((p) => ({
615
- value: p.id,
616
- label: p.label,
617
- hint: `${p.modelAlias} · ${p.baseUrl}`,
618
- }));
619
-
620
- const selected = await prompt.choice("Which profile?", choices, choices[0].value);
621
- const profile = await readProfile(selected);
622
- const backend = backendFor(profile.backend);
623
- const isManaged = backend.type === "managed-server";
624
- const piConfigured = await hasPiModel(profile);
625
-
626
- // Show profile details
627
- console.log("");
628
- console.log(renderSection("Profile", renderRows([
629
- ["ID", pc.cyan(profile.id)],
630
- ["Label", pc.bold(profile.label)],
631
- ["Backend", backend.label],
632
- ["Endpoint", pc.green(profile.baseUrl)],
633
- ...(!isManaged ? [
634
- ["Model", profile.modelPath ?? "unknown"],
635
- ["MMProj", profile.mmprojPath ?? "none"],
636
- ["Memory", existsSync(profile.modelPath) ? formatBytes(statSync(profile.modelPath).size) : "unknown"],
637
- ] : []),
638
- ["Alias", pc.cyan(profile.modelAlias)],
639
- ["Pi", piConfigured ? pc.green("configured") : pc.yellow("not synced")],
640
- ])));
641
-
642
- if (!isManaged && profile.commandArgv) {
643
- console.log("");
644
- console.log(pc.bold("llama-server command"));
645
- console.log(pc.dim(buildPrettyCommand(profile)));
646
- }
647
-
648
- const action = await prompt.choice("Action", [
649
- { value: "sync", label: piConfigured ? `${pc.green("✓")} Pi config synced` : "Sync Pi config", hint: piConfigured ? "Already in ~/.pi/agent/models.json" : "Update ~/.pi/agent/models.json" },
650
- { value: "run", label: "Run", hint: "Start server + Pi" },
651
- ...(isManaged ? [] : [{ value: "server", label: "Server only", hint: "Start server, no harness" }]),
652
- { value: "remove", label: "Remove", hint: "Delete profile + Pi config" },
653
- ], "sync");
654
-
655
- if (action === "sync") return await syncPiConfig(profile);
656
- if (action === "run") return await runProfile(profile);
657
- if (action === "server") return await runProfile(profile, { with: "server" });
658
- if (action === "remove") return await removeProfileInteractive(profile.id);
659
- }
660
-
661
517
  async function removeProfileInteractive(id) {
662
518
  const profile = await readProfile(id);
663
519
  if (!process.stdin.isTTY) {
@@ -1207,9 +1063,7 @@ function printHelp() {
1207
1063
  console.log(`${pc.bold("offgrid-ai")} — privacy-first local LLM runner
1208
1064
 
1209
1065
  Usage:
1210
- offgrid-ai Friendly shortcut: pick a model and run it
1211
- offgrid-ai models List, inspect, set up, sync, or remove models
1212
- offgrid-ai run Pick and run a model (or: offgrid-ai run <profile>)
1066
+ offgrid-ai Command center: inspect, set up, run, benchmark, or remove models
1213
1067
  offgrid-ai status Show running local models
1214
1068
  offgrid-ai stop Stop a running server (or: offgrid-ai stop <id>)
1215
1069
  offgrid-ai uninstall Remove offgrid-ai, clean up PATH, optionally keep profiles
@@ -8,10 +8,27 @@ const CACHE_CHOICES = [
8
8
  { value: "q4_0", label: "q4_0", hint: "lowest memory, quality/speed tradeoff" },
9
9
  ];
10
10
 
11
+ const GENERAL_DEFAULTS = {
12
+ topK: 20,
13
+ presencePenalty: 1.5,
14
+ repeatPenalty: 1.0,
15
+ };
16
+
17
+ const THINKING_DEFAULTS = {
18
+ topK: 64,
19
+ presencePenalty: 0,
20
+ repeatPenalty: 1.1,
21
+ chatTemplateKwargs: { enable_thinking: true },
22
+ };
23
+
11
24
  export async function configureLocalProfile(prompt, profile) {
25
+ let configured = profile;
26
+ const caps = profile.capabilities ?? {};
27
+
12
28
  console.log("");
13
29
  console.log(renderSection("Model setup", renderRows([
14
30
  ["Model", pc.bold(profile.label)],
31
+ ["Detected", detectionSummary(caps)],
15
32
  ["Context", `${profile.flags.ctxSize.toLocaleString()} tokens`],
16
33
  ["KV cache", `${profile.flags.cacheTypeK}/${profile.flags.cacheTypeV}`],
17
34
  ["Sampling", samplingSummary(profile.flags)],
@@ -19,13 +36,36 @@ export async function configureLocalProfile(prompt, profile) {
19
36
  console.log(pc.dim("Larger context windows use more memory. KV cache precision controls memory used by attention history."));
20
37
  console.log(pc.dim("Sampling defaults are shown for transparency; you can edit command.json later if needed.\n"));
21
38
 
22
- const ctxSize = await prompt.number("Context window tokens", profile.flags.ctxSize, 1024, 1048576);
23
- const cacheTypeK = await prompt.choice("K cache precision", CACHE_CHOICES, profile.flags.cacheTypeK);
24
- const cacheTypeV = await prompt.choice("V cache precision", CACHE_CHOICES, profile.flags.cacheTypeV);
25
- const configured = applyRuntimeFlagOverrides(profile, { ctxSize, cacheTypeK, cacheTypeV });
39
+ if (caps.mtp) {
40
+ console.log(renderSection("Detected MTP", renderRows([
41
+ ["Backend", "llama.cpp MTP"],
42
+ ["Port", "8081"],
43
+ ["Flags", "--spec-type draft-mtp --spec-draft-n-max 2"],
44
+ ])));
45
+ const useMtp = await prompt.yesNo("Use MTP speculative decoding flags?", true);
46
+ configured = useMtp ? applyMtpDefaults(configured) : removeMtpDefaults(configured);
47
+ }
48
+
49
+ if (caps.thinking || caps.qat) {
50
+ console.log("");
51
+ console.log(renderSection(caps.qat ? "Detected QAT / imatrix-style model" : "Detected thinking model", renderRows([
52
+ ["Defaults", "thinking / loop-safe"],
53
+ ["Flags", "--top-k 64 --presence-penalty 0 --repeat-penalty 1.1"],
54
+ ["Template", "--chat-template-kwargs { enable_thinking: true }"],
55
+ ])));
56
+ const useThinking = await prompt.yesNo("Use these thinking/QAT-safe defaults?", true);
57
+ configured = useThinking ? applyThinkingDefaults(configured) : removeThinkingDefaults(configured);
58
+ }
59
+
60
+ const ctxSize = await prompt.number("Context window tokens", configured.flags.ctxSize, 1024, 1048576);
61
+ const cacheTypeK = await prompt.choice("K cache precision", CACHE_CHOICES, configured.flags.cacheTypeK);
62
+ const cacheTypeV = await prompt.choice("V cache precision", CACHE_CHOICES, configured.flags.cacheTypeV);
63
+ configured = applyRuntimeFlagOverrides(configured, { ctxSize, cacheTypeK, cacheTypeV });
26
64
 
27
65
  console.log("");
28
66
  console.log(renderSection("Defaults", renderRows([
67
+ ["Backend", configured.backend],
68
+ ["Endpoint", configured.baseUrl],
29
69
  ["Temperature", configured.flags.temperature],
30
70
  ["Top-p", configured.flags.topP],
31
71
  ["Top-k", configured.flags.topK],
@@ -41,21 +81,63 @@ export async function configureLocalProfile(prompt, profile) {
41
81
 
42
82
  export function applyRuntimeFlagOverrides(profile, overrides) {
43
83
  const flags = { ...profile.flags, ...overrides };
44
- return {
84
+ return applyProfileFlags(profile, flags);
85
+ }
86
+
87
+ function applyMtpDefaults(profile) {
88
+ const flags = { ...profile.flags, port: 8081 };
89
+ return applyProfileFlags({ ...profile, backend: "llama-cpp-mtp", providerId: "llama-cpp-mtp" }, flags, {
90
+ values: { "--spec-type": "draft-mtp", "--spec-draft-n-max": 2 },
91
+ });
92
+ }
93
+
94
+ function removeMtpDefaults(profile) {
95
+ const flags = { ...profile.flags, port: 8080 };
96
+ return applyProfileFlags({ ...profile, backend: "llama-cpp", providerId: "llama-cpp" }, flags, {
97
+ remove: ["--spec-type", "--spec-draft-n-max"],
98
+ });
99
+ }
100
+
101
+ function applyThinkingDefaults(profile) {
102
+ const flags = { ...profile.flags, ...THINKING_DEFAULTS };
103
+ return applyProfileFlags(profile, flags);
104
+ }
105
+
106
+ function removeThinkingDefaults(profile) {
107
+ const flags = { ...profile.flags, ...GENERAL_DEFAULTS };
108
+ delete flags.chatTemplateKwargs;
109
+ return applyProfileFlags(profile, flags, { remove: ["--chat-template-kwargs"] });
110
+ }
111
+
112
+ function applyProfileFlags(profile, flags, edits = {}) {
113
+ const next = {
45
114
  ...profile,
46
115
  flags,
47
116
  baseUrl: `http://${flags.host}:${flags.port}/v1`,
48
- commandArgv: updateArgv(profile.commandArgv ?? [], {
49
- "--ctx-size": flags.ctxSize,
50
- "--cache-type-k": flags.cacheTypeK,
51
- "--cache-type-v": flags.cacheTypeV,
52
- }),
117
+ harnesses: {
118
+ ...(profile.harnesses ?? {}),
119
+ pi: { ...(profile.harnesses?.pi ?? {}), enabled: true, model: `${profile.providerId ?? profile.backend}/${profile.modelAlias ?? profile.id}` },
120
+ },
53
121
  };
122
+ next.commandArgv = updateArgv(profile.commandArgv ?? [], {
123
+ "--host": flags.host,
124
+ "--port": flags.port,
125
+ "--ctx-size": flags.ctxSize,
126
+ "--cache-type-k": flags.cacheTypeK,
127
+ "--cache-type-v": flags.cacheTypeV,
128
+ "--top-k": flags.topK,
129
+ "--presence-penalty": flags.presencePenalty,
130
+ "--repeat-penalty": flags.repeatPenalty,
131
+ ...(flags.chatTemplateKwargs ? { "--chat-template-kwargs": JSON.stringify(flags.chatTemplateKwargs) } : {}),
132
+ }, edits);
133
+ return next;
54
134
  }
55
135
 
56
- function updateArgv(argv, values) {
57
- const next = [...argv];
58
- for (const [flag, value] of Object.entries(values)) {
136
+ function updateArgv(argv, values, edits = {}) {
137
+ let next = [...argv];
138
+ for (const flag of edits.remove ?? []) next = removeOption(next, flag);
139
+ for (const [flag, value] of Object.entries({ ...values, ...(edits.values ?? {}) })) {
140
+ if (value === undefined) continue;
59
141
  const index = next.indexOf(flag);
60
142
  if (index === -1) next.push(flag, String(value));
61
143
  else next[index + 1] = String(value);
@@ -63,6 +145,18 @@ function updateArgv(argv, values) {
63
145
  return next;
64
146
  }
65
147
 
148
+ function removeOption(argv, flag) {
149
+ const next = [];
150
+ for (let i = 0; i < argv.length; i++) {
151
+ if (argv[i] === flag) {
152
+ if (argv[i + 1] && !argv[i + 1].startsWith("--")) i += 1;
153
+ continue;
154
+ }
155
+ next.push(argv[i]);
156
+ }
157
+ return next;
158
+ }
159
+
66
160
  function renderMemoryEstimate(profile) {
67
161
  try {
68
162
  const est = estimateMemory(profile.modelPath, profile.mmprojPath, null, profile.flags);
@@ -77,6 +171,17 @@ function renderMemoryEstimate(profile) {
77
171
  }
78
172
  }
79
173
 
174
+ function detectionSummary(caps) {
175
+ const parts = [];
176
+ if (caps.architecture) parts.push(caps.architecture);
177
+ if (caps.quant) parts.push(caps.quant);
178
+ if (caps.mtp) parts.push("MTP");
179
+ if (caps.qat) parts.push("QAT/imatrix");
180
+ if (caps.thinking) parts.push("thinking");
181
+ if (caps.vision) parts.push("vision");
182
+ return parts.length > 0 ? parts.join(" · ") : "standard GGUF";
183
+ }
184
+
80
185
  function samplingSummary(flags) {
81
186
  return `temp ${flags.temperature}, top-p ${flags.topP}, top-k ${flags.topK}`;
82
187
  }
package/src/profiles.mjs CHANGED
@@ -136,25 +136,41 @@ export function normalizeProfile(profile) {
136
136
 
137
137
  // ── Auto-create profile from a discovered model ────────────────────────────
138
138
 
139
- export async function createProfileFromModel(model, backendId = "llama-cpp") {
139
+ export async function createProfileFromModel(model, backendId) {
140
140
  const { detectCapabilities } = await import("./autodetect.mjs");
141
141
  const caps = detectCapabilities(model.path, model.mmprojPath);
142
+ const backend = backendId ?? (caps.mtp ? "llama-cpp-mtp" : "llama-cpp");
142
143
  const id = slugFromLabel(model.label);
143
144
  const { flags, argv } = computeFlags(caps, model.path, model.mmprojPath, null);
144
145
 
145
146
  return normalizeProfile({
146
147
  id,
147
148
  label: model.label,
148
- backend: backendId,
149
+ backend,
150
+ providerId: backend,
149
151
  modelAlias: model.aliasSuggestion,
150
152
  modelPath: model.path,
151
153
  mmprojPath: model.mmprojPath,
154
+ capabilities: summarizeCapabilities(caps),
152
155
  preset: null, // no presets — auto-detected
153
156
  flags,
154
157
  commandArgv: argv,
155
158
  });
156
159
  }
157
160
 
161
+ function summarizeCapabilities(caps) {
162
+ return {
163
+ architecture: caps.architecture,
164
+ thinking: caps.thinking,
165
+ vision: caps.vision,
166
+ mtp: caps.mtp,
167
+ qat: caps.qat,
168
+ quant: caps.quant,
169
+ metaCtx: caps.metaCtx,
170
+ ctxSize: caps.ctxSize,
171
+ };
172
+ }
173
+
158
174
  // ── State files (for running servers) ──────────────────────────────────────
159
175
 
160
176
  export async function readState(id) {