offgrid-ai 0.3.11 → 0.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -51,7 +51,7 @@ curl -fsSL https://raw.githubusercontent.com/eeshansrivastava89/offgrid-ai/main/
51
51
 
52
52
  1. **Auto-detect everything.** Scans for GGUF models in LM Studio, HuggingFace, and Ollama directories. Reads model metadata (quantization, context size, vision, thinking mode) directly from the GGUF file. No presets, no manual configuration.
53
53
 
54
- 2. **One command to run.** `offgrid-ai` → pick a model → it figures out the flags, starts llama-server, syncs Pi config, and launches Pi.
54
+ 2. **One command to run.** `offgrid-ai` → pick a model → confirm context/KV memory settings on first setup → it starts llama-server, syncs Pi config, and launches Pi.
55
55
 
56
56
  3. **One model at a time.** Laptops have limited RAM. One server, one model, no confusion.
57
57
 
@@ -77,7 +77,7 @@ When you run `offgrid-ai` for the first time on a fresh machine:
77
77
  - **oMLX** — Apple Silicon optimized
78
78
  4. **Models** — If no models found, tells you where to get them.
79
79
 
80
- Subsequent runs skip everything that's already installed.
80
+ Subsequent runs skip everything that's already installed. When a GGUF model is set up for the first time, offgrid-ai asks only for the memory-impacting choices: context window and KV cache precision. Sampling defaults are shown but not forced into a tuning wizard.
81
81
 
82
82
  ## Data directory
83
83
 
package/package.json CHANGED
@@ -1,11 +1,11 @@
1
1
  {
2
2
  "name": "offgrid-ai",
3
- "version": "0.3.11",
3
+ "version": "0.3.13",
4
4
  "description": "Privacy-first CLI for running local LLMs — discover, configure, run, benchmark",
5
5
  "author": "Eeshan Srivastava (https://eeshans.com)",
6
6
  "type": "module",
7
7
  "bin": {
8
- "offgrid-ai": "./bin/offgrid-ai.mjs"
8
+ "offgrid-ai": "bin/offgrid-ai.mjs"
9
9
  },
10
10
  "files": [
11
11
  "bin/*.mjs",
package/src/cli.mjs CHANGED
@@ -12,6 +12,8 @@ import { estimateMemory } from "./estimate.mjs";
12
12
  import { pc, formatBytes, renderRows, renderSection, startInteractive, createPrompt, parseOptions } from "./ui.mjs";
13
13
  import { checkForUpdate, currentPackageVersion, detectInvocation, updateCommand, runUpdateCommand } from "./updates.mjs";
14
14
  import { removeInstallerPathEntries } from "./shell-path.mjs";
15
+ import { configureLocalProfile } from "./profile-setup.mjs";
16
+ import { buildPrettyCommand } from "./command.mjs";
15
17
 
16
18
  // ── Entry point ────────────────────────────────────────────────────────────
17
19
 
@@ -200,15 +202,21 @@ export async function mainFlow() {
200
202
  }
201
203
 
202
204
  // Pick what to do
203
- const action = await prompt.choice("What next?", [
204
- { value: "run", label: "Run a model", hint: "Start server and launch Pi" },
205
- ...(profiles.length > 0 ? [{ value: "manage", label: "Manage profiles", hint: "Sync, remove, or inspect" }] : []),
206
- { value: "benchmark", label: "Benchmark", hint: "Run a benchmark prompt" },
207
- ], "run");
208
-
209
- if (action === "run") return await pickAndRun(prompt, profiles, newModels, managedItems);
210
- if (action === "manage") return await manageProfiles(prompt, profiles);
211
- if (action === "benchmark") return await benchmarkFlow(prompt, profiles);
205
+ while (true) {
206
+ const action = await prompt.choice("What next?", [
207
+ { value: "run", label: "Run a model", hint: "Start server and launch Pi" },
208
+ ...(profiles.length > 0 ? [{ value: "manage", label: "Manage profiles", hint: "Sync, remove, or inspect" }] : []),
209
+ { value: "benchmark", label: "Benchmark", hint: "Run a benchmark prompt" },
210
+ ], "run");
211
+
212
+ if (action === "run") return await pickAndRun(prompt, profiles, newModels, managedItems);
213
+ if (action === "manage") {
214
+ const result = await manageProfiles(prompt, profiles);
215
+ if (result === "back") continue;
216
+ return result;
217
+ }
218
+ if (action === "benchmark") return await benchmarkFlow(prompt, profiles);
219
+ }
212
220
  } finally {
213
221
  prompt.close();
214
222
  }
@@ -270,10 +278,12 @@ async function pickAndRun(prompt, profiles, newModels, managedItems) {
270
278
  const model = newModels.find((m) => m.path === modelPath);
271
279
  if (!model) throw new Error("Model not found.");
272
280
  const profile = await createProfileFromModel(model);
273
- await saveProfile(profile);
274
- console.log(pc.green(`Auto-configured: ${profile.label}`));
275
- await syncPiConfig(profile);
276
- return await runProfile(profile);
281
+ const configured = await configureLocalProfile(prompt, profile);
282
+ if (!configured) return;
283
+ await saveProfile(configured);
284
+ console.log(pc.green(`Saved profile: ${configured.label}`));
285
+ await syncPiConfig(configured);
286
+ return await runProfile(configured);
277
287
  }
278
288
 
279
289
  if (selected.startsWith("managed:")) {
@@ -381,58 +391,63 @@ async function runProfile(profile, options = {}) {
381
391
  // ── Manage profiles ─────────────────────────────────────────────────────────
382
392
 
383
393
  async function manageProfiles(prompt, profiles) {
384
- const choices = profiles.map((p) => ({
385
- value: p.id,
386
- label: p.label,
387
- hint: `${p.modelAlias} · ${p.baseUrl}`,
388
- }));
389
- choices.push({ value: "__back", label: "← Back" });
394
+ while (true) {
395
+ const choices = profiles.map((p) => ({
396
+ value: p.id,
397
+ label: p.label,
398
+ hint: `${p.modelAlias} · ${p.baseUrl}`,
399
+ }));
400
+ choices.push({ value: "__back", label: "← Back" });
390
401
 
391
- const selected = await prompt.choice("Which profile?", choices, choices[0].value);
392
- if (selected === "__back") return;
402
+ const selected = await prompt.choice("Which profile?", choices, choices[0].value);
403
+ if (selected === "__back") return "back";
393
404
 
394
- const profile = await readProfile(selected);
395
- const backend = backendFor(profile.backend);
396
- const isManaged = backend.type === "managed-server";
405
+ const profile = await readProfile(selected);
406
+ const backend = backendFor(profile.backend);
407
+ const isManaged = backend.type === "managed-server";
408
+ const piConfigured = await hasPiModel(profile);
397
409
 
398
- // Show profile details
399
- console.log("");
400
- console.log(renderSection("Profile", renderRows([
401
- ["ID", pc.cyan(profile.id)],
402
- ["Label", pc.bold(profile.label)],
403
- ["Backend", backend.label],
404
- ["Endpoint", pc.green(profile.baseUrl)],
405
- ...(!isManaged ? [
406
- ["Model", profile.modelPath ?? "unknown"],
407
- ["MMProj", profile.mmprojPath ?? "none"],
408
- ["Memory", existsSync(profile.modelPath) ? formatBytes(statSync(profile.modelPath).size) : "unknown"],
409
- ] : []),
410
- ["Alias", pc.cyan(profile.modelAlias)],
411
- ["Pi", (await hasPiModel(profile)) ? pc.green("configured") : pc.yellow("not synced")],
412
- ])));
413
-
414
- if (!isManaged && profile.commandArgv) {
410
+ // Show profile details
415
411
  console.log("");
416
- console.log(pc.bold("Auto-detected flags"));
417
- console.log(pc.dim(profile.commandArgv.join(" ")));
418
- }
419
-
420
- const action = await prompt.choice("Action", [
421
- { value: "sync", label: "Sync Pi config", hint: "Update ~/.pi/agent/models.json" },
422
- { value: "run", label: "Run", hint: "Start server + Pi" },
423
- ...(isManaged ? [] : [{ value: "server", label: "Server only", hint: "Start server, no harness" }]),
424
- { value: "remove", label: "Remove", hint: "Delete profile + Pi config" },
425
- { value: "__back", label: "← Back" },
426
- ], "sync");
412
+ console.log(renderSection("Profile", renderRows([
413
+ ["ID", pc.cyan(profile.id)],
414
+ ["Label", pc.bold(profile.label)],
415
+ ["Backend", backend.label],
416
+ ["Endpoint", pc.green(profile.baseUrl)],
417
+ ...(!isManaged ? [
418
+ ["Model", profile.modelPath ?? "unknown"],
419
+ ["MMProj", profile.mmprojPath ?? "none"],
420
+ ["Memory", existsSync(profile.modelPath) ? formatBytes(statSync(profile.modelPath).size) : "unknown"],
421
+ ] : []),
422
+ ["Alias", pc.cyan(profile.modelAlias)],
423
+ ["Pi", piConfigured ? pc.green("configured") : pc.yellow("not synced")],
424
+ ])));
425
+
426
+ if (!isManaged && profile.commandArgv) {
427
+ console.log("");
428
+ console.log(pc.bold("llama-server command"));
429
+ console.log(pc.dim(buildPrettyCommand(profile)));
430
+ }
427
431
 
428
- if (action === "sync") {
429
- await syncPiConfig(profile);
430
- } else if (action === "run") {
431
- return await runProfile(profile);
432
- } else if (action === "server") {
433
- return await runProfile(profile, { with: "server" });
434
- } else if (action === "remove") {
435
- await removeProfileInteractive(profile.id);
432
+ const action = await prompt.choice("Action", [
433
+ { value: "sync", label: piConfigured ? `${pc.green("✓")} Pi config synced` : "Sync Pi config", hint: piConfigured ? "Already in ~/.pi/agent/models.json" : "Update ~/.pi/agent/models.json" },
434
+ { value: "run", label: "Run", hint: "Start server + Pi" },
435
+ ...(isManaged ? [] : [{ value: "server", label: "Server only", hint: "Start server, no harness" }]),
436
+ { value: "remove", label: "Remove", hint: "Delete profile + Pi config" },
437
+ { value: "__back", label: "← Back", hint: "Choose another profile" },
438
+ ], "sync");
439
+
440
+ if (action === "__back") continue;
441
+ if (action === "sync") {
442
+ await syncPiConfig(profile);
443
+ continue;
444
+ }
445
+ if (action === "run") return await runProfile(profile);
446
+ if (action === "server") return await runProfile(profile, { with: "server" });
447
+ if (action === "remove") {
448
+ await removeProfileInteractive(profile.id);
449
+ return;
450
+ }
436
451
  }
437
452
  }
438
453
 
@@ -0,0 +1,21 @@
1
+ export function buildPrettyCommand(profile, binary = "llama-server") {
2
+ const argv = profile.commandArgv ?? [];
3
+ const lines = [`${quoteShell(binary)} \\`];
4
+ for (let i = 0; i < argv.length; i++) {
5
+ const arg = argv[i];
6
+ const next = argv[i + 1];
7
+ const hasValue = arg.startsWith("--") && next && !next.startsWith("--");
8
+ if (hasValue) {
9
+ lines.push(` ${arg} ${quoteShell(next)}${i + 2 < argv.length ? " \\" : ""}`);
10
+ i += 1;
11
+ } else {
12
+ lines.push(` ${arg}${i + 1 < argv.length ? " \\" : ""}`);
13
+ }
14
+ }
15
+ return lines.join("\n");
16
+ }
17
+
18
+ export function quoteShell(value) {
19
+ const text = String(value);
20
+ return /^[A-Za-z0-9_/@%+=:,.-]+$/u.test(text) ? text : `'${text.replace(/'/gu, `'"'"'`)}'`;
21
+ }
@@ -0,0 +1,82 @@
1
+ import { estimateMemory } from "./estimate.mjs";
2
+ import { pc, formatBytes, renderRows, renderSection } from "./ui.mjs";
3
+
4
+ const CACHE_CHOICES = [
5
+ { value: "bf16", label: "bf16", hint: "default: stable, good quality" },
6
+ { value: "f16", label: "f16", hint: "stable fallback, similar memory to bf16" },
7
+ { value: "q8_0", label: "q8_0", hint: "lower memory, usually safe" },
8
+ { value: "q4_0", label: "q4_0", hint: "lowest memory, quality/speed tradeoff" },
9
+ ];
10
+
11
+ export async function configureLocalProfile(prompt, profile) {
12
+ console.log("");
13
+ console.log(renderSection("Model setup", renderRows([
14
+ ["Model", pc.bold(profile.label)],
15
+ ["Context", `${profile.flags.ctxSize.toLocaleString()} tokens`],
16
+ ["KV cache", `${profile.flags.cacheTypeK}/${profile.flags.cacheTypeV}`],
17
+ ["Sampling", samplingSummary(profile.flags)],
18
+ ])));
19
+ console.log(pc.dim("Larger context windows use more memory. KV cache precision controls memory used by attention history."));
20
+ console.log(pc.dim("Sampling defaults are shown for transparency; you can edit command.json later if needed.\n"));
21
+
22
+ const ctxSize = await prompt.number("Context window tokens", profile.flags.ctxSize, 1024, 1048576);
23
+ const cacheTypeK = await prompt.choice("K cache precision", CACHE_CHOICES, profile.flags.cacheTypeK);
24
+ const cacheTypeV = await prompt.choice("V cache precision", CACHE_CHOICES, profile.flags.cacheTypeV);
25
+ const configured = applyRuntimeFlagOverrides(profile, { ctxSize, cacheTypeK, cacheTypeV });
26
+
27
+ console.log("");
28
+ console.log(renderSection("Defaults", renderRows([
29
+ ["Temperature", configured.flags.temperature],
30
+ ["Top-p", configured.flags.topP],
31
+ ["Top-k", configured.flags.topK],
32
+ ["Min-p", configured.flags.minP],
33
+ ["Presence penalty", configured.flags.presencePenalty],
34
+ ["Repeat penalty", configured.flags.repeatPenalty],
35
+ ])));
36
+
37
+ console.log("\n" + renderMemoryEstimate(configured));
38
+ if (!(await prompt.yesNo("Save profile with these settings?", true))) return null;
39
+ return configured;
40
+ }
41
+
42
+ export function applyRuntimeFlagOverrides(profile, overrides) {
43
+ const flags = { ...profile.flags, ...overrides };
44
+ return {
45
+ ...profile,
46
+ flags,
47
+ baseUrl: `http://${flags.host}:${flags.port}/v1`,
48
+ commandArgv: updateArgv(profile.commandArgv ?? [], {
49
+ "--ctx-size": flags.ctxSize,
50
+ "--cache-type-k": flags.cacheTypeK,
51
+ "--cache-type-v": flags.cacheTypeV,
52
+ }),
53
+ };
54
+ }
55
+
56
+ function updateArgv(argv, values) {
57
+ const next = [...argv];
58
+ for (const [flag, value] of Object.entries(values)) {
59
+ const index = next.indexOf(flag);
60
+ if (index === -1) next.push(flag, String(value));
61
+ else next[index + 1] = String(value);
62
+ }
63
+ return next;
64
+ }
65
+
66
+ function renderMemoryEstimate(profile) {
67
+ try {
68
+ const est = estimateMemory(profile.modelPath, profile.mmprojPath, null, profile.flags);
69
+ return renderSection("Memory", renderRows([
70
+ ["Estimated total", pc.bold(`~${formatBytes(est.totalBytes)}`)],
71
+ ["Model", formatBytes(est.modelBytes)],
72
+ ["KV cache", est.kvBytes ? `~${formatBytes(est.kvBytes)} (${profile.flags.ctxSize.toLocaleString()} ctx, ${profile.flags.cacheTypeK}/${profile.flags.cacheTypeV})` : "unknown"],
73
+ ...(est.note ? [["Note", pc.yellow(est.note)]] : []),
74
+ ]));
75
+ } catch {
76
+ return renderSection("Memory", pc.dim("Estimate unavailable for this model."));
77
+ }
78
+ }
79
+
80
+ function samplingSummary(flags) {
81
+ return `temp ${flags.temperature}, top-p ${flags.topP}, top-k ${flags.topK}`;
82
+ }