offgrid-ai 0.3.11 → 0.3.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -51,7 +51,7 @@ curl -fsSL https://raw.githubusercontent.com/eeshansrivastava89/offgrid-ai/main/
51
51
 
52
52
  1. **Auto-detect everything.** Scans for GGUF models in LM Studio, HuggingFace, and Ollama directories. Reads model metadata (quantization, context size, vision, thinking mode) directly from the GGUF file. No presets, no manual configuration.
53
53
 
54
- 2. **One command to run.** `offgrid-ai` → pick a model → it figures out the flags, starts llama-server, syncs Pi config, and launches Pi.
54
+ 2. **One command to run.** `offgrid-ai` → pick a model → confirm context/KV memory settings on first setup → it starts llama-server, syncs Pi config, and launches Pi.
55
55
 
56
56
  3. **One model at a time.** Laptops have limited RAM. One server, one model, no confusion.
57
57
 
@@ -77,7 +77,7 @@ When you run `offgrid-ai` for the first time on a fresh machine:
77
77
  - **oMLX** — Apple Silicon optimized
78
78
  4. **Models** — If no models found, tells you where to get them.
79
79
 
80
- Subsequent runs skip everything that's already installed.
80
+ Subsequent runs skip everything that's already installed. When a GGUF model is set up for the first time, offgrid-ai asks only for the memory-impacting choices: context window and KV cache precision. Sampling defaults are shown but not forced into a tuning wizard.
81
81
 
82
82
  ## Data directory
83
83
 
package/package.json CHANGED
@@ -1,11 +1,11 @@
1
1
  {
2
2
  "name": "offgrid-ai",
3
- "version": "0.3.11",
3
+ "version": "0.3.12",
4
4
  "description": "Privacy-first CLI for running local LLMs — discover, configure, run, benchmark",
5
5
  "author": "Eeshan Srivastava (https://eeshans.com)",
6
6
  "type": "module",
7
7
  "bin": {
8
- "offgrid-ai": "./bin/offgrid-ai.mjs"
8
+ "offgrid-ai": "bin/offgrid-ai.mjs"
9
9
  },
10
10
  "files": [
11
11
  "bin/*.mjs",
package/src/cli.mjs CHANGED
@@ -12,6 +12,7 @@ import { estimateMemory } from "./estimate.mjs";
12
12
  import { pc, formatBytes, renderRows, renderSection, startInteractive, createPrompt, parseOptions } from "./ui.mjs";
13
13
  import { checkForUpdate, currentPackageVersion, detectInvocation, updateCommand, runUpdateCommand } from "./updates.mjs";
14
14
  import { removeInstallerPathEntries } from "./shell-path.mjs";
15
+ import { configureLocalProfile } from "./profile-setup.mjs";
15
16
 
16
17
  // ── Entry point ────────────────────────────────────────────────────────────
17
18
 
@@ -270,10 +271,12 @@ async function pickAndRun(prompt, profiles, newModels, managedItems) {
270
271
  const model = newModels.find((m) => m.path === modelPath);
271
272
  if (!model) throw new Error("Model not found.");
272
273
  const profile = await createProfileFromModel(model);
273
- await saveProfile(profile);
274
- console.log(pc.green(`Auto-configured: ${profile.label}`));
275
- await syncPiConfig(profile);
276
- return await runProfile(profile);
274
+ const configured = await configureLocalProfile(prompt, profile);
275
+ if (!configured) return;
276
+ await saveProfile(configured);
277
+ console.log(pc.green(`Saved profile: ${configured.label}`));
278
+ await syncPiConfig(configured);
279
+ return await runProfile(configured);
277
280
  }
278
281
 
279
282
  if (selected.startsWith("managed:")) {
@@ -0,0 +1,82 @@
1
+ import { estimateMemory } from "./estimate.mjs";
2
+ import { pc, formatBytes, renderRows, renderSection } from "./ui.mjs";
3
+
4
+ const CACHE_CHOICES = [
5
+ { value: "bf16", label: "bf16", hint: "default: stable, good quality" },
6
+ { value: "f16", label: "f16", hint: "stable fallback, similar memory to bf16" },
7
+ { value: "q8_0", label: "q8_0", hint: "lower memory, usually safe" },
8
+ { value: "q4_0", label: "q4_0", hint: "lowest memory, quality/speed tradeoff" },
9
+ ];
10
+
11
+ export async function configureLocalProfile(prompt, profile) {
12
+ console.log("");
13
+ console.log(renderSection("Model setup", renderRows([
14
+ ["Model", pc.bold(profile.label)],
15
+ ["Context", `${profile.flags.ctxSize.toLocaleString()} tokens`],
16
+ ["KV cache", `${profile.flags.cacheTypeK}/${profile.flags.cacheTypeV}`],
17
+ ["Sampling", samplingSummary(profile.flags)],
18
+ ])));
19
+ console.log(pc.dim("Larger context windows use more memory. KV cache precision controls memory used by attention history."));
20
+ console.log(pc.dim("Sampling defaults are shown for transparency; you can edit command.json later if needed.\n"));
21
+
22
+ const ctxSize = await prompt.number("Context window tokens", profile.flags.ctxSize, 1024, 1048576);
23
+ const cacheTypeK = await prompt.choice("K cache precision", CACHE_CHOICES, profile.flags.cacheTypeK);
24
+ const cacheTypeV = await prompt.choice("V cache precision", CACHE_CHOICES, profile.flags.cacheTypeV);
25
+ const configured = applyRuntimeFlagOverrides(profile, { ctxSize, cacheTypeK, cacheTypeV });
26
+
27
+ console.log("");
28
+ console.log(renderSection("Defaults", renderRows([
29
+ ["Temperature", configured.flags.temperature],
30
+ ["Top-p", configured.flags.topP],
31
+ ["Top-k", configured.flags.topK],
32
+ ["Min-p", configured.flags.minP],
33
+ ["Presence penalty", configured.flags.presencePenalty],
34
+ ["Repeat penalty", configured.flags.repeatPenalty],
35
+ ])));
36
+
37
+ console.log("\n" + renderMemoryEstimate(configured));
38
+ if (!(await prompt.yesNo("Save profile with these settings?", true))) return null;
39
+ return configured;
40
+ }
41
+
42
+ export function applyRuntimeFlagOverrides(profile, overrides) {
43
+ const flags = { ...profile.flags, ...overrides };
44
+ return {
45
+ ...profile,
46
+ flags,
47
+ baseUrl: `http://${flags.host}:${flags.port}/v1`,
48
+ commandArgv: updateArgv(profile.commandArgv ?? [], {
49
+ "--ctx-size": flags.ctxSize,
50
+ "--cache-type-k": flags.cacheTypeK,
51
+ "--cache-type-v": flags.cacheTypeV,
52
+ }),
53
+ };
54
+ }
55
+
56
+ function updateArgv(argv, values) {
57
+ const next = [...argv];
58
+ for (const [flag, value] of Object.entries(values)) {
59
+ const index = next.indexOf(flag);
60
+ if (index === -1) next.push(flag, String(value));
61
+ else next[index + 1] = String(value);
62
+ }
63
+ return next;
64
+ }
65
+
66
+ function renderMemoryEstimate(profile) {
67
+ try {
68
+ const est = estimateMemory(profile.modelPath, profile.mmprojPath, null, profile.flags);
69
+ return renderSection("Memory", renderRows([
70
+ ["Estimated total", pc.bold(`~${formatBytes(est.totalBytes)}`)],
71
+ ["Model", formatBytes(est.modelBytes)],
72
+ ["KV cache", est.kvBytes ? `~${formatBytes(est.kvBytes)} (${profile.flags.ctxSize.toLocaleString()} ctx, ${profile.flags.cacheTypeK}/${profile.flags.cacheTypeV})` : "unknown"],
73
+ ...(est.note ? [["Note", pc.yellow(est.note)]] : []),
74
+ ]));
75
+ } catch {
76
+ return renderSection("Memory", pc.dim("Estimate unavailable for this model."));
77
+ }
78
+ }
79
+
80
+ function samplingSummary(flags) {
81
+ return `temp ${flags.temperature}, top-p ${flags.topP}, top-k ${flags.topK}`;
82
+ }