npm - offgrid-ai - Versions diffs - 0.3.11 → 0.3.13 - Mend

offgrid-ai 0.3.11 → 0.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md CHANGED Viewed

@@ -51,7 +51,7 @@ curl -fsSL https://raw.githubusercontent.com/eeshansrivastava89/offgrid-ai/main/
 1. **Auto-detect everything.** Scans for GGUF models in LM Studio, HuggingFace, and Ollama directories. Reads model metadata (quantization, context size, vision, thinking mode) directly from the GGUF file. No presets, no manual configuration.
-2. **One command to run.** `offgrid-ai` → pick a model → it figures out the flags, starts llama-server, syncs Pi config, and launches Pi.
+2. **One command to run.** `offgrid-ai` → pick a model → confirm context/KV memory settings on first setup → it starts llama-server, syncs Pi config, and launches Pi.
 3. **One model at a time.** Laptops have limited RAM. One server, one model, no confusion.
@@ -77,7 +77,7 @@ When you run `offgrid-ai` for the first time on a fresh machine:
    - **oMLX** — Apple Silicon optimized
 4. **Models** — If no models found, tells you where to get them.
-Subsequent runs skip everything that's already installed.
+Subsequent runs skip everything that's already installed. When a GGUF model is set up for the first time, offgrid-ai asks only for the memory-impacting choices: context window and KV cache precision. Sampling defaults are shown but not forced into a tuning wizard.
 ## Data directory

package/package.json CHANGED Viewed

@@ -1,11 +1,11 @@
 {
   "name": "offgrid-ai",
-  "version": "0.3.11",
+  "version": "0.3.13",
   "description": "Privacy-first CLI for running local LLMs — discover, configure, run, benchmark",
   "author": "Eeshan Srivastava (https://eeshans.com)",
   "type": "module",
   "bin": {
-    "offgrid-ai": "./bin/offgrid-ai.mjs"
+    "offgrid-ai": "bin/offgrid-ai.mjs"
   },
   "files": [
     "bin/*.mjs",

package/src/cli.mjs CHANGED Viewed

@@ -12,6 +12,8 @@ import { estimateMemory } from "./estimate.mjs";
 import { pc, formatBytes, renderRows, renderSection, startInteractive, createPrompt, parseOptions } from "./ui.mjs";
 import { checkForUpdate, currentPackageVersion, detectInvocation, updateCommand, runUpdateCommand } from "./updates.mjs";
 import { removeInstallerPathEntries } from "./shell-path.mjs";
+import { configureLocalProfile } from "./profile-setup.mjs";
+import { buildPrettyCommand } from "./command.mjs";
 // ── Entry point ────────────────────────────────────────────────────────────
@@ -200,15 +202,21 @@ export async function mainFlow() {
     }
     // Pick what to do
-    const action = await prompt.choice("What next?", [
-      { value: "run", label: "Run a model", hint: "Start server and launch Pi" },
-      ...(profiles.length > 0 ? [{ value: "manage", label: "Manage profiles", hint: "Sync, remove, or inspect" }] : []),
-      { value: "benchmark", label: "Benchmark", hint: "Run a benchmark prompt" },
-    ], "run");
-    if (action === "run") return await pickAndRun(prompt, profiles, newModels, managedItems);
-    if (action === "manage") return await manageProfiles(prompt, profiles);
-    if (action === "benchmark") return await benchmarkFlow(prompt, profiles);
+    while (true) {
+      const action = await prompt.choice("What next?", [
+        { value: "run", label: "Run a model", hint: "Start server and launch Pi" },
+        ...(profiles.length > 0 ? [{ value: "manage", label: "Manage profiles", hint: "Sync, remove, or inspect" }] : []),
+        { value: "benchmark", label: "Benchmark", hint: "Run a benchmark prompt" },
+      ], "run");
+      if (action === "run") return await pickAndRun(prompt, profiles, newModels, managedItems);
+      if (action === "manage") {
+        const result = await manageProfiles(prompt, profiles);
+        if (result === "back") continue;
+        return result;
+      }
+      if (action === "benchmark") return await benchmarkFlow(prompt, profiles);
+    }
   } finally {
     prompt.close();
   }
@@ -270,10 +278,12 @@ async function pickAndRun(prompt, profiles, newModels, managedItems) {
     const model = newModels.find((m) => m.path === modelPath);
     if (!model) throw new Error("Model not found.");
     const profile = await createProfileFromModel(model);
-    await saveProfile(profile);
-    console.log(pc.green(`Auto-configured: ${profile.label}`));
-    await syncPiConfig(profile);
-    return await runProfile(profile);
+    const configured = await configureLocalProfile(prompt, profile);
+    if (!configured) return;
+    await saveProfile(configured);
+    console.log(pc.green(`Saved profile: ${configured.label}`));
+    await syncPiConfig(configured);
+    return await runProfile(configured);
   }
   if (selected.startsWith("managed:")) {
@@ -381,58 +391,63 @@ async function runProfile(profile, options = {}) {
 // ── Manage profiles ─────────────────────────────────────────────────────────
 async function manageProfiles(prompt, profiles) {
-  const choices = profiles.map((p) => ({
-    value: p.id,
-    label: p.label,
-    hint: `${p.modelAlias} · ${p.baseUrl}`,
-  }));
-  choices.push({ value: "__back", label: "← Back" });
+  while (true) {
+    const choices = profiles.map((p) => ({
+      value: p.id,
+      label: p.label,
+      hint: `${p.modelAlias} · ${p.baseUrl}`,
+    }));
+    choices.push({ value: "__back", label: "← Back" });
-  const selected = await prompt.choice("Which profile?", choices, choices[0].value);
-  if (selected === "__back") return;
+    const selected = await prompt.choice("Which profile?", choices, choices[0].value);
+    if (selected === "__back") return "back";
-  const profile = await readProfile(selected);
-  const backend = backendFor(profile.backend);
-  const isManaged = backend.type === "managed-server";
+    const profile = await readProfile(selected);
+    const backend = backendFor(profile.backend);
+    const isManaged = backend.type === "managed-server";
+    const piConfigured = await hasPiModel(profile);
-  // Show profile details
-  console.log("");
-  console.log(renderSection("Profile", renderRows([
-    ["ID", pc.cyan(profile.id)],
-    ["Label", pc.bold(profile.label)],
-    ["Backend", backend.label],
-    ["Endpoint", pc.green(profile.baseUrl)],
-    ...(!isManaged ? [
-      ["Model", profile.modelPath ?? "unknown"],
-      ["MMProj", profile.mmprojPath ?? "none"],
-      ["Memory", existsSync(profile.modelPath) ? formatBytes(statSync(profile.modelPath).size) : "unknown"],
-    ] : []),
-    ["Alias", pc.cyan(profile.modelAlias)],
-    ["Pi", (await hasPiModel(profile)) ? pc.green("configured") : pc.yellow("not synced")],
-  ])));
-  if (!isManaged && profile.commandArgv) {
+    // Show profile details
     console.log("");
-    console.log(pc.bold("Auto-detected flags"));
-    console.log(pc.dim(profile.commandArgv.join(" ")));
-  }
-  const action = await prompt.choice("Action", [
-    { value: "sync", label: "Sync Pi config", hint: "Update ~/.pi/agent/models.json" },
-    { value: "run", label: "Run", hint: "Start server + Pi" },
-    ...(isManaged ? [] : [{ value: "server", label: "Server only", hint: "Start server, no harness" }]),
-    { value: "remove", label: "Remove", hint: "Delete profile + Pi config" },
-    { value: "__back", label: "← Back" },
-  ], "sync");
+    console.log(renderSection("Profile", renderRows([
+      ["ID", pc.cyan(profile.id)],
+      ["Label", pc.bold(profile.label)],
+      ["Backend", backend.label],
+      ["Endpoint", pc.green(profile.baseUrl)],
+      ...(!isManaged ? [
+        ["Model", profile.modelPath ?? "unknown"],
+        ["MMProj", profile.mmprojPath ?? "none"],
+        ["Memory", existsSync(profile.modelPath) ? formatBytes(statSync(profile.modelPath).size) : "unknown"],
+      ] : []),
+      ["Alias", pc.cyan(profile.modelAlias)],
+      ["Pi", piConfigured ? pc.green("configured") : pc.yellow("not synced")],
+    ])));
+    if (!isManaged && profile.commandArgv) {
+      console.log("");
+      console.log(pc.bold("llama-server command"));
+      console.log(pc.dim(buildPrettyCommand(profile)));
+    }
-  if (action === "sync") {
-    await syncPiConfig(profile);
-  } else if (action === "run") {
-    return await runProfile(profile);
-  } else if (action === "server") {
-    return await runProfile(profile, { with: "server" });
-  } else if (action === "remove") {
-    await removeProfileInteractive(profile.id);
+    const action = await prompt.choice("Action", [
+      { value: "sync", label: piConfigured ? `${pc.green("✓")} Pi config synced` : "Sync Pi config", hint: piConfigured ? "Already in ~/.pi/agent/models.json" : "Update ~/.pi/agent/models.json" },
+      { value: "run", label: "Run", hint: "Start server + Pi" },
+      ...(isManaged ? [] : [{ value: "server", label: "Server only", hint: "Start server, no harness" }]),
+      { value: "remove", label: "Remove", hint: "Delete profile + Pi config" },
+      { value: "__back", label: "← Back", hint: "Choose another profile" },
+    ], "sync");
+    if (action === "__back") continue;
+    if (action === "sync") {
+      await syncPiConfig(profile);
+      continue;
+    }
+    if (action === "run") return await runProfile(profile);
+    if (action === "server") return await runProfile(profile, { with: "server" });
+    if (action === "remove") {
+      await removeProfileInteractive(profile.id);
+      return;
+    }
   }
 }

package/src/command.mjs ADDED Viewed

@@ -0,0 +1,21 @@
+export function buildPrettyCommand(profile, binary = "llama-server") {
+  const argv = profile.commandArgv ?? [];
+  const lines = [`${quoteShell(binary)} \\`];
+  for (let i = 0; i < argv.length; i++) {
+    const arg = argv[i];
+    const next = argv[i + 1];
+    const hasValue = arg.startsWith("--") && next && !next.startsWith("--");
+    if (hasValue) {
+      lines.push(`  ${arg} ${quoteShell(next)}${i + 2 < argv.length ? " \\" : ""}`);
+      i += 1;
+    } else {
+      lines.push(`  ${arg}${i + 1 < argv.length ? " \\" : ""}`);
+    }
+  }
+  return lines.join("\n");
+}
+export function quoteShell(value) {
+  const text = String(value);
+  return /^[A-Za-z0-9_/@%+=:,.-]+$/u.test(text) ? text : `'${text.replace(/'/gu, `'"'"'`)}'`;
+}

package/src/profile-setup.mjs ADDED Viewed

@@ -0,0 +1,82 @@
+import { estimateMemory } from "./estimate.mjs";
+import { pc, formatBytes, renderRows, renderSection } from "./ui.mjs";
+const CACHE_CHOICES = [
+  { value: "bf16", label: "bf16", hint: "default: stable, good quality" },
+  { value: "f16", label: "f16", hint: "stable fallback, similar memory to bf16" },
+  { value: "q8_0", label: "q8_0", hint: "lower memory, usually safe" },
+  { value: "q4_0", label: "q4_0", hint: "lowest memory, quality/speed tradeoff" },
+];
+export async function configureLocalProfile(prompt, profile) {
+  console.log("");
+  console.log(renderSection("Model setup", renderRows([
+    ["Model", pc.bold(profile.label)],
+    ["Context", `${profile.flags.ctxSize.toLocaleString()} tokens`],
+    ["KV cache", `${profile.flags.cacheTypeK}/${profile.flags.cacheTypeV}`],
+    ["Sampling", samplingSummary(profile.flags)],
+  ])));
+  console.log(pc.dim("Larger context windows use more memory. KV cache precision controls memory used by attention history."));
+  console.log(pc.dim("Sampling defaults are shown for transparency; you can edit command.json later if needed.\n"));
+  const ctxSize = await prompt.number("Context window tokens", profile.flags.ctxSize, 1024, 1048576);
+  const cacheTypeK = await prompt.choice("K cache precision", CACHE_CHOICES, profile.flags.cacheTypeK);
+  const cacheTypeV = await prompt.choice("V cache precision", CACHE_CHOICES, profile.flags.cacheTypeV);
+  const configured = applyRuntimeFlagOverrides(profile, { ctxSize, cacheTypeK, cacheTypeV });
+  console.log("");
+  console.log(renderSection("Defaults", renderRows([
+    ["Temperature", configured.flags.temperature],
+    ["Top-p", configured.flags.topP],
+    ["Top-k", configured.flags.topK],
+    ["Min-p", configured.flags.minP],
+    ["Presence penalty", configured.flags.presencePenalty],
+    ["Repeat penalty", configured.flags.repeatPenalty],
+  ])));
+  console.log("\n" + renderMemoryEstimate(configured));
+  if (!(await prompt.yesNo("Save profile with these settings?", true))) return null;
+  return configured;
+}
+export function applyRuntimeFlagOverrides(profile, overrides) {
+  const flags = { ...profile.flags, ...overrides };
+  return {
+    ...profile,
+    flags,
+    baseUrl: `http://${flags.host}:${flags.port}/v1`,
+    commandArgv: updateArgv(profile.commandArgv ?? [], {
+      "--ctx-size": flags.ctxSize,
+      "--cache-type-k": flags.cacheTypeK,
+      "--cache-type-v": flags.cacheTypeV,
+    }),
+  };
+}
+function updateArgv(argv, values) {
+  const next = [...argv];
+  for (const [flag, value] of Object.entries(values)) {
+    const index = next.indexOf(flag);
+    if (index === -1) next.push(flag, String(value));
+    else next[index + 1] = String(value);
+  }
+  return next;
+}
+function renderMemoryEstimate(profile) {
+  try {
+    const est = estimateMemory(profile.modelPath, profile.mmprojPath, null, profile.flags);
+    return renderSection("Memory", renderRows([
+      ["Estimated total", pc.bold(`~${formatBytes(est.totalBytes)}`)],
+      ["Model", formatBytes(est.modelBytes)],
+      ["KV cache", est.kvBytes ? `~${formatBytes(est.kvBytes)} (${profile.flags.ctxSize.toLocaleString()} ctx, ${profile.flags.cacheTypeK}/${profile.flags.cacheTypeV})` : "unknown"],
+      ...(est.note ? [["Note", pc.yellow(est.note)]] : []),
+    ]));
+  } catch {
+    return renderSection("Memory", pc.dim("Estimate unavailable for this model."));
+  }
+}
+function samplingSummary(flags) {
+  return `temp ${flags.temperature}, top-p ${flags.topP}, top-k ${flags.topK}`;
+}