npm - offgrid-ai - Versions diffs - 0.3.11 → 0.3.12 - Mend

offgrid-ai 0.3.11 → 0.3.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -51,7 +51,7 @@ curl -fsSL https://raw.githubusercontent.com/eeshansrivastava89/offgrid-ai/main/
 1. **Auto-detect everything.** Scans for GGUF models in LM Studio, HuggingFace, and Ollama directories. Reads model metadata (quantization, context size, vision, thinking mode) directly from the GGUF file. No presets, no manual configuration.
-2. **One command to run.** `offgrid-ai` → pick a model → it figures out the flags, starts llama-server, syncs Pi config, and launches Pi.
+2. **One command to run.** `offgrid-ai` → pick a model → confirm context/KV memory settings on first setup → it starts llama-server, syncs Pi config, and launches Pi.
 3. **One model at a time.** Laptops have limited RAM. One server, one model, no confusion.
@@ -77,7 +77,7 @@ When you run `offgrid-ai` for the first time on a fresh machine:
    - **oMLX** — Apple Silicon optimized
 4. **Models** — If no models found, tells you where to get them.
-Subsequent runs skip everything that's already installed.
+Subsequent runs skip everything that's already installed. When a GGUF model is set up for the first time, offgrid-ai asks only for the memory-impacting choices: context window and KV cache precision. Sampling defaults are shown but not forced into a tuning wizard.
 ## Data directory

package/package.json CHANGED Viewed

@@ -1,11 +1,11 @@
 {
   "name": "offgrid-ai",
-  "version": "0.3.11",
+  "version": "0.3.12",
   "description": "Privacy-first CLI for running local LLMs — discover, configure, run, benchmark",
   "author": "Eeshan Srivastava (https://eeshans.com)",
   "type": "module",
   "bin": {
-    "offgrid-ai": "./bin/offgrid-ai.mjs"
+    "offgrid-ai": "bin/offgrid-ai.mjs"
   },
   "files": [
     "bin/*.mjs",

package/src/cli.mjs CHANGED Viewed

@@ -12,6 +12,7 @@ import { estimateMemory } from "./estimate.mjs";
 import { pc, formatBytes, renderRows, renderSection, startInteractive, createPrompt, parseOptions } from "./ui.mjs";
 import { checkForUpdate, currentPackageVersion, detectInvocation, updateCommand, runUpdateCommand } from "./updates.mjs";
 import { removeInstallerPathEntries } from "./shell-path.mjs";
+import { configureLocalProfile } from "./profile-setup.mjs";
 // ── Entry point ────────────────────────────────────────────────────────────
@@ -270,10 +271,12 @@ async function pickAndRun(prompt, profiles, newModels, managedItems) {
     const model = newModels.find((m) => m.path === modelPath);
     if (!model) throw new Error("Model not found.");
     const profile = await createProfileFromModel(model);
-    await saveProfile(profile);
-    console.log(pc.green(`Auto-configured: ${profile.label}`));
-    await syncPiConfig(profile);
-    return await runProfile(profile);
+    const configured = await configureLocalProfile(prompt, profile);
+    if (!configured) return;
+    await saveProfile(configured);
+    console.log(pc.green(`Saved profile: ${configured.label}`));
+    await syncPiConfig(configured);
+    return await runProfile(configured);
   }
   if (selected.startsWith("managed:")) {

package/src/profile-setup.mjs ADDED Viewed

@@ -0,0 +1,82 @@
+import { estimateMemory } from "./estimate.mjs";
+import { pc, formatBytes, renderRows, renderSection } from "./ui.mjs";
+const CACHE_CHOICES = [
+  { value: "bf16", label: "bf16", hint: "default: stable, good quality" },
+  { value: "f16", label: "f16", hint: "stable fallback, similar memory to bf16" },
+  { value: "q8_0", label: "q8_0", hint: "lower memory, usually safe" },
+  { value: "q4_0", label: "q4_0", hint: "lowest memory, quality/speed tradeoff" },
+];
+export async function configureLocalProfile(prompt, profile) {
+  console.log("");
+  console.log(renderSection("Model setup", renderRows([
+    ["Model", pc.bold(profile.label)],
+    ["Context", `${profile.flags.ctxSize.toLocaleString()} tokens`],
+    ["KV cache", `${profile.flags.cacheTypeK}/${profile.flags.cacheTypeV}`],
+    ["Sampling", samplingSummary(profile.flags)],
+  ])));
+  console.log(pc.dim("Larger context windows use more memory. KV cache precision controls memory used by attention history."));
+  console.log(pc.dim("Sampling defaults are shown for transparency; you can edit command.json later if needed.\n"));
+  const ctxSize = await prompt.number("Context window tokens", profile.flags.ctxSize, 1024, 1048576);
+  const cacheTypeK = await prompt.choice("K cache precision", CACHE_CHOICES, profile.flags.cacheTypeK);
+  const cacheTypeV = await prompt.choice("V cache precision", CACHE_CHOICES, profile.flags.cacheTypeV);
+  const configured = applyRuntimeFlagOverrides(profile, { ctxSize, cacheTypeK, cacheTypeV });
+  console.log("");
+  console.log(renderSection("Defaults", renderRows([
+    ["Temperature", configured.flags.temperature],
+    ["Top-p", configured.flags.topP],
+    ["Top-k", configured.flags.topK],
+    ["Min-p", configured.flags.minP],
+    ["Presence penalty", configured.flags.presencePenalty],
+    ["Repeat penalty", configured.flags.repeatPenalty],
+  ])));
+  console.log("\n" + renderMemoryEstimate(configured));
+  if (!(await prompt.yesNo("Save profile with these settings?", true))) return null;
+  return configured;
+}
+export function applyRuntimeFlagOverrides(profile, overrides) {
+  const flags = { ...profile.flags, ...overrides };
+  return {
+    ...profile,
+    flags,
+    baseUrl: `http://${flags.host}:${flags.port}/v1`,
+    commandArgv: updateArgv(profile.commandArgv ?? [], {
+      "--ctx-size": flags.ctxSize,
+      "--cache-type-k": flags.cacheTypeK,
+      "--cache-type-v": flags.cacheTypeV,
+    }),
+  };
+}
+function updateArgv(argv, values) {
+  const next = [...argv];
+  for (const [flag, value] of Object.entries(values)) {
+    const index = next.indexOf(flag);
+    if (index === -1) next.push(flag, String(value));
+    else next[index + 1] = String(value);
+  }
+  return next;
+}
+function renderMemoryEstimate(profile) {
+  try {
+    const est = estimateMemory(profile.modelPath, profile.mmprojPath, null, profile.flags);
+    return renderSection("Memory", renderRows([
+      ["Estimated total", pc.bold(`~${formatBytes(est.totalBytes)}`)],
+      ["Model", formatBytes(est.modelBytes)],
+      ["KV cache", est.kvBytes ? `~${formatBytes(est.kvBytes)} (${profile.flags.ctxSize.toLocaleString()} ctx, ${profile.flags.cacheTypeK}/${profile.flags.cacheTypeV})` : "unknown"],
+      ...(est.note ? [["Note", pc.yellow(est.note)]] : []),
+    ]));
+  } catch {
+    return renderSection("Memory", pc.dim("Estimate unavailable for this model."));
+  }
+}
+function samplingSummary(flags) {
+  return `temp ${flags.temperature}, top-p ${flags.topP}, top-k ${flags.topK}`;
+}