npm - offgrid-ai - Versions diffs - 0.8.14 → 0.9.0 - Mend

offgrid-ai 0.8.14 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/README.md +3 -1
package/package.json +3 -3
package/src/backends.mjs +34 -38
package/src/benchmark/finalize.mjs +198 -0
package/src/benchmark/flow.mjs +237 -0
package/src/benchmark/metrics.mjs +152 -0
package/src/benchmark/pi-runner.mjs +252 -0
package/src/benchmark/prepare.mjs +120 -0
package/src/benchmark/repo.mjs +77 -0
package/src/benchmark/shared.mjs +54 -0
package/src/benchmark/stream-renderer.mjs +274 -0
package/src/benchmark.mjs +10 -1222
package/src/cli.mjs +2 -2
package/src/commands/main.mjs +2 -2
package/src/commands/onboard.mjs +6 -2
package/src/config.mjs +8 -2
package/src/harness-pi.mjs +1 -1
package/src/managed.mjs +3 -3
package/src/model-catalog.mjs +2 -1
package/src/process.mjs +29 -21
package/src/runtime.mjs +11 -0
package/src/postinstall.mjs +0 -106

package/README.md CHANGED Viewed

@@ -41,12 +41,14 @@ This installs offgrid-ai and anything else it needs. Then open a new terminal wi
 offgrid-ai
 ```
-If you already have Node.js installed, you can also use:
+If you already have Node.js installed, you can also install with npm:
 ```bash
 npm install -g offgrid-ai@latest --prefer-online
 ```
+The curl installer is recommended for first-time setup because it also verifies the global npm bin directory is on your PATH. The npm package itself does not run install scripts or mutate shell config during `npm install`.
 ### 2. Pick a model
 The first time you run offgrid-ai, it looks for models already on your machine. If it does not find any, it tells you how to get one.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "offgrid-ai",
-  "version": "0.8.14",
+  "version": "0.9.0",
   "description": "Privacy-first CLI for running local LLMs — discover, configure, run, benchmark",
   "author": "Eeshan Srivastava (https://eeshans.com)",
   "type": "module",
@@ -11,6 +11,7 @@
     "bin/*.mjs",
     "src/*.mjs",
     "src/commands/*.mjs",
+    "src/benchmark/*.mjs",
     "install.sh"
   ],
   "publishConfig": {
@@ -31,12 +32,11 @@
     "start": "node bin/offgrid-ai.mjs",
     "test": "node --test test/*.mjs",
     "test:integration": "OFFGRID_INTEGRATION=1 node --test test/integration/*.mjs",
-    "lint": "eslint src/*.mjs src/commands/*.mjs bin/*.mjs",
+    "lint": "eslint src/*.mjs src/commands/*.mjs src/benchmark/*.mjs bin/*.mjs",
     "check:privacy": "node scripts/privacy-gate.mjs",
     "release:check": "bash scripts/release-check.sh",
     "release:check:fast": "bash scripts/release-check.sh --skip-install --skip-manual",
     "prepack": "npm run check:privacy",
-    "postinstall": "node src/postinstall.mjs",
     "pretest": "npm run lint"
   },
   "dependencies": {

package/src/backends.mjs CHANGED Viewed

@@ -87,51 +87,47 @@ export function defaultFlagsForBackend(backendId) {
 // ── Ollama model discovery ──────────────────────────────────────────────
 async function scanOllamaModels() {
-  try {
-    const response = await fetch(`${BACKENDS.ollama.apiBaseUrl}/api/tags`, { signal: AbortSignal.timeout(3000) });
-    if (!response.ok) return [];
-    const body = await response.json();
-    if (!Array.isArray(body?.models)) return [];
-    return body.models
-      .filter((model) => isLocalOllamaModel(model))
-      .map((model) => ({
-        id: model.name,
-        label: ollamaLabel(model.name),
-        aliasSuggestion: model.name,
-        sizeBytes: model.size ?? 0,
-        quant: model.details?.quantization_level,
-        family: model.details?.family,
-        backend: "ollama",
-        source: "ollama",
-      })).sort((a, b) => a.label.localeCompare(b.label));
-  } catch {
-    return [];
+  const response = await fetch(`${BACKENDS.ollama.apiBaseUrl}/api/tags`, { signal: AbortSignal.timeout(3000) });
+  if (!response.ok) {
+    throw new Error(`Ollama /api/tags returned ${response.status} ${response.statusText}`);
   }
+  const body = await response.json();
+  if (!Array.isArray(body?.models)) return [];
+  return body.models
+    .filter((model) => isLocalOllamaModel(model))
+    .map((model) => ({
+      id: model.name,
+      label: ollamaLabel(model.name),
+      aliasSuggestion: model.name,
+      sizeBytes: model.size ?? 0,
+      quant: model.details?.quantization_level,
+      family: model.details?.family,
+      backend: "ollama",
+      source: "ollama",
+    })).sort((a, b) => a.label.localeCompare(b.label));
 }
 // ── oMLX model discovery ───────────────────────────────────────────────
 async function scanOmlxModels() {
-  try {
-    const response = await fetch(`${BACKENDS.omlx.defaultBaseUrl}/models`, { signal: AbortSignal.timeout(3000) });
-    if (!response.ok) return [];
-    const body = await response.json();
-    if (!Array.isArray(body?.data)) return [];
-    return body.data
-      .filter((model) => isChatOmlxModel(model))
-      .map((model) => ({
-        id: model.id,
-        label: omlxLabel(model.id),
-        aliasSuggestion: model.id,
-        sizeBytes: 0,
-        quant: null,
-        family: null,
-        backend: "omlx",
-        source: "omlx",
-      })).sort((a, b) => a.label.localeCompare(b.label));
-  } catch {
-    return [];
+  const response = await fetch(`${BACKENDS.omlx.defaultBaseUrl}/models`, { signal: AbortSignal.timeout(3000) });
+  if (!response.ok) {
+    throw new Error(`oMLX /models returned ${response.status} ${response.statusText}`);
   }
+  const body = await response.json();
+  if (!Array.isArray(body?.data)) return [];
+  return body.data
+    .filter((model) => isChatOmlxModel(model))
+    .map((model) => ({
+      id: model.id,
+      label: omlxLabel(model.id),
+      aliasSuggestion: model.id,
+      sizeBytes: 0,
+      quant: null,
+      family: null,
+      backend: "omlx",
+      source: "omlx",
+    })).sort((a, b) => a.label.localeCompare(b.label));
 }
 // ── Labels ──────────────────────────────────────────────────────────────

package/src/benchmark/finalize.mjs ADDED Viewed

@@ -0,0 +1,198 @@
+// ── Unload model from server memory after benchmark ────────────────────────────
+import { backendFor } from "../backends.mjs";
+import { apiRootUrl } from "../process.mjs";
+import { existsSync } from "node:fs";
+import { readFile, writeFile } from "node:fs/promises";
+import { join } from "node:path";
+import { pc, renderRows, renderSection } from "../ui.mjs";
+export async function unloadModelFromServer(profile) {
+  const backend = backendFor(profile.backend);
+  if (backend.id === "ollama") {
+    const apiBaseUrl = apiRootUrl(profile.baseUrl || backend.apiBaseUrl || "");
+    try {
+      await fetch(`${apiBaseUrl}/api/generate`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({ model: profile.modelAlias, prompt: "", stream: false, keep_alive: 0 }),
+        signal: AbortSignal.timeout(10000),
+      });
+      return { unloaded: true, backend: backend.id };
+    } catch (err) {
+      return { unloaded: false, backend: backend.id, error: err.message };
+    }
+  }
+  if (backend.id === "llama-cpp" || backend.id === "llama-cpp-mtp") {
+    // llama.cpp unloads when the server process exits; no HTTP unload API exists.
+    // If offgrid-ai started the server, stopProfile already handled it.
+    return { unloaded: false, backend: backend.id, reason: "stop server to unload" };
+  }
+  if (backend.id === "omlx") {
+    // oMLX does not expose a model-unload endpoint. The model stays resident
+    // until the oMLX server process is stopped.
+    return { unloaded: false, backend: backend.id, reason: "no unload API available" };
+  }
+  return { unloaded: false, backend: backend.id, reason: "unsupported backend" };
+}
+export async function finalizeBenchmarkRun(runDirectory, runResult, speedMetrics) {
+  const metadataPath = join(runDirectory, "metadata.json");
+  const metadata = JSON.parse(await readFile(metadataPath, "utf8"));
+  const now = new Date();
+  const timestamp = now.toISOString();
+  const kind = metadata.kind ?? "visual";
+  const isDs = kind === "data-science";
+  const requiredFile = isDs ? "analysis.ipynb" : "index.html";
+  const requiredPath = join(runDirectory, requiredFile);
+  const outputFiles = [];
+  for (const candidate of [requiredFile, isDs ? "summary.json" : "preview.png", isDs ? "chart-distribution.png" : "preview.webm", "preview.mp4"]) {
+    if (existsSync(join(runDirectory, candidate))) {
+      outputFiles.push(candidate);
+    }
+  }
+  const success = existsSync(requiredPath) && (await readFile(requiredPath, "utf8")).trim().length > 0;
+  const hasTurns = runResult.agentTurns > 0;
+  let failureReason = null;
+  if (runResult.error) {
+    failureReason = typeof runResult.error === "string" ? runResult.error : (runResult.error.message ?? "Unknown error");
+  } else if (!hasTurns) {
+    failureReason = "The model did not produce any response turns.";
+  } else if (!success) {
+    if (runResult.toolCalls === 0) {
+      failureReason = `The model finished without writing the required output file (${requiredFile}). It may have returned the response as chat text instead of using the write tool.`;
+    } else {
+      failureReason = `The required output file (${requiredFile}) was missing or empty after the run.`;
+    }
+  }
+  const failed = failureReason !== null;
+  metadata.status = failed ? "failed" : "completed";
+  metadata.updatedAt = timestamp;
+  if (failed) {
+    metadata.failedAt = timestamp;
+  } else {
+    metadata.completedAt = timestamp;
+  }
+  const totalTokens = runResult.promptTokens + runResult.completionTokens;
+  metadata.runner.tokenMetrics = {
+    reported: hasTurns,
+    promptTokens: runResult.promptTokens,
+    completionTokens: runResult.completionTokens,
+    totalTokens,
+  };
+  metadata.runner.speedMetrics = speedMetrics;
+  metadata.runner.metricSource = speedMetrics?.metricSource ?? null;
+  metadata.results = {
+    wallClockMs: runResult.wallClockMs,
+    agentTurns: runResult.agentTurns,
+    toolCalls: runResult.toolCalls,
+    toolResults: runResult.toolResults,
+    success,
+    outputFiles,
+    perTurn: runResult.perTurn,
+  };
+  if (failureReason) {
+    metadata.error = { message: failureReason, ...(typeof runResult.error === "object" && runResult.error?.stack ? { stack: runResult.error.stack } : {}) };
+  } else if (runResult.error) {
+    metadata.error = typeof runResult.error === "string"
+      ? { message: runResult.error }
+      : { message: runResult.error.message ?? "Unknown error", ...(runResult.error.stack ? { stack: runResult.error.stack } : {}) };
+  }
+  await writeFile(metadataPath, JSON.stringify(metadata, null, 2) + "\n", "utf8");
+  return metadata;
+}
+function formatMetric(value, formatter) {
+  if (value === null || value === undefined || !Number.isFinite(value)) return pc.dim("—");
+  return formatter(value);
+}
+function formatMs(ms) {
+  return formatMetric(ms, (n) => (n < 1000 ? `${Math.round(n)} ms` : `${(n / 1000).toFixed(1)} s`));
+}
+function formatNumber(n) {
+  return formatMetric(n, (v) => v.toLocaleString());
+}
+function formatTokPerSec(n) {
+  return formatMetric(n, (v) => `${v.toFixed(1)} tok/s`);
+}
+function formatPercent(n) {
+  return formatMetric(n, (v) => `${(v * 100).toFixed(0)} %`);
+}
+export function renderBenchmarkSummary(metadata) {
+  const { status, results, runner, error } = metadata;
+  const agentRows = [
+    ["Status", status === "completed" ? pc.green("completed") : pc.red(status ?? "failed")],
+    ["Duration", formatMs(results?.wallClockMs)],
+    ["Agent turns", formatNumber(results?.agentTurns)],
+    ["Input tokens", formatNumber(runner?.tokenMetrics?.promptTokens)],
+    ["Output tokens", formatNumber(runner?.tokenMetrics?.completionTokens)],
+    ["Total tokens", formatNumber(runner?.tokenMetrics?.totalTokens)],
+    ["Tool calls", formatNumber(results?.toolCalls)],
+    ["Tool results", formatNumber(results?.toolResults)],
+    ["Output files", (results?.outputFiles?.length ?? 0) > 0 ? results.outputFiles.join(", ") : pc.dim("—")],
+  ];
+  console.log("");
+  console.log(renderSection("Benchmark Result", renderRows(agentRows)));
+  if (status === "completed" && runner?.speedMetrics) {
+    const speed = runner.speedMetrics;
+    const speedRows = [
+      ["Prefill tok/s", formatTokPerSec(speed.prefillTokensPerSecond)],
+      ["Generation tok/s", formatTokPerSec(speed.generationTokensPerSecond)],
+      ["TTFT", formatMs(speed.ttftMs)],
+      ["Speculative decode", formatPercent(speed.speculativeDecodeAcceptance)],
+      ["KV cache tokens", formatNumber(speed.kvCacheTokens)],
+      ["Model load time", formatMs(speed.modelLoadMs)],
+      ["Metric source", speed.metricSource ?? pc.dim("—")],
+    ];
+    console.log(renderSection("Speed Metrics", renderRows(speedRows)));
+  } else if (error) {
+    const wrappedError = wrapText(error.message ?? "Unknown error");
+    console.log(renderSection("Error", pc.red(wrappedError)));
+    if (error.message?.includes("write tool") || error.message?.includes("required output file")) {
+      const tip = wrapText("Tip: This usually means the model returned the answer as chat text instead of writing the file. Try a model with stronger tool-use support, or run the prompt manually.", 64);
+      console.log(pc.dim("\n" + tip));
+    }
+  }
+}
+function wrapText(text, width = 64) {
+  if (!text) return "";
+  const words = text.split(/\s+/);
+  const lines = [];
+  let current = "";
+  for (const word of words) {
+    if ((current + " " + word).trim().length > width) {
+      if (current) lines.push(current.trim());
+      current = word;
+    } else {
+      current = current ? `${current} ${word}` : word;
+    }
+  }
+  if (current) lines.push(current.trim());
+  return lines.join("\n");
+}

package/src/benchmark/flow.mjs ADDED Viewed

@@ -0,0 +1,237 @@
+// ── Benchmark command flows ───────────────────────────────────────────────────
+import { join } from "node:path";
+import { ensureDirs } from "../config.mjs";
+import { backendFor } from "../backends.mjs";
+import { hasPi, hasPiModel, syncPiConfig } from "../harness-pi.mjs";
+import { serverReady, startServer, waitForReady, stopProfile } from "../process.mjs";
+import { loadProfiles } from "../profiles.mjs";
+import { pc, createPrompt } from "../ui.mjs";
+import { linkBenchmarkRepo } from "./repo.mjs";
+import { loadBenchmarks } from "./shared.mjs";
+import { prepareBenchmarkRun } from "./prepare.mjs";
+import { runBenchmarkInPi } from "./pi-runner.mjs";
+import { queryServerMetrics } from "./metrics.mjs";
+import { unloadModelFromServer } from "./finalize.mjs";
+import { finalizeBenchmarkRun, renderBenchmarkSummary } from "./finalize.mjs";
+function benchmarkModelSource(profile) {
+  if (!profile) return "cloud";
+  return profile.providerId === "llama-cpp-mtp" ? "llama-cpp-mtp" : profile.backend === "ollama" ? "ollama" : profile.backend === "omlx" ? "omlx" : "llama-cpp";
+}
+async function chooseBenchmarkAction(prompt, canRun) {
+  const choices = [
+    { value: "run", label: "Run Benchmark", hint: "Automated with Pi" },
+    { value: "prepare", label: "Prepare Benchmark (manual)", hint: "Copy prompt and run yourself" },
+  ];
+  return await prompt.choice("Action", canRun ? choices : choices.filter((c) => c.value === "prepare"), canRun ? "run" : "prepare");
+}
+async function ensureServerForBenchmark(profile) {
+  const backend = backendFor(profile.backend);
+  if (await serverReady(profile.baseUrl)) {
+    console.log(pc.green(`[ready] ${backend.label} at ${profile.baseUrl}`));
+    return { started: false };
+  }
+  if (backend.type === "managed-server") {
+    throw new Error(`${backend.label} is not running at ${profile.baseUrl}. Start it and try again.`);
+  }
+  console.log(pc.dim(`Starting ${backend.label} for ${profile.label}...`));
+  const state = await startServer(profile);
+  await waitForReady(profile, state?.pid, state?.rawLogPath);
+  console.log(pc.green(`[ready] ${profile.baseUrl}/models`));
+  return { started: true, state };
+}
+export async function runPreparedBenchmark(profile, runDirectory, options = {}) {
+  const controller = new AbortController();
+  if (options.signal) {
+    options.signal.addEventListener("abort", () => controller.abort(), { once: true });
+  }
+  let serverStarted = false;
+  let metadata = null;
+  const onSigint = () => {
+    controller.abort();
+  };
+  process.on("SIGINT", onSigint);
+  try {
+    if (!(await hasPi())) {
+      console.log(pc.yellow("\nPi is not installed. Run prepared for manual execution."));
+      return metadata;
+    }
+    const serverState = await ensureServerForBenchmark(profile);
+    serverStarted = serverState.started;
+    if (!(await hasPiModel(profile))) {
+      await syncPiConfig(profile);
+    }
+    const runResult = await runBenchmarkInPi(profile, runDirectory, { signal: controller.signal });
+    let speedMetrics = null;
+    if (!runResult.error) {
+      try {
+        speedMetrics = await queryServerMetrics(profile);
+      } catch (err) {
+        runResult.error = { message: `Speed metrics query failed: ${err.message}` };
+      }
+    }
+    metadata = await finalizeBenchmarkRun(runDirectory, runResult, speedMetrics);
+    renderBenchmarkSummary(metadata);
+  } catch (err) {
+    const failedResult = {
+      error: { message: err.message },
+      wallClockMs: null,
+      agentTurns: 0,
+      promptTokens: 0,
+      completionTokens: 0,
+      totalTokens: 0,
+      cacheRead: 0,
+      cacheWrite: 0,
+      toolCalls: 0,
+      toolResults: 0,
+      perTurn: [],
+    };
+    metadata = await finalizeBenchmarkRun(runDirectory, failedResult, null);
+    renderBenchmarkSummary(metadata);
+  } finally {
+    process.removeListener("SIGINT", onSigint);
+    if (serverStarted && !options.keepServer) {
+      const backend = backendFor(profile.backend);
+      if (backend.type !== "managed-server") {
+        const result = await stopProfile(profile);
+        console.log(result.stopped ? pc.green(`[stop] ${result.message}`) : pc.dim(`[stop] ${result.message}`));
+      }
+    }
+    const unloadResult = await unloadModelFromServer(profile);
+    if (!unloadResult.unloaded && unloadResult.error) {
+      console.log(pc.yellow(`[unload] ${unloadResult.backend}: ${unloadResult.error}`));
+    } else if (!unloadResult.unloaded && unloadResult.reason) {
+      console.log(pc.dim(`[unload] ${unloadResult.backend}: ${unloadResult.reason}`));
+    }
+  }
+  return metadata;
+}
+// ── Benchmark from a selected profile (from model picker) ────────────────
+export async function benchmarkForProfile(profile) {
+  await ensureDirs();
+  const prompt = createPrompt();
+  try {
+    const repoPath = await linkBenchmarkRepo(prompt);
+    if (!repoPath) return;
+    const kind = await prompt.choice("Benchmark category", [
+      { value: "visual", label: "Visual Benchmark", hint: "HTML/CSS/JS animation benchmarks" },
+      { value: "data-science", label: "Data Science", hint: "Analysis and charting benchmarks" },
+    ], "visual");
+    const benchDir = join(repoPath, "benchmarks");
+    const benchmarks = (await loadBenchmarks(benchDir)).filter((b) => b.kind === kind);
+    if (benchmarks.length === 0) {
+      console.log(pc.yellow(`No ${kind} benchmarks found in ${benchDir}`));
+      return;
+    }
+    const benchmarkId = await prompt.choice("Prompt", benchmarks.map((b) => ({
+      value: b.id, label: b.title, hint: b.description || b.id,
+    })), benchmarks[0].id);
+    const selectedBenchmark = benchmarks.find((b) => b.id === benchmarkId);
+    if (!selectedBenchmark) return;
+    const modelId = profile.modelAlias;
+    const modelSource = benchmarkModelSource(profile);
+    const backendLabel = backendFor(profile.backend).label;
+    const canRun = (await hasPi()) && modelSource !== "cloud";
+    const action = await chooseBenchmarkAction(prompt, canRun);
+    const runDirectory = await prepareBenchmarkRun({ repoPath, benchmark: selectedBenchmark, kind, modelId, modelSource, backendLabel, profile, showNextSteps: action === "prepare" });
+    if (action === "run") {
+      return await runPreparedBenchmark(profile, runDirectory);
+    }
+    return runDirectory;
+  } finally {
+    prompt.close();
+  }
+}
+// ── Standalone benchmark flow (offgrid-ai benchmark) ──────────────────────
+export async function benchmarkFlow() {
+  await ensureDirs();
+  const prompt = createPrompt();
+  try {
+    const repoPath = await linkBenchmarkRepo(prompt);
+    if (!repoPath) return;
+    const kind = await prompt.choice("Benchmark category", [
+      { value: "visual", label: "Visual Benchmark", hint: "HTML/CSS/JS animation benchmarks" },
+      { value: "data-science", label: "Data Science", hint: "Analysis and charting benchmarks" },
+    ], "visual");
+    const benchDir = join(repoPath, "benchmarks");
+    const benchmarks = (await loadBenchmarks(benchDir)).filter((b) => b.kind === kind);
+    if (benchmarks.length === 0) {
+      console.log(pc.yellow(`No ${kind} benchmarks found in ${benchDir}`));
+      return;
+    }
+    const benchmarkId = await prompt.choice("Prompt", benchmarks.map((b) => ({
+      value: b.id, label: b.title, hint: b.description || b.id,
+    })), benchmarks[0].id);
+    const selectedBenchmark = benchmarks.find((b) => b.id === benchmarkId);
+    if (!selectedBenchmark) return;
+    const profiles = await loadProfiles();
+    const source = await prompt.choice("Model source", [
+      { value: "profile", label: "Use existing profile", hint: "Pick a saved offgrid-ai profile" },
+      { value: "cloud", label: "Custom / cloud", hint: "Free-form model label for cloud runs" },
+    ], "profile");
+    let modelId, modelSource, backendLabel, profile;
+    if (source === "profile") {
+      if (profiles.length === 0) {
+        console.log(pc.yellow("No profiles yet. Run: offgrid-ai models"));
+        return;
+      }
+      const profileId = await prompt.choice("Profile", profiles.map((p) => ({
+        value: p.id, label: p.label, hint: `${backendFor(p.backend).label} · ${p.modelAlias}`,
+      })), profiles[0].id);
+      profile = profiles.find((p) => p.id === profileId);
+      if (!profile) return;
+      modelId = profile.modelAlias;
+      modelSource = benchmarkModelSource(profile);
+      backendLabel = backendFor(profile.backend).label;
+    } else {
+      backendLabel = await prompt.text("Backend label", "cloud");
+      modelId = await prompt.text("Model name", "");
+      if (!modelId) { console.log(pc.yellow("Model name is required.")); return; }
+      modelSource = "cloud";
+    }
+    const canRun = (await hasPi()) && modelSource !== "cloud" && profile != null;
+    const action = await chooseBenchmarkAction(prompt, canRun);
+    const runDirectory = await prepareBenchmarkRun({ repoPath, benchmark: selectedBenchmark, kind, modelId, modelSource, backendLabel, profile, showNextSteps: action === "prepare" });
+    if (action === "run" && profile) {
+      return await runPreparedBenchmark(profile, runDirectory);
+    }
+    return runDirectory;
+  } finally {
+    prompt.close();
+  }
+}