npm - offgrid-ai - Versions diffs - 0.9.4 → 0.9.6 - Mend

offgrid-ai 0.9.4 → 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/package.json +1 -1
package/src/benchmark/finalize.mjs +18 -8
package/src/benchmark/flow.mjs +26 -8
package/src/benchmark/metrics.mjs +8 -3
package/src/benchmark/pi-runner.mjs +5 -2
package/src/benchmark/stream-renderer.mjs +31 -2
package/src/commands/models.mjs +10 -5
package/src/model-presenters.mjs +10 -4
package/src/profile-setup.mjs +11 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "offgrid-ai",
-  "version": "0.9.4",
+  "version": "0.9.6",
   "description": "Privacy-first CLI for running local LLMs — discover, configure, run, benchmark",
   "author": "Eeshan Srivastava (https://eeshans.com)",
   "type": "module",

package/src/benchmark/finalize.mjs CHANGED Viewed

@@ -59,13 +59,7 @@ async function unloadOmlxModel(profile) {
       return { unloaded: true, backend: "omlx", modelId: targetId };
     }
-    let detail = "";
-    try {
-      const body = await response.json();
-      detail = body?.detail ?? body?.message ?? "";
-    } catch {
-      detail = await response.text().catch(() => "");
-    }
+    const detail = await responseErrorDetail(response);
     if (response.status === 400 && /not loaded/i.test(detail)) {
       return { unloaded: true, backend: "omlx", modelId: targetId, reason: "model was not loaded" };
@@ -89,7 +83,18 @@ async function unloadOmlxModel(profile) {
   }
 }
-export async function finalizeBenchmarkRun(runDirectory, runResult, speedMetrics) {
+async function responseErrorDetail(response) {
+  const text = await response.text().catch(() => "");
+  if (!text) return "";
+  try {
+    const body = JSON.parse(text);
+    return body?.detail ?? body?.message ?? text;
+  } catch {
+    return text;
+  }
+}
+export async function finalizeBenchmarkRun(runDirectory, runResult, speedMetrics, speedMetricsError = null) {
   const metadataPath = join(runDirectory, "metadata.json");
   const metadata = JSON.parse(await readFile(metadataPath, "utf8"));
   const now = new Date();
@@ -144,6 +149,7 @@ export async function finalizeBenchmarkRun(runDirectory, runResult, speedMetrics
   metadata.runner.speedMetrics = speedMetrics;
   metadata.runner.metricSource = speedMetrics?.metricSource ?? null;
+  metadata.runner.speedMetricsError = speedMetricsError ?? null;
   metadata.results = {
     wallClockMs: runResult.wallClockMs,
@@ -226,6 +232,10 @@ export function renderBenchmarkSummary(metadata) {
       console.log(pc.dim("\n" + tip));
     }
   }
+  if (status === "completed" && !runner?.speedMetrics && runner?.speedMetricsError) {
+    console.log(pc.dim(`\nSpeed metrics unavailable: ${runner.speedMetricsError}`));
+  }
 }
 function wrapText(text, width = 64) {

package/src/benchmark/flow.mjs CHANGED Viewed

@@ -4,7 +4,7 @@ import { join } from "node:path";
 import { ensureDirs } from "../config.mjs";
 import { backendFor } from "../backends.mjs";
 import { hasPi, hasPiModel, syncPiConfig } from "../harness-pi.mjs";
-import { serverReady, startServer, waitForReady, stopProfile } from "../process.mjs";
+import { serverReady, startServer, waitForReady, stopProfile, modelAvailableOnServer } from "../process.mjs";
 import { loadProfiles } from "../profiles.mjs";
 import { pc, createPrompt } from "../ui.mjs";
 import { linkBenchmarkRepo } from "./repo.mjs";
@@ -28,9 +28,20 @@ async function chooseBenchmarkAction(prompt, canRun) {
   return await prompt.choice("Action", canRun ? choices : choices.filter((c) => c.value === "prepare"), canRun ? "run" : "prepare");
 }
+function managedModelId(profile) {
+  return profile.omlxModel ?? profile.ollamaModel ?? profile.modelAlias ?? profile.label;
+}
+async function ensureManagedModelAvailableForBenchmark(profile, backend) {
+  if (backend.type !== "managed-server") return;
+  if (await modelAvailableOnServer(profile)) return;
+  throw new Error(`${managedModelId(profile)} is not available on ${backend.label} at ${profile.baseUrl}.`);
+}
 async function ensureServerForBenchmark(profile) {
   const backend = backendFor(profile.backend);
   if (await serverReady(profile.baseUrl)) {
+    await ensureManagedModelAvailableForBenchmark(profile, backend);
     console.log(pc.green(`[ready] ${backend.label} at ${profile.baseUrl}`));
     return { started: false };
   }
@@ -52,6 +63,7 @@ export async function runPreparedBenchmark(profile, runDirectory, options = {})
     options.signal.addEventListener("abort", () => controller.abort(), { once: true });
   }
   let serverStarted = false;
+  let benchmarkStarted = false;
   let metadata = null;
   const onSigint = () => {
@@ -72,18 +84,22 @@ export async function runPreparedBenchmark(profile, runDirectory, options = {})
       await syncPiConfig(profile);
     }
+    benchmarkStarted = true;
     const runResult = await runBenchmarkInPi(profile, runDirectory, { signal: controller.signal });
     let speedMetrics = null;
+    let speedMetricsError = null;
     if (!runResult.error) {
       try {
         speedMetrics = await queryServerMetrics(profile);
       } catch (err) {
-        runResult.error = { message: `Speed metrics query failed: ${err.message}` };
+        // Non-fatal: speed metrics are a supplementary measurement, not the
+        // benchmark itself. Don't poison the run result; surface it as a note.
+        speedMetricsError = err.message;
       }
     }
-    metadata = await finalizeBenchmarkRun(runDirectory, runResult, speedMetrics);
+    metadata = await finalizeBenchmarkRun(runDirectory, runResult, speedMetrics, speedMetricsError);
     renderBenchmarkSummary(metadata);
   } catch (err) {
     const failedResult = {
@@ -110,11 +126,13 @@ export async function runPreparedBenchmark(profile, runDirectory, options = {})
         console.log(result.stopped ? pc.green(`[stop] ${result.message}`) : pc.dim(`[stop] ${result.message}`));
       }
     }
-    const unloadResult = await unloadModelFromServer(profile);
-    if (!unloadResult.unloaded && unloadResult.error) {
-      console.log(pc.yellow(`[unload] ${unloadResult.backend}: ${unloadResult.error}`));
-    } else if (!unloadResult.unloaded && unloadResult.reason) {
-      console.log(pc.dim(`[unload] ${unloadResult.backend}: ${unloadResult.reason}`));
+    if (benchmarkStarted) {
+      const unloadResult = await unloadModelFromServer(profile);
+      if (!unloadResult.unloaded && unloadResult.error) {
+        console.log(pc.yellow(`[unload] ${unloadResult.backend}: ${unloadResult.error}`));
+      } else if (!unloadResult.unloaded && unloadResult.reason) {
+        console.log(pc.dim(`[unload] ${unloadResult.backend}: ${unloadResult.reason}`));
+      }
     }
   }

package/src/benchmark/metrics.mjs CHANGED Viewed

@@ -4,6 +4,8 @@ import { backendFor } from "../backends.mjs";
 import { apiRootUrl } from "../process.mjs";
 const BENCH_SPEED_PROMPT = "Write a one-sentence summary of machine learning.";
+const SPEED_QUERY_TIMEOUT_MS = 120_000;
+const SPEED_QUERY_MAX_TOKENS = 64;
 export async function queryServerMetrics(profile) {
   const backend = backendFor(profile.backend);
@@ -26,13 +28,14 @@ async function queryLlamaCppMetrics(profile) {
     model: profile.modelAlias,
     messages: [{ role: "user", content: BENCH_SPEED_PROMPT }],
     stream: false,
+    max_tokens: SPEED_QUERY_MAX_TOKENS,
   };
   const response = await fetch(profile.baseUrl.replace(/\/$/u, "") + "/chat/completions", {
     method: "POST",
     headers: { "Content-Type": "application/json" },
     body: JSON.stringify(body),
-    signal: AbortSignal.timeout(60000),
+    signal: AbortSignal.timeout(SPEED_QUERY_TIMEOUT_MS),
   });
   if (!response.ok) {
@@ -66,13 +69,14 @@ async function queryOmlxMetrics(profile) {
     messages: [{ role: "user", content: BENCH_SPEED_PROMPT }],
     stream: true,
     stream_options: { include_usage: true },
+    max_tokens: SPEED_QUERY_MAX_TOKENS,
   };
   const response = await fetch(profile.baseUrl.replace(/\/$/u, "") + "/chat/completions", {
     method: "POST",
     headers: { "Content-Type": "application/json" },
     body: JSON.stringify(body),
-    signal: AbortSignal.timeout(60000),
+    signal: AbortSignal.timeout(SPEED_QUERY_TIMEOUT_MS),
   });
   if (!response.ok) {
@@ -117,6 +121,7 @@ async function queryOllamaMetrics(profile) {
     model: profile.modelAlias,
     prompt: BENCH_SPEED_PROMPT,
     stream: false,
+    options: { num_predict: SPEED_QUERY_MAX_TOKENS },
   };
   const apiBaseUrl = apiRootUrl(profile.baseUrl || backendFor(profile.backend).apiBaseUrl || "");
@@ -125,7 +130,7 @@ async function queryOllamaMetrics(profile) {
     method: "POST",
     headers: { "Content-Type": "application/json" },
     body: JSON.stringify(body),
-    signal: AbortSignal.timeout(60000),
+    signal: AbortSignal.timeout(SPEED_QUERY_TIMEOUT_MS),
   });
   if (!response.ok) {

package/src/benchmark/pi-runner.mjs CHANGED Viewed

@@ -5,7 +5,7 @@ import { join } from "node:path";
 import { spawn } from "node:child_process";
 import {
   BENCH_COLORS, renderStreamEvent,
-  formatToolCall, printFinalLine,
+  formatToolCall, printFinalLine, stopExecTimer,
 } from "./stream-renderer.mjs";
 import { piModelString } from "./shared.mjs";
@@ -58,7 +58,8 @@ export async function runBenchmarkInPi(profile, runDirectory, { signal } = {}) {
     turnHadToolError: false,
     modelPrinted: false,
     activeTool: null,
-    status: { mode: "idle", toolName: null, bytes: 0, tokens: 0 },
+    execTimer: null,
+    status: { mode: "idle", toolName: null, bytes: 0, tokens: 0, execStartedAt: null },
   };
   function appendResponse(text) {
@@ -193,6 +194,7 @@ export async function runBenchmarkInPi(profile, runDirectory, { signal } = {}) {
   return new Promise((resolve) => {
     child.on("exit", async (code) => {
       if (signal) signal.removeEventListener("abort", abortListener);
+      stopExecTimer(renderState);
       if (streamBuffer.trim()) {
         processLine(streamBuffer);
       }
@@ -225,6 +227,7 @@ export async function runBenchmarkInPi(profile, runDirectory, { signal } = {}) {
     child.on("error", async (err) => {
       if (signal) signal.removeEventListener("abort", abortListener);
+      stopExecTimer(renderState);
       await streamHandle.close();
       await stderrHandle.close();
       runResult.error = { message: err.message };

package/src/benchmark/stream-renderer.mjs CHANGED Viewed

@@ -107,6 +107,7 @@ export function renderStreamEvent(parsed, state, opts = {}) {
       };
       resetStatus(state, "exec", parsed.toolName);
       printFinalLine(BENCH_COLORS.tool(formatToolStart(parsed.toolName, parsed.args ?? {}, state)));
+      startExecTimer(state);
       break;
     }
     case "tool_execution_update": {
@@ -114,11 +115,13 @@ export function renderStreamEvent(parsed, state, opts = {}) {
       if (text) {
         if (verbose) process.stdout.write(BENCH_COLORS.toolOutput(text));
         if (state.activeTool) state.activeTool.outputText = text;
-        updateStatusFromDelta(state, text, "exec");
+        state.status.bytes += Buffer.byteLength(text, "utf8");
+        printExecStatus(state);
       }
       break;
     }
     case "tool_execution_end": {
+      stopExecTimer(state);
       const lines = formatToolEnd(parsed, state);
       if (parsed.isError) state.turnHadToolError = true;
       for (const line of lines) printFinalLine(line);
@@ -133,6 +136,7 @@ export function renderStreamEvent(parsed, state, opts = {}) {
       break;
     }
     case "turn_end": {
+      stopExecTimer(state);
       const usage = parsed.message?.usage;
       const tokenPart = usage ? ` · ${formatTokens(usage.output ?? usage.totalTokens ?? 0)} tokens` : "";
       const marker = state.turnHadToolError ? BENCH_COLORS.warning("⚠") : BENCH_COLORS.success("✓");
@@ -141,7 +145,7 @@ export function renderStreamEvent(parsed, state, opts = {}) {
       break;
     }
     case "agent_end":
-      clearStatusLine();
+      stopExecTimer(state);
       break;
     default:
       break;
@@ -172,6 +176,31 @@ export function updateStatusFromDelta(state, delta, mode = state.status.mode) {
   printStatusLine(BENCH_COLORS.dim(`Turn ${state.turn} ${modeLabel}${label} · ${bytes} (~${tokens} tokens)`));
 }
+export function startExecTimer(state) {
+  stopExecTimer(state);
+  state.status.execStartedAt = Date.now();
+  state.status.bytes = 0;
+  if (!process.stdout.isTTY) return;
+  printExecStatus(state);
+  state.execTimer = setInterval(() => printExecStatus(state), 1000);
+}
+export function stopExecTimer(state) {
+  if (state.execTimer) {
+    clearInterval(state.execTimer);
+    state.execTimer = null;
+  }
+  clearStatusLine();
+}
+export function printExecStatus(state) {
+  if (!process.stdout.isTTY) return;
+  const elapsed = state.status.execStartedAt ? Math.floor((Date.now() - state.status.execStartedAt) / 1000) : 0;
+  const tool = state.status.toolName ?? "tool";
+  const bytes = formatBytes(state.status.bytes);
+  printStatusLine(BENCH_COLORS.dim(`Turn ${state.turn} running ${tool}… ${elapsed}s · ${bytes}`));
+}
 export function formatToolStart(toolName, args, state) {
   if (toolName === "read") return `→ read ${displayPath(args.path, state)}`;
   if (toolName === "write") {

package/src/commands/models.mjs CHANGED Viewed

@@ -1,7 +1,7 @@
 import { ensureDirs } from "../config.mjs";
 import { backendFor, BACKENDS } from "../backends.mjs";
 import { createProfileFromModel, readProfile, saveProfile, deleteProfile, profileJsonPath } from "../profiles.mjs";
-import { isProfileRunning, isProfileServerUp, stopProfile } from "../process.mjs";
+import { isProfileRunning, isProfileServerUp, modelAvailableOnServer, stopProfile } from "../process.mjs";
 import { syncPiConfig, removeFromPiConfig } from "../harness-pi.mjs";
 import { configureLocalProfile } from "../profile-setup.mjs";
 import { pc, startInteractive, createPrompt } from "../ui.mjs";
@@ -41,14 +41,18 @@ export async function modelCommandCenter(initialCatalog) {
   const runningProfilesNow = [];
   const serverUpIds = new Set();
+  const modelMissingIds = new Set();
   for (const profile of normalized.profiles) {
     if (await isProfileRunning(profile)) {
       runningProfilesNow.push(profile);
       continue;
     }
-    if (backendFor(profile.backend).type === "managed-server" && await isProfileServerUp(profile)) serverUpIds.add(profile.id);
+    if (backendFor(profile.backend).type === "managed-server" && await isProfileServerUp(profile)) {
+      if (await modelAvailableOnServer(profile)) serverUpIds.add(profile.id);
+      else modelMissingIds.add(profile.id);
+    }
   }
-  printWorkspaceHeader(normalized, runningProfilesNow, serverUpIds);
+  printWorkspaceHeader(normalized, runningProfilesNow, serverUpIds, modelMissingIds);
   await printBenchmarkLine();
   const nameWidth = modelNameWidth(allItems);
@@ -57,6 +61,7 @@ export async function modelCommandCenter(initialCatalog) {
     if (item.type === "profile") {
       if (item.fileMissing) return "missing";
       if (runningProfilesNow.some((profile) => profile.id === item.profile.id)) return "running";
+      if (modelMissingIds.has(item.profile.id)) return "missing";
       if (serverUpIds.has(item.profile.id)) return "serverup";
       return "ready";
     }
@@ -72,8 +77,8 @@ export async function modelCommandCenter(initialCatalog) {
     const bucket = grouped.get(group);
     if (!bucket || bucket.length === 0) continue;
     for (const item of bucket) {
-      const opt = modelSelectOption(item, { runningProfilesNow, serverUpIds, nameWidth });
-      choices.push({ value: opt.value, label: opt.label });
+      const opt = modelSelectOption(item, { runningProfilesNow, serverUpIds, modelMissingIds, nameWidth });
+      choices.push({ value: opt.value, label: opt.label, hint: opt.hint });
     }
   }

package/src/model-presenters.mjs CHANGED Viewed

@@ -80,12 +80,17 @@ function optionLabel({ status, source, name, ctx, size, nameWidth }) {
   return [status, source, pc.bold(optionPad(name, null, nameWidth)), ctx, pc.dim(size)].join(OPTION_SEPARATOR);
 }
-export function modelSelectOption(item, { runningProfilesNow, serverUpIds, nameWidth }) {
+export function modelSelectOption(item, { runningProfilesNow, serverUpIds, modelMissingIds, nameWidth }) {
   if (item.type === "profile") {
     const backend = backendFor(item.profile.backend);
     const running = runningProfilesNow.some((profile) => profile.id === item.profile.id);
     const serverUp = !running && !item.fileMissing && serverUpIds?.has(item.profile.id);
-    const status = item.fileMissing ? "missing" : running ? "running" : serverUp ? "serverup" : "ready";
+    const modelMissing = !item.fileMissing && modelMissingIds?.has(item.profile.id);
+    const status = item.fileMissing || modelMissing ? "missing" : running ? "running" : serverUp ? "serverup" : "ready";
+    const drafterMissing = Boolean(item.profile.drafterPath) && !existsSync(item.profile.drafterPath);
+    const hint = drafterMissing ? "MTP drafter missing — reconfigure"
+      : modelMissing ? `${backend.label} model no longer available`
+      : undefined;
     return {
       value: itemKey(item),
       label: optionLabel({
@@ -96,6 +101,7 @@ export function modelSelectOption(item, { runningProfilesNow, serverUpIds, nameW
         ctx: optionCtxLabel(item),
         size: optionSizeLabel(item),
       }),
+      ...(hint ? { hint: pc.red(hint) } : {}),
     };
   }
   if (item.type === "new") {
@@ -125,10 +131,10 @@ export function modelSelectOption(item, { runningProfilesNow, serverUpIds, nameW
   };
 }
-export function printWorkspaceHeader(normalized, runningProfilesNow, serverUpIds = new Set()) {
+export function printWorkspaceHeader(normalized, runningProfilesNow, serverUpIds = new Set(), modelMissingIds = new Set()) {
   const profiles = normalized.profiles;
   const isRunning = (p) => runningProfilesNow.some((r) => r.id === p.id);
-  const isMissing = (p) => isProfileFileMissing(p);
+  const isMissing = (p) => isProfileFileMissing(p) || modelMissingIds.has(p.id);
   const readyCount = profiles.filter((p) => !isMissing(p) && !isRunning(p) && !serverUpIds.has(p.id)).length;
   const runningCount = runningProfilesNow.length;
   const serverUpCount = profiles.filter((p) => !isMissing(p) && serverUpIds.has(p.id) && !isRunning(p)).length;

package/src/profile-setup.mjs CHANGED Viewed

@@ -1,3 +1,4 @@
+import { existsSync } from "node:fs";
 import { execFile } from "node:child_process";
 import { promisify } from "node:util";
 import { estimateMemory } from "./estimate.mjs";
@@ -36,6 +37,10 @@ export async function configureLocalProfile(prompt, profile) {
   // so that re-setup can pick up MTP availability, vision changes, etc.
   const freshCaps = detectCapabilities(profile.modelPath, profile.mmprojPath);
   let drafterPath = profile.drafterPath ?? null;
+  if (drafterPath && !existsSync(drafterPath)) {
+    // Stored drafter is no longer on disk — drop it and re-scan for a fresh one.
+    drafterPath = null;
+  }
   if (!drafterPath) {
     const { drafters } = await scanGgufModels();
     const drafter = matchDrafter(profile.modelPath, drafters);
@@ -47,6 +52,12 @@ export async function configureLocalProfile(prompt, profile) {
   if (hasMtp && configured.backend !== "llama-cpp-mtp") {
     configured = { ...configured, backend: "llama-cpp-mtp", providerId: "llama-cpp-mtp", drafterPath, capabilities: { ...configured.capabilities, mtp: true } };
   }
+  // If the profile was MTP but the drafter is now gone (and the model isn't
+  // natively MTP), switch back to plain llama.cpp so the server can start.
+  if (!hasMtp && configured.backend === "llama-cpp-mtp") {
+    console.log(pc.yellow("MTP drafter no longer found — switching to llama.cpp without speculative decoding."));
+    configured = removeMtpDefaults(configured);
+  }
   if (drafterPath && !configured.drafterPath) {
     configured = { ...configured, drafterPath };
   }