offgrid-ai 0.9.4 → 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "offgrid-ai",
3
- "version": "0.9.4",
3
+ "version": "0.9.6",
4
4
  "description": "Privacy-first CLI for running local LLMs — discover, configure, run, benchmark",
5
5
  "author": "Eeshan Srivastava (https://eeshans.com)",
6
6
  "type": "module",
@@ -59,13 +59,7 @@ async function unloadOmlxModel(profile) {
59
59
  return { unloaded: true, backend: "omlx", modelId: targetId };
60
60
  }
61
61
 
62
- let detail = "";
63
- try {
64
- const body = await response.json();
65
- detail = body?.detail ?? body?.message ?? "";
66
- } catch {
67
- detail = await response.text().catch(() => "");
68
- }
62
+ const detail = await responseErrorDetail(response);
69
63
 
70
64
  if (response.status === 400 && /not loaded/i.test(detail)) {
71
65
  return { unloaded: true, backend: "omlx", modelId: targetId, reason: "model was not loaded" };
@@ -89,7 +83,18 @@ async function unloadOmlxModel(profile) {
89
83
  }
90
84
  }
91
85
 
92
- export async function finalizeBenchmarkRun(runDirectory, runResult, speedMetrics) {
86
+ async function responseErrorDetail(response) {
87
+ const text = await response.text().catch(() => "");
88
+ if (!text) return "";
89
+ try {
90
+ const body = JSON.parse(text);
91
+ return body?.detail ?? body?.message ?? text;
92
+ } catch {
93
+ return text;
94
+ }
95
+ }
96
+
97
+ export async function finalizeBenchmarkRun(runDirectory, runResult, speedMetrics, speedMetricsError = null) {
93
98
  const metadataPath = join(runDirectory, "metadata.json");
94
99
  const metadata = JSON.parse(await readFile(metadataPath, "utf8"));
95
100
  const now = new Date();
@@ -144,6 +149,7 @@ export async function finalizeBenchmarkRun(runDirectory, runResult, speedMetrics
144
149
 
145
150
  metadata.runner.speedMetrics = speedMetrics;
146
151
  metadata.runner.metricSource = speedMetrics?.metricSource ?? null;
152
+ metadata.runner.speedMetricsError = speedMetricsError ?? null;
147
153
 
148
154
  metadata.results = {
149
155
  wallClockMs: runResult.wallClockMs,
@@ -226,6 +232,10 @@ export function renderBenchmarkSummary(metadata) {
226
232
  console.log(pc.dim("\n" + tip));
227
233
  }
228
234
  }
235
+
236
+ if (status === "completed" && !runner?.speedMetrics && runner?.speedMetricsError) {
237
+ console.log(pc.dim(`\nSpeed metrics unavailable: ${runner.speedMetricsError}`));
238
+ }
229
239
  }
230
240
 
231
241
  function wrapText(text, width = 64) {
@@ -4,7 +4,7 @@ import { join } from "node:path";
4
4
  import { ensureDirs } from "../config.mjs";
5
5
  import { backendFor } from "../backends.mjs";
6
6
  import { hasPi, hasPiModel, syncPiConfig } from "../harness-pi.mjs";
7
- import { serverReady, startServer, waitForReady, stopProfile } from "../process.mjs";
7
+ import { serverReady, startServer, waitForReady, stopProfile, modelAvailableOnServer } from "../process.mjs";
8
8
  import { loadProfiles } from "../profiles.mjs";
9
9
  import { pc, createPrompt } from "../ui.mjs";
10
10
  import { linkBenchmarkRepo } from "./repo.mjs";
@@ -28,9 +28,20 @@ async function chooseBenchmarkAction(prompt, canRun) {
28
28
  return await prompt.choice("Action", canRun ? choices : choices.filter((c) => c.value === "prepare"), canRun ? "run" : "prepare");
29
29
  }
30
30
 
31
+ function managedModelId(profile) {
32
+ return profile.omlxModel ?? profile.ollamaModel ?? profile.modelAlias ?? profile.label;
33
+ }
34
+
35
+ async function ensureManagedModelAvailableForBenchmark(profile, backend) {
36
+ if (backend.type !== "managed-server") return;
37
+ if (await modelAvailableOnServer(profile)) return;
38
+ throw new Error(`${managedModelId(profile)} is not available on ${backend.label} at ${profile.baseUrl}.`);
39
+ }
40
+
31
41
  async function ensureServerForBenchmark(profile) {
32
42
  const backend = backendFor(profile.backend);
33
43
  if (await serverReady(profile.baseUrl)) {
44
+ await ensureManagedModelAvailableForBenchmark(profile, backend);
34
45
  console.log(pc.green(`[ready] ${backend.label} at ${profile.baseUrl}`));
35
46
  return { started: false };
36
47
  }
@@ -52,6 +63,7 @@ export async function runPreparedBenchmark(profile, runDirectory, options = {})
52
63
  options.signal.addEventListener("abort", () => controller.abort(), { once: true });
53
64
  }
54
65
  let serverStarted = false;
66
+ let benchmarkStarted = false;
55
67
  let metadata = null;
56
68
 
57
69
  const onSigint = () => {
@@ -72,18 +84,22 @@ export async function runPreparedBenchmark(profile, runDirectory, options = {})
72
84
  await syncPiConfig(profile);
73
85
  }
74
86
 
87
+ benchmarkStarted = true;
75
88
  const runResult = await runBenchmarkInPi(profile, runDirectory, { signal: controller.signal });
76
89
 
77
90
  let speedMetrics = null;
91
+ let speedMetricsError = null;
78
92
  if (!runResult.error) {
79
93
  try {
80
94
  speedMetrics = await queryServerMetrics(profile);
81
95
  } catch (err) {
82
- runResult.error = { message: `Speed metrics query failed: ${err.message}` };
96
+ // Non-fatal: speed metrics are a supplementary measurement, not the
97
+ // benchmark itself. Don't poison the run result; surface it as a note.
98
+ speedMetricsError = err.message;
83
99
  }
84
100
  }
85
101
 
86
- metadata = await finalizeBenchmarkRun(runDirectory, runResult, speedMetrics);
102
+ metadata = await finalizeBenchmarkRun(runDirectory, runResult, speedMetrics, speedMetricsError);
87
103
  renderBenchmarkSummary(metadata);
88
104
  } catch (err) {
89
105
  const failedResult = {
@@ -110,11 +126,13 @@ export async function runPreparedBenchmark(profile, runDirectory, options = {})
110
126
  console.log(result.stopped ? pc.green(`[stop] ${result.message}`) : pc.dim(`[stop] ${result.message}`));
111
127
  }
112
128
  }
113
- const unloadResult = await unloadModelFromServer(profile);
114
- if (!unloadResult.unloaded && unloadResult.error) {
115
- console.log(pc.yellow(`[unload] ${unloadResult.backend}: ${unloadResult.error}`));
116
- } else if (!unloadResult.unloaded && unloadResult.reason) {
117
- console.log(pc.dim(`[unload] ${unloadResult.backend}: ${unloadResult.reason}`));
129
+ if (benchmarkStarted) {
130
+ const unloadResult = await unloadModelFromServer(profile);
131
+ if (!unloadResult.unloaded && unloadResult.error) {
132
+ console.log(pc.yellow(`[unload] ${unloadResult.backend}: ${unloadResult.error}`));
133
+ } else if (!unloadResult.unloaded && unloadResult.reason) {
134
+ console.log(pc.dim(`[unload] ${unloadResult.backend}: ${unloadResult.reason}`));
135
+ }
118
136
  }
119
137
  }
120
138
 
@@ -4,6 +4,8 @@ import { backendFor } from "../backends.mjs";
4
4
  import { apiRootUrl } from "../process.mjs";
5
5
 
6
6
  const BENCH_SPEED_PROMPT = "Write a one-sentence summary of machine learning.";
7
+ const SPEED_QUERY_TIMEOUT_MS = 120_000;
8
+ const SPEED_QUERY_MAX_TOKENS = 64;
7
9
 
8
10
  export async function queryServerMetrics(profile) {
9
11
  const backend = backendFor(profile.backend);
@@ -26,13 +28,14 @@ async function queryLlamaCppMetrics(profile) {
26
28
  model: profile.modelAlias,
27
29
  messages: [{ role: "user", content: BENCH_SPEED_PROMPT }],
28
30
  stream: false,
31
+ max_tokens: SPEED_QUERY_MAX_TOKENS,
29
32
  };
30
33
 
31
34
  const response = await fetch(profile.baseUrl.replace(/\/$/u, "") + "/chat/completions", {
32
35
  method: "POST",
33
36
  headers: { "Content-Type": "application/json" },
34
37
  body: JSON.stringify(body),
35
- signal: AbortSignal.timeout(60000),
38
+ signal: AbortSignal.timeout(SPEED_QUERY_TIMEOUT_MS),
36
39
  });
37
40
 
38
41
  if (!response.ok) {
@@ -66,13 +69,14 @@ async function queryOmlxMetrics(profile) {
66
69
  messages: [{ role: "user", content: BENCH_SPEED_PROMPT }],
67
70
  stream: true,
68
71
  stream_options: { include_usage: true },
72
+ max_tokens: SPEED_QUERY_MAX_TOKENS,
69
73
  };
70
74
 
71
75
  const response = await fetch(profile.baseUrl.replace(/\/$/u, "") + "/chat/completions", {
72
76
  method: "POST",
73
77
  headers: { "Content-Type": "application/json" },
74
78
  body: JSON.stringify(body),
75
- signal: AbortSignal.timeout(60000),
79
+ signal: AbortSignal.timeout(SPEED_QUERY_TIMEOUT_MS),
76
80
  });
77
81
 
78
82
  if (!response.ok) {
@@ -117,6 +121,7 @@ async function queryOllamaMetrics(profile) {
117
121
  model: profile.modelAlias,
118
122
  prompt: BENCH_SPEED_PROMPT,
119
123
  stream: false,
124
+ options: { num_predict: SPEED_QUERY_MAX_TOKENS },
120
125
  };
121
126
 
122
127
  const apiBaseUrl = apiRootUrl(profile.baseUrl || backendFor(profile.backend).apiBaseUrl || "");
@@ -125,7 +130,7 @@ async function queryOllamaMetrics(profile) {
125
130
  method: "POST",
126
131
  headers: { "Content-Type": "application/json" },
127
132
  body: JSON.stringify(body),
128
- signal: AbortSignal.timeout(60000),
133
+ signal: AbortSignal.timeout(SPEED_QUERY_TIMEOUT_MS),
129
134
  });
130
135
 
131
136
  if (!response.ok) {
@@ -5,7 +5,7 @@ import { join } from "node:path";
5
5
  import { spawn } from "node:child_process";
6
6
  import {
7
7
  BENCH_COLORS, renderStreamEvent,
8
- formatToolCall, printFinalLine,
8
+ formatToolCall, printFinalLine, stopExecTimer,
9
9
  } from "./stream-renderer.mjs";
10
10
  import { piModelString } from "./shared.mjs";
11
11
 
@@ -58,7 +58,8 @@ export async function runBenchmarkInPi(profile, runDirectory, { signal } = {}) {
58
58
  turnHadToolError: false,
59
59
  modelPrinted: false,
60
60
  activeTool: null,
61
- status: { mode: "idle", toolName: null, bytes: 0, tokens: 0 },
61
+ execTimer: null,
62
+ status: { mode: "idle", toolName: null, bytes: 0, tokens: 0, execStartedAt: null },
62
63
  };
63
64
 
64
65
  function appendResponse(text) {
@@ -193,6 +194,7 @@ export async function runBenchmarkInPi(profile, runDirectory, { signal } = {}) {
193
194
  return new Promise((resolve) => {
194
195
  child.on("exit", async (code) => {
195
196
  if (signal) signal.removeEventListener("abort", abortListener);
197
+ stopExecTimer(renderState);
196
198
  if (streamBuffer.trim()) {
197
199
  processLine(streamBuffer);
198
200
  }
@@ -225,6 +227,7 @@ export async function runBenchmarkInPi(profile, runDirectory, { signal } = {}) {
225
227
 
226
228
  child.on("error", async (err) => {
227
229
  if (signal) signal.removeEventListener("abort", abortListener);
230
+ stopExecTimer(renderState);
228
231
  await streamHandle.close();
229
232
  await stderrHandle.close();
230
233
  runResult.error = { message: err.message };
@@ -107,6 +107,7 @@ export function renderStreamEvent(parsed, state, opts = {}) {
107
107
  };
108
108
  resetStatus(state, "exec", parsed.toolName);
109
109
  printFinalLine(BENCH_COLORS.tool(formatToolStart(parsed.toolName, parsed.args ?? {}, state)));
110
+ startExecTimer(state);
110
111
  break;
111
112
  }
112
113
  case "tool_execution_update": {
@@ -114,11 +115,13 @@ export function renderStreamEvent(parsed, state, opts = {}) {
114
115
  if (text) {
115
116
  if (verbose) process.stdout.write(BENCH_COLORS.toolOutput(text));
116
117
  if (state.activeTool) state.activeTool.outputText = text;
117
- updateStatusFromDelta(state, text, "exec");
118
+ state.status.bytes += Buffer.byteLength(text, "utf8");
119
+ printExecStatus(state);
118
120
  }
119
121
  break;
120
122
  }
121
123
  case "tool_execution_end": {
124
+ stopExecTimer(state);
122
125
  const lines = formatToolEnd(parsed, state);
123
126
  if (parsed.isError) state.turnHadToolError = true;
124
127
  for (const line of lines) printFinalLine(line);
@@ -133,6 +136,7 @@ export function renderStreamEvent(parsed, state, opts = {}) {
133
136
  break;
134
137
  }
135
138
  case "turn_end": {
139
+ stopExecTimer(state);
136
140
  const usage = parsed.message?.usage;
137
141
  const tokenPart = usage ? ` · ${formatTokens(usage.output ?? usage.totalTokens ?? 0)} tokens` : "";
138
142
  const marker = state.turnHadToolError ? BENCH_COLORS.warning("⚠") : BENCH_COLORS.success("✓");
@@ -141,7 +145,7 @@ export function renderStreamEvent(parsed, state, opts = {}) {
141
145
  break;
142
146
  }
143
147
  case "agent_end":
144
- clearStatusLine();
148
+ stopExecTimer(state);
145
149
  break;
146
150
  default:
147
151
  break;
@@ -172,6 +176,31 @@ export function updateStatusFromDelta(state, delta, mode = state.status.mode) {
172
176
  printStatusLine(BENCH_COLORS.dim(`Turn ${state.turn} ${modeLabel}${label} · ${bytes} (~${tokens} tokens)`));
173
177
  }
174
178
 
179
+ export function startExecTimer(state) {
180
+ stopExecTimer(state);
181
+ state.status.execStartedAt = Date.now();
182
+ state.status.bytes = 0;
183
+ if (!process.stdout.isTTY) return;
184
+ printExecStatus(state);
185
+ state.execTimer = setInterval(() => printExecStatus(state), 1000);
186
+ }
187
+
188
+ export function stopExecTimer(state) {
189
+ if (state.execTimer) {
190
+ clearInterval(state.execTimer);
191
+ state.execTimer = null;
192
+ }
193
+ clearStatusLine();
194
+ }
195
+
196
+ export function printExecStatus(state) {
197
+ if (!process.stdout.isTTY) return;
198
+ const elapsed = state.status.execStartedAt ? Math.floor((Date.now() - state.status.execStartedAt) / 1000) : 0;
199
+ const tool = state.status.toolName ?? "tool";
200
+ const bytes = formatBytes(state.status.bytes);
201
+ printStatusLine(BENCH_COLORS.dim(`Turn ${state.turn} running ${tool}… ${elapsed}s · ${bytes}`));
202
+ }
203
+
175
204
  export function formatToolStart(toolName, args, state) {
176
205
  if (toolName === "read") return `→ read ${displayPath(args.path, state)}`;
177
206
  if (toolName === "write") {
@@ -1,7 +1,7 @@
1
1
  import { ensureDirs } from "../config.mjs";
2
2
  import { backendFor, BACKENDS } from "../backends.mjs";
3
3
  import { createProfileFromModel, readProfile, saveProfile, deleteProfile, profileJsonPath } from "../profiles.mjs";
4
- import { isProfileRunning, isProfileServerUp, stopProfile } from "../process.mjs";
4
+ import { isProfileRunning, isProfileServerUp, modelAvailableOnServer, stopProfile } from "../process.mjs";
5
5
  import { syncPiConfig, removeFromPiConfig } from "../harness-pi.mjs";
6
6
  import { configureLocalProfile } from "../profile-setup.mjs";
7
7
  import { pc, startInteractive, createPrompt } from "../ui.mjs";
@@ -41,14 +41,18 @@ export async function modelCommandCenter(initialCatalog) {
41
41
 
42
42
  const runningProfilesNow = [];
43
43
  const serverUpIds = new Set();
44
+ const modelMissingIds = new Set();
44
45
  for (const profile of normalized.profiles) {
45
46
  if (await isProfileRunning(profile)) {
46
47
  runningProfilesNow.push(profile);
47
48
  continue;
48
49
  }
49
- if (backendFor(profile.backend).type === "managed-server" && await isProfileServerUp(profile)) serverUpIds.add(profile.id);
50
+ if (backendFor(profile.backend).type === "managed-server" && await isProfileServerUp(profile)) {
51
+ if (await modelAvailableOnServer(profile)) serverUpIds.add(profile.id);
52
+ else modelMissingIds.add(profile.id);
53
+ }
50
54
  }
51
- printWorkspaceHeader(normalized, runningProfilesNow, serverUpIds);
55
+ printWorkspaceHeader(normalized, runningProfilesNow, serverUpIds, modelMissingIds);
52
56
  await printBenchmarkLine();
53
57
 
54
58
  const nameWidth = modelNameWidth(allItems);
@@ -57,6 +61,7 @@ export async function modelCommandCenter(initialCatalog) {
57
61
  if (item.type === "profile") {
58
62
  if (item.fileMissing) return "missing";
59
63
  if (runningProfilesNow.some((profile) => profile.id === item.profile.id)) return "running";
64
+ if (modelMissingIds.has(item.profile.id)) return "missing";
60
65
  if (serverUpIds.has(item.profile.id)) return "serverup";
61
66
  return "ready";
62
67
  }
@@ -72,8 +77,8 @@ export async function modelCommandCenter(initialCatalog) {
72
77
  const bucket = grouped.get(group);
73
78
  if (!bucket || bucket.length === 0) continue;
74
79
  for (const item of bucket) {
75
- const opt = modelSelectOption(item, { runningProfilesNow, serverUpIds, nameWidth });
76
- choices.push({ value: opt.value, label: opt.label });
80
+ const opt = modelSelectOption(item, { runningProfilesNow, serverUpIds, modelMissingIds, nameWidth });
81
+ choices.push({ value: opt.value, label: opt.label, hint: opt.hint });
77
82
  }
78
83
  }
79
84
 
@@ -80,12 +80,17 @@ function optionLabel({ status, source, name, ctx, size, nameWidth }) {
80
80
  return [status, source, pc.bold(optionPad(name, null, nameWidth)), ctx, pc.dim(size)].join(OPTION_SEPARATOR);
81
81
  }
82
82
 
83
- export function modelSelectOption(item, { runningProfilesNow, serverUpIds, nameWidth }) {
83
+ export function modelSelectOption(item, { runningProfilesNow, serverUpIds, modelMissingIds, nameWidth }) {
84
84
  if (item.type === "profile") {
85
85
  const backend = backendFor(item.profile.backend);
86
86
  const running = runningProfilesNow.some((profile) => profile.id === item.profile.id);
87
87
  const serverUp = !running && !item.fileMissing && serverUpIds?.has(item.profile.id);
88
- const status = item.fileMissing ? "missing" : running ? "running" : serverUp ? "serverup" : "ready";
88
+ const modelMissing = !item.fileMissing && modelMissingIds?.has(item.profile.id);
89
+ const status = item.fileMissing || modelMissing ? "missing" : running ? "running" : serverUp ? "serverup" : "ready";
90
+ const drafterMissing = Boolean(item.profile.drafterPath) && !existsSync(item.profile.drafterPath);
91
+ const hint = drafterMissing ? "MTP drafter missing — reconfigure"
92
+ : modelMissing ? `${backend.label} model no longer available`
93
+ : undefined;
89
94
  return {
90
95
  value: itemKey(item),
91
96
  label: optionLabel({
@@ -96,6 +101,7 @@ export function modelSelectOption(item, { runningProfilesNow, serverUpIds, nameW
96
101
  ctx: optionCtxLabel(item),
97
102
  size: optionSizeLabel(item),
98
103
  }),
104
+ ...(hint ? { hint: pc.red(hint) } : {}),
99
105
  };
100
106
  }
101
107
  if (item.type === "new") {
@@ -125,10 +131,10 @@ export function modelSelectOption(item, { runningProfilesNow, serverUpIds, nameW
125
131
  };
126
132
  }
127
133
 
128
- export function printWorkspaceHeader(normalized, runningProfilesNow, serverUpIds = new Set()) {
134
+ export function printWorkspaceHeader(normalized, runningProfilesNow, serverUpIds = new Set(), modelMissingIds = new Set()) {
129
135
  const profiles = normalized.profiles;
130
136
  const isRunning = (p) => runningProfilesNow.some((r) => r.id === p.id);
131
- const isMissing = (p) => isProfileFileMissing(p);
137
+ const isMissing = (p) => isProfileFileMissing(p) || modelMissingIds.has(p.id);
132
138
  const readyCount = profiles.filter((p) => !isMissing(p) && !isRunning(p) && !serverUpIds.has(p.id)).length;
133
139
  const runningCount = runningProfilesNow.length;
134
140
  const serverUpCount = profiles.filter((p) => !isMissing(p) && serverUpIds.has(p.id) && !isRunning(p)).length;
@@ -1,3 +1,4 @@
1
+ import { existsSync } from "node:fs";
1
2
  import { execFile } from "node:child_process";
2
3
  import { promisify } from "node:util";
3
4
  import { estimateMemory } from "./estimate.mjs";
@@ -36,6 +37,10 @@ export async function configureLocalProfile(prompt, profile) {
36
37
  // so that re-setup can pick up MTP availability, vision changes, etc.
37
38
  const freshCaps = detectCapabilities(profile.modelPath, profile.mmprojPath);
38
39
  let drafterPath = profile.drafterPath ?? null;
40
+ if (drafterPath && !existsSync(drafterPath)) {
41
+ // Stored drafter is no longer on disk — drop it and re-scan for a fresh one.
42
+ drafterPath = null;
43
+ }
39
44
  if (!drafterPath) {
40
45
  const { drafters } = await scanGgufModels();
41
46
  const drafter = matchDrafter(profile.modelPath, drafters);
@@ -47,6 +52,12 @@ export async function configureLocalProfile(prompt, profile) {
47
52
  if (hasMtp && configured.backend !== "llama-cpp-mtp") {
48
53
  configured = { ...configured, backend: "llama-cpp-mtp", providerId: "llama-cpp-mtp", drafterPath, capabilities: { ...configured.capabilities, mtp: true } };
49
54
  }
55
+ // If the profile was MTP but the drafter is now gone (and the model isn't
56
+ // natively MTP), switch back to plain llama.cpp so the server can start.
57
+ if (!hasMtp && configured.backend === "llama-cpp-mtp") {
58
+ console.log(pc.yellow("MTP drafter no longer found — switching to llama.cpp without speculative decoding."));
59
+ configured = removeMtpDefaults(configured);
60
+ }
50
61
  if (drafterPath && !configured.drafterPath) {
51
62
  configured = { ...configured, drafterPath };
52
63
  }