offgrid-ai 0.9.5 → 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "offgrid-ai",
3
- "version": "0.9.5",
3
+ "version": "0.9.6",
4
4
  "description": "Privacy-first CLI for running local LLMs — discover, configure, run, benchmark",
5
5
  "author": "Eeshan Srivastava (https://eeshans.com)",
6
6
  "type": "module",
@@ -94,7 +94,7 @@ async function responseErrorDetail(response) {
94
94
  }
95
95
  }
96
96
 
97
- export async function finalizeBenchmarkRun(runDirectory, runResult, speedMetrics) {
97
+ export async function finalizeBenchmarkRun(runDirectory, runResult, speedMetrics, speedMetricsError = null) {
98
98
  const metadataPath = join(runDirectory, "metadata.json");
99
99
  const metadata = JSON.parse(await readFile(metadataPath, "utf8"));
100
100
  const now = new Date();
@@ -149,6 +149,7 @@ export async function finalizeBenchmarkRun(runDirectory, runResult, speedMetrics
149
149
 
150
150
  metadata.runner.speedMetrics = speedMetrics;
151
151
  metadata.runner.metricSource = speedMetrics?.metricSource ?? null;
152
+ metadata.runner.speedMetricsError = speedMetricsError ?? null;
152
153
 
153
154
  metadata.results = {
154
155
  wallClockMs: runResult.wallClockMs,
@@ -231,6 +232,10 @@ export function renderBenchmarkSummary(metadata) {
231
232
  console.log(pc.dim("\n" + tip));
232
233
  }
233
234
  }
235
+
236
+ if (status === "completed" && !runner?.speedMetrics && runner?.speedMetricsError) {
237
+ console.log(pc.dim(`\nSpeed metrics unavailable: ${runner.speedMetricsError}`));
238
+ }
234
239
  }
235
240
 
236
241
  function wrapText(text, width = 64) {
@@ -88,15 +88,18 @@ export async function runPreparedBenchmark(profile, runDirectory, options = {})
88
88
  const runResult = await runBenchmarkInPi(profile, runDirectory, { signal: controller.signal });
89
89
 
90
90
  let speedMetrics = null;
91
+ let speedMetricsError = null;
91
92
  if (!runResult.error) {
92
93
  try {
93
94
  speedMetrics = await queryServerMetrics(profile);
94
95
  } catch (err) {
95
- runResult.error = { message: `Speed metrics query failed: ${err.message}` };
96
+ // Non-fatal: speed metrics are a supplementary measurement, not the
97
+ // benchmark itself. Don't poison the run result; surface it as a note.
98
+ speedMetricsError = err.message;
96
99
  }
97
100
  }
98
101
 
99
- metadata = await finalizeBenchmarkRun(runDirectory, runResult, speedMetrics);
102
+ metadata = await finalizeBenchmarkRun(runDirectory, runResult, speedMetrics, speedMetricsError);
100
103
  renderBenchmarkSummary(metadata);
101
104
  } catch (err) {
102
105
  const failedResult = {
@@ -4,6 +4,8 @@ import { backendFor } from "../backends.mjs";
4
4
  import { apiRootUrl } from "../process.mjs";
5
5
 
6
6
  const BENCH_SPEED_PROMPT = "Write a one-sentence summary of machine learning.";
7
+ const SPEED_QUERY_TIMEOUT_MS = 120_000;
8
+ const SPEED_QUERY_MAX_TOKENS = 64;
7
9
 
8
10
  export async function queryServerMetrics(profile) {
9
11
  const backend = backendFor(profile.backend);
@@ -26,13 +28,14 @@ async function queryLlamaCppMetrics(profile) {
26
28
  model: profile.modelAlias,
27
29
  messages: [{ role: "user", content: BENCH_SPEED_PROMPT }],
28
30
  stream: false,
31
+ max_tokens: SPEED_QUERY_MAX_TOKENS,
29
32
  };
30
33
 
31
34
  const response = await fetch(profile.baseUrl.replace(/\/$/u, "") + "/chat/completions", {
32
35
  method: "POST",
33
36
  headers: { "Content-Type": "application/json" },
34
37
  body: JSON.stringify(body),
35
- signal: AbortSignal.timeout(60000),
38
+ signal: AbortSignal.timeout(SPEED_QUERY_TIMEOUT_MS),
36
39
  });
37
40
 
38
41
  if (!response.ok) {
@@ -66,13 +69,14 @@ async function queryOmlxMetrics(profile) {
66
69
  messages: [{ role: "user", content: BENCH_SPEED_PROMPT }],
67
70
  stream: true,
68
71
  stream_options: { include_usage: true },
72
+ max_tokens: SPEED_QUERY_MAX_TOKENS,
69
73
  };
70
74
 
71
75
  const response = await fetch(profile.baseUrl.replace(/\/$/u, "") + "/chat/completions", {
72
76
  method: "POST",
73
77
  headers: { "Content-Type": "application/json" },
74
78
  body: JSON.stringify(body),
75
- signal: AbortSignal.timeout(60000),
79
+ signal: AbortSignal.timeout(SPEED_QUERY_TIMEOUT_MS),
76
80
  });
77
81
 
78
82
  if (!response.ok) {
@@ -117,6 +121,7 @@ async function queryOllamaMetrics(profile) {
117
121
  model: profile.modelAlias,
118
122
  prompt: BENCH_SPEED_PROMPT,
119
123
  stream: false,
124
+ options: { num_predict: SPEED_QUERY_MAX_TOKENS },
120
125
  };
121
126
 
122
127
  const apiBaseUrl = apiRootUrl(profile.baseUrl || backendFor(profile.backend).apiBaseUrl || "");
@@ -125,7 +130,7 @@ async function queryOllamaMetrics(profile) {
125
130
  method: "POST",
126
131
  headers: { "Content-Type": "application/json" },
127
132
  body: JSON.stringify(body),
128
- signal: AbortSignal.timeout(60000),
133
+ signal: AbortSignal.timeout(SPEED_QUERY_TIMEOUT_MS),
129
134
  });
130
135
 
131
136
  if (!response.ok) {
@@ -5,7 +5,7 @@ import { join } from "node:path";
5
5
  import { spawn } from "node:child_process";
6
6
  import {
7
7
  BENCH_COLORS, renderStreamEvent,
8
- formatToolCall, printFinalLine,
8
+ formatToolCall, printFinalLine, stopExecTimer,
9
9
  } from "./stream-renderer.mjs";
10
10
  import { piModelString } from "./shared.mjs";
11
11
 
@@ -58,7 +58,8 @@ export async function runBenchmarkInPi(profile, runDirectory, { signal } = {}) {
58
58
  turnHadToolError: false,
59
59
  modelPrinted: false,
60
60
  activeTool: null,
61
- status: { mode: "idle", toolName: null, bytes: 0, tokens: 0 },
61
+ execTimer: null,
62
+ status: { mode: "idle", toolName: null, bytes: 0, tokens: 0, execStartedAt: null },
62
63
  };
63
64
 
64
65
  function appendResponse(text) {
@@ -193,6 +194,7 @@ export async function runBenchmarkInPi(profile, runDirectory, { signal } = {}) {
193
194
  return new Promise((resolve) => {
194
195
  child.on("exit", async (code) => {
195
196
  if (signal) signal.removeEventListener("abort", abortListener);
197
+ stopExecTimer(renderState);
196
198
  if (streamBuffer.trim()) {
197
199
  processLine(streamBuffer);
198
200
  }
@@ -225,6 +227,7 @@ export async function runBenchmarkInPi(profile, runDirectory, { signal } = {}) {
225
227
 
226
228
  child.on("error", async (err) => {
227
229
  if (signal) signal.removeEventListener("abort", abortListener);
230
+ stopExecTimer(renderState);
228
231
  await streamHandle.close();
229
232
  await stderrHandle.close();
230
233
  runResult.error = { message: err.message };
@@ -107,6 +107,7 @@ export function renderStreamEvent(parsed, state, opts = {}) {
107
107
  };
108
108
  resetStatus(state, "exec", parsed.toolName);
109
109
  printFinalLine(BENCH_COLORS.tool(formatToolStart(parsed.toolName, parsed.args ?? {}, state)));
110
+ startExecTimer(state);
110
111
  break;
111
112
  }
112
113
  case "tool_execution_update": {
@@ -114,11 +115,13 @@ export function renderStreamEvent(parsed, state, opts = {}) {
114
115
  if (text) {
115
116
  if (verbose) process.stdout.write(BENCH_COLORS.toolOutput(text));
116
117
  if (state.activeTool) state.activeTool.outputText = text;
117
- updateStatusFromDelta(state, text, "exec");
118
+ state.status.bytes += Buffer.byteLength(text, "utf8");
119
+ printExecStatus(state);
118
120
  }
119
121
  break;
120
122
  }
121
123
  case "tool_execution_end": {
124
+ stopExecTimer(state);
122
125
  const lines = formatToolEnd(parsed, state);
123
126
  if (parsed.isError) state.turnHadToolError = true;
124
127
  for (const line of lines) printFinalLine(line);
@@ -133,6 +136,7 @@ export function renderStreamEvent(parsed, state, opts = {}) {
133
136
  break;
134
137
  }
135
138
  case "turn_end": {
139
+ stopExecTimer(state);
136
140
  const usage = parsed.message?.usage;
137
141
  const tokenPart = usage ? ` · ${formatTokens(usage.output ?? usage.totalTokens ?? 0)} tokens` : "";
138
142
  const marker = state.turnHadToolError ? BENCH_COLORS.warning("⚠") : BENCH_COLORS.success("✓");
@@ -141,7 +145,7 @@ export function renderStreamEvent(parsed, state, opts = {}) {
141
145
  break;
142
146
  }
143
147
  case "agent_end":
144
- clearStatusLine();
148
+ stopExecTimer(state);
145
149
  break;
146
150
  default:
147
151
  break;
@@ -172,6 +176,31 @@ export function updateStatusFromDelta(state, delta, mode = state.status.mode) {
172
176
  printStatusLine(BENCH_COLORS.dim(`Turn ${state.turn} ${modeLabel}${label} · ${bytes} (~${tokens} tokens)`));
173
177
  }
174
178
 
179
+ export function startExecTimer(state) {
180
+ stopExecTimer(state);
181
+ state.status.execStartedAt = Date.now();
182
+ state.status.bytes = 0;
183
+ if (!process.stdout.isTTY) return;
184
+ printExecStatus(state);
185
+ state.execTimer = setInterval(() => printExecStatus(state), 1000);
186
+ }
187
+
188
+ export function stopExecTimer(state) {
189
+ if (state.execTimer) {
190
+ clearInterval(state.execTimer);
191
+ state.execTimer = null;
192
+ }
193
+ clearStatusLine();
194
+ }
195
+
196
+ export function printExecStatus(state) {
197
+ if (!process.stdout.isTTY) return;
198
+ const elapsed = state.status.execStartedAt ? Math.floor((Date.now() - state.status.execStartedAt) / 1000) : 0;
199
+ const tool = state.status.toolName ?? "tool";
200
+ const bytes = formatBytes(state.status.bytes);
201
+ printStatusLine(BENCH_COLORS.dim(`Turn ${state.turn} running ${tool}… ${elapsed}s · ${bytes}`));
202
+ }
203
+
175
204
  export function formatToolStart(toolName, args, state) {
176
205
  if (toolName === "read") return `→ read ${displayPath(args.path, state)}`;
177
206
  if (toolName === "write") {
@@ -1,7 +1,7 @@
1
1
  import { ensureDirs } from "../config.mjs";
2
2
  import { backendFor, BACKENDS } from "../backends.mjs";
3
3
  import { createProfileFromModel, readProfile, saveProfile, deleteProfile, profileJsonPath } from "../profiles.mjs";
4
- import { isProfileRunning, isProfileServerUp, stopProfile } from "../process.mjs";
4
+ import { isProfileRunning, isProfileServerUp, modelAvailableOnServer, stopProfile } from "../process.mjs";
5
5
  import { syncPiConfig, removeFromPiConfig } from "../harness-pi.mjs";
6
6
  import { configureLocalProfile } from "../profile-setup.mjs";
7
7
  import { pc, startInteractive, createPrompt } from "../ui.mjs";
@@ -41,14 +41,18 @@ export async function modelCommandCenter(initialCatalog) {
41
41
 
42
42
  const runningProfilesNow = [];
43
43
  const serverUpIds = new Set();
44
+ const modelMissingIds = new Set();
44
45
  for (const profile of normalized.profiles) {
45
46
  if (await isProfileRunning(profile)) {
46
47
  runningProfilesNow.push(profile);
47
48
  continue;
48
49
  }
49
- if (backendFor(profile.backend).type === "managed-server" && await isProfileServerUp(profile)) serverUpIds.add(profile.id);
50
+ if (backendFor(profile.backend).type === "managed-server" && await isProfileServerUp(profile)) {
51
+ if (await modelAvailableOnServer(profile)) serverUpIds.add(profile.id);
52
+ else modelMissingIds.add(profile.id);
53
+ }
50
54
  }
51
- printWorkspaceHeader(normalized, runningProfilesNow, serverUpIds);
55
+ printWorkspaceHeader(normalized, runningProfilesNow, serverUpIds, modelMissingIds);
52
56
  await printBenchmarkLine();
53
57
 
54
58
  const nameWidth = modelNameWidth(allItems);
@@ -57,6 +61,7 @@ export async function modelCommandCenter(initialCatalog) {
57
61
  if (item.type === "profile") {
58
62
  if (item.fileMissing) return "missing";
59
63
  if (runningProfilesNow.some((profile) => profile.id === item.profile.id)) return "running";
64
+ if (modelMissingIds.has(item.profile.id)) return "missing";
60
65
  if (serverUpIds.has(item.profile.id)) return "serverup";
61
66
  return "ready";
62
67
  }
@@ -72,8 +77,8 @@ export async function modelCommandCenter(initialCatalog) {
72
77
  const bucket = grouped.get(group);
73
78
  if (!bucket || bucket.length === 0) continue;
74
79
  for (const item of bucket) {
75
- const opt = modelSelectOption(item, { runningProfilesNow, serverUpIds, nameWidth });
76
- choices.push({ value: opt.value, label: opt.label });
80
+ const opt = modelSelectOption(item, { runningProfilesNow, serverUpIds, modelMissingIds, nameWidth });
81
+ choices.push({ value: opt.value, label: opt.label, hint: opt.hint });
77
82
  }
78
83
  }
79
84
 
@@ -80,12 +80,17 @@ function optionLabel({ status, source, name, ctx, size, nameWidth }) {
80
80
  return [status, source, pc.bold(optionPad(name, null, nameWidth)), ctx, pc.dim(size)].join(OPTION_SEPARATOR);
81
81
  }
82
82
 
83
- export function modelSelectOption(item, { runningProfilesNow, serverUpIds, nameWidth }) {
83
+ export function modelSelectOption(item, { runningProfilesNow, serverUpIds, modelMissingIds, nameWidth }) {
84
84
  if (item.type === "profile") {
85
85
  const backend = backendFor(item.profile.backend);
86
86
  const running = runningProfilesNow.some((profile) => profile.id === item.profile.id);
87
87
  const serverUp = !running && !item.fileMissing && serverUpIds?.has(item.profile.id);
88
- const status = item.fileMissing ? "missing" : running ? "running" : serverUp ? "serverup" : "ready";
88
+ const modelMissing = !item.fileMissing && modelMissingIds?.has(item.profile.id);
89
+ const status = item.fileMissing || modelMissing ? "missing" : running ? "running" : serverUp ? "serverup" : "ready";
90
+ const drafterMissing = Boolean(item.profile.drafterPath) && !existsSync(item.profile.drafterPath);
91
+ const hint = drafterMissing ? "MTP drafter missing — reconfigure"
92
+ : modelMissing ? `${backend.label} model no longer available`
93
+ : undefined;
89
94
  return {
90
95
  value: itemKey(item),
91
96
  label: optionLabel({
@@ -96,6 +101,7 @@ export function modelSelectOption(item, { runningProfilesNow, serverUpIds, nameW
96
101
  ctx: optionCtxLabel(item),
97
102
  size: optionSizeLabel(item),
98
103
  }),
104
+ ...(hint ? { hint: pc.red(hint) } : {}),
99
105
  };
100
106
  }
101
107
  if (item.type === "new") {
@@ -125,10 +131,10 @@ export function modelSelectOption(item, { runningProfilesNow, serverUpIds, nameW
125
131
  };
126
132
  }
127
133
 
128
- export function printWorkspaceHeader(normalized, runningProfilesNow, serverUpIds = new Set()) {
134
+ export function printWorkspaceHeader(normalized, runningProfilesNow, serverUpIds = new Set(), modelMissingIds = new Set()) {
129
135
  const profiles = normalized.profiles;
130
136
  const isRunning = (p) => runningProfilesNow.some((r) => r.id === p.id);
131
- const isMissing = (p) => isProfileFileMissing(p);
137
+ const isMissing = (p) => isProfileFileMissing(p) || modelMissingIds.has(p.id);
132
138
  const readyCount = profiles.filter((p) => !isMissing(p) && !isRunning(p) && !serverUpIds.has(p.id)).length;
133
139
  const runningCount = runningProfilesNow.length;
134
140
  const serverUpCount = profiles.filter((p) => !isMissing(p) && serverUpIds.has(p.id) && !isRunning(p)).length;
@@ -1,3 +1,4 @@
1
+ import { existsSync } from "node:fs";
1
2
  import { execFile } from "node:child_process";
2
3
  import { promisify } from "node:util";
3
4
  import { estimateMemory } from "./estimate.mjs";
@@ -36,6 +37,10 @@ export async function configureLocalProfile(prompt, profile) {
36
37
  // so that re-setup can pick up MTP availability, vision changes, etc.
37
38
  const freshCaps = detectCapabilities(profile.modelPath, profile.mmprojPath);
38
39
  let drafterPath = profile.drafterPath ?? null;
40
+ if (drafterPath && !existsSync(drafterPath)) {
41
+ // Stored drafter is no longer on disk — drop it and re-scan for a fresh one.
42
+ drafterPath = null;
43
+ }
39
44
  if (!drafterPath) {
40
45
  const { drafters } = await scanGgufModels();
41
46
  const drafter = matchDrafter(profile.modelPath, drafters);
@@ -47,6 +52,12 @@ export async function configureLocalProfile(prompt, profile) {
47
52
  if (hasMtp && configured.backend !== "llama-cpp-mtp") {
48
53
  configured = { ...configured, backend: "llama-cpp-mtp", providerId: "llama-cpp-mtp", drafterPath, capabilities: { ...configured.capabilities, mtp: true } };
49
54
  }
55
+ // If the profile was MTP but the drafter is now gone (and the model isn't
56
+ // natively MTP), switch back to plain llama.cpp so the server can start.
57
+ if (!hasMtp && configured.backend === "llama-cpp-mtp") {
58
+ console.log(pc.yellow("MTP drafter no longer found — switching to llama.cpp without speculative decoding."));
59
+ configured = removeMtpDefaults(configured);
60
+ }
50
61
  if (drafterPath && !configured.drafterPath) {
51
62
  configured = { ...configured, drafterPath };
52
63
  }