offgrid-ai 0.9.5 → 0.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/benchmark/finalize.mjs +6 -1
- package/src/benchmark/flow.mjs +5 -2
- package/src/benchmark/metrics.mjs +8 -3
- package/src/benchmark/pi-runner.mjs +5 -2
- package/src/benchmark/stream-renderer.mjs +31 -2
- package/src/commands/models.mjs +10 -5
- package/src/model-presenters.mjs +10 -4
- package/src/profile-setup.mjs +11 -0
package/package.json
CHANGED
|
@@ -94,7 +94,7 @@ async function responseErrorDetail(response) {
|
|
|
94
94
|
}
|
|
95
95
|
}
|
|
96
96
|
|
|
97
|
-
export async function finalizeBenchmarkRun(runDirectory, runResult, speedMetrics) {
|
|
97
|
+
export async function finalizeBenchmarkRun(runDirectory, runResult, speedMetrics, speedMetricsError = null) {
|
|
98
98
|
const metadataPath = join(runDirectory, "metadata.json");
|
|
99
99
|
const metadata = JSON.parse(await readFile(metadataPath, "utf8"));
|
|
100
100
|
const now = new Date();
|
|
@@ -149,6 +149,7 @@ export async function finalizeBenchmarkRun(runDirectory, runResult, speedMetrics
|
|
|
149
149
|
|
|
150
150
|
metadata.runner.speedMetrics = speedMetrics;
|
|
151
151
|
metadata.runner.metricSource = speedMetrics?.metricSource ?? null;
|
|
152
|
+
metadata.runner.speedMetricsError = speedMetricsError ?? null;
|
|
152
153
|
|
|
153
154
|
metadata.results = {
|
|
154
155
|
wallClockMs: runResult.wallClockMs,
|
|
@@ -231,6 +232,10 @@ export function renderBenchmarkSummary(metadata) {
|
|
|
231
232
|
console.log(pc.dim("\n" + tip));
|
|
232
233
|
}
|
|
233
234
|
}
|
|
235
|
+
|
|
236
|
+
if (status === "completed" && !runner?.speedMetrics && runner?.speedMetricsError) {
|
|
237
|
+
console.log(pc.dim(`\nSpeed metrics unavailable: ${runner.speedMetricsError}`));
|
|
238
|
+
}
|
|
234
239
|
}
|
|
235
240
|
|
|
236
241
|
function wrapText(text, width = 64) {
|
package/src/benchmark/flow.mjs
CHANGED
|
@@ -88,15 +88,18 @@ export async function runPreparedBenchmark(profile, runDirectory, options = {})
|
|
|
88
88
|
const runResult = await runBenchmarkInPi(profile, runDirectory, { signal: controller.signal });
|
|
89
89
|
|
|
90
90
|
let speedMetrics = null;
|
|
91
|
+
let speedMetricsError = null;
|
|
91
92
|
if (!runResult.error) {
|
|
92
93
|
try {
|
|
93
94
|
speedMetrics = await queryServerMetrics(profile);
|
|
94
95
|
} catch (err) {
|
|
95
|
-
|
|
96
|
+
// Non-fatal: speed metrics are a supplementary measurement, not the
|
|
97
|
+
// benchmark itself. Don't poison the run result; surface it as a note.
|
|
98
|
+
speedMetricsError = err.message;
|
|
96
99
|
}
|
|
97
100
|
}
|
|
98
101
|
|
|
99
|
-
metadata = await finalizeBenchmarkRun(runDirectory, runResult, speedMetrics);
|
|
102
|
+
metadata = await finalizeBenchmarkRun(runDirectory, runResult, speedMetrics, speedMetricsError);
|
|
100
103
|
renderBenchmarkSummary(metadata);
|
|
101
104
|
} catch (err) {
|
|
102
105
|
const failedResult = {
|
|
@@ -4,6 +4,8 @@ import { backendFor } from "../backends.mjs";
|
|
|
4
4
|
import { apiRootUrl } from "../process.mjs";
|
|
5
5
|
|
|
6
6
|
const BENCH_SPEED_PROMPT = "Write a one-sentence summary of machine learning.";
|
|
7
|
+
const SPEED_QUERY_TIMEOUT_MS = 120_000;
|
|
8
|
+
const SPEED_QUERY_MAX_TOKENS = 64;
|
|
7
9
|
|
|
8
10
|
export async function queryServerMetrics(profile) {
|
|
9
11
|
const backend = backendFor(profile.backend);
|
|
@@ -26,13 +28,14 @@ async function queryLlamaCppMetrics(profile) {
|
|
|
26
28
|
model: profile.modelAlias,
|
|
27
29
|
messages: [{ role: "user", content: BENCH_SPEED_PROMPT }],
|
|
28
30
|
stream: false,
|
|
31
|
+
max_tokens: SPEED_QUERY_MAX_TOKENS,
|
|
29
32
|
};
|
|
30
33
|
|
|
31
34
|
const response = await fetch(profile.baseUrl.replace(/\/$/u, "") + "/chat/completions", {
|
|
32
35
|
method: "POST",
|
|
33
36
|
headers: { "Content-Type": "application/json" },
|
|
34
37
|
body: JSON.stringify(body),
|
|
35
|
-
signal: AbortSignal.timeout(
|
|
38
|
+
signal: AbortSignal.timeout(SPEED_QUERY_TIMEOUT_MS),
|
|
36
39
|
});
|
|
37
40
|
|
|
38
41
|
if (!response.ok) {
|
|
@@ -66,13 +69,14 @@ async function queryOmlxMetrics(profile) {
|
|
|
66
69
|
messages: [{ role: "user", content: BENCH_SPEED_PROMPT }],
|
|
67
70
|
stream: true,
|
|
68
71
|
stream_options: { include_usage: true },
|
|
72
|
+
max_tokens: SPEED_QUERY_MAX_TOKENS,
|
|
69
73
|
};
|
|
70
74
|
|
|
71
75
|
const response = await fetch(profile.baseUrl.replace(/\/$/u, "") + "/chat/completions", {
|
|
72
76
|
method: "POST",
|
|
73
77
|
headers: { "Content-Type": "application/json" },
|
|
74
78
|
body: JSON.stringify(body),
|
|
75
|
-
signal: AbortSignal.timeout(
|
|
79
|
+
signal: AbortSignal.timeout(SPEED_QUERY_TIMEOUT_MS),
|
|
76
80
|
});
|
|
77
81
|
|
|
78
82
|
if (!response.ok) {
|
|
@@ -117,6 +121,7 @@ async function queryOllamaMetrics(profile) {
|
|
|
117
121
|
model: profile.modelAlias,
|
|
118
122
|
prompt: BENCH_SPEED_PROMPT,
|
|
119
123
|
stream: false,
|
|
124
|
+
options: { num_predict: SPEED_QUERY_MAX_TOKENS },
|
|
120
125
|
};
|
|
121
126
|
|
|
122
127
|
const apiBaseUrl = apiRootUrl(profile.baseUrl || backendFor(profile.backend).apiBaseUrl || "");
|
|
@@ -125,7 +130,7 @@ async function queryOllamaMetrics(profile) {
|
|
|
125
130
|
method: "POST",
|
|
126
131
|
headers: { "Content-Type": "application/json" },
|
|
127
132
|
body: JSON.stringify(body),
|
|
128
|
-
signal: AbortSignal.timeout(
|
|
133
|
+
signal: AbortSignal.timeout(SPEED_QUERY_TIMEOUT_MS),
|
|
129
134
|
});
|
|
130
135
|
|
|
131
136
|
if (!response.ok) {
|
|
@@ -5,7 +5,7 @@ import { join } from "node:path";
|
|
|
5
5
|
import { spawn } from "node:child_process";
|
|
6
6
|
import {
|
|
7
7
|
BENCH_COLORS, renderStreamEvent,
|
|
8
|
-
formatToolCall, printFinalLine,
|
|
8
|
+
formatToolCall, printFinalLine, stopExecTimer,
|
|
9
9
|
} from "./stream-renderer.mjs";
|
|
10
10
|
import { piModelString } from "./shared.mjs";
|
|
11
11
|
|
|
@@ -58,7 +58,8 @@ export async function runBenchmarkInPi(profile, runDirectory, { signal } = {}) {
|
|
|
58
58
|
turnHadToolError: false,
|
|
59
59
|
modelPrinted: false,
|
|
60
60
|
activeTool: null,
|
|
61
|
-
|
|
61
|
+
execTimer: null,
|
|
62
|
+
status: { mode: "idle", toolName: null, bytes: 0, tokens: 0, execStartedAt: null },
|
|
62
63
|
};
|
|
63
64
|
|
|
64
65
|
function appendResponse(text) {
|
|
@@ -193,6 +194,7 @@ export async function runBenchmarkInPi(profile, runDirectory, { signal } = {}) {
|
|
|
193
194
|
return new Promise((resolve) => {
|
|
194
195
|
child.on("exit", async (code) => {
|
|
195
196
|
if (signal) signal.removeEventListener("abort", abortListener);
|
|
197
|
+
stopExecTimer(renderState);
|
|
196
198
|
if (streamBuffer.trim()) {
|
|
197
199
|
processLine(streamBuffer);
|
|
198
200
|
}
|
|
@@ -225,6 +227,7 @@ export async function runBenchmarkInPi(profile, runDirectory, { signal } = {}) {
|
|
|
225
227
|
|
|
226
228
|
child.on("error", async (err) => {
|
|
227
229
|
if (signal) signal.removeEventListener("abort", abortListener);
|
|
230
|
+
stopExecTimer(renderState);
|
|
228
231
|
await streamHandle.close();
|
|
229
232
|
await stderrHandle.close();
|
|
230
233
|
runResult.error = { message: err.message };
|
|
@@ -107,6 +107,7 @@ export function renderStreamEvent(parsed, state, opts = {}) {
|
|
|
107
107
|
};
|
|
108
108
|
resetStatus(state, "exec", parsed.toolName);
|
|
109
109
|
printFinalLine(BENCH_COLORS.tool(formatToolStart(parsed.toolName, parsed.args ?? {}, state)));
|
|
110
|
+
startExecTimer(state);
|
|
110
111
|
break;
|
|
111
112
|
}
|
|
112
113
|
case "tool_execution_update": {
|
|
@@ -114,11 +115,13 @@ export function renderStreamEvent(parsed, state, opts = {}) {
|
|
|
114
115
|
if (text) {
|
|
115
116
|
if (verbose) process.stdout.write(BENCH_COLORS.toolOutput(text));
|
|
116
117
|
if (state.activeTool) state.activeTool.outputText = text;
|
|
117
|
-
|
|
118
|
+
state.status.bytes += Buffer.byteLength(text, "utf8");
|
|
119
|
+
printExecStatus(state);
|
|
118
120
|
}
|
|
119
121
|
break;
|
|
120
122
|
}
|
|
121
123
|
case "tool_execution_end": {
|
|
124
|
+
stopExecTimer(state);
|
|
122
125
|
const lines = formatToolEnd(parsed, state);
|
|
123
126
|
if (parsed.isError) state.turnHadToolError = true;
|
|
124
127
|
for (const line of lines) printFinalLine(line);
|
|
@@ -133,6 +136,7 @@ export function renderStreamEvent(parsed, state, opts = {}) {
|
|
|
133
136
|
break;
|
|
134
137
|
}
|
|
135
138
|
case "turn_end": {
|
|
139
|
+
stopExecTimer(state);
|
|
136
140
|
const usage = parsed.message?.usage;
|
|
137
141
|
const tokenPart = usage ? ` · ${formatTokens(usage.output ?? usage.totalTokens ?? 0)} tokens` : "";
|
|
138
142
|
const marker = state.turnHadToolError ? BENCH_COLORS.warning("⚠") : BENCH_COLORS.success("✓");
|
|
@@ -141,7 +145,7 @@ export function renderStreamEvent(parsed, state, opts = {}) {
|
|
|
141
145
|
break;
|
|
142
146
|
}
|
|
143
147
|
case "agent_end":
|
|
144
|
-
|
|
148
|
+
stopExecTimer(state);
|
|
145
149
|
break;
|
|
146
150
|
default:
|
|
147
151
|
break;
|
|
@@ -172,6 +176,31 @@ export function updateStatusFromDelta(state, delta, mode = state.status.mode) {
|
|
|
172
176
|
printStatusLine(BENCH_COLORS.dim(`Turn ${state.turn} ${modeLabel}${label} · ${bytes} (~${tokens} tokens)`));
|
|
173
177
|
}
|
|
174
178
|
|
|
179
|
+
export function startExecTimer(state) {
|
|
180
|
+
stopExecTimer(state);
|
|
181
|
+
state.status.execStartedAt = Date.now();
|
|
182
|
+
state.status.bytes = 0;
|
|
183
|
+
if (!process.stdout.isTTY) return;
|
|
184
|
+
printExecStatus(state);
|
|
185
|
+
state.execTimer = setInterval(() => printExecStatus(state), 1000);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
export function stopExecTimer(state) {
|
|
189
|
+
if (state.execTimer) {
|
|
190
|
+
clearInterval(state.execTimer);
|
|
191
|
+
state.execTimer = null;
|
|
192
|
+
}
|
|
193
|
+
clearStatusLine();
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
export function printExecStatus(state) {
|
|
197
|
+
if (!process.stdout.isTTY) return;
|
|
198
|
+
const elapsed = state.status.execStartedAt ? Math.floor((Date.now() - state.status.execStartedAt) / 1000) : 0;
|
|
199
|
+
const tool = state.status.toolName ?? "tool";
|
|
200
|
+
const bytes = formatBytes(state.status.bytes);
|
|
201
|
+
printStatusLine(BENCH_COLORS.dim(`Turn ${state.turn} running ${tool}… ${elapsed}s · ${bytes}`));
|
|
202
|
+
}
|
|
203
|
+
|
|
175
204
|
export function formatToolStart(toolName, args, state) {
|
|
176
205
|
if (toolName === "read") return `→ read ${displayPath(args.path, state)}`;
|
|
177
206
|
if (toolName === "write") {
|
package/src/commands/models.mjs
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { ensureDirs } from "../config.mjs";
|
|
2
2
|
import { backendFor, BACKENDS } from "../backends.mjs";
|
|
3
3
|
import { createProfileFromModel, readProfile, saveProfile, deleteProfile, profileJsonPath } from "../profiles.mjs";
|
|
4
|
-
import { isProfileRunning, isProfileServerUp, stopProfile } from "../process.mjs";
|
|
4
|
+
import { isProfileRunning, isProfileServerUp, modelAvailableOnServer, stopProfile } from "../process.mjs";
|
|
5
5
|
import { syncPiConfig, removeFromPiConfig } from "../harness-pi.mjs";
|
|
6
6
|
import { configureLocalProfile } from "../profile-setup.mjs";
|
|
7
7
|
import { pc, startInteractive, createPrompt } from "../ui.mjs";
|
|
@@ -41,14 +41,18 @@ export async function modelCommandCenter(initialCatalog) {
|
|
|
41
41
|
|
|
42
42
|
const runningProfilesNow = [];
|
|
43
43
|
const serverUpIds = new Set();
|
|
44
|
+
const modelMissingIds = new Set();
|
|
44
45
|
for (const profile of normalized.profiles) {
|
|
45
46
|
if (await isProfileRunning(profile)) {
|
|
46
47
|
runningProfilesNow.push(profile);
|
|
47
48
|
continue;
|
|
48
49
|
}
|
|
49
|
-
if (backendFor(profile.backend).type === "managed-server" && await isProfileServerUp(profile))
|
|
50
|
+
if (backendFor(profile.backend).type === "managed-server" && await isProfileServerUp(profile)) {
|
|
51
|
+
if (await modelAvailableOnServer(profile)) serverUpIds.add(profile.id);
|
|
52
|
+
else modelMissingIds.add(profile.id);
|
|
53
|
+
}
|
|
50
54
|
}
|
|
51
|
-
printWorkspaceHeader(normalized, runningProfilesNow, serverUpIds);
|
|
55
|
+
printWorkspaceHeader(normalized, runningProfilesNow, serverUpIds, modelMissingIds);
|
|
52
56
|
await printBenchmarkLine();
|
|
53
57
|
|
|
54
58
|
const nameWidth = modelNameWidth(allItems);
|
|
@@ -57,6 +61,7 @@ export async function modelCommandCenter(initialCatalog) {
|
|
|
57
61
|
if (item.type === "profile") {
|
|
58
62
|
if (item.fileMissing) return "missing";
|
|
59
63
|
if (runningProfilesNow.some((profile) => profile.id === item.profile.id)) return "running";
|
|
64
|
+
if (modelMissingIds.has(item.profile.id)) return "missing";
|
|
60
65
|
if (serverUpIds.has(item.profile.id)) return "serverup";
|
|
61
66
|
return "ready";
|
|
62
67
|
}
|
|
@@ -72,8 +77,8 @@ export async function modelCommandCenter(initialCatalog) {
|
|
|
72
77
|
const bucket = grouped.get(group);
|
|
73
78
|
if (!bucket || bucket.length === 0) continue;
|
|
74
79
|
for (const item of bucket) {
|
|
75
|
-
const opt = modelSelectOption(item, { runningProfilesNow, serverUpIds, nameWidth });
|
|
76
|
-
choices.push({ value: opt.value, label: opt.label });
|
|
80
|
+
const opt = modelSelectOption(item, { runningProfilesNow, serverUpIds, modelMissingIds, nameWidth });
|
|
81
|
+
choices.push({ value: opt.value, label: opt.label, hint: opt.hint });
|
|
77
82
|
}
|
|
78
83
|
}
|
|
79
84
|
|
package/src/model-presenters.mjs
CHANGED
|
@@ -80,12 +80,17 @@ function optionLabel({ status, source, name, ctx, size, nameWidth }) {
|
|
|
80
80
|
return [status, source, pc.bold(optionPad(name, null, nameWidth)), ctx, pc.dim(size)].join(OPTION_SEPARATOR);
|
|
81
81
|
}
|
|
82
82
|
|
|
83
|
-
export function modelSelectOption(item, { runningProfilesNow, serverUpIds, nameWidth }) {
|
|
83
|
+
export function modelSelectOption(item, { runningProfilesNow, serverUpIds, modelMissingIds, nameWidth }) {
|
|
84
84
|
if (item.type === "profile") {
|
|
85
85
|
const backend = backendFor(item.profile.backend);
|
|
86
86
|
const running = runningProfilesNow.some((profile) => profile.id === item.profile.id);
|
|
87
87
|
const serverUp = !running && !item.fileMissing && serverUpIds?.has(item.profile.id);
|
|
88
|
-
const
|
|
88
|
+
const modelMissing = !item.fileMissing && modelMissingIds?.has(item.profile.id);
|
|
89
|
+
const status = item.fileMissing || modelMissing ? "missing" : running ? "running" : serverUp ? "serverup" : "ready";
|
|
90
|
+
const drafterMissing = Boolean(item.profile.drafterPath) && !existsSync(item.profile.drafterPath);
|
|
91
|
+
const hint = drafterMissing ? "MTP drafter missing — reconfigure"
|
|
92
|
+
: modelMissing ? `${backend.label} model no longer available`
|
|
93
|
+
: undefined;
|
|
89
94
|
return {
|
|
90
95
|
value: itemKey(item),
|
|
91
96
|
label: optionLabel({
|
|
@@ -96,6 +101,7 @@ export function modelSelectOption(item, { runningProfilesNow, serverUpIds, nameW
|
|
|
96
101
|
ctx: optionCtxLabel(item),
|
|
97
102
|
size: optionSizeLabel(item),
|
|
98
103
|
}),
|
|
104
|
+
...(hint ? { hint: pc.red(hint) } : {}),
|
|
99
105
|
};
|
|
100
106
|
}
|
|
101
107
|
if (item.type === "new") {
|
|
@@ -125,10 +131,10 @@ export function modelSelectOption(item, { runningProfilesNow, serverUpIds, nameW
|
|
|
125
131
|
};
|
|
126
132
|
}
|
|
127
133
|
|
|
128
|
-
export function printWorkspaceHeader(normalized, runningProfilesNow, serverUpIds = new Set()) {
|
|
134
|
+
export function printWorkspaceHeader(normalized, runningProfilesNow, serverUpIds = new Set(), modelMissingIds = new Set()) {
|
|
129
135
|
const profiles = normalized.profiles;
|
|
130
136
|
const isRunning = (p) => runningProfilesNow.some((r) => r.id === p.id);
|
|
131
|
-
const isMissing = (p) => isProfileFileMissing(p);
|
|
137
|
+
const isMissing = (p) => isProfileFileMissing(p) || modelMissingIds.has(p.id);
|
|
132
138
|
const readyCount = profiles.filter((p) => !isMissing(p) && !isRunning(p) && !serverUpIds.has(p.id)).length;
|
|
133
139
|
const runningCount = runningProfilesNow.length;
|
|
134
140
|
const serverUpCount = profiles.filter((p) => !isMissing(p) && serverUpIds.has(p.id) && !isRunning(p)).length;
|
package/src/profile-setup.mjs
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { existsSync } from "node:fs";
|
|
1
2
|
import { execFile } from "node:child_process";
|
|
2
3
|
import { promisify } from "node:util";
|
|
3
4
|
import { estimateMemory } from "./estimate.mjs";
|
|
@@ -36,6 +37,10 @@ export async function configureLocalProfile(prompt, profile) {
|
|
|
36
37
|
// so that re-setup can pick up MTP availability, vision changes, etc.
|
|
37
38
|
const freshCaps = detectCapabilities(profile.modelPath, profile.mmprojPath);
|
|
38
39
|
let drafterPath = profile.drafterPath ?? null;
|
|
40
|
+
if (drafterPath && !existsSync(drafterPath)) {
|
|
41
|
+
// Stored drafter is no longer on disk — drop it and re-scan for a fresh one.
|
|
42
|
+
drafterPath = null;
|
|
43
|
+
}
|
|
39
44
|
if (!drafterPath) {
|
|
40
45
|
const { drafters } = await scanGgufModels();
|
|
41
46
|
const drafter = matchDrafter(profile.modelPath, drafters);
|
|
@@ -47,6 +52,12 @@ export async function configureLocalProfile(prompt, profile) {
|
|
|
47
52
|
if (hasMtp && configured.backend !== "llama-cpp-mtp") {
|
|
48
53
|
configured = { ...configured, backend: "llama-cpp-mtp", providerId: "llama-cpp-mtp", drafterPath, capabilities: { ...configured.capabilities, mtp: true } };
|
|
49
54
|
}
|
|
55
|
+
// If the profile was MTP but the drafter is now gone (and the model isn't
|
|
56
|
+
// natively MTP), switch back to plain llama.cpp so the server can start.
|
|
57
|
+
if (!hasMtp && configured.backend === "llama-cpp-mtp") {
|
|
58
|
+
console.log(pc.yellow("MTP drafter no longer found — switching to llama.cpp without speculative decoding."));
|
|
59
|
+
configured = removeMtpDefaults(configured);
|
|
60
|
+
}
|
|
50
61
|
if (drafterPath && !configured.drafterPath) {
|
|
51
62
|
configured = { ...configured, drafterPath };
|
|
52
63
|
}
|