offgrid-ai 0.12.2 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -1
- package/src/autodetect.mjs +2 -1
- package/src/benchmark/flow.mjs +4 -14
- package/src/benchmark/prepare.mjs +0 -3
- package/src/benchmark/sdk-runner.mjs +380 -0
- package/src/benchmark.mjs +1 -1
- package/src/model-presenters.mjs +13 -7
- package/src/process.mjs +85 -13
- package/src/profiles.mjs +3 -8
- package/src/benchmark/pi-runner.mjs +0 -257
- package/src/benchmark/stream-renderer.mjs +0 -302
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "offgrid-ai",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.14.0",
|
|
4
4
|
"description": "Privacy-first CLI for running local LLMs — discover, configure, run, benchmark",
|
|
5
5
|
"author": "Eeshan Srivastava (https://eeshans.com)",
|
|
6
6
|
"type": "module",
|
|
@@ -43,6 +43,9 @@
|
|
|
43
43
|
},
|
|
44
44
|
"dependencies": {
|
|
45
45
|
"@clack/prompts": "^1.4.0",
|
|
46
|
+
"@earendil-works/pi-agent-core": "^0.80.3",
|
|
47
|
+
"@earendil-works/pi-ai": "^0.80.3",
|
|
48
|
+
"@earendil-works/pi-coding-agent": "^0.80.3",
|
|
46
49
|
"picocolors": "^1.1.0"
|
|
47
50
|
},
|
|
48
51
|
"keywords": [
|
package/src/autodetect.mjs
CHANGED
|
@@ -50,7 +50,7 @@ export function detectCapabilities(modelPath, mmprojPath) {
|
|
|
50
50
|
|
|
51
51
|
// ── Compute llama-server flags from capabilities ───────────────────────────
|
|
52
52
|
|
|
53
|
-
export function computeFlags(capabilities, modelPath, mmprojPath, draftModelPath) {
|
|
53
|
+
export function computeFlags(capabilities, modelPath, mmprojPath, draftModelPath, flagOverrides = {}) {
|
|
54
54
|
const { thinking, mtp, quant } = capabilities;
|
|
55
55
|
const isLowMem = quant && /[Qq]4[_0]/i.test(quant);
|
|
56
56
|
|
|
@@ -69,6 +69,7 @@ export function computeFlags(capabilities, modelPath, mmprojPath, draftModelPath
|
|
|
69
69
|
repeatPenalty: thinking ? 1.1 : 1.0,
|
|
70
70
|
parallel: 1,
|
|
71
71
|
batchSize: 512,
|
|
72
|
+
...flagOverrides,
|
|
72
73
|
};
|
|
73
74
|
|
|
74
75
|
// Thinking mode
|
package/src/benchmark/flow.mjs
CHANGED
|
@@ -3,14 +3,13 @@
|
|
|
3
3
|
import { join } from "node:path";
|
|
4
4
|
import { ensureDirs } from "../config.mjs";
|
|
5
5
|
import { backendFor } from "../backends.mjs";
|
|
6
|
-
import { hasPi, hasPiModel, syncPiConfig } from "../harness-pi.mjs";
|
|
7
6
|
import { serverReady, startServer, waitForReady, stopProfile, modelAvailableOnServer, unloadModelFromServer } from "../process.mjs";
|
|
8
7
|
import { loadProfiles } from "../profiles.mjs";
|
|
9
8
|
import { pc, createPrompt } from "../ui.mjs";
|
|
10
9
|
import { linkBenchmarkRepo } from "./repo.mjs";
|
|
11
10
|
import { loadBenchmarks } from "./shared.mjs";
|
|
12
11
|
import { prepareBenchmarkRun } from "./prepare.mjs";
|
|
13
|
-
import { runBenchmarkInPi } from "./
|
|
12
|
+
import { runBenchmarkInPi } from "./sdk-runner.mjs";
|
|
14
13
|
import { queryServerMetrics } from "./metrics.mjs";
|
|
15
14
|
import { finalizeBenchmarkRun, renderBenchmarkSummary } from "./finalize.mjs";
|
|
16
15
|
|
|
@@ -63,7 +62,7 @@ export async function runPreparedBenchmark(profile, runDirectory, options = {})
|
|
|
63
62
|
}
|
|
64
63
|
let serverStarted = false;
|
|
65
64
|
let benchmarkStarted = false;
|
|
66
|
-
let metadata
|
|
65
|
+
let metadata;
|
|
67
66
|
|
|
68
67
|
const onSigint = () => {
|
|
69
68
|
controller.abort();
|
|
@@ -71,18 +70,9 @@ export async function runPreparedBenchmark(profile, runDirectory, options = {})
|
|
|
71
70
|
process.on("SIGINT", onSigint);
|
|
72
71
|
|
|
73
72
|
try {
|
|
74
|
-
if (!(await hasPi())) {
|
|
75
|
-
console.log(pc.yellow("\nPi is not installed. Run prepared for manual execution."));
|
|
76
|
-
return metadata;
|
|
77
|
-
}
|
|
78
|
-
|
|
79
73
|
const serverState = await ensureServerForBenchmark(profile);
|
|
80
74
|
serverStarted = serverState.started;
|
|
81
75
|
|
|
82
|
-
if (!(await hasPiModel(profile))) {
|
|
83
|
-
await syncPiConfig(profile);
|
|
84
|
-
}
|
|
85
|
-
|
|
86
76
|
benchmarkStarted = true;
|
|
87
77
|
const runResult = await runBenchmarkInPi(profile, runDirectory, { signal: controller.signal });
|
|
88
78
|
|
|
@@ -168,7 +158,7 @@ export async function benchmarkForProfile(profile) {
|
|
|
168
158
|
const modelSource = benchmarkModelSource(profile);
|
|
169
159
|
const backendLabel = backendFor(profile.backend).label;
|
|
170
160
|
|
|
171
|
-
const canRun =
|
|
161
|
+
const canRun = modelSource !== "cloud";
|
|
172
162
|
const action = await chooseBenchmarkAction(prompt, canRun);
|
|
173
163
|
|
|
174
164
|
const runDirectory = await prepareBenchmarkRun({ repoPath, benchmark: selectedBenchmark, kind, modelId, modelSource, backendLabel, profile, showNextSteps: action === "prepare" });
|
|
@@ -238,7 +228,7 @@ export async function benchmarkFlow() {
|
|
|
238
228
|
modelSource = "cloud";
|
|
239
229
|
}
|
|
240
230
|
|
|
241
|
-
const canRun =
|
|
231
|
+
const canRun = modelSource !== "cloud" && profile != null;
|
|
242
232
|
const action = await chooseBenchmarkAction(prompt, canRun);
|
|
243
233
|
|
|
244
234
|
const runDirectory = await prepareBenchmarkRun({ repoPath, benchmark: selectedBenchmark, kind, modelId, modelSource, backendLabel, profile, showNextSteps: action === "prepare" });
|
|
@@ -46,9 +46,6 @@ export async function prepareBenchmarkRun({ repoPath, benchmark, kind, modelId,
|
|
|
46
46
|
const baseAssets = {
|
|
47
47
|
metadata: "metadata.json",
|
|
48
48
|
prompt: "prompt.md",
|
|
49
|
-
rawResponse: "response.raw.txt",
|
|
50
|
-
stream: "stream.ndjson",
|
|
51
|
-
stderr: "stderr.log",
|
|
52
49
|
};
|
|
53
50
|
const metadata = {
|
|
54
51
|
schemaVersion: 1,
|
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
// ── Run benchmark via Pi SDK (no subprocess, no NDJSON parsing) ────────────────
|
|
2
|
+
|
|
3
|
+
import { readFile } from "node:fs/promises";
|
|
4
|
+
import { join, relative, basename } from "node:path";
|
|
5
|
+
import { Agent } from "@earendil-works/pi-agent-core";
|
|
6
|
+
import { streamSimple } from "@earendil-works/pi-ai/compat";
|
|
7
|
+
import { createCodingTools } from "@earendil-works/pi-coding-agent";
|
|
8
|
+
import { pc } from "../ui.mjs";
|
|
9
|
+
import { piApiModelId } from "../harness-pi.mjs";
|
|
10
|
+
|
|
11
|
+
const C = {
|
|
12
|
+
thinking: pc.magenta,
|
|
13
|
+
text: pc.green,
|
|
14
|
+
tool: pc.yellow,
|
|
15
|
+
success: pc.green,
|
|
16
|
+
warning: pc.yellow,
|
|
17
|
+
error: pc.red,
|
|
18
|
+
info: pc.cyan,
|
|
19
|
+
dim: pc.dim,
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
export async function runBenchmarkInPi(profile, runDirectory, { signal } = {}) {
|
|
23
|
+
const model = buildModel(profile);
|
|
24
|
+
const tools = createCodingTools(runDirectory);
|
|
25
|
+
const systemPrompt = buildSystemPrompt(runDirectory);
|
|
26
|
+
const promptText = await readFile(join(runDirectory, "prompt.md"), "utf8");
|
|
27
|
+
|
|
28
|
+
const runResult = {
|
|
29
|
+
model: `${profile.providerId}/${piApiModelId(profile)}`,
|
|
30
|
+
exitCode: 0,
|
|
31
|
+
wallClockMs: null,
|
|
32
|
+
agentTurns: 0,
|
|
33
|
+
promptTokens: 0,
|
|
34
|
+
completionTokens: 0,
|
|
35
|
+
totalTokens: 0,
|
|
36
|
+
cacheRead: 0,
|
|
37
|
+
cacheWrite: 0,
|
|
38
|
+
toolCalls: 0,
|
|
39
|
+
toolResults: 0,
|
|
40
|
+
perTurn: [],
|
|
41
|
+
rawResponseLines: [],
|
|
42
|
+
error: null,
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
const runStartMs = Date.now();
|
|
46
|
+
let currentTurnStartMs = null;
|
|
47
|
+
let lastTurnEndMs = null;
|
|
48
|
+
let turnToolCalls = 0;
|
|
49
|
+
let responseBuffer = "";
|
|
50
|
+
const verbose = Boolean(process.env.OFFGRID_BENCHMARK_VERBOSE);
|
|
51
|
+
const toolArgsByCallId = new Map();
|
|
52
|
+
|
|
53
|
+
// ── Status line state ────────────────────────────────────────────────────
|
|
54
|
+
let statusBytes = 0;
|
|
55
|
+
let streamedText = false;
|
|
56
|
+
let execTimer = null;
|
|
57
|
+
let execStartedAt = null;
|
|
58
|
+
|
|
59
|
+
function clearStatusLine() {
|
|
60
|
+
if (process.stdout.isTTY) process.stdout.write("\r\x1b[K");
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function printStatusLine(text) {
|
|
64
|
+
if (process.stdout.isTTY) process.stdout.write(`\r\x1b[K${text}`);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function stopExecTimer() {
|
|
68
|
+
if (execTimer) { clearInterval(execTimer); execTimer = null; }
|
|
69
|
+
clearStatusLine();
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function startExecTimer(toolName) {
|
|
73
|
+
stopExecTimer();
|
|
74
|
+
execStartedAt = Date.now();
|
|
75
|
+
if (!process.stdout.isTTY) return;
|
|
76
|
+
const update = () => {
|
|
77
|
+
const elapsed = Math.floor((Date.now() - execStartedAt) / 1000);
|
|
78
|
+
printStatusLine(C.dim(`running ${toolName}… ${elapsed}s`));
|
|
79
|
+
};
|
|
80
|
+
update();
|
|
81
|
+
execTimer = setInterval(update, 1000);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const agent = new Agent({
|
|
85
|
+
initialState: {
|
|
86
|
+
systemPrompt,
|
|
87
|
+
model,
|
|
88
|
+
thinkingLevel: profile.reasoning ? "low" : "off",
|
|
89
|
+
tools,
|
|
90
|
+
},
|
|
91
|
+
streamFn: async (mdl, ctx, opts) =>
|
|
92
|
+
streamSimple(mdl, ctx, { ...opts, apiKey: "none" }),
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
// ── Event handler: render + collect metrics ──────────────────────────────
|
|
96
|
+
|
|
97
|
+
agent.subscribe((event) => {
|
|
98
|
+
try {
|
|
99
|
+
handleEvent(event);
|
|
100
|
+
} catch (err) {
|
|
101
|
+
console.error(C.error(`[renderer error] ${err.message}`));
|
|
102
|
+
}
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
function handleEvent(event) {
|
|
106
|
+
switch (event.type) {
|
|
107
|
+
case "turn_start": {
|
|
108
|
+
stopExecTimer();
|
|
109
|
+
runResult.agentTurns += 1;
|
|
110
|
+
currentTurnStartMs = lastTurnEndMs ?? runStartMs;
|
|
111
|
+
turnToolCalls = 0;
|
|
112
|
+
console.log("");
|
|
113
|
+
console.log(C.info(`Turn ${runResult.agentTurns}`));
|
|
114
|
+
break;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
case "message_update": {
|
|
118
|
+
const evt = event.assistantMessageEvent;
|
|
119
|
+
if (!evt) break;
|
|
120
|
+
const sub = String(evt.type ?? "").replace(/_/gu, "");
|
|
121
|
+
if (sub === "thinkingstart") {
|
|
122
|
+
statusBytes = 0;
|
|
123
|
+
} else if (sub === "thinkingdelta") {
|
|
124
|
+
statusBytes += Buffer.byteLength(evt.delta || "", "utf8");
|
|
125
|
+
const tokens = Math.max(1, Math.ceil(statusBytes / 4));
|
|
126
|
+
printStatusLine(C.dim(`thinking… ${formatBytes(statusBytes)} (~${formatTokens(tokens)} tokens)`));
|
|
127
|
+
if (verbose) process.stdout.write(C.thinking(evt.delta || ""));
|
|
128
|
+
} else if (sub === "textstart") {
|
|
129
|
+
clearStatusLine();
|
|
130
|
+
statusBytes = 0;
|
|
131
|
+
} else if (sub === "textdelta") {
|
|
132
|
+
process.stdout.write(evt.delta || "");
|
|
133
|
+
responseBuffer += evt.delta || "";
|
|
134
|
+
streamedText = true;
|
|
135
|
+
} else if (sub === "toolcallstart") {
|
|
136
|
+
clearStatusLine();
|
|
137
|
+
}
|
|
138
|
+
break;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
case "message_end": {
|
|
142
|
+
if (streamedText) {
|
|
143
|
+
console.log("");
|
|
144
|
+
streamedText = false;
|
|
145
|
+
}
|
|
146
|
+
if (event.message?.role === "assistant") {
|
|
147
|
+
for (const item of event.message.content ?? []) {
|
|
148
|
+
if (item.type === "toolCall") {
|
|
149
|
+
runResult.toolCalls += 1;
|
|
150
|
+
turnToolCalls += 1;
|
|
151
|
+
responseBuffer += `\n[toolCall] ${item.name}\n`;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
if (responseBuffer) {
|
|
155
|
+
runResult.rawResponseLines.push(responseBuffer);
|
|
156
|
+
responseBuffer = "";
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
break;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
case "tool_execution_start": {
|
|
163
|
+
clearStatusLine();
|
|
164
|
+
toolArgsByCallId.set(event.toolCallId, event.args);
|
|
165
|
+
console.log(C.tool(formatToolStart(event.toolName, event.args, runDirectory)));
|
|
166
|
+
startExecTimer(event.toolName);
|
|
167
|
+
break;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
case "tool_execution_end": {
|
|
171
|
+
stopExecTimer();
|
|
172
|
+
const { toolName, result, isError, toolCallId } = event;
|
|
173
|
+
const args = toolArgsByCallId.get(toolCallId) ?? {};
|
|
174
|
+
const marker = isError ? C.error("✗") : C.success("✓");
|
|
175
|
+
console.log(`${marker} ${toolSummary(toolName, result, isError, args, runDirectory)}`);
|
|
176
|
+
runResult.toolResults += 1;
|
|
177
|
+
break;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
case "turn_end": {
|
|
181
|
+
stopExecTimer();
|
|
182
|
+
clearStatusLine();
|
|
183
|
+
const msg = event.message;
|
|
184
|
+
const isFailure = msg?.role === "assistant" && (msg.stopReason === "error" || msg.stopReason === "aborted");
|
|
185
|
+
const usage = !isFailure ? msg?.usage : null;
|
|
186
|
+
if (usage) {
|
|
187
|
+
runResult.promptTokens += usage.input ?? 0;
|
|
188
|
+
runResult.completionTokens += usage.output ?? 0;
|
|
189
|
+
runResult.cacheRead += usage.cacheRead ?? 0;
|
|
190
|
+
runResult.cacheWrite += usage.cacheWrite ?? 0;
|
|
191
|
+
}
|
|
192
|
+
const turnEndMs = Date.now();
|
|
193
|
+
const wallClockMs = currentTurnStartMs ? turnEndMs - currentTurnStartMs : null;
|
|
194
|
+
runResult.perTurn.push({
|
|
195
|
+
turn: runResult.agentTurns,
|
|
196
|
+
inputTokens: usage?.input ?? 0,
|
|
197
|
+
outputTokens: usage?.output ?? 0,
|
|
198
|
+
cacheRead: usage?.cacheRead ?? 0,
|
|
199
|
+
cacheWrite: usage?.cacheWrite ?? 0,
|
|
200
|
+
wallClockMs,
|
|
201
|
+
toolCalls: turnToolCalls,
|
|
202
|
+
});
|
|
203
|
+
lastTurnEndMs = turnEndMs;
|
|
204
|
+
const tokStr = usage ? ` · ${formatTokens(usage.output ?? 0)} tokens` : "";
|
|
205
|
+
console.log(C.success(`✓ turn ${runResult.agentTurns}${tokStr}`));
|
|
206
|
+
break;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
case "agent_end": {
|
|
210
|
+
if (responseBuffer) {
|
|
211
|
+
runResult.rawResponseLines.push(responseBuffer);
|
|
212
|
+
responseBuffer = "";
|
|
213
|
+
}
|
|
214
|
+
break;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// ── Wire abort signal ────────────────────────────────────────────────────
|
|
220
|
+
|
|
221
|
+
let cancelled = false;
|
|
222
|
+
const abortListener = () => {
|
|
223
|
+
cancelled = true;
|
|
224
|
+
agent.abort();
|
|
225
|
+
};
|
|
226
|
+
if (signal) signal.addEventListener("abort", abortListener, { once: true });
|
|
227
|
+
|
|
228
|
+
// ── Run ───────────────────────────────────────────────────────────────────
|
|
229
|
+
|
|
230
|
+
try {
|
|
231
|
+
console.log(C.info("Pi benchmark started"));
|
|
232
|
+
console.log(C.dim(` Model ${model.provider}/${model.id}`));
|
|
233
|
+
await agent.prompt(promptText);
|
|
234
|
+
} catch (err) {
|
|
235
|
+
if (!cancelled) {
|
|
236
|
+
runResult.error = { message: err.message };
|
|
237
|
+
}
|
|
238
|
+
} finally {
|
|
239
|
+
if (signal) signal.removeEventListener("abort", abortListener);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
if (cancelled) {
|
|
243
|
+
runResult.error = { message: "Cancelled by user" };
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
if (!runResult.error && agent.state.errorMessage) {
|
|
247
|
+
runResult.error = { message: agent.state.errorMessage };
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
runResult.wallClockMs = Date.now() - runStartMs;
|
|
251
|
+
runResult.totalTokens = runResult.promptTokens + runResult.completionTokens;
|
|
252
|
+
|
|
253
|
+
console.log(C.info("Pi benchmark finished"));
|
|
254
|
+
return runResult;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// ── Model construction ──────────────────────────────────────────────────────
|
|
258
|
+
|
|
259
|
+
function buildModel(profile) {
|
|
260
|
+
const text = [profile.id, profile.label, profile.modelAlias, profile.modelPath, profile.omlxModel]
|
|
261
|
+
.filter(Boolean).join(" ").toLowerCase();
|
|
262
|
+
const reasoning = profile.reasoning !== undefined
|
|
263
|
+
? Boolean(profile.reasoning)
|
|
264
|
+
: /qwen|gemma-4|gemma 4/i.test(text);
|
|
265
|
+
const hasCompat = profile.compat
|
|
266
|
+
? profile.compat
|
|
267
|
+
: /qwen|gemma-4|gemma 4/i.test(text)
|
|
268
|
+
? { thinkingFormat: "qwen-chat-template" }
|
|
269
|
+
: null;
|
|
270
|
+
|
|
271
|
+
return {
|
|
272
|
+
id: piApiModelId(profile),
|
|
273
|
+
name: profile.label,
|
|
274
|
+
api: "openai-completions",
|
|
275
|
+
provider: profile.providerId,
|
|
276
|
+
baseUrl: profile.baseUrl,
|
|
277
|
+
reasoning,
|
|
278
|
+
input: profile.mmprojPath || profile.capabilities?.vision ? ["text", "image"] : ["text"],
|
|
279
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
280
|
+
contextWindow: profile.flags?.ctxSize ?? 32768,
|
|
281
|
+
maxTokens: 16384,
|
|
282
|
+
compat: {
|
|
283
|
+
supportsDeveloperRole: false,
|
|
284
|
+
supportsReasoningEffort: false,
|
|
285
|
+
maxTokensField: "max_tokens",
|
|
286
|
+
...(hasCompat ?? {}),
|
|
287
|
+
},
|
|
288
|
+
};
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
// ── System prompt ───────────────────────────────────────────────────────────
|
|
292
|
+
|
|
293
|
+
function buildSystemPrompt(cwd) {
|
|
294
|
+
const now = new Date();
|
|
295
|
+
const date = `${now.getFullYear()}-${String(now.getMonth() + 1).padStart(2, "0")}-${String(now.getDate()).padStart(2, "0")}`;
|
|
296
|
+
return `You are an expert coding assistant. You help users by reading files, executing commands, editing code, and writing new files.
|
|
297
|
+
|
|
298
|
+
Available tools:
|
|
299
|
+
- read: Read file contents (supports text and images)
|
|
300
|
+
- bash: Execute shell commands
|
|
301
|
+
- edit: Apply targeted text replacements to files
|
|
302
|
+
- write: Write content to files (creates or overwrites)
|
|
303
|
+
|
|
304
|
+
Guidelines:
|
|
305
|
+
- Be concise in your responses
|
|
306
|
+
- Show file paths clearly when working with files
|
|
307
|
+
- Use the write tool to create files — do not return file contents as chat text
|
|
308
|
+
- Use bash to run commands and verify your work
|
|
309
|
+
|
|
310
|
+
Current date: ${date}
|
|
311
|
+
Current working directory: ${cwd.replace(/\\/gu, "/")}`;
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
// ── Rendering helpers ───────────────────────────────────────────────────────
|
|
315
|
+
|
|
316
|
+
function formatToolStart(toolName, args, cwd) {
|
|
317
|
+
if (toolName === "read") return `→ read ${relPath(args.path, cwd)}`;
|
|
318
|
+
if (toolName === "write") {
|
|
319
|
+
const size = args.content ? ` · ${formatBytes(Buffer.byteLength(String(args.content), "utf8"))}` : "";
|
|
320
|
+
return `→ write ${relPath(args.path, cwd)}${size}`;
|
|
321
|
+
}
|
|
322
|
+
if (toolName === "edit") {
|
|
323
|
+
const count = Array.isArray(args.edits) ? args.edits.length : 0;
|
|
324
|
+
return `→ edit ${relPath(args.path, cwd)}${count > 0 ? ` · ${count} replacement${count === 1 ? "" : "s"}` : ""}`;
|
|
325
|
+
}
|
|
326
|
+
if (toolName === "bash") return `→ run ${truncateOneLine(args.command ?? "")}`;
|
|
327
|
+
return `→ ${toolName}`;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
function toolSummary(toolName, result, isError, args, cwd) {
|
|
331
|
+
const text = toolResultText(result);
|
|
332
|
+
if (isError) return `${toolName} failed · ${firstLine(text)}`;
|
|
333
|
+
if (toolName === "write") {
|
|
334
|
+
const m = String(text).match(/Successfully wrote\s+([0-9,]+)\s+bytes/iu);
|
|
335
|
+
const size = m ? ` · ${formatBytes(Number(m[1].replace(/,/gu, "")))}` : "";
|
|
336
|
+
return `wrote ${relPath(args.path, cwd)}${size}`;
|
|
337
|
+
}
|
|
338
|
+
if (toolName === "read") return `read ${relPath(args.path, cwd)}${text ? ` · ${formatBytes(Buffer.byteLength(text, "utf8"))}` : ""}`;
|
|
339
|
+
if (toolName === "edit") return `edited ${relPath(args.path, cwd)}`;
|
|
340
|
+
if (toolName === "bash") return firstLine(text) || "command completed";
|
|
341
|
+
return `${toolName}${text ? ` · ${firstLine(text)}` : ""}`;
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
function toolResultText(result) {
|
|
345
|
+
const content = result?.content;
|
|
346
|
+
if (typeof content === "string") return content;
|
|
347
|
+
if (!Array.isArray(content)) return "";
|
|
348
|
+
return content.map((c) => c?.text ?? "").filter(Boolean).join("\n");
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
function firstLine(text) {
|
|
352
|
+
return String(text ?? "").split(/\r?\n/u).map((s) => s.trim()).find(Boolean) ?? "no details";
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
function relPath(path, cwd) {
|
|
356
|
+
if (!path) return "unknown";
|
|
357
|
+
const r = relative(cwd, String(path));
|
|
358
|
+
if (r && !r.startsWith("..") && r !== ".") return r;
|
|
359
|
+
return basename(String(path)) || String(path);
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
function truncateOneLine(value, max = 80) {
|
|
363
|
+
const text = String(value ?? "").replace(/\s+/gu, " ").trim();
|
|
364
|
+
return text.length > max ? `${text.slice(0, Math.max(1, max - 1))}…` : text;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
function formatBytes(bytes) {
|
|
368
|
+
if (!Number.isFinite(bytes)) return "unknown";
|
|
369
|
+
const units = ["B", "KB", "MB", "GB", "TB"];
|
|
370
|
+
let size = bytes;
|
|
371
|
+
let unit = 0;
|
|
372
|
+
while (size >= 1024 && unit < units.length - 1) { size /= 1024; unit += 1; }
|
|
373
|
+
return `${size.toFixed(unit === 0 ? 0 : 2)} ${units[unit]}`;
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
function formatTokens(n) {
|
|
377
|
+
if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`;
|
|
378
|
+
if (n >= 1_000) return `${Math.round(n / 1_000)}k`;
|
|
379
|
+
return String(Math.round(n));
|
|
380
|
+
}
|
package/src/benchmark.mjs
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
export { slugModelId, createRunId, buildToolPrompt, loadBenchmarks, piModelString } from "./benchmark/shared.mjs";
|
|
5
5
|
export { findBenchmarkRepo, linkBenchmarkRepo } from "./benchmark/repo.mjs";
|
|
6
6
|
export { prepareBenchmarkRun } from "./benchmark/prepare.mjs";
|
|
7
|
-
export { runBenchmarkInPi } from "./benchmark/
|
|
7
|
+
export { runBenchmarkInPi } from "./benchmark/sdk-runner.mjs";
|
|
8
8
|
export { queryServerMetrics } from "./benchmark/metrics.mjs";
|
|
9
9
|
// unloadModelFromServer now lives in src/process.mjs (managed-server counterpart to stopProfile).
|
|
10
10
|
export { unloadModelFromServer } from "./process.mjs";
|
package/src/model-presenters.mjs
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
import { existsSync, statSync } from "node:fs";
|
|
2
|
-
import { basename, dirname } from "node:path";
|
|
2
|
+
import { basename, dirname, join } from "node:path";
|
|
3
3
|
import { backendFor } from "./backends.mjs";
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
6
|
-
import { buildPrettyCommand } from "./command.mjs";
|
|
4
|
+
import { computeServerCommand, buildStartScript, isProfileRunning } from "./process.mjs";
|
|
5
|
+
import { profileDir } from "./profiles.mjs";
|
|
7
6
|
import { pc, formatBytes, renderRows, renderSection } from "./ui.mjs";
|
|
8
7
|
import { capabilitySummary, ggufDetailParts, isProfileFileMissing, profileDetailParts } from "./model-summary.mjs";
|
|
9
8
|
import { itemKey } from "./model-catalog.mjs";
|
|
@@ -260,9 +259,16 @@ export async function printProfileDetails(profile) {
|
|
|
260
259
|
|
|
261
260
|
if (fileMissing) console.log("\n" + pc.red("⚠ This model's file is no longer on disk. Remove this setup or move the file back."));
|
|
262
261
|
|
|
263
|
-
if (!isManaged
|
|
264
|
-
const
|
|
265
|
-
|
|
262
|
+
if (!isManaged) {
|
|
263
|
+
const command = await computeServerCommand(profile);
|
|
264
|
+
if (command) {
|
|
265
|
+
const script = buildStartScript(profile, command);
|
|
266
|
+
const scriptPath = join(profileDir(profile.id), "start.sh");
|
|
267
|
+
console.log("\n" + renderSection("Server command", renderRows([
|
|
268
|
+
["Run manually", pc.cyan(`bash ${scriptPath}`)],
|
|
269
|
+
["Command", pc.dim(script)],
|
|
270
|
+
]), { columns: 120 }));
|
|
271
|
+
}
|
|
266
272
|
}
|
|
267
273
|
}
|
|
268
274
|
|
package/src/process.mjs
CHANGED
|
@@ -1,14 +1,85 @@
|
|
|
1
1
|
import { execFile, spawn } from "node:child_process";
|
|
2
2
|
import { promisify } from "node:util";
|
|
3
3
|
import { closeSync, openSync } from "node:fs";
|
|
4
|
-
import { readFile, writeFile } from "node:fs/promises";
|
|
4
|
+
import { readFile, writeFile, chmod } from "node:fs/promises";
|
|
5
5
|
import { basename, join } from "node:path";
|
|
6
|
+
import { quoteShell } from "./command.mjs";
|
|
6
7
|
import { LOG_DIR } from "./config.mjs";
|
|
7
|
-
import { writeState, readState,
|
|
8
|
+
import { writeState, readState, profileDir } from "./profiles.mjs";
|
|
8
9
|
import { backendFor, backendBinaryFor } from "./backends.mjs";
|
|
9
10
|
|
|
10
11
|
const execFileAsync = promisify(execFile);
|
|
11
12
|
|
|
13
|
+
// ── Compute server command from profile config ─────────────────────────────
|
|
14
|
+
// Single source of truth: derives the full command (binary + args + env) from
|
|
15
|
+
// the profile's stored configuration. Used for both launching and display so
|
|
16
|
+
// they always match. No stored commandArgv — the command is always fresh.
|
|
17
|
+
|
|
18
|
+
export async function computeServerCommand(profile) {
|
|
19
|
+
const backend = backendFor(profile.backend);
|
|
20
|
+
if (backend.type === "managed-server") return null;
|
|
21
|
+
|
|
22
|
+
const binary = await backendBinaryFor(profile.backend);
|
|
23
|
+
if (!binary) throw new Error("Server binary not found. Run offgrid-ai interactively to install.");
|
|
24
|
+
|
|
25
|
+
let argv, extraEnv;
|
|
26
|
+
|
|
27
|
+
if (profile.backend === "mlx-vlm") {
|
|
28
|
+
const { computeMlxVlmFlags } = await import("./mlx-flags.mjs");
|
|
29
|
+
const result = computeMlxVlmFlags(profile.modelPath, {
|
|
30
|
+
port: profile.flags?.port,
|
|
31
|
+
ctxSize: profile.flags?.ctxSize,
|
|
32
|
+
thinkingEnabled: profile.capabilities?.thinking ?? true,
|
|
33
|
+
});
|
|
34
|
+
argv = result.args;
|
|
35
|
+
extraEnv = { APC_ENABLED: "1", MLX_VLM_MAX_TOKENS: "16384" };
|
|
36
|
+
} else {
|
|
37
|
+
// llama-cpp / llama-cpp-mtp
|
|
38
|
+
const { computeFlags } = await import("./autodetect.mjs");
|
|
39
|
+
const result = computeFlags(
|
|
40
|
+
profile.capabilities ?? {},
|
|
41
|
+
profile.modelPath,
|
|
42
|
+
profile.mmprojPath,
|
|
43
|
+
profile.drafterPath,
|
|
44
|
+
profile.flags ?? {},
|
|
45
|
+
);
|
|
46
|
+
argv = result.argv;
|
|
47
|
+
extraEnv = {};
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return { binary, argv, extraEnv, backend };
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/** Build a runnable start.sh script for the profile. */
|
|
54
|
+
export function buildStartScript(profile, command) {
|
|
55
|
+
const { binary, argv, extraEnv } = command;
|
|
56
|
+
const backend = backendFor(profile.backend);
|
|
57
|
+
const lines = [
|
|
58
|
+
"#!/bin/bash",
|
|
59
|
+
`# Generated by offgrid-ai — do not edit`,
|
|
60
|
+
`# Profile: ${profile.label}`,
|
|
61
|
+
`# Backend: ${backend.label}`,
|
|
62
|
+
];
|
|
63
|
+
for (const [key, value] of Object.entries(extraEnv)) {
|
|
64
|
+
lines.push(`export ${key}="${value}"`);
|
|
65
|
+
}
|
|
66
|
+
lines.push(""); // blank line before exec
|
|
67
|
+
// Format the exec command with backslash continuation
|
|
68
|
+
lines.push(`exec ${quoteShell(binary)} \\`);
|
|
69
|
+
for (let i = 0; i < argv.length; i++) {
|
|
70
|
+
const arg = argv[i];
|
|
71
|
+
const next = argv[i + 1];
|
|
72
|
+
const hasValue = arg.startsWith("--") && next && !next.startsWith("--");
|
|
73
|
+
if (hasValue) {
|
|
74
|
+
lines.push(` ${arg} ${quoteShell(next)}${i + 2 < argv.length ? " \\" : ""}`);
|
|
75
|
+
i += 1;
|
|
76
|
+
} else {
|
|
77
|
+
lines.push(` ${arg}${i + 1 < argv.length ? " \\" : ""}`);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
return lines.join("\n") + "\n";
|
|
81
|
+
}
|
|
82
|
+
|
|
12
83
|
// ── Start server ───────────────────────────────────────────────────────────
|
|
13
84
|
|
|
14
85
|
export async function startServer(profile) {
|
|
@@ -20,23 +91,24 @@ export async function startServer(profile) {
|
|
|
20
91
|
}
|
|
21
92
|
|
|
22
93
|
async function startLocalServer(profile) {
|
|
23
|
-
const
|
|
24
|
-
if (!
|
|
25
|
-
|
|
26
|
-
}
|
|
94
|
+
const command = await computeServerCommand(profile);
|
|
95
|
+
if (!command) throw new Error("No server command for this backend.");
|
|
96
|
+
|
|
97
|
+
const { binary, argv, extraEnv } = command;
|
|
27
98
|
|
|
28
99
|
const timestamp = timestampForFile();
|
|
29
100
|
const rawLogPath = join(LOG_DIR, `${profile.id}-${timestamp}.raw.log`);
|
|
30
101
|
const friendlyLogPath = join(LOG_DIR, `${profile.id}-${timestamp}.friendly.log`);
|
|
31
|
-
const commandArgv = await readCommandArgv(profile);
|
|
32
102
|
|
|
33
|
-
|
|
34
|
-
|
|
103
|
+
// Write start.sh so the user can run the model manually
|
|
104
|
+
const scriptPath = join(profileDir(profile.id), "start.sh");
|
|
105
|
+
await writeFile(scriptPath, buildStartScript(profile, command), "utf8");
|
|
106
|
+
await chmod(scriptPath, 0o755);
|
|
107
|
+
|
|
108
|
+
await writeFile(rawLogPath, `[offgrid-ai] ${new Date().toISOString()}\n[binary] ${binary}\n[argv]\n${argv.join(" ")}\n`, "utf8");
|
|
109
|
+
await writeFile(friendlyLogPath, `[launch] starting ${backendFor(profile.backend).label} for ${profile.label}\n`, "utf8");
|
|
35
110
|
|
|
36
|
-
|
|
37
|
-
const argv = [...commandArgv];
|
|
38
|
-
// mlx-vlm requires APC_ENABLED=1 (86x TTFT improvement; fixes Metal cache clearing).
|
|
39
|
-
const env = profile.backend === "mlx-vlm" ? { ...process.env, APC_ENABLED: "1" } : process.env;
|
|
111
|
+
const env = { ...process.env, ...extraEnv };
|
|
40
112
|
|
|
41
113
|
const rawFd = openSync(rawLogPath, "a");
|
|
42
114
|
let child;
|
package/src/profiles.mjs
CHANGED
|
@@ -69,14 +69,9 @@ export async function saveProfile(profile, options = {}) {
|
|
|
69
69
|
};
|
|
70
70
|
await writeJson(profileJsonPath(id), saved);
|
|
71
71
|
|
|
72
|
-
//
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
const cmdPath = commandJsonPath(id);
|
|
76
|
-
if (options.writeCommand || !existsSync(cmdPath)) {
|
|
77
|
-
await writeJson(cmdPath, { argv: saved.commandArgv ?? [] });
|
|
78
|
-
}
|
|
79
|
-
}
|
|
72
|
+
// Note: command.json is no longer written — the server command is computed
|
|
73
|
+
// fresh from the profile config at launch time (see computeServerCommand in
|
|
74
|
+
// process.mjs). commandArgv is kept in the profile for backwards compat.
|
|
80
75
|
|
|
81
76
|
if (!existsSync(notesPath(id))) {
|
|
82
77
|
await writeFile(notesPath(id), `# ${saved.label}\n\nNotes for this model profile.\n`, "utf8");
|
|
@@ -1,257 +0,0 @@
|
|
|
1
|
-
// ── Run benchmark in Pi (non-interactive JSON mode) ───────────────────────────
|
|
2
|
-
|
|
3
|
-
import { writeFile } from "node:fs/promises";
|
|
4
|
-
import { join } from "node:path";
|
|
5
|
-
import { spawn } from "node:child_process";
|
|
6
|
-
import {
|
|
7
|
-
BENCH_COLORS, renderStreamEvent,
|
|
8
|
-
formatToolCall, printFinalLine, stopExecTimer,
|
|
9
|
-
} from "./stream-renderer.mjs";
|
|
10
|
-
import { piModelString } from "./shared.mjs";
|
|
11
|
-
|
|
12
|
-
export async function runBenchmarkInPi(profile, runDirectory, { signal } = {}) {
|
|
13
|
-
const model = piModelString(profile);
|
|
14
|
-
const args = ["--model", model, "--mode", "json", "-p", "@prompt.md"];
|
|
15
|
-
|
|
16
|
-
const child = spawn("pi", args, {
|
|
17
|
-
cwd: runDirectory,
|
|
18
|
-
stdio: ["ignore", "pipe", "pipe"],
|
|
19
|
-
});
|
|
20
|
-
|
|
21
|
-
const runResult = {
|
|
22
|
-
model,
|
|
23
|
-
exitCode: null,
|
|
24
|
-
wallClockMs: null,
|
|
25
|
-
agentTurns: 0,
|
|
26
|
-
promptTokens: 0,
|
|
27
|
-
completionTokens: 0,
|
|
28
|
-
totalTokens: 0,
|
|
29
|
-
cacheRead: 0,
|
|
30
|
-
cacheWrite: 0,
|
|
31
|
-
toolCalls: 0,
|
|
32
|
-
toolResults: 0,
|
|
33
|
-
perTurn: [],
|
|
34
|
-
rawResponseLines: [],
|
|
35
|
-
error: null,
|
|
36
|
-
};
|
|
37
|
-
|
|
38
|
-
let streamBuffer = "";
|
|
39
|
-
let responseBuffer = "";
|
|
40
|
-
let currentTurnStartMs = null;
|
|
41
|
-
let lastTurnEndMs = null;
|
|
42
|
-
let runStartMs = null;
|
|
43
|
-
let firstEventMs = null;
|
|
44
|
-
let lastEventMs = null;
|
|
45
|
-
let cancelled = false;
|
|
46
|
-
|
|
47
|
-
const streamPath = join(runDirectory, "stream.ndjson");
|
|
48
|
-
const stderrPath = join(runDirectory, "stderr.log");
|
|
49
|
-
const responsePath = join(runDirectory, "response.raw.txt");
|
|
50
|
-
|
|
51
|
-
const streamHandle = await openFileHandle(streamPath, "w");
|
|
52
|
-
const stderrHandle = await openFileHandle(stderrPath, "w");
|
|
53
|
-
|
|
54
|
-
const verbose = Boolean(process.env.OFFGRID_BENCHMARK_VERBOSE);
|
|
55
|
-
const renderState = {
|
|
56
|
-
cwd: runDirectory,
|
|
57
|
-
turn: 0,
|
|
58
|
-
turnHadToolError: false,
|
|
59
|
-
modelPrinted: false,
|
|
60
|
-
activeTool: null,
|
|
61
|
-
execTimer: null,
|
|
62
|
-
status: { mode: "idle", toolName: null, bytes: 0, tokens: 0, execStartedAt: null },
|
|
63
|
-
};
|
|
64
|
-
|
|
65
|
-
function appendResponse(text) {
|
|
66
|
-
responseBuffer += text;
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
function flushResponse() {
|
|
70
|
-
if (responseBuffer) {
|
|
71
|
-
runResult.rawResponseLines.push(responseBuffer);
|
|
72
|
-
responseBuffer = "";
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
function updateTimeBounds(timestamp) {
|
|
77
|
-
if (!timestamp) return;
|
|
78
|
-
if (firstEventMs === null) firstEventMs = timestamp;
|
|
79
|
-
lastEventMs = timestamp;
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
function beginTurn() {
|
|
83
|
-
runResult.agentTurns += 1;
|
|
84
|
-
currentTurnStartMs = lastTurnEndMs ?? runStartMs ?? null;
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
function endTurn(usage, timestamp) {
|
|
88
|
-
const turnEndMs = timestamp ?? null;
|
|
89
|
-
const wallClockMs = currentTurnStartMs && turnEndMs ? turnEndMs - currentTurnStartMs : null;
|
|
90
|
-
runResult.perTurn.push({
|
|
91
|
-
turn: runResult.agentTurns,
|
|
92
|
-
inputTokens: usage?.input ?? 0,
|
|
93
|
-
outputTokens: usage?.output ?? 0,
|
|
94
|
-
cacheRead: usage?.cacheRead ?? 0,
|
|
95
|
-
cacheWrite: usage?.cacheWrite ?? 0,
|
|
96
|
-
wallClockMs,
|
|
97
|
-
toolCalls: 0,
|
|
98
|
-
});
|
|
99
|
-
if (turnEndMs) lastTurnEndMs = turnEndMs;
|
|
100
|
-
currentTurnStartMs = null;
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
function processLine(line) {
|
|
104
|
-
if (!line.trim()) return;
|
|
105
|
-
streamHandle.write(line + "\n");
|
|
106
|
-
let parsed;
|
|
107
|
-
try {
|
|
108
|
-
parsed = JSON.parse(line);
|
|
109
|
-
} catch (err) {
|
|
110
|
-
console.log(BENCH_COLORS.error(`[parse error] ${err.message}`));
|
|
111
|
-
return;
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
const timestamp = extractTimestamp(parsed);
|
|
115
|
-
updateTimeBounds(timestamp);
|
|
116
|
-
|
|
117
|
-
renderStreamEvent(parsed, renderState, { verbose });
|
|
118
|
-
|
|
119
|
-
if (parsed.type === "session" || parsed.type === "agent_start") {
|
|
120
|
-
if (timestamp && runStartMs === null) runStartMs = timestamp;
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
if (parsed.type === "turn_start") {
|
|
124
|
-
beginTurn();
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
if (parsed.type === "turn_end" && parsed.message?.usage) {
|
|
128
|
-
const usage = parsed.message.usage;
|
|
129
|
-
runResult.promptTokens += usage.input ?? 0;
|
|
130
|
-
runResult.completionTokens += usage.output ?? 0;
|
|
131
|
-
runResult.cacheRead += usage.cacheRead ?? 0;
|
|
132
|
-
runResult.cacheWrite += usage.cacheWrite ?? 0;
|
|
133
|
-
endTurn(usage, timestamp);
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
if (parsed.type === "message_update" && parsed.assistantMessageEvent) {
|
|
137
|
-
const evt = parsed.assistantMessageEvent;
|
|
138
|
-
const subtype = String(evt.type ?? "").replace(/_/gu, "");
|
|
139
|
-
if (subtype === "thinkingdelta" || subtype === "textdelta") {
|
|
140
|
-
appendResponse(evt.delta || "");
|
|
141
|
-
}
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
if (parsed.type === "message_end" && parsed.message?.role === "assistant") {
|
|
145
|
-
flushResponse();
|
|
146
|
-
const content = parsed.message.content ?? [];
|
|
147
|
-
for (const item of content) {
|
|
148
|
-
if (item.type === "toolCall") {
|
|
149
|
-
runResult.toolCalls += 1;
|
|
150
|
-
appendResponse(`\n${formatToolCall(item)}\n`);
|
|
151
|
-
const currentTurn = runResult.perTurn[runResult.perTurn.length - 1];
|
|
152
|
-
if (currentTurn) currentTurn.toolCalls += 1;
|
|
153
|
-
}
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
if (parsed.type === "toolResult") {
|
|
158
|
-
runResult.toolResults += 1;
|
|
159
|
-
const status = parsed.isError ? "error" : "ok";
|
|
160
|
-
appendResponse(`\n[toolResult] ${parsed.toolName} (${status})\n`);
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
if (parsed.type === "agent_end") {
|
|
164
|
-
flushResponse();
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
child.stdout.setEncoding("utf8");
|
|
169
|
-
child.stdout.on("data", (chunk) => {
|
|
170
|
-
streamBuffer += chunk;
|
|
171
|
-
const lines = streamBuffer.split("\n");
|
|
172
|
-
streamBuffer = lines.pop();
|
|
173
|
-
for (const line of lines) {
|
|
174
|
-
processLine(line);
|
|
175
|
-
}
|
|
176
|
-
});
|
|
177
|
-
|
|
178
|
-
child.stderr.setEncoding("utf8");
|
|
179
|
-
child.stderr.on("data", (chunk) => {
|
|
180
|
-
stderrHandle.write(chunk);
|
|
181
|
-
});
|
|
182
|
-
|
|
183
|
-
const abortListener = () => {
|
|
184
|
-
if (cancelled) return;
|
|
185
|
-
cancelled = true;
|
|
186
|
-
console.log(BENCH_COLORS.error("\n\n[Cancelled by user]"));
|
|
187
|
-
child.kill("SIGTERM");
|
|
188
|
-
};
|
|
189
|
-
|
|
190
|
-
if (signal) {
|
|
191
|
-
signal.addEventListener("abort", abortListener);
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
return new Promise((resolve) => {
|
|
195
|
-
child.on("exit", async (code) => {
|
|
196
|
-
if (signal) signal.removeEventListener("abort", abortListener);
|
|
197
|
-
stopExecTimer(renderState);
|
|
198
|
-
if (streamBuffer.trim()) {
|
|
199
|
-
processLine(streamBuffer);
|
|
200
|
-
}
|
|
201
|
-
flushResponse();
|
|
202
|
-
await streamHandle.close();
|
|
203
|
-
await stderrHandle.close();
|
|
204
|
-
await writeFile(responsePath, runResult.rawResponseLines.join(""), "utf8");
|
|
205
|
-
|
|
206
|
-
runResult.exitCode = code ?? 0;
|
|
207
|
-
if (firstEventMs !== null && lastEventMs !== null) {
|
|
208
|
-
runResult.wallClockMs = lastEventMs - firstEventMs;
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
if (cancelled) {
|
|
212
|
-
runResult.error = { message: "Cancelled by user" };
|
|
213
|
-
resolve(runResult);
|
|
214
|
-
return;
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
printFinalLine(BENCH_COLORS.info("Pi benchmark finished"));
|
|
218
|
-
|
|
219
|
-
if (runResult.exitCode !== 0) {
|
|
220
|
-
runResult.error = { message: `Pi exited with code ${runResult.exitCode}` };
|
|
221
|
-
resolve(runResult);
|
|
222
|
-
return;
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
resolve(runResult);
|
|
226
|
-
});
|
|
227
|
-
|
|
228
|
-
child.on("error", async (err) => {
|
|
229
|
-
if (signal) signal.removeEventListener("abort", abortListener);
|
|
230
|
-
stopExecTimer(renderState);
|
|
231
|
-
await streamHandle.close();
|
|
232
|
-
await stderrHandle.close();
|
|
233
|
-
runResult.error = { message: err.message };
|
|
234
|
-
resolve(runResult);
|
|
235
|
-
});
|
|
236
|
-
});
|
|
237
|
-
}
|
|
238
|
-
|
|
239
|
-
function extractTimestamp(event) {
|
|
240
|
-
const raw = event?.message?.timestamp ?? event?.timestamp ?? event?.assistantMessageEvent?.partial?.timestamp;
|
|
241
|
-
if (typeof raw === "number") return raw;
|
|
242
|
-
if (typeof raw === "string") {
|
|
243
|
-
const parsed = Date.parse(raw);
|
|
244
|
-
if (Number.isFinite(parsed)) return parsed;
|
|
245
|
-
}
|
|
246
|
-
const iso = event?.message?.createdAt ?? event?.createdAt ?? event?.created_at;
|
|
247
|
-
if (typeof iso === "string") {
|
|
248
|
-
const parsed = Date.parse(iso);
|
|
249
|
-
if (Number.isFinite(parsed)) return parsed;
|
|
250
|
-
}
|
|
251
|
-
return null;
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
async function openFileHandle(path, flags) {
|
|
255
|
-
const { open } = await import("node:fs/promises");
|
|
256
|
-
return open(path, flags);
|
|
257
|
-
}
|
|
@@ -1,302 +0,0 @@
|
|
|
1
|
-
// ── Semantic stream renderer for Pi benchmark output ─────────────────────────
|
|
2
|
-
|
|
3
|
-
import { relative, basename } from "node:path";
|
|
4
|
-
import { pc } from "../ui.mjs";
|
|
5
|
-
|
|
6
|
-
export const BENCH_COLORS = {
|
|
7
|
-
thinking: pc.magenta,
|
|
8
|
-
text: pc.green,
|
|
9
|
-
tool: pc.yellow,
|
|
10
|
-
success: pc.green,
|
|
11
|
-
warning: pc.yellow,
|
|
12
|
-
toolOutput: pc.dim,
|
|
13
|
-
error: pc.red,
|
|
14
|
-
info: pc.cyan,
|
|
15
|
-
dim: pc.dim,
|
|
16
|
-
};
|
|
17
|
-
|
|
18
|
-
export function formatToolCall(toolCall) {
|
|
19
|
-
const path = toolCall.arguments?.path || toolCall.arguments?.file_path || toolCall.arguments?.filename || "";
|
|
20
|
-
const summary = path ? ` → ${path}` : "";
|
|
21
|
-
return `[toolCall] ${toolCall.name}${summary}`;
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
export function formatTokens(n) {
|
|
25
|
-
if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`;
|
|
26
|
-
if (n >= 1_000) return `${Math.round(n / 1_000)}k`;
|
|
27
|
-
return String(Math.round(n));
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
export function estimatedTokensFromBytes(bytes) {
|
|
31
|
-
return Math.max(1, Math.ceil(bytes / 4));
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
export function clearStatusLine() {
|
|
35
|
-
if (process.stdout.isTTY) {
|
|
36
|
-
process.stdout.write("\r\x1b[K");
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
export function printStatusLine(text) {
|
|
41
|
-
if (process.stdout.isTTY) {
|
|
42
|
-
process.stdout.write(`\r\x1b[K${text}`);
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
export function printFinalLine(text) {
|
|
47
|
-
clearStatusLine();
|
|
48
|
-
console.log(text);
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
export function renderStreamEvent(parsed, state, opts = {}) {
|
|
52
|
-
const verbose = Boolean(opts.verbose);
|
|
53
|
-
const type = parsed.type;
|
|
54
|
-
|
|
55
|
-
switch (type) {
|
|
56
|
-
case "session":
|
|
57
|
-
printFinalLine(BENCH_COLORS.info("Pi benchmark started"));
|
|
58
|
-
if (parsed.id) printFinalLine(BENCH_COLORS.dim(` Session ${parsed.id}`));
|
|
59
|
-
break;
|
|
60
|
-
case "agent_start":
|
|
61
|
-
break;
|
|
62
|
-
case "turn_start": {
|
|
63
|
-
state.turn += 1;
|
|
64
|
-
state.turnHadToolError = false;
|
|
65
|
-
resetStatus(state, "thinking");
|
|
66
|
-
printFinalLine("");
|
|
67
|
-
printFinalLine(BENCH_COLORS.info(`Turn ${state.turn}`));
|
|
68
|
-
break;
|
|
69
|
-
}
|
|
70
|
-
case "message_start": {
|
|
71
|
-
const msg = parsed.message;
|
|
72
|
-
if (!state.modelPrinted && msg?.role === "assistant" && msg.provider && msg.model) {
|
|
73
|
-
state.modelPrinted = true;
|
|
74
|
-
printFinalLine(BENCH_COLORS.dim(` Model ${msg.provider}/${msg.model}`));
|
|
75
|
-
}
|
|
76
|
-
break;
|
|
77
|
-
}
|
|
78
|
-
case "message_update": {
|
|
79
|
-
const evt = parsed.assistantMessageEvent;
|
|
80
|
-
if (!evt) return;
|
|
81
|
-
const subtype = String(evt.type ?? "").replace(/_/gu, "");
|
|
82
|
-
if (subtype === "thinkingstart") {
|
|
83
|
-
resetStatus(state, "thinking");
|
|
84
|
-
} else if (subtype === "thinkingdelta") {
|
|
85
|
-
if (verbose) process.stdout.write(BENCH_COLORS.thinking(evt.delta || ""));
|
|
86
|
-
updateStatusFromDelta(state, evt.delta, "thinking");
|
|
87
|
-
} else if (subtype === "textstart") {
|
|
88
|
-
resetStatus(state, "text");
|
|
89
|
-
} else if (subtype === "textdelta") {
|
|
90
|
-
if (verbose) process.stdout.write(BENCH_COLORS.text(evt.delta || ""));
|
|
91
|
-
updateStatusFromDelta(state, evt.delta, "text");
|
|
92
|
-
} else if (subtype === "toolcallstart") {
|
|
93
|
-
resetStatus(state, "tool");
|
|
94
|
-
} else if (subtype === "toolcalldelta") {
|
|
95
|
-
if (verbose) process.stdout.write(BENCH_COLORS.tool(evt.delta || ""));
|
|
96
|
-
updateStatusFromDelta(state, evt.delta, "tool");
|
|
97
|
-
}
|
|
98
|
-
break;
|
|
99
|
-
}
|
|
100
|
-
case "message_end":
|
|
101
|
-
break;
|
|
102
|
-
case "tool_execution_start": {
|
|
103
|
-
state.activeTool = {
|
|
104
|
-
name: parsed.toolName,
|
|
105
|
-
args: parsed.args ?? {},
|
|
106
|
-
outputText: "",
|
|
107
|
-
};
|
|
108
|
-
resetStatus(state, "exec", parsed.toolName);
|
|
109
|
-
printFinalLine(BENCH_COLORS.tool(formatToolStart(parsed.toolName, parsed.args ?? {}, state)));
|
|
110
|
-
startExecTimer(state);
|
|
111
|
-
break;
|
|
112
|
-
}
|
|
113
|
-
case "tool_execution_update": {
|
|
114
|
-
const text = toolResultText(parsed.partialResult ?? parsed.result ?? parsed);
|
|
115
|
-
if (text) {
|
|
116
|
-
if (verbose) process.stdout.write(BENCH_COLORS.toolOutput(text));
|
|
117
|
-
if (state.activeTool) state.activeTool.outputText = text;
|
|
118
|
-
state.status.bytes += Buffer.byteLength(text, "utf8");
|
|
119
|
-
printExecStatus(state);
|
|
120
|
-
}
|
|
121
|
-
break;
|
|
122
|
-
}
|
|
123
|
-
case "tool_execution_end": {
|
|
124
|
-
stopExecTimer(state);
|
|
125
|
-
const lines = formatToolEnd(parsed, state);
|
|
126
|
-
if (parsed.isError) state.turnHadToolError = true;
|
|
127
|
-
for (const line of lines) printFinalLine(line);
|
|
128
|
-
state.activeTool = null;
|
|
129
|
-
resetStatus(state, "idle");
|
|
130
|
-
break;
|
|
131
|
-
}
|
|
132
|
-
case "toolResult": {
|
|
133
|
-
if (parsed.isError) state.turnHadToolError = true;
|
|
134
|
-
const status = parsed.isError ? BENCH_COLORS.error("✗") : BENCH_COLORS.success("✓");
|
|
135
|
-
printFinalLine(`${status} ${parsed.toolName ?? "tool"}`);
|
|
136
|
-
break;
|
|
137
|
-
}
|
|
138
|
-
case "turn_end": {
|
|
139
|
-
stopExecTimer(state);
|
|
140
|
-
const usage = parsed.message?.usage;
|
|
141
|
-
const tokenPart = usage ? ` · ${formatTokens(usage.output ?? usage.totalTokens ?? 0)} tokens` : "";
|
|
142
|
-
const marker = state.turnHadToolError ? BENCH_COLORS.warning("⚠") : BENCH_COLORS.success("✓");
|
|
143
|
-
const suffix = state.turnHadToolError ? " · tool issue" : "";
|
|
144
|
-
printFinalLine(`${marker} turn ${state.turn}${tokenPart}${suffix}`);
|
|
145
|
-
break;
|
|
146
|
-
}
|
|
147
|
-
case "agent_end":
|
|
148
|
-
stopExecTimer(state);
|
|
149
|
-
break;
|
|
150
|
-
default:
|
|
151
|
-
break;
|
|
152
|
-
}
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
export function resetStatus(state, mode, toolName = null) {
|
|
156
|
-
state.status.mode = mode;
|
|
157
|
-
state.status.toolName = toolName;
|
|
158
|
-
state.status.bytes = 0;
|
|
159
|
-
state.status.tokens = 0;
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
export function updateStatusFromDelta(state, delta, mode = state.status.mode) {
|
|
163
|
-
if (!delta) return;
|
|
164
|
-
state.status.mode = mode;
|
|
165
|
-
state.status.bytes += Buffer.byteLength(delta, "utf8");
|
|
166
|
-
state.status.tokens = estimatedTokensFromBytes(state.status.bytes);
|
|
167
|
-
const label = state.status.toolName ? ` · ${state.status.toolName}` : "";
|
|
168
|
-
const modeLabel = {
|
|
169
|
-
thinking: "thinking…",
|
|
170
|
-
text: "drafting response…",
|
|
171
|
-
tool: "preparing tool…",
|
|
172
|
-
exec: "running tool…",
|
|
173
|
-
}[state.status.mode] ?? "working…";
|
|
174
|
-
const bytes = formatBytes(state.status.bytes);
|
|
175
|
-
const tokens = formatTokens(state.status.tokens);
|
|
176
|
-
printStatusLine(BENCH_COLORS.dim(`Turn ${state.turn} ${modeLabel}${label} · ${bytes} (~${tokens} tokens)`));
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
export function startExecTimer(state) {
|
|
180
|
-
stopExecTimer(state);
|
|
181
|
-
state.status.execStartedAt = Date.now();
|
|
182
|
-
state.status.bytes = 0;
|
|
183
|
-
if (!process.stdout.isTTY) return;
|
|
184
|
-
printExecStatus(state);
|
|
185
|
-
state.execTimer = setInterval(() => printExecStatus(state), 1000);
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
export function stopExecTimer(state) {
|
|
189
|
-
if (state.execTimer) {
|
|
190
|
-
clearInterval(state.execTimer);
|
|
191
|
-
state.execTimer = null;
|
|
192
|
-
}
|
|
193
|
-
clearStatusLine();
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
export function printExecStatus(state) {
|
|
197
|
-
if (!process.stdout.isTTY) return;
|
|
198
|
-
const elapsed = state.status.execStartedAt ? Math.floor((Date.now() - state.status.execStartedAt) / 1000) : 0;
|
|
199
|
-
const tool = state.status.toolName ?? "tool";
|
|
200
|
-
const bytes = formatBytes(state.status.bytes);
|
|
201
|
-
printStatusLine(BENCH_COLORS.dim(`Turn ${state.turn} running ${tool}… ${elapsed}s · ${bytes}`));
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
export function formatToolStart(toolName, args, state) {
|
|
205
|
-
if (toolName === "read") return `→ read ${displayPath(args.path, state)}`;
|
|
206
|
-
if (toolName === "write") {
|
|
207
|
-
const size = args.content ? ` · ${formatBytes(Buffer.byteLength(String(args.content), "utf8"))}` : "";
|
|
208
|
-
return `→ write ${displayPath(args.path, state)}${size}`;
|
|
209
|
-
}
|
|
210
|
-
if (toolName === "edit") {
|
|
211
|
-
const count = Array.isArray(args.edits) ? args.edits.length : 0;
|
|
212
|
-
const suffix = count > 0 ? ` · ${count} replacement${count === 1 ? "" : "s"}` : "";
|
|
213
|
-
return `→ edit ${displayPath(args.path, state)}${suffix}`;
|
|
214
|
-
}
|
|
215
|
-
if (toolName === "bash") return `→ run ${truncateOneLine(args.command ?? "")}`;
|
|
216
|
-
return `→ ${toolName}${compactArgs(args)}`;
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
export function formatToolEnd(parsed, state) {
|
|
220
|
-
const toolName = parsed.toolName ?? state.activeTool?.name ?? "tool";
|
|
221
|
-
const args = parsed.args ?? state.activeTool?.args ?? {};
|
|
222
|
-
const text = toolResultText(parsed.result) || state.activeTool?.outputText || "";
|
|
223
|
-
const marker = parsed.isError ? BENCH_COLORS.error("✗") : BENCH_COLORS.success("✓");
|
|
224
|
-
|
|
225
|
-
if (parsed.isError) {
|
|
226
|
-
return [`${marker} ${toolName} failed · ${firstUsefulLine(text)}`];
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
if (toolName === "write") return [`${marker} wrote ${displayPath(args.path, state)}${parsedWriteSize(text)}`];
|
|
230
|
-
if (toolName === "read") return [`${marker} read ${displayPath(args.path, state)}${text ? ` · ${formatBytes(Buffer.byteLength(text, "utf8"))}` : ""}`];
|
|
231
|
-
if (toolName === "edit") return [`${marker} edited ${displayPath(args.path, state)}`];
|
|
232
|
-
if (toolName === "bash") return formatBashResult(marker, text);
|
|
233
|
-
|
|
234
|
-
const summary = firstUsefulLine(text);
|
|
235
|
-
return [`${marker} ${toolName}${summary ? ` · ${summary}` : ""}`];
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
export function formatBashResult(marker, text) {
|
|
239
|
-
const lines = meaningfulLines(text).slice(0, 2);
|
|
240
|
-
if (lines.length === 0) return [`${marker} command completed`];
|
|
241
|
-
return [`${marker} ${lines[0]}`, ...lines.slice(1).map((line) => BENCH_COLORS.dim(` ${line}`))];
|
|
242
|
-
}
|
|
243
|
-
|
|
244
|
-
export function parsedWriteSize(text) {
|
|
245
|
-
const match = String(text).match(/Successfully wrote\s+([0-9,]+)\s+bytes/iu);
|
|
246
|
-
if (!match) return "";
|
|
247
|
-
const bytes = Number(match[1].replace(/,/gu, ""));
|
|
248
|
-
return Number.isFinite(bytes) ? ` · ${formatBytes(bytes)}` : "";
|
|
249
|
-
}
|
|
250
|
-
|
|
251
|
-
export function toolResultText(result) {
|
|
252
|
-
const content = result?.content;
|
|
253
|
-
if (typeof content === "string") return content;
|
|
254
|
-
if (!Array.isArray(content)) return "";
|
|
255
|
-
return content
|
|
256
|
-
.map((item) => typeof item?.text === "string" ? item.text : "")
|
|
257
|
-
.filter(Boolean)
|
|
258
|
-
.join("\n");
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
export function firstUsefulLine(text) {
|
|
262
|
-
return meaningfulLines(text)[0] ?? "no details";
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
export function meaningfulLines(text) {
|
|
266
|
-
const lines = String(text ?? "")
|
|
267
|
-
.split(/\r?\n/u)
|
|
268
|
-
.map((line) => line.trim())
|
|
269
|
-
.filter(Boolean)
|
|
270
|
-
.filter((line) => !/^\^+$/u.test(line));
|
|
271
|
-
const errorLine = lines.find((line) => /(?:error|exception|failed|not found|command exited with code|validation failed)/iu.test(line));
|
|
272
|
-
if (errorLine) return [errorLine, ...lines.filter((line) => line !== errorLine)];
|
|
273
|
-
return lines;
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
export function displayPath(value, state) {
|
|
277
|
-
if (!value) return "unknown";
|
|
278
|
-
const path = String(value);
|
|
279
|
-
const rel = state.cwd ? relative(state.cwd, path) : path;
|
|
280
|
-
if (rel && !rel.startsWith("..") && rel !== ".") return rel;
|
|
281
|
-
return basename(path) || path;
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
export function compactArgs(args) {
|
|
285
|
-
const entries = Object.entries(args ?? {}).filter(([, value]) => value !== undefined && value !== null && value !== "");
|
|
286
|
-
if (entries.length === 0) return "";
|
|
287
|
-
return ` · ${truncateOneLine(entries.map(([key, value]) => `${key}=${String(value)}`).join(" "))}`;
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
export function truncateOneLine(value, max = Math.max(60, Math.min(process.stdout.columns ?? 100, 140) - 12)) {
|
|
291
|
-
const text = String(value ?? "").replace(/\s+/gu, " ").trim();
|
|
292
|
-
return text.length > max ? `${text.slice(0, Math.max(1, max - 1))}…` : text;
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
export function formatBytes(bytes) {
|
|
296
|
-
if (!Number.isFinite(bytes)) return "unknown";
|
|
297
|
-
const units = ["B", "KB", "MB", "GB", "TB"];
|
|
298
|
-
let size = bytes;
|
|
299
|
-
let unit = 0;
|
|
300
|
-
while (size >= 1024 && unit < units.length - 1) { size /= 1024; unit += 1; }
|
|
301
|
-
return `${size.toFixed(unit === 0 ? 0 : 2)} ${units[unit]}`;
|
|
302
|
-
}
|