offgrid-ai 0.8.15 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -25
- package/package.json +3 -2
- package/src/autodetect.mjs +6 -3
- package/src/backends.mjs +36 -45
- package/src/benchmark/finalize.mjs +198 -0
- package/src/benchmark/flow.mjs +237 -0
- package/src/benchmark/metrics.mjs +152 -0
- package/src/benchmark/pi-runner.mjs +252 -0
- package/src/benchmark/prepare.mjs +121 -0
- package/src/benchmark/repo.mjs +77 -0
- package/src/benchmark/shared.mjs +54 -0
- package/src/benchmark/stream-renderer.mjs +274 -0
- package/src/benchmark.mjs +10 -1330
- package/src/cli.mjs +2 -2
- package/src/commands/main.mjs +2 -2
- package/src/commands/onboard.mjs +6 -2
- package/src/config.mjs +8 -2
- package/src/harness-pi.mjs +1 -1
- package/src/managed.mjs +3 -3
- package/src/model-catalog.mjs +2 -1
- package/src/model-name.mjs +220 -0
- package/src/process.mjs +29 -21
- package/src/runtime.mjs +11 -0
- package/src/scan.mjs +9 -20
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
// ── Create a benchmark run directory ────────────────────────────────────────
|
|
2
|
+
|
|
3
|
+
import { mkdir, writeFile } from "node:fs/promises";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import { pc, renderRows, renderSection } from "../ui.mjs";
|
|
6
|
+
import { slugModelId, createRunId, buildToolPrompt } from "./shared.mjs";
|
|
7
|
+
import { parseModelName } from "../model-name.mjs";
|
|
8
|
+
|
|
9
|
+
function harnessDisplayName(id) {
|
|
10
|
+
if (id === "pi") return "Pi";
|
|
11
|
+
return String(id).replace(/[-_]+/gu, " ").replace(/\b\w/gu, (char) => char.toUpperCase());
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function intendedRunnerForProfile(profile) {
|
|
15
|
+
if (!profile) return "your tool";
|
|
16
|
+
const harnessEntries = Object.entries(profile.harnesses ?? {}).filter(([, config]) => config?.enabled !== false);
|
|
17
|
+
const [id] = harnessEntries.find(([key]) => key === "pi") ?? harnessEntries[0] ?? ["pi"];
|
|
18
|
+
return harnessDisplayName(id);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function printBenchmarkNextSteps({ repoPath, runDirectory, profile, modelId, runnerLabel }) {
|
|
22
|
+
const runCommand = profile ? `offgrid-ai run ${profile.id}` : null;
|
|
23
|
+
const runnerCommand = runCommand ?? `Open ${runnerLabel} for ${modelId}`;
|
|
24
|
+
|
|
25
|
+
console.log("");
|
|
26
|
+
console.log(pc.bold("Next steps"));
|
|
27
|
+
console.log(` 1. Open the gallery. If it is not running: ${pc.cyan(`cd ${repoPath} && npm run dev`)}`);
|
|
28
|
+
console.log(` 2. ${pc.cyan(`cd ${runDirectory}`)}`);
|
|
29
|
+
console.log(` 3. ${pc.cyan(runnerCommand)}, then copy this run's prompt from the gallery and paste it into ${runnerLabel}`);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export async function prepareBenchmarkRun({ repoPath, benchmark, kind, modelId, modelSource, backendLabel, profile, showNextSteps = true }) {
|
|
33
|
+
const toolPrompt = buildToolPrompt(benchmark);
|
|
34
|
+
const now = new Date();
|
|
35
|
+
const runId = createRunId(now);
|
|
36
|
+
const modelSlug = slugModelId(modelId);
|
|
37
|
+
const runnerLabel = intendedRunnerForProfile(profile);
|
|
38
|
+
const runsDir = join(repoPath, "runs");
|
|
39
|
+
const benchmarkDirectory = join(runsDir, benchmark.id);
|
|
40
|
+
const modelDirectory = join(benchmarkDirectory, modelSlug);
|
|
41
|
+
const runDirectory = join(modelDirectory, runId);
|
|
42
|
+
|
|
43
|
+
await mkdir(runDirectory, { recursive: true });
|
|
44
|
+
|
|
45
|
+
const isDs = kind === "data-science";
|
|
46
|
+
const baseAssets = {
|
|
47
|
+
metadata: "metadata.json",
|
|
48
|
+
prompt: "prompt.md",
|
|
49
|
+
rawResponse: "response.raw.txt",
|
|
50
|
+
stream: "stream.ndjson",
|
|
51
|
+
stderr: "stderr.log",
|
|
52
|
+
};
|
|
53
|
+
const metadata = {
|
|
54
|
+
schemaVersion: 1,
|
|
55
|
+
kind,
|
|
56
|
+
runId,
|
|
57
|
+
benchmark: { id: benchmark.id, title: benchmark.title, description: benchmark.description, prompt: benchmark.prompt },
|
|
58
|
+
model: { id: modelId, slug: modelSlug, displayName: parseModelName(modelId, modelSource === "ollama" ? "ollama" : modelSource === "omlx" ? "omlx" : "local-gguf").display },
|
|
59
|
+
status: "prepared",
|
|
60
|
+
createdAt: now.toISOString(),
|
|
61
|
+
updatedAt: now.toISOString(),
|
|
62
|
+
preparedAt: now.toISOString(),
|
|
63
|
+
runDirectory,
|
|
64
|
+
assets: isDs
|
|
65
|
+
? { ...baseAssets, ds: { notebook: "analysis.ipynb", summary: "summary.json", chartDistribution: "chart-distribution.png", chartTreatmentEffect: "chart-treatment-effect.png", chartCompletionRates: "chart-completion-rates.png" } }
|
|
66
|
+
: { ...baseAssets, html: "index.html", preview: "preview.png", video: "preview.webm" },
|
|
67
|
+
runner: {
|
|
68
|
+
mode: modelSource === "cloud" ? "manual" : "external",
|
|
69
|
+
intendedRunner: profile ? runnerLabel : undefined,
|
|
70
|
+
...(profile?.harnesses?.pi || runnerLabel === "Pi" ? { tool: "pi" } : {}),
|
|
71
|
+
...(modelSource ? { modelSource } : {}),
|
|
72
|
+
...(backendLabel ? { backendLabel } : {}),
|
|
73
|
+
...(profile?.baseUrl ? { baseUrl: profile.baseUrl } : {}),
|
|
74
|
+
model: modelId,
|
|
75
|
+
retries: 0,
|
|
76
|
+
tokenMetrics: {
|
|
77
|
+
reported: false,
|
|
78
|
+
promptTokens: 0,
|
|
79
|
+
completionTokens: 0,
|
|
80
|
+
totalTokens: 0,
|
|
81
|
+
},
|
|
82
|
+
speedMetrics: {
|
|
83
|
+
prefillTokensPerSecond: null,
|
|
84
|
+
generationTokensPerSecond: null,
|
|
85
|
+
ttftMs: null,
|
|
86
|
+
modelLoadMs: null,
|
|
87
|
+
speculativeDecodeAcceptance: null,
|
|
88
|
+
kvCacheTokens: null,
|
|
89
|
+
},
|
|
90
|
+
metricSource: null,
|
|
91
|
+
},
|
|
92
|
+
results: {
|
|
93
|
+
wallClockMs: null,
|
|
94
|
+
agentTurns: 0,
|
|
95
|
+
toolCalls: 0,
|
|
96
|
+
toolResults: 0,
|
|
97
|
+
success: false,
|
|
98
|
+
outputFiles: [],
|
|
99
|
+
perTurn: [],
|
|
100
|
+
},
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
await writeFile(join(runDirectory, "metadata.json"), JSON.stringify(metadata, null, 2) + "\n", "utf8");
|
|
104
|
+
await writeFile(join(runDirectory, "prompt.md"), toolPrompt + "\n", "utf8");
|
|
105
|
+
|
|
106
|
+
console.log("");
|
|
107
|
+
console.log(pc.green("✓ Run slot prepared"));
|
|
108
|
+
console.log(renderSection("Run", renderRows([
|
|
109
|
+
["Directory", pc.cyan(runDirectory)],
|
|
110
|
+
["Benchmark", benchmark.title],
|
|
111
|
+
["Kind", kind],
|
|
112
|
+
["Model", pc.bold(modelId)],
|
|
113
|
+
["Source", backendLabel || modelSource],
|
|
114
|
+
])));
|
|
115
|
+
|
|
116
|
+
if (showNextSteps) {
|
|
117
|
+
printBenchmarkNextSteps({ repoPath, runDirectory, profile, modelId, runnerLabel });
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return runDirectory;
|
|
121
|
+
}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
// ── Benchmark repo linking ────────────────────────────────────────────────────
|
|
2
|
+
|
|
3
|
+
import { existsSync } from "node:fs";
|
|
4
|
+
import { join, resolve } from "node:path";
|
|
5
|
+
import { homedir } from "node:os";
|
|
6
|
+
import { execFile } from "node:child_process";
|
|
7
|
+
import { promisify } from "node:util";
|
|
8
|
+
import { loadConfig, saveConfig } from "../config.mjs";
|
|
9
|
+
import { pc } from "../ui.mjs";
|
|
10
|
+
|
|
11
|
+
const execFileAsync = promisify(execFile);
|
|
12
|
+
|
|
13
|
+
const BENCHMARK_REPO = "https://github.com/eeshansrivastava89/local-llm-visual-benchmark.git";
|
|
14
|
+
|
|
15
|
+
export async function findBenchmarkRepo() {
|
|
16
|
+
const config = await loadConfig();
|
|
17
|
+
if (config.benchmarkRepoPath && existsSync(join(config.benchmarkRepoPath, "benchmarks"))) {
|
|
18
|
+
return config.benchmarkRepoPath;
|
|
19
|
+
}
|
|
20
|
+
return null;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export async function linkBenchmarkRepo(prompt) {
|
|
24
|
+
const existing = await findBenchmarkRepo();
|
|
25
|
+
if (existing) return existing;
|
|
26
|
+
|
|
27
|
+
const candidates = [
|
|
28
|
+
join(homedir(), "dev", "local-llm-visual-benchmark"),
|
|
29
|
+
join(homedir(), "projects", "local-llm-visual-benchmark"),
|
|
30
|
+
join(homedir(), "local-llm-visual-benchmark"),
|
|
31
|
+
];
|
|
32
|
+
for (const candidate of candidates) {
|
|
33
|
+
if (existsSync(join(candidate, "benchmarks"))) {
|
|
34
|
+
const config = await loadConfig();
|
|
35
|
+
config.benchmarkRepoPath = candidate;
|
|
36
|
+
await saveConfig(config);
|
|
37
|
+
return candidate;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
console.log(pc.dim("\nThe benchmark gallery needs to be linked to offgrid-ai."));
|
|
42
|
+
console.log(pc.dim("This is the local-llm-visual-benchmark repo that stores prompts and run results.\n"));
|
|
43
|
+
|
|
44
|
+
const choice = await prompt.choice("Link benchmark gallery", [
|
|
45
|
+
{ value: "clone", label: "Clone from GitHub", hint: "git clone into ~/dev" },
|
|
46
|
+
{ value: "manual", label: "Enter path manually", hint: "If you already have it cloned" },
|
|
47
|
+
], "clone");
|
|
48
|
+
|
|
49
|
+
if (choice === "clone") {
|
|
50
|
+
const targetDir = join(homedir(), "dev", "local-llm-visual-benchmark");
|
|
51
|
+
console.log(pc.dim(`\nCloning ${BENCHMARK_REPO}...`));
|
|
52
|
+
try {
|
|
53
|
+
await execFileAsync("git", ["clone", BENCHMARK_REPO, targetDir], { stdio: "pipe" });
|
|
54
|
+
const config = await loadConfig();
|
|
55
|
+
config.benchmarkRepoPath = targetDir;
|
|
56
|
+
await saveConfig(config);
|
|
57
|
+
console.log(pc.green(`✓ Cloned to ${targetDir}`));
|
|
58
|
+
return targetDir;
|
|
59
|
+
} catch (err) {
|
|
60
|
+
console.log(pc.red(`Clone failed: ${err.message}`));
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const path = await prompt.text("Path to local-llm-visual-benchmark", "");
|
|
66
|
+
if (!path) return null;
|
|
67
|
+
const resolved = resolve(path.replace(/^~/, homedir()));
|
|
68
|
+
if (!existsSync(join(resolved, "benchmarks"))) {
|
|
69
|
+
console.log(pc.red(`No benchmarks/ directory found at ${resolved}`));
|
|
70
|
+
return null;
|
|
71
|
+
}
|
|
72
|
+
const config = await loadConfig();
|
|
73
|
+
config.benchmarkRepoPath = resolved;
|
|
74
|
+
await saveConfig(config);
|
|
75
|
+
console.log(pc.green(`✓ Linked to ${resolved}`));
|
|
76
|
+
return resolved;
|
|
77
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
// ── Shared utilities (matches local-llm-visual-benchmark) ──────────────────
|
|
2
|
+
|
|
3
|
+
import { createHash } from "node:crypto";
|
|
4
|
+
import { readdir, readFile } from "node:fs/promises";
|
|
5
|
+
import { join } from "node:path";
|
|
6
|
+
|
|
7
|
+
export function slugModelId(modelId, maxLength = 80) {
|
|
8
|
+
const hash = createHash("sha256").update(modelId).digest("hex").slice(0, 10);
|
|
9
|
+
const normalized = modelId.normalize("NFKD").replace(/[\u0300-\u036f]/gu, "").toLowerCase();
|
|
10
|
+
const slug = normalized.replace(/[^a-z0-9]+/gu, "-").replace(/^-+|-+$/gu, "").replace(/-{2,}/gu, "-");
|
|
11
|
+
if (slug.length > 0 && slug.length <= maxLength && slug === normalized) return slug;
|
|
12
|
+
const baseMaxLength = Math.max(1, maxLength - 11);
|
|
13
|
+
const base = slug.slice(0, baseMaxLength).replace(/^-+|-+$/gu, "") || "model";
|
|
14
|
+
return `${base}-${hash}`;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function createRunId(date = new Date()) {
|
|
18
|
+
return date.toISOString().replace(/:/gu, "-").replace(/\./gu, "-");
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function buildToolPrompt(benchmark) {
|
|
22
|
+
return benchmark.prompt;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export async function loadBenchmarks(benchDir) {
|
|
26
|
+
const entries = await readdir(benchDir);
|
|
27
|
+
const markdownFiles = entries.filter((f) => f.endsWith(".md")).sort();
|
|
28
|
+
const benchmarks = [];
|
|
29
|
+
for (const filename of markdownFiles) {
|
|
30
|
+
const raw = await readFile(join(benchDir, filename), "utf8");
|
|
31
|
+
const match = raw.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
|
|
32
|
+
const frontmatter = match ? match[1] : "";
|
|
33
|
+
const content = match ? match[2].trim() : raw.trim();
|
|
34
|
+
let id = filename.replace(/\.md$/u, "");
|
|
35
|
+
let title = id;
|
|
36
|
+
let description = "";
|
|
37
|
+
for (const line of frontmatter.split("\n")) {
|
|
38
|
+
const kv = line.match(/^(\w+):\s*(.+)$/);
|
|
39
|
+
if (kv) {
|
|
40
|
+
const [, key, val] = kv;
|
|
41
|
+
if (key === "id") id = val.trim();
|
|
42
|
+
if (key === "title") title = val.trim();
|
|
43
|
+
if (key === "description") description = val.trim();
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
const kind = id === "ab-test-analysis" ? "data-science" : "visual";
|
|
47
|
+
benchmarks.push({ id, title, description, prompt: content, kind });
|
|
48
|
+
}
|
|
49
|
+
return benchmarks;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export function piModelString(profile) {
|
|
53
|
+
return profile.harnesses?.pi?.model ?? `${profile.providerId}/${profile.modelAlias}`;
|
|
54
|
+
}
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
// ── Semantic stream renderer for Pi benchmark output ─────────────────────────
|
|
2
|
+
|
|
3
|
+
import { relative, basename } from "node:path";
|
|
4
|
+
import { pc } from "../ui.mjs";
|
|
5
|
+
|
|
6
|
+
export const BENCH_COLORS = {
|
|
7
|
+
thinking: pc.magenta,
|
|
8
|
+
text: pc.green,
|
|
9
|
+
tool: pc.yellow,
|
|
10
|
+
success: pc.green,
|
|
11
|
+
warning: pc.yellow,
|
|
12
|
+
toolOutput: pc.dim,
|
|
13
|
+
error: pc.red,
|
|
14
|
+
info: pc.cyan,
|
|
15
|
+
dim: pc.dim,
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
export function formatToolCall(toolCall) {
|
|
19
|
+
const path = toolCall.arguments?.path || toolCall.arguments?.file_path || toolCall.arguments?.filename || "";
|
|
20
|
+
const summary = path ? ` → ${path}` : "";
|
|
21
|
+
return `[toolCall] ${toolCall.name}${summary}`;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export function formatTokens(n) {
|
|
25
|
+
if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`;
|
|
26
|
+
if (n >= 1_000) return `${Math.round(n / 1_000)}k`;
|
|
27
|
+
return String(Math.round(n));
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export function estimatedTokensFromBytes(bytes) {
|
|
31
|
+
return Math.max(1, Math.ceil(bytes / 4));
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function clearStatusLine() {
|
|
35
|
+
if (process.stdout.isTTY) {
|
|
36
|
+
process.stdout.write("\r\x1b[K");
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export function printStatusLine(text) {
|
|
41
|
+
if (process.stdout.isTTY) {
|
|
42
|
+
process.stdout.write(`\r\x1b[K${text}`);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export function printFinalLine(text) {
|
|
47
|
+
clearStatusLine();
|
|
48
|
+
console.log(text);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export function renderStreamEvent(parsed, state, opts = {}) {
|
|
52
|
+
const verbose = Boolean(opts.verbose);
|
|
53
|
+
const type = parsed.type;
|
|
54
|
+
|
|
55
|
+
switch (type) {
|
|
56
|
+
case "session":
|
|
57
|
+
printFinalLine(BENCH_COLORS.info("Pi benchmark started"));
|
|
58
|
+
if (parsed.id) printFinalLine(BENCH_COLORS.dim(` Session ${parsed.id}`));
|
|
59
|
+
break;
|
|
60
|
+
case "agent_start":
|
|
61
|
+
break;
|
|
62
|
+
case "turn_start": {
|
|
63
|
+
state.turn += 1;
|
|
64
|
+
state.turnHadToolError = false;
|
|
65
|
+
resetStatus(state, "thinking");
|
|
66
|
+
printFinalLine("");
|
|
67
|
+
printFinalLine(BENCH_COLORS.info(`Turn ${state.turn}`));
|
|
68
|
+
break;
|
|
69
|
+
}
|
|
70
|
+
case "message_start": {
|
|
71
|
+
const msg = parsed.message;
|
|
72
|
+
if (!state.modelPrinted && msg?.role === "assistant" && msg.provider && msg.model) {
|
|
73
|
+
state.modelPrinted = true;
|
|
74
|
+
printFinalLine(BENCH_COLORS.dim(` Model ${msg.provider}/${msg.model}`));
|
|
75
|
+
}
|
|
76
|
+
break;
|
|
77
|
+
}
|
|
78
|
+
case "message_update": {
|
|
79
|
+
const evt = parsed.assistantMessageEvent;
|
|
80
|
+
if (!evt) return;
|
|
81
|
+
const subtype = String(evt.type ?? "").replace(/_/gu, "");
|
|
82
|
+
if (subtype === "thinkingstart") {
|
|
83
|
+
resetStatus(state, "thinking");
|
|
84
|
+
} else if (subtype === "thinkingdelta") {
|
|
85
|
+
if (verbose) process.stdout.write(BENCH_COLORS.thinking(evt.delta || ""));
|
|
86
|
+
updateStatusFromDelta(state, evt.delta, "thinking");
|
|
87
|
+
} else if (subtype === "textstart") {
|
|
88
|
+
resetStatus(state, "text");
|
|
89
|
+
} else if (subtype === "textdelta") {
|
|
90
|
+
if (verbose) process.stdout.write(BENCH_COLORS.text(evt.delta || ""));
|
|
91
|
+
updateStatusFromDelta(state, evt.delta, "text");
|
|
92
|
+
} else if (subtype === "toolcallstart") {
|
|
93
|
+
resetStatus(state, "tool");
|
|
94
|
+
} else if (subtype === "toolcalldelta") {
|
|
95
|
+
if (verbose) process.stdout.write(BENCH_COLORS.tool(evt.delta || ""));
|
|
96
|
+
updateStatusFromDelta(state, evt.delta, "tool");
|
|
97
|
+
}
|
|
98
|
+
break;
|
|
99
|
+
}
|
|
100
|
+
case "message_end":
|
|
101
|
+
break;
|
|
102
|
+
case "tool_execution_start": {
|
|
103
|
+
state.activeTool = {
|
|
104
|
+
name: parsed.toolName,
|
|
105
|
+
args: parsed.args ?? {},
|
|
106
|
+
outputText: "",
|
|
107
|
+
};
|
|
108
|
+
resetStatus(state, "exec", parsed.toolName);
|
|
109
|
+
printFinalLine(BENCH_COLORS.tool(formatToolStart(parsed.toolName, parsed.args ?? {}, state)));
|
|
110
|
+
break;
|
|
111
|
+
}
|
|
112
|
+
case "tool_execution_update": {
|
|
113
|
+
const text = toolResultText(parsed.partialResult ?? parsed.result ?? parsed);
|
|
114
|
+
if (text) {
|
|
115
|
+
if (verbose) process.stdout.write(BENCH_COLORS.toolOutput(text));
|
|
116
|
+
if (state.activeTool) state.activeTool.outputText = text;
|
|
117
|
+
updateStatusFromDelta(state, text, "exec");
|
|
118
|
+
}
|
|
119
|
+
break;
|
|
120
|
+
}
|
|
121
|
+
case "tool_execution_end": {
|
|
122
|
+
const lines = formatToolEnd(parsed, state);
|
|
123
|
+
if (parsed.isError) state.turnHadToolError = true;
|
|
124
|
+
for (const line of lines) printFinalLine(line);
|
|
125
|
+
state.activeTool = null;
|
|
126
|
+
resetStatus(state, "idle");
|
|
127
|
+
break;
|
|
128
|
+
}
|
|
129
|
+
case "toolResult": {
|
|
130
|
+
if (parsed.isError) state.turnHadToolError = true;
|
|
131
|
+
const status = parsed.isError ? BENCH_COLORS.error("✗") : BENCH_COLORS.success("✓");
|
|
132
|
+
printFinalLine(`${status} ${parsed.toolName ?? "tool"}`);
|
|
133
|
+
break;
|
|
134
|
+
}
|
|
135
|
+
case "turn_end": {
|
|
136
|
+
const usage = parsed.message?.usage;
|
|
137
|
+
const tokenPart = usage ? ` · ${formatTokens(usage.output ?? usage.totalTokens ?? 0)} tokens` : "";
|
|
138
|
+
const marker = state.turnHadToolError ? BENCH_COLORS.warning("⚠") : BENCH_COLORS.success("✓");
|
|
139
|
+
const suffix = state.turnHadToolError ? " · tool issue" : "";
|
|
140
|
+
printFinalLine(`${marker} turn ${state.turn}${tokenPart}${suffix}`);
|
|
141
|
+
break;
|
|
142
|
+
}
|
|
143
|
+
case "agent_end":
|
|
144
|
+
clearStatusLine();
|
|
145
|
+
printFinalLine(BENCH_COLORS.info("Pi benchmark finished"));
|
|
146
|
+
break;
|
|
147
|
+
default:
|
|
148
|
+
break;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
export function resetStatus(state, mode, toolName = null) {
|
|
153
|
+
state.status.mode = mode;
|
|
154
|
+
state.status.toolName = toolName;
|
|
155
|
+
state.status.bytes = 0;
|
|
156
|
+
state.status.tokens = 0;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
export function updateStatusFromDelta(state, delta, mode = state.status.mode) {
|
|
160
|
+
if (!delta) return;
|
|
161
|
+
state.status.mode = mode;
|
|
162
|
+
state.status.bytes += Buffer.byteLength(delta, "utf8");
|
|
163
|
+
state.status.tokens = estimatedTokensFromBytes(state.status.bytes);
|
|
164
|
+
const label = state.status.toolName ? ` · ${state.status.toolName}` : "";
|
|
165
|
+
const modeLabel = {
|
|
166
|
+
thinking: "thinking…",
|
|
167
|
+
text: "drafting response…",
|
|
168
|
+
tool: "preparing tool…",
|
|
169
|
+
exec: "running tool…",
|
|
170
|
+
}[state.status.mode] ?? "working…";
|
|
171
|
+
const bytes = formatBytes(state.status.bytes);
|
|
172
|
+
const tokens = formatTokens(state.status.tokens);
|
|
173
|
+
printStatusLine(BENCH_COLORS.dim(`Turn ${state.turn} ${modeLabel}${label} · ${bytes} (~${tokens} tokens)`));
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
export function formatToolStart(toolName, args, state) {
|
|
177
|
+
if (toolName === "read") return `→ read ${displayPath(args.path, state)}`;
|
|
178
|
+
if (toolName === "write") {
|
|
179
|
+
const size = args.content ? ` · ${formatBytes(Buffer.byteLength(String(args.content), "utf8"))}` : "";
|
|
180
|
+
return `→ write ${displayPath(args.path, state)}${size}`;
|
|
181
|
+
}
|
|
182
|
+
if (toolName === "edit") {
|
|
183
|
+
const count = Array.isArray(args.edits) ? args.edits.length : 0;
|
|
184
|
+
const suffix = count > 0 ? ` · ${count} replacement${count === 1 ? "" : "s"}` : "";
|
|
185
|
+
return `→ edit ${displayPath(args.path, state)}${suffix}`;
|
|
186
|
+
}
|
|
187
|
+
if (toolName === "bash") return `→ run ${truncateOneLine(args.command ?? "")}`;
|
|
188
|
+
return `→ ${toolName}${compactArgs(args)}`;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
export function formatToolEnd(parsed, state) {
|
|
192
|
+
const toolName = parsed.toolName ?? state.activeTool?.name ?? "tool";
|
|
193
|
+
const args = parsed.args ?? state.activeTool?.args ?? {};
|
|
194
|
+
const text = toolResultText(parsed.result) || state.activeTool?.outputText || "";
|
|
195
|
+
const marker = parsed.isError ? BENCH_COLORS.error("✗") : BENCH_COLORS.success("✓");
|
|
196
|
+
|
|
197
|
+
if (parsed.isError) {
|
|
198
|
+
return [`${marker} ${toolName} failed · ${firstUsefulLine(text)}`];
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
if (toolName === "write") return [`${marker} wrote ${displayPath(args.path, state)}${parsedWriteSize(text)}`];
|
|
202
|
+
if (toolName === "read") return [`${marker} read ${displayPath(args.path, state)}${text ? ` · ${formatBytes(Buffer.byteLength(text, "utf8"))}` : ""}`];
|
|
203
|
+
if (toolName === "edit") return [`${marker} edited ${displayPath(args.path, state)}`];
|
|
204
|
+
if (toolName === "bash") return formatBashResult(marker, text);
|
|
205
|
+
|
|
206
|
+
const summary = firstUsefulLine(text);
|
|
207
|
+
return [`${marker} ${toolName}${summary ? ` · ${summary}` : ""}`];
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
export function formatBashResult(marker, text) {
|
|
211
|
+
const lines = meaningfulLines(text).slice(0, 2);
|
|
212
|
+
if (lines.length === 0) return [`${marker} command completed`];
|
|
213
|
+
return [`${marker} ${lines[0]}`, ...lines.slice(1).map((line) => BENCH_COLORS.dim(` ${line}`))];
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
export function parsedWriteSize(text) {
|
|
217
|
+
const match = String(text).match(/Successfully wrote\s+([0-9,]+)\s+bytes/iu);
|
|
218
|
+
if (!match) return "";
|
|
219
|
+
const bytes = Number(match[1].replace(/,/gu, ""));
|
|
220
|
+
return Number.isFinite(bytes) ? ` · ${formatBytes(bytes)}` : "";
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
export function toolResultText(result) {
|
|
224
|
+
const content = result?.content;
|
|
225
|
+
if (typeof content === "string") return content;
|
|
226
|
+
if (!Array.isArray(content)) return "";
|
|
227
|
+
return content
|
|
228
|
+
.map((item) => typeof item?.text === "string" ? item.text : "")
|
|
229
|
+
.filter(Boolean)
|
|
230
|
+
.join("\n");
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
export function firstUsefulLine(text) {
|
|
234
|
+
return meaningfulLines(text)[0] ?? "no details";
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
export function meaningfulLines(text) {
|
|
238
|
+
const lines = String(text ?? "")
|
|
239
|
+
.split(/\r?\n/u)
|
|
240
|
+
.map((line) => line.trim())
|
|
241
|
+
.filter(Boolean)
|
|
242
|
+
.filter((line) => !/^\^+$/u.test(line));
|
|
243
|
+
const errorLine = lines.find((line) => /(?:error|exception|failed|not found|command exited with code|validation failed)/iu.test(line));
|
|
244
|
+
if (errorLine) return [errorLine, ...lines.filter((line) => line !== errorLine)];
|
|
245
|
+
return lines;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
export function displayPath(value, state) {
|
|
249
|
+
if (!value) return "unknown";
|
|
250
|
+
const path = String(value);
|
|
251
|
+
const rel = state.cwd ? relative(state.cwd, path) : path;
|
|
252
|
+
if (rel && !rel.startsWith("..") && rel !== ".") return rel;
|
|
253
|
+
return basename(path) || path;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
export function compactArgs(args) {
|
|
257
|
+
const entries = Object.entries(args ?? {}).filter(([, value]) => value !== undefined && value !== null && value !== "");
|
|
258
|
+
if (entries.length === 0) return "";
|
|
259
|
+
return ` · ${truncateOneLine(entries.map(([key, value]) => `${key}=${String(value)}`).join(" "))}`;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
export function truncateOneLine(value, max = Math.max(60, Math.min(process.stdout.columns ?? 100, 140) - 12)) {
|
|
263
|
+
const text = String(value ?? "").replace(/\s+/gu, " ").trim();
|
|
264
|
+
return text.length > max ? `${text.slice(0, Math.max(1, max - 1))}…` : text;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
export function formatBytes(bytes) {
|
|
268
|
+
if (!Number.isFinite(bytes)) return "unknown";
|
|
269
|
+
const units = ["B", "KB", "MB", "GB", "TB"];
|
|
270
|
+
let size = bytes;
|
|
271
|
+
let unit = 0;
|
|
272
|
+
while (size >= 1024 && unit < units.length - 1) { size /= 1024; unit += 1; }
|
|
273
|
+
return `${size.toFixed(unit === 0 ? 0 : 2)} ${units[unit]}`;
|
|
274
|
+
}
|