@ted-galago/wave-cli 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +226 -2
- package/dist/index.cjs +2992 -131
- package/dist/index.js +2989 -128
- package/package.json +2 -1
- package/scripts/benchmark-cli.mjs +626 -0
- package/scripts/verify-dev-api.mjs +341 -26
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ted-galago/wave-cli",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.6",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"bin": {
|
|
6
6
|
"wave": "dist/index.js"
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
"dev": "tsx src/index.ts",
|
|
15
15
|
"test": "vitest run",
|
|
16
16
|
"verify:dev": "node scripts/verify-dev-api.mjs",
|
|
17
|
+
"benchmark:cli": "node scripts/benchmark-cli.mjs",
|
|
17
18
|
"postinstall": "node scripts/postinstall-local-bin.mjs"
|
|
18
19
|
},
|
|
19
20
|
"engines": {
|
|
@@ -0,0 +1,626 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { existsSync, readFileSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { resolve } from "node:path";
|
|
4
|
+
import { spawnSync } from "node:child_process";
|
|
5
|
+
|
|
6
|
+
const DEFAULTS = {
|
|
7
|
+
runs: 10,
|
|
8
|
+
warmup: 2,
|
|
9
|
+
query: "notes",
|
|
10
|
+
timeoutMs: 15000,
|
|
11
|
+
waveEntry: "dist/index.js",
|
|
12
|
+
verbose: false,
|
|
13
|
+
skipWave: false,
|
|
14
|
+
skipObsidian: false,
|
|
15
|
+
obsidianBin: ""
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
function printUsage() {
|
|
19
|
+
console.log(`Wave vs Obsidian CLI benchmark\n\nUsage:\n node scripts/benchmark-cli.mjs [options]\n\nOptions:\n --runs <n> Measured runs per scenario (default: ${DEFAULTS.runs})\n --warmup <n> Warmup runs per scenario (default: ${DEFAULTS.warmup})\n --query <text> Search query used in search scenarios (default: ${DEFAULTS.query})\n --timeout-ms <n> Timeout per command in ms (default: ${DEFAULTS.timeoutMs})\n --obsidian-bin <path> Override Obsidian binary path\n --wave-entry <path> Override Wave CLI entry script (default: ${DEFAULTS.waveEntry})\n --json-out <path> Write full benchmark output as JSON\n --verbose Print per-run details\n --skip-wave Skip Wave CLI benchmarks\n --skip-obsidian Skip Obsidian CLI benchmarks\n --help Show this help\n\nExamples:\n node scripts/benchmark-cli.mjs\n node scripts/benchmark-cli.mjs --runs 20 --warmup 3 --query \"meeting notes\"\n node scripts/benchmark-cli.mjs --obsidian-bin /Applications/Obsidian.app/Contents/MacOS/obsidian-cli\n`);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function parseArgs(argv) {
|
|
23
|
+
const options = { ...DEFAULTS, jsonOut: "" };
|
|
24
|
+
|
|
25
|
+
for (let i = 0; i < argv.length; i += 1) {
|
|
26
|
+
const arg = argv[i];
|
|
27
|
+
|
|
28
|
+
if (arg === "--help") {
|
|
29
|
+
options.help = true;
|
|
30
|
+
continue;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
if (arg === "--verbose") {
|
|
34
|
+
options.verbose = true;
|
|
35
|
+
continue;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
if (arg === "--skip-wave") {
|
|
39
|
+
options.skipWave = true;
|
|
40
|
+
continue;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if (arg === "--skip-obsidian") {
|
|
44
|
+
options.skipObsidian = true;
|
|
45
|
+
continue;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if (arg.startsWith("--")) {
|
|
49
|
+
const key = arg;
|
|
50
|
+
const value = argv[i + 1];
|
|
51
|
+
if (value === undefined || value.startsWith("--")) {
|
|
52
|
+
throw new Error(`Missing value for ${key}`);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if (key === "--runs") {
|
|
56
|
+
options.runs = toPositiveInt(value, "--runs");
|
|
57
|
+
} else if (key === "--warmup") {
|
|
58
|
+
options.warmup = toNonNegativeInt(value, "--warmup");
|
|
59
|
+
} else if (key === "--query") {
|
|
60
|
+
options.query = value;
|
|
61
|
+
} else if (key === "--timeout-ms") {
|
|
62
|
+
options.timeoutMs = toPositiveInt(value, "--timeout-ms");
|
|
63
|
+
} else if (key === "--obsidian-bin") {
|
|
64
|
+
options.obsidianBin = value;
|
|
65
|
+
} else if (key === "--wave-entry") {
|
|
66
|
+
options.waveEntry = value;
|
|
67
|
+
} else if (key === "--json-out") {
|
|
68
|
+
options.jsonOut = value;
|
|
69
|
+
} else {
|
|
70
|
+
throw new Error(`Unknown option: ${key}`);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
i += 1;
|
|
74
|
+
continue;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
throw new Error(`Unexpected argument: ${arg}`);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return options;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function toPositiveInt(raw, name) {
|
|
84
|
+
const num = Number(raw);
|
|
85
|
+
if (!Number.isInteger(num) || num <= 0) {
|
|
86
|
+
throw new Error(`${name} must be a positive integer.`);
|
|
87
|
+
}
|
|
88
|
+
return num;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function toNonNegativeInt(raw, name) {
|
|
92
|
+
const num = Number(raw);
|
|
93
|
+
if (!Number.isInteger(num) || num < 0) {
|
|
94
|
+
throw new Error(`${name} must be a non-negative integer.`);
|
|
95
|
+
}
|
|
96
|
+
return num;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
function loadDotEnv(filePath) {
|
|
100
|
+
if (!existsSync(filePath)) return {};
|
|
101
|
+
const raw = readFileSync(filePath, "utf8");
|
|
102
|
+
const env = {};
|
|
103
|
+
for (const line of raw.split(/\r?\n/)) {
|
|
104
|
+
const trimmed = line.trim();
|
|
105
|
+
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
106
|
+
const match = trimmed.match(/^([A-Za-z_][A-Za-z0-9_]*)=(.*)$/);
|
|
107
|
+
if (!match) continue;
|
|
108
|
+
const [, key, value] = match;
|
|
109
|
+
env[key] = value;
|
|
110
|
+
}
|
|
111
|
+
return env;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
function commandExists(command) {
|
|
115
|
+
const probe = spawnSync("/bin/zsh", ["-lc", `command -v ${escapeShellWord(command)}`], {
|
|
116
|
+
encoding: "utf8"
|
|
117
|
+
});
|
|
118
|
+
if (probe.status !== 0) return "";
|
|
119
|
+
return probe.stdout.trim();
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function escapeShellWord(value) {
|
|
123
|
+
return `'${String(value).replace(/'/g, `'\\''`)}'`;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
function resolveObsidianBinary(explicitPath) {
|
|
127
|
+
if (explicitPath) {
|
|
128
|
+
return { path: explicitPath, source: "--obsidian-bin" };
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const inPath = commandExists("obsidian");
|
|
132
|
+
if (inPath) {
|
|
133
|
+
return { path: inPath, source: "PATH" };
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const appBundlePath = "/Applications/Obsidian.app/Contents/MacOS/obsidian-cli";
|
|
137
|
+
if (existsSync(appBundlePath)) {
|
|
138
|
+
return { path: appBundlePath, source: "app-bundle" };
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
return { path: "", source: "not-found" };
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
function nowIso() {
|
|
145
|
+
return new Date().toISOString();
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
function runTimed(binary, args, env, timeoutMs) {
|
|
149
|
+
const startedAt = process.hrtime.bigint();
|
|
150
|
+
const child = spawnSync(binary, args, {
|
|
151
|
+
env,
|
|
152
|
+
cwd: process.cwd(),
|
|
153
|
+
encoding: "utf8",
|
|
154
|
+
timeout: timeoutMs,
|
|
155
|
+
maxBuffer: 10 * 1024 * 1024
|
|
156
|
+
});
|
|
157
|
+
const endedAt = process.hrtime.bigint();
|
|
158
|
+
|
|
159
|
+
const durationMs = Number(endedAt - startedAt) / 1_000_000;
|
|
160
|
+
|
|
161
|
+
return {
|
|
162
|
+
command: [binary, ...args].join(" "),
|
|
163
|
+
args,
|
|
164
|
+
status: child.status,
|
|
165
|
+
signal: child.signal,
|
|
166
|
+
durationMs,
|
|
167
|
+
stdout: child.stdout ?? "",
|
|
168
|
+
stderr: child.stderr ?? "",
|
|
169
|
+
timedOut: child.error?.code === "ETIMEDOUT"
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function parseWaveEnvelope(stdout) {
|
|
174
|
+
const text = (stdout || "").trim();
|
|
175
|
+
if (!text) return null;
|
|
176
|
+
try {
|
|
177
|
+
return JSON.parse(text);
|
|
178
|
+
} catch {
|
|
179
|
+
return null;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
function evaluateRun(toolId, expectation, mode, result) {
|
|
184
|
+
if (result.timedOut) {
|
|
185
|
+
return {
|
|
186
|
+
pass: false,
|
|
187
|
+
reason: "timeout",
|
|
188
|
+
waveJson: false,
|
|
189
|
+
waveOk: false
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
if (toolId === "wave") {
|
|
194
|
+
const envelope = parseWaveEnvelope(result.stdout);
|
|
195
|
+
const waveJson = Boolean(envelope);
|
|
196
|
+
const waveOk = envelope?.ok === true;
|
|
197
|
+
|
|
198
|
+
if (mode === "exit_only") {
|
|
199
|
+
if (expectation === "success") {
|
|
200
|
+
const pass = result.status === 0;
|
|
201
|
+
return {
|
|
202
|
+
pass,
|
|
203
|
+
reason: pass ? "ok" : `exit_${result.status ?? "null"}`,
|
|
204
|
+
waveJson,
|
|
205
|
+
waveOk
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
const pass = result.status !== 0;
|
|
210
|
+
return {
|
|
211
|
+
pass,
|
|
212
|
+
reason: pass ? "expected_failure" : "unexpected_success",
|
|
213
|
+
waveJson,
|
|
214
|
+
waveOk
|
|
215
|
+
};
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
if (expectation === "success") {
|
|
219
|
+
const pass = result.status === 0 && waveOk;
|
|
220
|
+
return {
|
|
221
|
+
pass,
|
|
222
|
+
reason: pass ? "ok" : envelope ? `wave_not_ok:${envelope?.error?.code ?? "unknown"}` : "non_json_output",
|
|
223
|
+
waveJson,
|
|
224
|
+
waveOk
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
const pass = result.status !== 0 && envelope?.ok === false;
|
|
229
|
+
return {
|
|
230
|
+
pass,
|
|
231
|
+
reason: pass ? "expected_failure" : "unexpected_success_or_format",
|
|
232
|
+
waveJson,
|
|
233
|
+
waveOk
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
if (expectation === "success") {
|
|
238
|
+
const pass = result.status === 0;
|
|
239
|
+
const combined = `${result.stdout || ""}\n${result.stderr || ""}`.toLowerCase();
|
|
240
|
+
const reason = pass
|
|
241
|
+
? "ok"
|
|
242
|
+
: combined.includes("unable to find obsidian")
|
|
243
|
+
? "obsidian_not_running"
|
|
244
|
+
: `exit_${result.status ?? "null"}`;
|
|
245
|
+
return {
|
|
246
|
+
pass,
|
|
247
|
+
reason,
|
|
248
|
+
waveJson: false,
|
|
249
|
+
waveOk: false
|
|
250
|
+
};
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
const pass = result.status !== 0;
|
|
254
|
+
return {
|
|
255
|
+
pass,
|
|
256
|
+
reason: pass ? "expected_failure" : "unexpected_success",
|
|
257
|
+
waveJson: false,
|
|
258
|
+
waveOk: false
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
function percentile(values, p) {
|
|
263
|
+
if (values.length === 0) return null;
|
|
264
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
265
|
+
const idx = Math.ceil(sorted.length * p) - 1;
|
|
266
|
+
return sorted[Math.max(0, Math.min(sorted.length - 1, idx))];
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
function mean(values) {
|
|
270
|
+
if (values.length === 0) return null;
|
|
271
|
+
return values.reduce((acc, v) => acc + v, 0) / values.length;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
function formatMs(value) {
|
|
275
|
+
if (value === null || Number.isNaN(value)) return "n/a";
|
|
276
|
+
return `${value.toFixed(1)}ms`;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
function pad(value, width) {
|
|
280
|
+
const text = String(value);
|
|
281
|
+
if (text.length >= width) return text;
|
|
282
|
+
return `${text}${" ".repeat(width - text.length)}`;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
function printTable(rows) {
|
|
286
|
+
if (rows.length === 0) return;
|
|
287
|
+
|
|
288
|
+
const headers = [
|
|
289
|
+
"Scenario",
|
|
290
|
+
"Tool",
|
|
291
|
+
"Pass",
|
|
292
|
+
"Avg",
|
|
293
|
+
"P50",
|
|
294
|
+
"P95",
|
|
295
|
+
"Min",
|
|
296
|
+
"Max",
|
|
297
|
+
"Runs",
|
|
298
|
+
"JSON"
|
|
299
|
+
];
|
|
300
|
+
|
|
301
|
+
const widths = headers.map((h) => h.length);
|
|
302
|
+
const normalizedRows = rows.map((row) => {
|
|
303
|
+
const cells = [
|
|
304
|
+
row.scenario,
|
|
305
|
+
row.tool,
|
|
306
|
+
`${row.passRate.toFixed(0)}%`,
|
|
307
|
+
formatMs(row.avgMs),
|
|
308
|
+
formatMs(row.p50Ms),
|
|
309
|
+
formatMs(row.p95Ms),
|
|
310
|
+
formatMs(row.minMs),
|
|
311
|
+
formatMs(row.maxMs),
|
|
312
|
+
String(row.runs),
|
|
313
|
+
row.jsonRate
|
|
314
|
+
];
|
|
315
|
+
cells.forEach((cell, index) => {
|
|
316
|
+
widths[index] = Math.max(widths[index], String(cell).length);
|
|
317
|
+
});
|
|
318
|
+
return cells;
|
|
319
|
+
});
|
|
320
|
+
|
|
321
|
+
const headerLine = headers.map((h, i) => pad(h, widths[i])).join(" ");
|
|
322
|
+
const separator = widths.map((w) => "-".repeat(w)).join(" ");
|
|
323
|
+
|
|
324
|
+
console.log("\nBenchmark Summary");
|
|
325
|
+
console.log(headerLine);
|
|
326
|
+
console.log(separator);
|
|
327
|
+
|
|
328
|
+
normalizedRows.forEach((cells) => {
|
|
329
|
+
console.log(cells.map((cell, i) => pad(cell, widths[i])).join(" "));
|
|
330
|
+
});
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
function buildScenarios(query) {
|
|
334
|
+
return [
|
|
335
|
+
{
|
|
336
|
+
id: "cold_help",
|
|
337
|
+
label: "Cold help",
|
|
338
|
+
category: "mechanics",
|
|
339
|
+
expectation: "success",
|
|
340
|
+
runs: 1,
|
|
341
|
+
warmup: 0,
|
|
342
|
+
modes: {
|
|
343
|
+
wave: "exit_only",
|
|
344
|
+
obsidian: "exit_only"
|
|
345
|
+
},
|
|
346
|
+
commands: {
|
|
347
|
+
wave: ["--help"],
|
|
348
|
+
obsidian: ["help"]
|
|
349
|
+
}
|
|
350
|
+
},
|
|
351
|
+
{
|
|
352
|
+
id: "warm_help",
|
|
353
|
+
label: "Warm help",
|
|
354
|
+
category: "mechanics",
|
|
355
|
+
expectation: "success",
|
|
356
|
+
modes: {
|
|
357
|
+
wave: "exit_only",
|
|
358
|
+
obsidian: "exit_only"
|
|
359
|
+
},
|
|
360
|
+
commands: {
|
|
361
|
+
wave: ["--help"],
|
|
362
|
+
obsidian: ["help"]
|
|
363
|
+
}
|
|
364
|
+
},
|
|
365
|
+
{
|
|
366
|
+
id: "list_read",
|
|
367
|
+
label: "List/read",
|
|
368
|
+
category: "domain",
|
|
369
|
+
expectation: "success",
|
|
370
|
+
commands: {
|
|
371
|
+
wave: ["tasks", "list", "--page", "1", "--per", "10"],
|
|
372
|
+
obsidian: ["files", "total"]
|
|
373
|
+
}
|
|
374
|
+
},
|
|
375
|
+
{
|
|
376
|
+
id: "search",
|
|
377
|
+
label: "Search",
|
|
378
|
+
category: "domain",
|
|
379
|
+
expectation: "success",
|
|
380
|
+
commands: {
|
|
381
|
+
wave: ["find", query, "--limit", "10"],
|
|
382
|
+
obsidian: ["search", `query=${query}`]
|
|
383
|
+
}
|
|
384
|
+
},
|
|
385
|
+
{
|
|
386
|
+
id: "invalid_input",
|
|
387
|
+
label: "Invalid input",
|
|
388
|
+
category: "error",
|
|
389
|
+
expectation: "failure",
|
|
390
|
+
commands: {
|
|
391
|
+
wave: ["tasks", "show"],
|
|
392
|
+
obsidian: ["__wave_benchmark_invalid_command__"]
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
];
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
function summarizeRuns(scenario, tool, runs) {
|
|
399
|
+
const latencies = runs.map((run) => run.durationMs);
|
|
400
|
+
const passCount = runs.filter((run) => run.pass).length;
|
|
401
|
+
const waveJsonCount = runs.filter((run) => run.waveJson).length;
|
|
402
|
+
|
|
403
|
+
return {
|
|
404
|
+
scenario: scenario.label,
|
|
405
|
+
scenarioId: scenario.id,
|
|
406
|
+
category: scenario.category,
|
|
407
|
+
tool,
|
|
408
|
+
runs: runs.length,
|
|
409
|
+
passRate: runs.length === 0 ? 0 : (passCount / runs.length) * 100,
|
|
410
|
+
avgMs: mean(latencies),
|
|
411
|
+
p50Ms: percentile(latencies, 0.5),
|
|
412
|
+
p95Ms: percentile(latencies, 0.95),
|
|
413
|
+
minMs: latencies.length ? Math.min(...latencies) : null,
|
|
414
|
+
maxMs: latencies.length ? Math.max(...latencies) : null,
|
|
415
|
+
jsonRate: tool === "wave" ? `${Math.round((waveJsonCount / Math.max(1, runs.length)) * 100)}%` : "n/a"
|
|
416
|
+
};
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
function printPreflight(preflight) {
|
|
420
|
+
console.log("Preflight");
|
|
421
|
+
console.log(` Timestamp: ${preflight.timestamp}`);
|
|
422
|
+
console.log(` Wave entry: ${preflight.waveEntry}`);
|
|
423
|
+
console.log(` Wave ready: ${preflight.waveReady ? "yes" : "no"}`);
|
|
424
|
+
if (preflight.waveMissing.length > 0) {
|
|
425
|
+
console.log(` Wave missing env: ${preflight.waveMissing.join(", ")}`);
|
|
426
|
+
}
|
|
427
|
+
console.log(` Obsidian binary: ${preflight.obsidianPath || "not found"}`);
|
|
428
|
+
console.log(` Obsidian source: ${preflight.obsidianSource}`);
|
|
429
|
+
console.log(` Obsidian ready: ${preflight.obsidianReady ? "yes" : "no"}`);
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
function printOperationalNotes(preflight) {
|
|
433
|
+
console.log("\nOperational Notes");
|
|
434
|
+
console.log(" Wave CLI: requires API base URL, token, and organization ID; outputs strict JSON envelope.");
|
|
435
|
+
console.log(" Obsidian CLI: requires Obsidian desktop CLI registration and usually a running app instance.");
|
|
436
|
+
if (!preflight.obsidianReady) {
|
|
437
|
+
console.log(" Obsidian benchmarks were skipped because no binary was found.");
|
|
438
|
+
}
|
|
439
|
+
if (!preflight.waveReady) {
|
|
440
|
+
console.log(" Wave benchmarks were skipped because required env/build prerequisites were missing.");
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
function printFailureReasons(allRunResults) {
|
|
445
|
+
const failedRuns = allRunResults.filter((run) => !run.pass);
|
|
446
|
+
if (failedRuns.length === 0) return;
|
|
447
|
+
|
|
448
|
+
const grouped = new Map();
|
|
449
|
+
for (const run of failedRuns) {
|
|
450
|
+
const key = `${run.scenarioLabel} | ${run.tool}`;
|
|
451
|
+
if (!grouped.has(key)) grouped.set(key, new Map());
|
|
452
|
+
const reasonMap = grouped.get(key);
|
|
453
|
+
reasonMap.set(run.reason, (reasonMap.get(run.reason) || 0) + 1);
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
console.log("\nTop Failure Reasons");
|
|
457
|
+
for (const [group, reasonMap] of grouped.entries()) {
|
|
458
|
+
const sorted = [...reasonMap.entries()].sort((a, b) => b[1] - a[1]);
|
|
459
|
+
const [reason, count] = sorted[0];
|
|
460
|
+
console.log(` ${group}: ${reason} (${count})`);
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
function main() {
|
|
465
|
+
const options = parseArgs(process.argv.slice(2));
|
|
466
|
+
if (options.help) {
|
|
467
|
+
printUsage();
|
|
468
|
+
return;
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
const cwd = process.cwd();
|
|
472
|
+
const dotEnvPath = resolve(cwd, ".env");
|
|
473
|
+
const dotEnv = loadDotEnv(dotEnvPath);
|
|
474
|
+
const baseEnv = {
|
|
475
|
+
...process.env,
|
|
476
|
+
...dotEnv,
|
|
477
|
+
WAVE_AGENT_RUN_ID: process.env.WAVE_AGENT_RUN_ID || `benchmark-${Date.now()}`,
|
|
478
|
+
WAVE_AGENT_NAME: process.env.WAVE_AGENT_NAME || dotEnv.WAVE_AGENT_NAME || "cli-benchmark"
|
|
479
|
+
};
|
|
480
|
+
|
|
481
|
+
const waveEntryAbs = resolve(cwd, options.waveEntry);
|
|
482
|
+
const waveMissing = ["WAVE_API_BASE_URL", "WAVE_API_TOKEN", "WAVE_ORGANIZATION_ID"].filter((key) => !baseEnv[key]);
|
|
483
|
+
const waveReady = existsSync(waveEntryAbs) && waveMissing.length === 0;
|
|
484
|
+
|
|
485
|
+
const obsidian = resolveObsidianBinary(options.obsidianBin);
|
|
486
|
+
const obsidianReady = Boolean(obsidian.path);
|
|
487
|
+
|
|
488
|
+
const preflight = {
|
|
489
|
+
timestamp: nowIso(),
|
|
490
|
+
waveEntry: waveEntryAbs,
|
|
491
|
+
waveReady,
|
|
492
|
+
waveMissing,
|
|
493
|
+
obsidianReady,
|
|
494
|
+
obsidianPath: obsidian.path,
|
|
495
|
+
obsidianSource: obsidian.source,
|
|
496
|
+
options
|
|
497
|
+
};
|
|
498
|
+
|
|
499
|
+
printPreflight(preflight);
|
|
500
|
+
|
|
501
|
+
const tools = [];
|
|
502
|
+
if (!options.skipWave && waveReady) {
|
|
503
|
+
tools.push({
|
|
504
|
+
id: "wave",
|
|
505
|
+
label: "Wave",
|
|
506
|
+
binary: process.execPath,
|
|
507
|
+
baseArgs: [waveEntryAbs],
|
|
508
|
+
env: baseEnv
|
|
509
|
+
});
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
if (!options.skipObsidian && obsidianReady) {
|
|
513
|
+
tools.push({
|
|
514
|
+
id: "obsidian",
|
|
515
|
+
label: "Obsidian",
|
|
516
|
+
binary: obsidian.path,
|
|
517
|
+
baseArgs: [],
|
|
518
|
+
env: process.env
|
|
519
|
+
});
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
if (tools.length === 0) {
|
|
523
|
+
console.log("\nNo tools available to benchmark. Check preflight status or remove skip flags.");
|
|
524
|
+
process.exitCode = 1;
|
|
525
|
+
return;
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
const scenarios = buildScenarios(options.query);
|
|
529
|
+
const allRunResults = [];
|
|
530
|
+
const summaryRows = [];
|
|
531
|
+
|
|
532
|
+
for (const scenario of scenarios) {
|
|
533
|
+
const measuredRuns = scenario.runs ?? options.runs;
|
|
534
|
+
const warmupRuns = scenario.warmup ?? options.warmup;
|
|
535
|
+
|
|
536
|
+
for (const tool of tools) {
|
|
537
|
+
const argsForTool = scenario.commands[tool.id];
|
|
538
|
+
if (!argsForTool) continue;
|
|
539
|
+
|
|
540
|
+
console.log(`\n[${scenario.id}] ${tool.label} (warmup=${warmupRuns}, runs=${measuredRuns})`);
|
|
541
|
+
|
|
542
|
+
for (let i = 0; i < warmupRuns; i += 1) {
|
|
543
|
+
const warmup = runTimed(tool.binary, [...tool.baseArgs, ...argsForTool], tool.env, options.timeoutMs);
|
|
544
|
+
if (options.verbose) {
|
|
545
|
+
console.log(` warmup ${i + 1}/${warmupRuns} ${warmup.status === 0 ? "ok" : "fail"} ${warmup.durationMs.toFixed(1)}ms`);
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
const runRows = [];
|
|
550
|
+
for (let i = 0; i < measuredRuns; i += 1) {
|
|
551
|
+
const result = runTimed(tool.binary, [...tool.baseArgs, ...argsForTool], tool.env, options.timeoutMs);
|
|
552
|
+
const mode = scenario.modes?.[tool.id] ?? "strict_json";
|
|
553
|
+
const evalResult = evaluateRun(tool.id, scenario.expectation, mode, result);
|
|
554
|
+
const merged = {
|
|
555
|
+
...result,
|
|
556
|
+
...evalResult,
|
|
557
|
+
scenarioId: scenario.id,
|
|
558
|
+
scenarioLabel: scenario.label,
|
|
559
|
+
tool: tool.id,
|
|
560
|
+
run: i + 1,
|
|
561
|
+
measuredRuns,
|
|
562
|
+
warmupRuns
|
|
563
|
+
};
|
|
564
|
+
|
|
565
|
+
if (options.verbose) {
|
|
566
|
+
console.log(` run ${i + 1}/${measuredRuns} ${merged.pass ? "ok" : "fail"} ${merged.durationMs.toFixed(1)}ms (${merged.reason})`);
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
runRows.push(merged);
|
|
570
|
+
allRunResults.push(merged);
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
const summary = summarizeRuns(scenario, tool.id, runRows);
|
|
574
|
+
summaryRows.push(summary);
|
|
575
|
+
console.log(` => pass ${summary.passRate.toFixed(0)}% | avg ${formatMs(summary.avgMs)} | p95 ${formatMs(summary.p95Ms)}`);
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
printTable(summaryRows);
|
|
580
|
+
printFailureReasons(allRunResults);
|
|
581
|
+
printOperationalNotes(preflight);
|
|
582
|
+
|
|
583
|
+
const winners = [];
|
|
584
|
+
for (const scenario of scenarios) {
|
|
585
|
+
const wave = summaryRows.find((row) => row.scenarioId === scenario.id && row.tool === "wave");
|
|
586
|
+
const obsidianRow = summaryRows.find((row) => row.scenarioId === scenario.id && row.tool === "obsidian");
|
|
587
|
+
if (!wave || !obsidianRow) continue;
|
|
588
|
+
if (wave.avgMs === null || obsidianRow.avgMs === null) continue;
|
|
589
|
+
|
|
590
|
+
const winner = wave.avgMs < obsidianRow.avgMs ? "Wave" : "Obsidian";
|
|
591
|
+
const delta = Math.abs(wave.avgMs - obsidianRow.avgMs);
|
|
592
|
+
winners.push({ scenario: scenario.label, winner, deltaMs: delta });
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
if (winners.length > 0) {
|
|
596
|
+
console.log("\nSpeed Winner By Scenario");
|
|
597
|
+
for (const winner of winners) {
|
|
598
|
+
console.log(` ${winner.scenario}: ${winner.winner} (${winner.deltaMs.toFixed(1)}ms faster on average)`);
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
if (options.jsonOut) {
|
|
603
|
+
const outPath = resolve(cwd, options.jsonOut);
|
|
604
|
+
const payload = {
|
|
605
|
+
generatedAt: nowIso(),
|
|
606
|
+
cwd,
|
|
607
|
+
options,
|
|
608
|
+
preflight,
|
|
609
|
+
summary: summaryRows,
|
|
610
|
+
runs: allRunResults
|
|
611
|
+
};
|
|
612
|
+
writeFileSync(outPath, JSON.stringify(payload, null, 2));
|
|
613
|
+
console.log(`\nWrote JSON report: ${outPath}`);
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
const failures = summaryRows.filter((row) => row.passRate < 100).length;
|
|
617
|
+
process.exitCode = failures > 0 ? 1 : 0;
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
try {
|
|
621
|
+
main();
|
|
622
|
+
} catch (error) {
|
|
623
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
624
|
+
console.error(`benchmark-cli failed: ${message}`);
|
|
625
|
+
process.exit(1);
|
|
626
|
+
}
|