@omni-oss/task-bench 0.0.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +328 -0
- package/dist/bench/index.d.ts +82 -0
- package/dist/bench/index.d.ts.map +1 -0
- package/dist/bench/install.d.ts +5 -0
- package/dist/bench/install.d.ts.map +1 -0
- package/dist/bench/report.d.ts +9 -0
- package/dist/bench/report.d.ts.map +1 -0
- package/dist/bench/stats.d.ts +12 -0
- package/dist/bench/stats.d.ts.map +1 -0
- package/dist/cli/index.d.ts +3 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/config.d.ts +91 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/generate/index.d.ts +19 -0
- package/dist/generate/index.d.ts.map +1 -0
- package/dist/generate/templates.d.ts +20 -0
- package/dist/generate/templates.d.ts.map +1 -0
- package/dist/graph.d.ts +21 -0
- package/dist/graph.d.ts.map +1 -0
- package/dist/index.d.ts +16 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.mjs +2 -0
- package/dist/src-D3XyMXAu.mjs +1167 -0
- package/dist/suite/index.d.ts +49 -0
- package/dist/suite/index.d.ts.map +1 -0
- package/dist/suite/preset.d.ts +93 -0
- package/dist/suite/preset.d.ts.map +1 -0
- package/dist/suite/report.d.ts +7 -0
- package/dist/suite/report.d.ts.map +1 -0
- package/dist/task-bench-cli.mjs +107 -0
- package/dist/tools/index.d.ts +18 -0
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/moon.d.ts +10 -0
- package/dist/tools/moon.d.ts.map +1 -0
- package/dist/tools/nx.d.ts +7 -0
- package/dist/tools/nx.d.ts.map +1 -0
- package/dist/tools/omni.d.ts +7 -0
- package/dist/tools/omni.d.ts.map +1 -0
- package/dist/tools/turbo.d.ts +5 -0
- package/dist/tools/turbo.d.ts.map +1 -0
- package/dist/tools/types.d.ts +77 -0
- package/dist/tools/types.d.ts.map +1 -0
- package/package.json +41 -0
- package/project.omni.yaml +33 -0
- package/src/bench/index.ts +323 -0
- package/src/bench/install.ts +12 -0
- package/src/bench/report.ts +142 -0
- package/src/bench/stats.spec.ts +35 -0
- package/src/bench/stats.ts +38 -0
- package/src/cli/index.ts +410 -0
- package/src/config.ts +138 -0
- package/src/generate/index.ts +215 -0
- package/src/generate/templates.ts +87 -0
- package/src/graph.spec.ts +119 -0
- package/src/graph.ts +120 -0
- package/src/index.ts +31 -0
- package/src/suite/index.ts +113 -0
- package/src/suite/preset.spec.ts +95 -0
- package/src/suite/preset.ts +253 -0
- package/src/suite/report.ts +135 -0
- package/src/tools/adapters.spec.ts +95 -0
- package/src/tools/config.spec.ts +73 -0
- package/src/tools/index.ts +76 -0
- package/src/tools/moon.ts +106 -0
- package/src/tools/nx.ts +106 -0
- package/src/tools/omni.ts +96 -0
- package/src/tools/turbo.ts +78 -0
- package/src/tools/types.ts +116 -0
- package/tsconfig.json +4 -0
- package/tsconfig.project.json +6 -0
- package/tsconfig.types.json +4 -0
- package/vite.config.ts +29 -0
- package/vitest.config.unit.ts +13 -0
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
import { readFileSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { readFile } from "node:fs/promises";
|
|
3
|
+
import { cpus, tmpdir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import { performance } from "node:perf_hooks";
|
|
6
|
+
import { execa } from "execa";
|
|
7
|
+
import { HarnessConfigSchema, type Tool } from "../config";
|
|
8
|
+
import { buildGraph, taskNames } from "../graph";
|
|
9
|
+
import { getAdapter, resolveToolVersions, type ToolContext } from "../tools";
|
|
10
|
+
import { computeStats, type Stats } from "./stats";
|
|
11
|
+
|
|
12
|
+
export interface RunSample {
|
|
13
|
+
durationMs: number;
|
|
14
|
+
exitCode: number;
|
|
15
|
+
stdout: string;
|
|
16
|
+
stderr: string;
|
|
17
|
+
/** Number of tasks that actually executed (0 == a full cache hit). */
|
|
18
|
+
executed: number;
|
|
19
|
+
ok: boolean;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface ScenarioResult {
|
|
23
|
+
runs: number;
|
|
24
|
+
failures: number;
|
|
25
|
+
stats: Stats;
|
|
26
|
+
/** Median number of tasks that actually executed across the runs. */
|
|
27
|
+
executedMedian: number;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export interface ToolResult {
|
|
31
|
+
tool: Tool;
|
|
32
|
+
task: string;
|
|
33
|
+
/** Size of the executed task graph (tasks run on a cold, uncached run). */
|
|
34
|
+
taskGraphSize: number;
|
|
35
|
+
cold: ScenarioResult;
|
|
36
|
+
warm: ScenarioResult;
|
|
37
|
+
error?: string;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export interface BenchmarkResult {
|
|
41
|
+
rootDir: string;
|
|
42
|
+
task: string;
|
|
43
|
+
projects: number;
|
|
44
|
+
tasksPerProject: number;
|
|
45
|
+
concurrency: number;
|
|
46
|
+
daemon: boolean;
|
|
47
|
+
/** Resolved version of each benchmarked tool (detected for omni). */
|
|
48
|
+
versions: Record<string, string | null>;
|
|
49
|
+
generatedAt: string;
|
|
50
|
+
tools: ToolResult[];
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export type BenchEvent =
|
|
54
|
+
| { kind: "tool-start"; tool: Tool }
|
|
55
|
+
| {
|
|
56
|
+
kind: "scenario";
|
|
57
|
+
tool: Tool;
|
|
58
|
+
scenario: "cold" | "warm";
|
|
59
|
+
run: number;
|
|
60
|
+
total: number;
|
|
61
|
+
sample: RunSample;
|
|
62
|
+
}
|
|
63
|
+
| { kind: "tool-error"; tool: Tool; error: string }
|
|
64
|
+
| {
|
|
65
|
+
kind: "tool-unsuccessful";
|
|
66
|
+
tool: Tool;
|
|
67
|
+
sample: RunSample;
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
export interface RunBenchmarkOptions {
|
|
71
|
+
tools?: Tool[] | undefined;
|
|
72
|
+
task?: string | undefined;
|
|
73
|
+
coldRuns?: number | undefined;
|
|
74
|
+
warmRuns?: number | undefined;
|
|
75
|
+
concurrency?: number | undefined;
|
|
76
|
+
/** Allow each tool's persistent daemon (default true). */
|
|
77
|
+
daemon?: boolean | undefined;
|
|
78
|
+
onEvent?: ((event: BenchEvent) => void) | undefined;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function median(values: number[]): number {
|
|
82
|
+
if (values.length === 0) return 0;
|
|
83
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
84
|
+
const mid = Math.floor(sorted.length / 2);
|
|
85
|
+
return sorted.length % 2 === 0
|
|
86
|
+
? ((sorted[mid - 1] ?? 0) + (sorted[mid] ?? 0)) / 2
|
|
87
|
+
: (sorted[mid] ?? 0);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function countLines(path: string): number {
|
|
91
|
+
try {
|
|
92
|
+
const text = readFileSync(path, "utf8");
|
|
93
|
+
if (text.length === 0) return 0;
|
|
94
|
+
// Each executed task appends exactly one newline-terminated line.
|
|
95
|
+
let count = 0;
|
|
96
|
+
for (let i = 0; i < text.length; i++) {
|
|
97
|
+
if (text[i] === "\n") count++;
|
|
98
|
+
}
|
|
99
|
+
return count;
|
|
100
|
+
} catch {
|
|
101
|
+
return 0;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
async function timeRun(
|
|
106
|
+
file: string,
|
|
107
|
+
args: string[],
|
|
108
|
+
rootDir: string,
|
|
109
|
+
execLog: string,
|
|
110
|
+
extraEnv: Record<string, string>,
|
|
111
|
+
): Promise<RunSample> {
|
|
112
|
+
// Reset the execution marker so we can count real executions for this run.
|
|
113
|
+
writeFileSync(execLog, "");
|
|
114
|
+
const start = performance.now();
|
|
115
|
+
const result = await execa(file, args, {
|
|
116
|
+
cwd: rootDir,
|
|
117
|
+
reject: false,
|
|
118
|
+
env: {
|
|
119
|
+
FORCE_COLOR: "0",
|
|
120
|
+
TURBO_TELEMETRY_DISABLED: "1",
|
|
121
|
+
DO_NOT_TRACK: "1",
|
|
122
|
+
NX_TUI: "false",
|
|
123
|
+
TASK_BENCH_EXEC_LOG: execLog,
|
|
124
|
+
...extraEnv,
|
|
125
|
+
},
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
const durationMs = performance.now() - start;
|
|
129
|
+
const stdout = typeof result.stdout === "string" ? result.stdout : "";
|
|
130
|
+
const stderr = typeof result.stderr === "string" ? result.stderr : "";
|
|
131
|
+
return {
|
|
132
|
+
durationMs,
|
|
133
|
+
exitCode: result.exitCode ?? -1,
|
|
134
|
+
stdout,
|
|
135
|
+
stderr,
|
|
136
|
+
executed: countLines(execLog),
|
|
137
|
+
ok: result.exitCode === 0,
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function scenarioFromSamples(samples: RunSample[]): ScenarioResult {
|
|
142
|
+
return {
|
|
143
|
+
runs: samples.length,
|
|
144
|
+
failures: samples.filter((s) => !s.ok).length,
|
|
145
|
+
stats: computeStats(samples.map((s) => s.durationMs)),
|
|
146
|
+
executedMedian: median(samples.map((s) => s.executed)),
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Benchmark the enabled tools against an already-generated workspace.
|
|
152
|
+
*
|
|
153
|
+
* For each tool we measure two scenarios:
|
|
154
|
+
* - cold: caches + outputs wiped before every run (discovery + full exec + cache write)
|
|
155
|
+
* - warm: caches primed, so ideally every task is a cache hit
|
|
156
|
+
* (isolates discovery + cache-restore overhead)
|
|
157
|
+
*
|
|
158
|
+
* A tool-agnostic execution counter (see the generated task runner) records how
|
|
159
|
+
* many tasks actually ran, so warm-run cache effectiveness is *verified* rather
|
|
160
|
+
* than assumed. Concurrency is pinned identically across all runners.
|
|
161
|
+
*/
|
|
162
|
+
export async function runBenchmark(
|
|
163
|
+
rootDir: string,
|
|
164
|
+
options: RunBenchmarkOptions = {},
|
|
165
|
+
): Promise<BenchmarkResult> {
|
|
166
|
+
const raw = JSON.parse(
|
|
167
|
+
await readFile(join(rootDir, "bench.config.json"), "utf8"),
|
|
168
|
+
);
|
|
169
|
+
const config = HarnessConfigSchema.parse(raw);
|
|
170
|
+
const projects = buildGraph(config);
|
|
171
|
+
const tasks = taskNames(config);
|
|
172
|
+
|
|
173
|
+
const tools = options.tools ?? config.tools;
|
|
174
|
+
const task = options.task ?? tasks[tasks.length - 1] ?? "t0";
|
|
175
|
+
const coldRuns = options.coldRuns ?? 3;
|
|
176
|
+
const warmRuns = options.warmRuns ?? 5;
|
|
177
|
+
const concurrency = options.concurrency ?? Math.max(1, cpus().length);
|
|
178
|
+
const daemon = options.daemon ?? true;
|
|
179
|
+
const emit = options.onEvent ?? (() => {});
|
|
180
|
+
const execLog = join(
|
|
181
|
+
tmpdir(),
|
|
182
|
+
`task-bench-exec-${process.pid}-${Date.now()}.log`,
|
|
183
|
+
);
|
|
184
|
+
|
|
185
|
+
const ctx: ToolContext = {
|
|
186
|
+
rootDir,
|
|
187
|
+
projectDirs: projects.map((p) => p.dir),
|
|
188
|
+
concurrency,
|
|
189
|
+
daemon,
|
|
190
|
+
};
|
|
191
|
+
|
|
192
|
+
// Resolve (and re-validate) the version of each tool actually used.
|
|
193
|
+
const versionMap = await resolveToolVersions(config, rootDir, tools);
|
|
194
|
+
const versions: Record<string, string | null> = {};
|
|
195
|
+
for (const [tool, version] of versionMap) versions[tool] = version;
|
|
196
|
+
|
|
197
|
+
const results: ToolResult[] = [];
|
|
198
|
+
|
|
199
|
+
for (const tool of tools) {
|
|
200
|
+
emit({ kind: "tool-start", tool });
|
|
201
|
+
const adapter = getAdapter(tool);
|
|
202
|
+
const invocation = adapter.run(task, ctx);
|
|
203
|
+
const env = adapter.env(ctx);
|
|
204
|
+
|
|
205
|
+
try {
|
|
206
|
+
// In no-daemon mode make sure no stale daemon lingers first.
|
|
207
|
+
if (!daemon && adapter.hasDaemon) {
|
|
208
|
+
await adapter.stopDaemon(ctx);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Cold scenario: a fresh start each run. Caches + outputs are wiped,
|
|
212
|
+
// and (in daemon mode) the daemon is torn down so cold includes its
|
|
213
|
+
// startup cost. Warm runs below deliberately keep the daemon alive.
|
|
214
|
+
const coldSamples: RunSample[] = [];
|
|
215
|
+
for (let run = 1; run <= coldRuns; run++) {
|
|
216
|
+
await adapter.clearCaches(ctx);
|
|
217
|
+
if (daemon && adapter.hasDaemon) {
|
|
218
|
+
await adapter.stopDaemon(ctx);
|
|
219
|
+
}
|
|
220
|
+
const sample = await timeRun(
|
|
221
|
+
invocation.file,
|
|
222
|
+
invocation.args,
|
|
223
|
+
rootDir,
|
|
224
|
+
execLog,
|
|
225
|
+
env,
|
|
226
|
+
);
|
|
227
|
+
coldSamples.push(sample);
|
|
228
|
+
emit({
|
|
229
|
+
kind: "scenario",
|
|
230
|
+
tool,
|
|
231
|
+
scenario: "cold",
|
|
232
|
+
run,
|
|
233
|
+
total: coldRuns,
|
|
234
|
+
sample,
|
|
235
|
+
});
|
|
236
|
+
if (sample.exitCode !== 0) {
|
|
237
|
+
emit({
|
|
238
|
+
kind: "tool-unsuccessful",
|
|
239
|
+
tool,
|
|
240
|
+
sample,
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// Warm scenario: prime once (unmeasured, also warms the daemon),
|
|
246
|
+
// then measure while caches + daemon stay hot.
|
|
247
|
+
await timeRun(
|
|
248
|
+
invocation.file,
|
|
249
|
+
invocation.args,
|
|
250
|
+
rootDir,
|
|
251
|
+
execLog,
|
|
252
|
+
env,
|
|
253
|
+
);
|
|
254
|
+
const warmSamples: RunSample[] = [];
|
|
255
|
+
for (let run = 1; run <= warmRuns; run++) {
|
|
256
|
+
const sample = await timeRun(
|
|
257
|
+
invocation.file,
|
|
258
|
+
invocation.args,
|
|
259
|
+
rootDir,
|
|
260
|
+
execLog,
|
|
261
|
+
env,
|
|
262
|
+
);
|
|
263
|
+
warmSamples.push(sample);
|
|
264
|
+
emit({
|
|
265
|
+
kind: "scenario",
|
|
266
|
+
tool,
|
|
267
|
+
scenario: "warm",
|
|
268
|
+
run,
|
|
269
|
+
total: warmRuns,
|
|
270
|
+
sample,
|
|
271
|
+
});
|
|
272
|
+
if (sample.exitCode !== 0) {
|
|
273
|
+
emit({
|
|
274
|
+
kind: "tool-unsuccessful",
|
|
275
|
+
tool,
|
|
276
|
+
sample,
|
|
277
|
+
});
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
const taskGraphSize = Math.max(
|
|
282
|
+
0,
|
|
283
|
+
...coldSamples.map((s) => s.executed),
|
|
284
|
+
...warmSamples.map((s) => s.executed),
|
|
285
|
+
);
|
|
286
|
+
|
|
287
|
+
results.push({
|
|
288
|
+
tool,
|
|
289
|
+
task,
|
|
290
|
+
taskGraphSize,
|
|
291
|
+
cold: scenarioFromSamples(coldSamples),
|
|
292
|
+
warm: scenarioFromSamples(warmSamples),
|
|
293
|
+
});
|
|
294
|
+
} catch (err) {
|
|
295
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
296
|
+
emit({ kind: "tool-error", tool, error: message });
|
|
297
|
+
results.push({
|
|
298
|
+
tool,
|
|
299
|
+
task,
|
|
300
|
+
taskGraphSize: 0,
|
|
301
|
+
cold: scenarioFromSamples([]),
|
|
302
|
+
warm: scenarioFromSamples([]),
|
|
303
|
+
error: message,
|
|
304
|
+
});
|
|
305
|
+
} finally {
|
|
306
|
+
// Always clean up the daemon so it does not leak between tools or
|
|
307
|
+
// outlive the benchmark.
|
|
308
|
+
await adapter.stopDaemon(ctx);
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
return {
|
|
313
|
+
rootDir,
|
|
314
|
+
task,
|
|
315
|
+
projects: config.projects,
|
|
316
|
+
tasksPerProject: config.tasksPerProject,
|
|
317
|
+
concurrency,
|
|
318
|
+
daemon,
|
|
319
|
+
versions,
|
|
320
|
+
generatedAt: new Date().toISOString(),
|
|
321
|
+
tools: results,
|
|
322
|
+
};
|
|
323
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { execa } from "execa";
|
|
2
|
+
|
|
3
|
+
/** Install dependencies in a generated workspace with bun. */
|
|
4
|
+
export async function installWorkspace(
|
|
5
|
+
dir: string,
|
|
6
|
+
opts: { quiet?: boolean } = {},
|
|
7
|
+
): Promise<void> {
|
|
8
|
+
await execa("bun", ["install"], {
|
|
9
|
+
cwd: dir,
|
|
10
|
+
stdio: opts.quiet ? "ignore" : "inherit",
|
|
11
|
+
});
|
|
12
|
+
}
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
import type { BenchmarkResult, ToolResult } from "./index";
|
|
2
|
+
import { formatMs, type Stats } from "./stats";
|
|
3
|
+
|
|
4
|
+
function pad(value: string, width: number): string {
|
|
5
|
+
return value.length >= width
|
|
6
|
+
? value
|
|
7
|
+
: value + " ".repeat(width - value.length);
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
function statCell(stats: Stats, failures: number): string {
|
|
11
|
+
if (stats.samples.length === 0) return "—";
|
|
12
|
+
const base = `${formatMs(stats.median)} ±${formatMs(stats.stddev)}`;
|
|
13
|
+
return failures > 0 ? `${base} ⚠${failures}` : base;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function cacheHitPct(tool: ToolResult): number | null {
|
|
17
|
+
if (tool.taskGraphSize <= 0 || tool.warm.stats.samples.length === 0) {
|
|
18
|
+
return null;
|
|
19
|
+
}
|
|
20
|
+
const hits = tool.taskGraphSize - tool.warm.executedMedian;
|
|
21
|
+
return (hits / tool.taskGraphSize) * 100;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function cacheCell(tool: ToolResult): string {
|
|
25
|
+
const pct = cacheHitPct(tool);
|
|
26
|
+
if (pct === null) return "—";
|
|
27
|
+
return `${pct.toFixed(0)}%`;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Render a human-readable comparison table plus a short takeaway. The warm
|
|
32
|
+
* column is the key metric: with all tasks cached it approximates each tool's
|
|
33
|
+
* discovery + cache-restore overhead. The warm-cache-hit column verifies that
|
|
34
|
+
* assumption held (should be 100%).
|
|
35
|
+
*/
|
|
36
|
+
export function formatReport(result: BenchmarkResult): string {
|
|
37
|
+
const lines: string[] = [];
|
|
38
|
+
const graphSize = Math.max(0, ...result.tools.map((t) => t.taskGraphSize));
|
|
39
|
+
lines.push("");
|
|
40
|
+
lines.push(
|
|
41
|
+
`task-bench: ${result.projects} projects × ${result.tasksPerProject} tasks ` +
|
|
42
|
+
`(${graphSize} task-graph nodes), running "${result.task}" ` +
|
|
43
|
+
`at concurrency ${result.concurrency} ` +
|
|
44
|
+
`(daemons ${result.daemon ? "on" : "off"})`,
|
|
45
|
+
);
|
|
46
|
+
lines.push(formatVersions(result));
|
|
47
|
+
lines.push("");
|
|
48
|
+
|
|
49
|
+
const headers = [
|
|
50
|
+
"tool",
|
|
51
|
+
"cold (median)",
|
|
52
|
+
"warm (median)",
|
|
53
|
+
"warm cache-hit",
|
|
54
|
+
"notes",
|
|
55
|
+
];
|
|
56
|
+
const rows = result.tools.map((t) => {
|
|
57
|
+
const notes = t.error
|
|
58
|
+
? `error: ${(t.error.split("\n")[0] ?? "").slice(0, 40)}`
|
|
59
|
+
: "";
|
|
60
|
+
return [
|
|
61
|
+
t.tool,
|
|
62
|
+
statCell(t.cold.stats, t.cold.failures),
|
|
63
|
+
statCell(t.warm.stats, t.warm.failures),
|
|
64
|
+
cacheCell(t),
|
|
65
|
+
notes,
|
|
66
|
+
];
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
const widths = headers.map((h, i) =>
|
|
70
|
+
Math.max(h.length, ...rows.map((r) => (r[i] ?? "").length)),
|
|
71
|
+
);
|
|
72
|
+
|
|
73
|
+
const renderRow = (cells: string[]) =>
|
|
74
|
+
`| ${cells.map((c, i) => pad(c, widths[i] ?? 0)).join(" | ")} |`;
|
|
75
|
+
|
|
76
|
+
lines.push(renderRow(headers));
|
|
77
|
+
lines.push(`| ${widths.map((w) => "-".repeat(w)).join(" | ")} |`);
|
|
78
|
+
for (const row of rows) lines.push(renderRow(row));
|
|
79
|
+
lines.push("");
|
|
80
|
+
|
|
81
|
+
// Warn if any warm scenario was not a full cache hit — the warm numbers
|
|
82
|
+
// are only meaningful as "overhead" when everything is cached.
|
|
83
|
+
const impure = result.tools.filter((t) => {
|
|
84
|
+
const pct = cacheHitPct(t);
|
|
85
|
+
return pct !== null && pct < 99.5;
|
|
86
|
+
});
|
|
87
|
+
for (const t of impure) {
|
|
88
|
+
lines.push(
|
|
89
|
+
`⚠ ${t.tool}: warm runs were not fully cached ` +
|
|
90
|
+
`(${cacheCell(t)} hit, ${t.warm.executedMedian}/${t.taskGraphSize} tasks re-ran) — ` +
|
|
91
|
+
`treat its warm number with caution.`,
|
|
92
|
+
);
|
|
93
|
+
}
|
|
94
|
+
if (impure.length) lines.push("");
|
|
95
|
+
|
|
96
|
+
// Fastest-warm takeaway (only among fully-cached, error-free tools).
|
|
97
|
+
const ranked = result.tools
|
|
98
|
+
.filter((t) => {
|
|
99
|
+
const pct = cacheHitPct(t);
|
|
100
|
+
return !t.error && pct !== null && pct >= 99.5;
|
|
101
|
+
})
|
|
102
|
+
.sort((a, b) => a.warm.stats.median - b.warm.stats.median);
|
|
103
|
+
const fastest = ranked[0];
|
|
104
|
+
const slowest = ranked[ranked.length - 1];
|
|
105
|
+
if (ranked.length > 1 && fastest && slowest) {
|
|
106
|
+
const factor = slowest.warm.stats.median / fastest.warm.stats.median;
|
|
107
|
+
lines.push(
|
|
108
|
+
`Warm-cache overhead: ${fastest.tool} is fastest ` +
|
|
109
|
+
`(${formatMs(fastest.warm.stats.median)}), ` +
|
|
110
|
+
`${factor.toFixed(2)}× faster than ${slowest.tool} ` +
|
|
111
|
+
`(${formatMs(slowest.warm.stats.median)}).\n`,
|
|
112
|
+
);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Fastest-cold takeaway (discovery + full execution + cache write).
|
|
116
|
+
const coldRanked = result.tools
|
|
117
|
+
.filter((t) => !t.error && t.cold.stats.samples.length > 0)
|
|
118
|
+
.sort((a, b) => a.cold.stats.median - b.cold.stats.median);
|
|
119
|
+
const coldFastest = coldRanked[0];
|
|
120
|
+
const coldSlowest = coldRanked[coldRanked.length - 1];
|
|
121
|
+
if (coldRanked.length > 1 && coldFastest && coldSlowest) {
|
|
122
|
+
const factor =
|
|
123
|
+
coldSlowest.cold.stats.median / coldFastest.cold.stats.median;
|
|
124
|
+
lines.push(
|
|
125
|
+
`Cold-run overhead: ${coldFastest.tool} is fastest ` +
|
|
126
|
+
`(${formatMs(coldFastest.cold.stats.median)}), ` +
|
|
127
|
+
`${factor.toFixed(2)}× faster than ${coldSlowest.tool} ` +
|
|
128
|
+
`(${formatMs(coldSlowest.cold.stats.median)}).`,
|
|
129
|
+
);
|
|
130
|
+
}
|
|
131
|
+
if (ranked.length > 1 || coldRanked.length > 1) lines.push("");
|
|
132
|
+
|
|
133
|
+
return lines.join("\n");
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/** A one-line summary of the resolved tool versions used. */
|
|
137
|
+
function formatVersions(result: BenchmarkResult): string {
|
|
138
|
+
const parts = result.tools.map(
|
|
139
|
+
(t) => `\`${t.tool} ${result.versions[t.tool] ?? "?"}\``,
|
|
140
|
+
);
|
|
141
|
+
return `versions: ${parts.join(", ")}`;
|
|
142
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { computeStats, formatMs } from "./stats";
|
|
3
|
+
|
|
4
|
+
describe("computeStats", () => {
|
|
5
|
+
it("handles the empty case", () => {
|
|
6
|
+
const stats = computeStats([]);
|
|
7
|
+
expect(stats).toMatchObject({
|
|
8
|
+
min: 0,
|
|
9
|
+
max: 0,
|
|
10
|
+
mean: 0,
|
|
11
|
+
median: 0,
|
|
12
|
+
stddev: 0,
|
|
13
|
+
});
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
it("computes summary statistics", () => {
|
|
17
|
+
const stats = computeStats([10, 20, 30]);
|
|
18
|
+
expect(stats.min).toBe(10);
|
|
19
|
+
expect(stats.max).toBe(30);
|
|
20
|
+
expect(stats.mean).toBe(20);
|
|
21
|
+
expect(stats.median).toBe(20);
|
|
22
|
+
expect(stats.stddev).toBeCloseTo(Math.sqrt(200 / 3), 6);
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
it("averages the two middle values for an even count", () => {
|
|
26
|
+
expect(computeStats([1, 2, 3, 4]).median).toBe(2.5);
|
|
27
|
+
});
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
describe("formatMs", () => {
|
|
31
|
+
it("uses ms below a second and seconds above", () => {
|
|
32
|
+
expect(formatMs(250)).toBe("250ms");
|
|
33
|
+
expect(formatMs(1500)).toBe("1.50s");
|
|
34
|
+
});
|
|
35
|
+
});
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/** Summary statistics for a set of timing samples (all in milliseconds). */
|
|
2
|
+
export interface Stats {
|
|
3
|
+
samples: number[];
|
|
4
|
+
min: number;
|
|
5
|
+
max: number;
|
|
6
|
+
mean: number;
|
|
7
|
+
median: number;
|
|
8
|
+
stddev: number;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function computeStats(samples: number[]): Stats {
|
|
12
|
+
if (samples.length === 0) {
|
|
13
|
+
return { samples, min: 0, max: 0, mean: 0, median: 0, stddev: 0 };
|
|
14
|
+
}
|
|
15
|
+
const sorted = [...samples].sort((a, b) => a - b);
|
|
16
|
+
const sum = sorted.reduce((acc, n) => acc + n, 0);
|
|
17
|
+
const mean = sum / sorted.length;
|
|
18
|
+
const mid = Math.floor(sorted.length / 2);
|
|
19
|
+
const median =
|
|
20
|
+
sorted.length % 2 === 0
|
|
21
|
+
? ((sorted[mid - 1] ?? 0) + (sorted[mid] ?? 0)) / 2
|
|
22
|
+
: (sorted[mid] ?? 0);
|
|
23
|
+
const variance =
|
|
24
|
+
sorted.reduce((acc, n) => acc + (n - mean) ** 2, 0) / sorted.length;
|
|
25
|
+
return {
|
|
26
|
+
samples,
|
|
27
|
+
min: sorted[0] ?? 0,
|
|
28
|
+
max: sorted[sorted.length - 1] ?? 0,
|
|
29
|
+
mean,
|
|
30
|
+
median,
|
|
31
|
+
stddev: Math.sqrt(variance),
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function formatMs(ms: number): string {
|
|
36
|
+
if (ms >= 1000) return `${(ms / 1000).toFixed(2)}s`;
|
|
37
|
+
return `${ms.toFixed(0)}ms`;
|
|
38
|
+
}
|