@fuzdev/fuz_util 0.42.0 → 0.43.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +19 -12
- package/dist/async.d.ts +2 -2
- package/dist/async.d.ts.map +1 -1
- package/dist/async.js +2 -2
- package/dist/benchmark.d.ts +179 -0
- package/dist/benchmark.d.ts.map +1 -0
- package/dist/benchmark.js +400 -0
- package/dist/benchmark_baseline.d.ts +195 -0
- package/dist/benchmark_baseline.d.ts.map +1 -0
- package/dist/benchmark_baseline.js +415 -0
- package/dist/benchmark_format.d.ts +92 -0
- package/dist/benchmark_format.d.ts.map +1 -0
- package/dist/benchmark_format.js +327 -0
- package/dist/benchmark_stats.d.ts +112 -0
- package/dist/benchmark_stats.d.ts.map +1 -0
- package/dist/benchmark_stats.js +336 -0
- package/dist/benchmark_types.d.ts +174 -0
- package/dist/benchmark_types.d.ts.map +1 -0
- package/dist/benchmark_types.js +1 -0
- package/dist/library_json.d.ts +3 -3
- package/dist/library_json.d.ts.map +1 -1
- package/dist/library_json.js +1 -1
- package/dist/object.js +1 -1
- package/dist/stats.d.ts +126 -0
- package/dist/stats.d.ts.map +1 -0
- package/dist/stats.js +262 -0
- package/dist/time.d.ts +161 -0
- package/dist/time.d.ts.map +1 -0
- package/dist/time.js +260 -0
- package/dist/timings.d.ts +1 -7
- package/dist/timings.d.ts.map +1 -1
- package/dist/timings.js +16 -16
- package/package.json +21 -19
- package/src/lib/async.ts +3 -3
- package/src/lib/benchmark.ts +498 -0
- package/src/lib/benchmark_baseline.ts +573 -0
- package/src/lib/benchmark_format.ts +379 -0
- package/src/lib/benchmark_stats.ts +448 -0
- package/src/lib/benchmark_types.ts +197 -0
- package/src/lib/library_json.ts +3 -3
- package/src/lib/object.ts +1 -1
- package/src/lib/stats.ts +353 -0
- package/src/lib/time.ts +314 -0
- package/src/lib/timings.ts +17 -17
- package/src/lib/types.ts +2 -2
|
@@ -0,0 +1,573 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Benchmark baseline storage and comparison utilities.
|
|
3
|
+
* Save benchmark results to disk and compare against baselines for regression detection.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import {readFile, writeFile, mkdir, rm} from 'node:fs/promises';
|
|
7
|
+
import {join} from 'node:path';
|
|
8
|
+
import {z} from 'zod';
|
|
9
|
+
|
|
10
|
+
import {fs_exists} from './fs.js';
|
|
11
|
+
import type {BenchmarkResult} from './benchmark_types.js';
|
|
12
|
+
import {
|
|
13
|
+
benchmark_stats_compare,
|
|
14
|
+
type BenchmarkComparison,
|
|
15
|
+
type BenchmarkStatsComparable,
|
|
16
|
+
} from './benchmark_stats.js';
|
|
17
|
+
|
|
18
|
+
// Version for forward compatibility - increment when schema changes
|
|
19
|
+
const BASELINE_VERSION = 1;
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Schema for a single benchmark entry in the baseline.
|
|
23
|
+
*/
|
|
24
|
+
export const BenchmarkBaselineEntry = z.object({
|
|
25
|
+
name: z.string(),
|
|
26
|
+
mean_ns: z.number(),
|
|
27
|
+
median_ns: z.number(),
|
|
28
|
+
std_dev_ns: z.number(),
|
|
29
|
+
min_ns: z.number(),
|
|
30
|
+
max_ns: z.number(),
|
|
31
|
+
p75_ns: z.number(),
|
|
32
|
+
p90_ns: z.number(),
|
|
33
|
+
p95_ns: z.number(),
|
|
34
|
+
p99_ns: z.number(),
|
|
35
|
+
ops_per_second: z.number(),
|
|
36
|
+
sample_size: z.number(),
|
|
37
|
+
});
|
|
38
|
+
export type BenchmarkBaselineEntry = z.infer<typeof BenchmarkBaselineEntry>;
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Schema for the complete baseline file.
|
|
42
|
+
*/
|
|
43
|
+
export const BenchmarkBaseline = z.object({
|
|
44
|
+
version: z.number(),
|
|
45
|
+
timestamp: z.string(),
|
|
46
|
+
git_commit: z.string().nullable(),
|
|
47
|
+
git_branch: z.string().nullable(),
|
|
48
|
+
node_version: z.string(),
|
|
49
|
+
entries: z.array(BenchmarkBaselineEntry),
|
|
50
|
+
});
|
|
51
|
+
export type BenchmarkBaseline = z.infer<typeof BenchmarkBaseline>;
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Options for saving a baseline.
|
|
55
|
+
*/
|
|
56
|
+
export interface BenchmarkBaselineSaveOptions {
|
|
57
|
+
/** Directory to store baselines (default: '.gro/benchmarks') */
|
|
58
|
+
path?: string;
|
|
59
|
+
/** Git commit hash (auto-detected if not provided) */
|
|
60
|
+
git_commit?: string | null;
|
|
61
|
+
/** Git branch name (auto-detected if not provided) */
|
|
62
|
+
git_branch?: string | null;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Options for loading a baseline.
|
|
67
|
+
*/
|
|
68
|
+
export interface BenchmarkBaselineLoadOptions {
|
|
69
|
+
/** Directory to load baseline from (default: '.gro/benchmarks') */
|
|
70
|
+
path?: string;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Options for comparing against a baseline.
|
|
75
|
+
*/
|
|
76
|
+
export interface BenchmarkBaselineCompareOptions extends BenchmarkBaselineLoadOptions {
|
|
77
|
+
/**
|
|
78
|
+
* Minimum speedup ratio to consider a regression.
|
|
79
|
+
* For example, 1.05 means only flag regressions that are 5% or more slower.
|
|
80
|
+
* Default: 1.0 (any statistically significant slowdown is a regression)
|
|
81
|
+
*/
|
|
82
|
+
regression_threshold?: number;
|
|
83
|
+
/**
|
|
84
|
+
* Number of days after which to warn about stale baseline.
|
|
85
|
+
* Default: undefined (no staleness warning)
|
|
86
|
+
*/
|
|
87
|
+
staleness_warning_days?: number;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Result of comparing current results against a baseline.
|
|
92
|
+
*/
|
|
93
|
+
export interface BenchmarkBaselineComparisonResult {
|
|
94
|
+
/** Whether a baseline was found */
|
|
95
|
+
baseline_found: boolean;
|
|
96
|
+
/** Timestamp of the baseline */
|
|
97
|
+
baseline_timestamp: string | null;
|
|
98
|
+
/** Git commit of the baseline */
|
|
99
|
+
baseline_commit: string | null;
|
|
100
|
+
/** Age of the baseline in days */
|
|
101
|
+
baseline_age_days: number | null;
|
|
102
|
+
/** Whether the baseline is considered stale based on staleness_warning_days option */
|
|
103
|
+
baseline_stale: boolean;
|
|
104
|
+
/** Individual task comparisons */
|
|
105
|
+
comparisons: Array<BenchmarkBaselineTaskComparison>;
|
|
106
|
+
/** Tasks that regressed (slower with statistical significance), sorted by effect size (largest first) */
|
|
107
|
+
regressions: Array<BenchmarkBaselineTaskComparison>;
|
|
108
|
+
/** Tasks that improved (faster with statistical significance), sorted by effect size (largest first) */
|
|
109
|
+
improvements: Array<BenchmarkBaselineTaskComparison>;
|
|
110
|
+
/** Tasks with no significant change */
|
|
111
|
+
unchanged: Array<BenchmarkBaselineTaskComparison>;
|
|
112
|
+
/** Tasks in current run but not in baseline */
|
|
113
|
+
new_tasks: Array<string>;
|
|
114
|
+
/** Tasks in baseline but not in current run */
|
|
115
|
+
removed_tasks: Array<string>;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Comparison result for a single task.
|
|
120
|
+
*/
|
|
121
|
+
export interface BenchmarkBaselineTaskComparison {
|
|
122
|
+
name: string;
|
|
123
|
+
baseline: BenchmarkBaselineEntry;
|
|
124
|
+
current: BenchmarkBaselineEntry;
|
|
125
|
+
comparison: BenchmarkComparison;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
const DEFAULT_BASELINE_PATH = '.gro/benchmarks';
|
|
129
|
+
const BASELINE_FILENAME = 'baseline.json';
|
|
130
|
+
|
|
131
|
+
/** Z-score for 95% confidence interval */
|
|
132
|
+
const Z_95 = 1.96;
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Calculate 95% confidence interval from mean, std_dev, and sample_size.
|
|
136
|
+
*/
|
|
137
|
+
const calculate_confidence_interval = (
|
|
138
|
+
mean: number,
|
|
139
|
+
std_dev: number,
|
|
140
|
+
sample_size: number,
|
|
141
|
+
): [number, number] => {
|
|
142
|
+
const margin = Z_95 * (std_dev / Math.sqrt(sample_size));
|
|
143
|
+
return [mean - margin, mean + margin];
|
|
144
|
+
};
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Convert benchmark results to baseline entries.
|
|
148
|
+
*/
|
|
149
|
+
const results_to_entries = (results: Array<BenchmarkResult>): Array<BenchmarkBaselineEntry> => {
|
|
150
|
+
return results.map((r) => ({
|
|
151
|
+
name: r.name,
|
|
152
|
+
mean_ns: r.stats.mean_ns,
|
|
153
|
+
median_ns: r.stats.median_ns,
|
|
154
|
+
std_dev_ns: r.stats.std_dev_ns,
|
|
155
|
+
min_ns: r.stats.min_ns,
|
|
156
|
+
max_ns: r.stats.max_ns,
|
|
157
|
+
p75_ns: r.stats.p75_ns,
|
|
158
|
+
p90_ns: r.stats.p90_ns,
|
|
159
|
+
p95_ns: r.stats.p95_ns,
|
|
160
|
+
p99_ns: r.stats.p99_ns,
|
|
161
|
+
ops_per_second: r.stats.ops_per_second,
|
|
162
|
+
sample_size: r.stats.sample_size,
|
|
163
|
+
}));
|
|
164
|
+
};
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Try to get git info from the environment or git commands.
|
|
168
|
+
*/
|
|
169
|
+
const get_git_info = async (): Promise<{commit: string | null; branch: string | null}> => {
|
|
170
|
+
try {
|
|
171
|
+
const {promisify} = await import('node:util');
|
|
172
|
+
const exec = promisify((await import('node:child_process')).exec);
|
|
173
|
+
|
|
174
|
+
const [commit_result, branch_result] = await Promise.all([
|
|
175
|
+
exec('git rev-parse HEAD').catch(() => ({stdout: ''})),
|
|
176
|
+
exec('git rev-parse --abbrev-ref HEAD').catch(() => ({stdout: ''})),
|
|
177
|
+
]);
|
|
178
|
+
|
|
179
|
+
return {
|
|
180
|
+
commit: commit_result.stdout.trim() || null,
|
|
181
|
+
branch: branch_result.stdout.trim() || null,
|
|
182
|
+
};
|
|
183
|
+
} catch {
|
|
184
|
+
return {commit: null, branch: null};
|
|
185
|
+
}
|
|
186
|
+
};
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Save benchmark results as the current baseline.
|
|
190
|
+
*
|
|
191
|
+
* @param results - Benchmark results to save
|
|
192
|
+
* @param options - Save options
|
|
193
|
+
*
|
|
194
|
+
* @example
|
|
195
|
+
* ```ts
|
|
196
|
+
* const bench = new Benchmark();
|
|
197
|
+
* bench.add('test', () => fn());
|
|
198
|
+
* await bench.run();
|
|
199
|
+
* await benchmark_baseline_save(bench.results());
|
|
200
|
+
* ```
|
|
201
|
+
*/
|
|
202
|
+
export const benchmark_baseline_save = async (
|
|
203
|
+
results: Array<BenchmarkResult>,
|
|
204
|
+
options: BenchmarkBaselineSaveOptions = {},
|
|
205
|
+
): Promise<void> => {
|
|
206
|
+
const base_path = options.path ?? DEFAULT_BASELINE_PATH;
|
|
207
|
+
|
|
208
|
+
// Get git info if not provided
|
|
209
|
+
let git_commit = options.git_commit;
|
|
210
|
+
let git_branch = options.git_branch;
|
|
211
|
+
if (git_commit === undefined || git_branch === undefined) {
|
|
212
|
+
const git_info = await get_git_info();
|
|
213
|
+
git_commit ??= git_info.commit;
|
|
214
|
+
git_branch ??= git_info.branch;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
const baseline: BenchmarkBaseline = {
|
|
218
|
+
version: BASELINE_VERSION,
|
|
219
|
+
timestamp: new Date().toISOString(),
|
|
220
|
+
git_commit,
|
|
221
|
+
git_branch,
|
|
222
|
+
node_version: process.version,
|
|
223
|
+
entries: results_to_entries(results),
|
|
224
|
+
};
|
|
225
|
+
|
|
226
|
+
await mkdir(base_path, {recursive: true});
|
|
227
|
+
const filepath = join(base_path, BASELINE_FILENAME);
|
|
228
|
+
await writeFile(filepath, JSON.stringify(baseline, null, '\t'), 'utf-8');
|
|
229
|
+
};
|
|
230
|
+
|
|
231
|
+
/**
|
|
232
|
+
* Load the current baseline from disk.
|
|
233
|
+
*
|
|
234
|
+
* @param options - Load options
|
|
235
|
+
* @returns The baseline, or null if not found or invalid
|
|
236
|
+
*
|
|
237
|
+
* @example
|
|
238
|
+
* ```ts
|
|
239
|
+
* const baseline = await benchmark_baseline_load();
|
|
240
|
+
* if (baseline) {
|
|
241
|
+
* console.log(`Baseline from ${baseline.timestamp}`);
|
|
242
|
+
* }
|
|
243
|
+
* ```
|
|
244
|
+
*/
|
|
245
|
+
export const benchmark_baseline_load = async (
|
|
246
|
+
options: BenchmarkBaselineLoadOptions = {},
|
|
247
|
+
): Promise<BenchmarkBaseline | null> => {
|
|
248
|
+
const base_path = options.path ?? DEFAULT_BASELINE_PATH;
|
|
249
|
+
const filepath = join(base_path, BASELINE_FILENAME);
|
|
250
|
+
|
|
251
|
+
if (!(await fs_exists(filepath))) {
|
|
252
|
+
return null;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
try {
|
|
256
|
+
const contents = await readFile(filepath, 'utf-8');
|
|
257
|
+
const parsed = JSON.parse(contents);
|
|
258
|
+
const baseline = BenchmarkBaseline.parse(parsed);
|
|
259
|
+
|
|
260
|
+
// Check version compatibility
|
|
261
|
+
if (baseline.version !== BASELINE_VERSION) {
|
|
262
|
+
// eslint-disable-next-line no-console
|
|
263
|
+
console.warn(
|
|
264
|
+
`Benchmark baseline version mismatch (got ${baseline.version}, expected ${BASELINE_VERSION}). Removing stale baseline: ${filepath}`,
|
|
265
|
+
);
|
|
266
|
+
await rm(filepath, {force: true});
|
|
267
|
+
return null;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
return baseline;
|
|
271
|
+
} catch (err) {
|
|
272
|
+
// eslint-disable-next-line no-console
|
|
273
|
+
console.warn(
|
|
274
|
+
`Invalid or corrupted benchmark baseline file. Removing: ${filepath}`,
|
|
275
|
+
err instanceof Error ? err.message : err,
|
|
276
|
+
);
|
|
277
|
+
await rm(filepath, {force: true});
|
|
278
|
+
return null;
|
|
279
|
+
}
|
|
280
|
+
};
|
|
281
|
+
|
|
282
|
+
/**
|
|
283
|
+
* Compare benchmark results against the stored baseline.
|
|
284
|
+
*
|
|
285
|
+
* @param results - Current benchmark results
|
|
286
|
+
* @param options - Comparison options including regression threshold and staleness warning
|
|
287
|
+
* @returns Comparison result with regressions, improvements, and unchanged tasks
|
|
288
|
+
*
|
|
289
|
+
* @example
|
|
290
|
+
* ```ts
|
|
291
|
+
* const bench = new Benchmark();
|
|
292
|
+
* bench.add('test', () => fn());
|
|
293
|
+
* await bench.run();
|
|
294
|
+
*
|
|
295
|
+
* const comparison = await benchmark_baseline_compare(bench.results(), {
|
|
296
|
+
* regression_threshold: 1.05, // Only flag regressions 5% or more slower
|
|
297
|
+
* staleness_warning_days: 7, // Warn if baseline is older than 7 days
|
|
298
|
+
* });
|
|
299
|
+
* if (comparison.regressions.length > 0) {
|
|
300
|
+
* console.log('Performance regressions detected!');
|
|
301
|
+
* for (const r of comparison.regressions) {
|
|
302
|
+
* console.log(` ${r.name}: ${r.comparison.speedup_ratio.toFixed(2)}x slower`);
|
|
303
|
+
* }
|
|
304
|
+
* process.exit(1);
|
|
305
|
+
* }
|
|
306
|
+
* ```
|
|
307
|
+
*/
|
|
308
|
+
export const benchmark_baseline_compare = async (
|
|
309
|
+
results: Array<BenchmarkResult>,
|
|
310
|
+
options: BenchmarkBaselineCompareOptions = {},
|
|
311
|
+
): Promise<BenchmarkBaselineComparisonResult> => {
|
|
312
|
+
const baseline = await benchmark_baseline_load(options);
|
|
313
|
+
const regression_threshold = options.regression_threshold ?? 1.0;
|
|
314
|
+
|
|
315
|
+
if (!baseline) {
|
|
316
|
+
return {
|
|
317
|
+
baseline_found: false,
|
|
318
|
+
baseline_timestamp: null,
|
|
319
|
+
baseline_commit: null,
|
|
320
|
+
baseline_age_days: null,
|
|
321
|
+
baseline_stale: false,
|
|
322
|
+
comparisons: [],
|
|
323
|
+
regressions: [],
|
|
324
|
+
improvements: [],
|
|
325
|
+
unchanged: [],
|
|
326
|
+
new_tasks: results.map((r) => r.name),
|
|
327
|
+
removed_tasks: [],
|
|
328
|
+
};
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
// Calculate baseline age
|
|
332
|
+
const baseline_date = new Date(baseline.timestamp);
|
|
333
|
+
const now = new Date();
|
|
334
|
+
const baseline_age_days = (now.getTime() - baseline_date.getTime()) / (1000 * 60 * 60 * 24);
|
|
335
|
+
const baseline_stale =
|
|
336
|
+
options.staleness_warning_days !== undefined &&
|
|
337
|
+
baseline_age_days > options.staleness_warning_days;
|
|
338
|
+
|
|
339
|
+
const current_entries = results_to_entries(results);
|
|
340
|
+
const baseline_map = new Map(baseline.entries.map((e) => [e.name, e]));
|
|
341
|
+
const current_map = new Map(current_entries.map((e) => [e.name, e]));
|
|
342
|
+
|
|
343
|
+
const comparisons: Array<BenchmarkBaselineTaskComparison> = [];
|
|
344
|
+
const regressions: Array<BenchmarkBaselineTaskComparison> = [];
|
|
345
|
+
const improvements: Array<BenchmarkBaselineTaskComparison> = [];
|
|
346
|
+
const unchanged: Array<BenchmarkBaselineTaskComparison> = [];
|
|
347
|
+
const new_tasks: Array<string> = [];
|
|
348
|
+
const removed_tasks: Array<string> = [];
|
|
349
|
+
|
|
350
|
+
// Compare tasks that exist in both
|
|
351
|
+
for (const current of current_entries) {
|
|
352
|
+
const baseline_entry = baseline_map.get(current.name);
|
|
353
|
+
if (!baseline_entry) {
|
|
354
|
+
new_tasks.push(current.name);
|
|
355
|
+
continue;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
// Create minimal stats objects for comparison
|
|
359
|
+
const baseline_stats: BenchmarkStatsComparable = {
|
|
360
|
+
mean_ns: baseline_entry.mean_ns,
|
|
361
|
+
std_dev_ns: baseline_entry.std_dev_ns,
|
|
362
|
+
sample_size: baseline_entry.sample_size,
|
|
363
|
+
confidence_interval_ns: calculate_confidence_interval(
|
|
364
|
+
baseline_entry.mean_ns,
|
|
365
|
+
baseline_entry.std_dev_ns,
|
|
366
|
+
baseline_entry.sample_size,
|
|
367
|
+
),
|
|
368
|
+
};
|
|
369
|
+
const current_stats: BenchmarkStatsComparable = {
|
|
370
|
+
mean_ns: current.mean_ns,
|
|
371
|
+
std_dev_ns: current.std_dev_ns,
|
|
372
|
+
sample_size: current.sample_size,
|
|
373
|
+
confidence_interval_ns: calculate_confidence_interval(
|
|
374
|
+
current.mean_ns,
|
|
375
|
+
current.std_dev_ns,
|
|
376
|
+
current.sample_size,
|
|
377
|
+
),
|
|
378
|
+
};
|
|
379
|
+
|
|
380
|
+
const comparison = benchmark_stats_compare(baseline_stats, current_stats);
|
|
381
|
+
|
|
382
|
+
const task_comparison: BenchmarkBaselineTaskComparison = {
|
|
383
|
+
name: current.name,
|
|
384
|
+
baseline: baseline_entry,
|
|
385
|
+
current,
|
|
386
|
+
comparison,
|
|
387
|
+
};
|
|
388
|
+
|
|
389
|
+
comparisons.push(task_comparison);
|
|
390
|
+
|
|
391
|
+
// Categorize based on comparison result
|
|
392
|
+
// Note: comparison.faster is 'a' (baseline) or 'b' (current)
|
|
393
|
+
if (comparison.significant && comparison.effect_magnitude !== 'negligible') {
|
|
394
|
+
if (comparison.faster === 'a') {
|
|
395
|
+
// Baseline was faster = potential regression
|
|
396
|
+
// Only count as regression if it exceeds the threshold
|
|
397
|
+
if (comparison.speedup_ratio >= regression_threshold) {
|
|
398
|
+
regressions.push(task_comparison);
|
|
399
|
+
} else {
|
|
400
|
+
unchanged.push(task_comparison);
|
|
401
|
+
}
|
|
402
|
+
} else if (comparison.faster === 'b') {
|
|
403
|
+
// Current is faster = improvement
|
|
404
|
+
improvements.push(task_comparison);
|
|
405
|
+
} else {
|
|
406
|
+
unchanged.push(task_comparison);
|
|
407
|
+
}
|
|
408
|
+
} else {
|
|
409
|
+
unchanged.push(task_comparison);
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
// Find removed tasks
|
|
414
|
+
for (const baseline_entry of baseline.entries) {
|
|
415
|
+
if (!current_map.has(baseline_entry.name)) {
|
|
416
|
+
removed_tasks.push(baseline_entry.name);
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
// Sort regressions and improvements by effect size (largest first)
|
|
421
|
+
const sort_by_effect_size = (
|
|
422
|
+
a: BenchmarkBaselineTaskComparison,
|
|
423
|
+
b: BenchmarkBaselineTaskComparison,
|
|
424
|
+
) => b.comparison.effect_size - a.comparison.effect_size;
|
|
425
|
+
|
|
426
|
+
regressions.sort(sort_by_effect_size);
|
|
427
|
+
improvements.sort(sort_by_effect_size);
|
|
428
|
+
|
|
429
|
+
return {
|
|
430
|
+
baseline_found: true,
|
|
431
|
+
baseline_timestamp: baseline.timestamp,
|
|
432
|
+
baseline_commit: baseline.git_commit,
|
|
433
|
+
baseline_age_days,
|
|
434
|
+
baseline_stale,
|
|
435
|
+
comparisons,
|
|
436
|
+
regressions,
|
|
437
|
+
improvements,
|
|
438
|
+
unchanged,
|
|
439
|
+
new_tasks,
|
|
440
|
+
removed_tasks,
|
|
441
|
+
};
|
|
442
|
+
};
|
|
443
|
+
|
|
444
|
+
/**
|
|
445
|
+
* Format a baseline comparison result as a human-readable string.
|
|
446
|
+
*
|
|
447
|
+
* @param result - Comparison result from benchmark_baseline_compare
|
|
448
|
+
* @returns Formatted string summary
|
|
449
|
+
*/
|
|
450
|
+
export const benchmark_baseline_format = (result: BenchmarkBaselineComparisonResult): string => {
|
|
451
|
+
if (!result.baseline_found) {
|
|
452
|
+
return 'No baseline found. Call benchmark_baseline_save() to create one.';
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
const lines: Array<string> = [];
|
|
456
|
+
|
|
457
|
+
lines.push(`Comparing against baseline from ${result.baseline_timestamp}`);
|
|
458
|
+
if (result.baseline_commit) {
|
|
459
|
+
lines.push(`Baseline commit: ${result.baseline_commit.slice(0, 8)}`);
|
|
460
|
+
}
|
|
461
|
+
if (result.baseline_age_days !== null) {
|
|
462
|
+
const age_str =
|
|
463
|
+
result.baseline_age_days < 1
|
|
464
|
+
? 'less than a day'
|
|
465
|
+
: result.baseline_age_days < 2
|
|
466
|
+
? '1 day'
|
|
467
|
+
: `${Math.floor(result.baseline_age_days)} days`;
|
|
468
|
+
lines.push(`Baseline age: ${age_str}${result.baseline_stale ? ' (STALE)' : ''}`);
|
|
469
|
+
}
|
|
470
|
+
lines.push('');
|
|
471
|
+
|
|
472
|
+
if (result.regressions.length > 0) {
|
|
473
|
+
lines.push(`Regressions (${result.regressions.length}):`);
|
|
474
|
+
for (const r of result.regressions) {
|
|
475
|
+
const ratio = r.comparison.speedup_ratio.toFixed(2);
|
|
476
|
+
const p = r.comparison.p_value.toFixed(3);
|
|
477
|
+
lines.push(` ${r.name}: ${ratio}x slower (p=${p}, ${r.comparison.effect_magnitude})`);
|
|
478
|
+
}
|
|
479
|
+
lines.push('');
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
if (result.improvements.length > 0) {
|
|
483
|
+
lines.push(`Improvements (${result.improvements.length}):`);
|
|
484
|
+
for (const r of result.improvements) {
|
|
485
|
+
const ratio = r.comparison.speedup_ratio.toFixed(2);
|
|
486
|
+
const p = r.comparison.p_value.toFixed(3);
|
|
487
|
+
lines.push(` ${r.name}: ${ratio}x faster (p=${p}, ${r.comparison.effect_magnitude})`);
|
|
488
|
+
}
|
|
489
|
+
lines.push('');
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
if (result.unchanged.length > 0) {
|
|
493
|
+
lines.push(`Unchanged (${result.unchanged.length}):`);
|
|
494
|
+
for (const r of result.unchanged) {
|
|
495
|
+
lines.push(` ${r.name}`);
|
|
496
|
+
}
|
|
497
|
+
lines.push('');
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
if (result.new_tasks.length > 0) {
|
|
501
|
+
lines.push(`New tasks (${result.new_tasks.length}): ${result.new_tasks.join(', ')}`);
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
if (result.removed_tasks.length > 0) {
|
|
505
|
+
lines.push(
|
|
506
|
+
`Removed tasks (${result.removed_tasks.length}): ${result.removed_tasks.join(', ')}`,
|
|
507
|
+
);
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
// Summary line
|
|
511
|
+
const total = result.comparisons.length;
|
|
512
|
+
const summary_parts: Array<string> = [];
|
|
513
|
+
if (result.regressions.length > 0) summary_parts.push(`${result.regressions.length} regressions`);
|
|
514
|
+
if (result.improvements.length > 0)
|
|
515
|
+
summary_parts.push(`${result.improvements.length} improvements`);
|
|
516
|
+
if (result.unchanged.length > 0) summary_parts.push(`${result.unchanged.length} unchanged`);
|
|
517
|
+
|
|
518
|
+
lines.push('');
|
|
519
|
+
lines.push(`Summary: ${summary_parts.join(', ')} (${total} total)`);
|
|
520
|
+
|
|
521
|
+
return lines.join('\n');
|
|
522
|
+
};
|
|
523
|
+
|
|
524
|
+
/**
|
|
525
|
+
* Format a baseline comparison result as JSON for programmatic consumption.
|
|
526
|
+
*
|
|
527
|
+
* @param result - Comparison result from benchmark_baseline_compare
|
|
528
|
+
* @param options - Formatting options
|
|
529
|
+
* @returns JSON string
|
|
530
|
+
*/
|
|
531
|
+
export const benchmark_baseline_format_json = (
|
|
532
|
+
result: BenchmarkBaselineComparisonResult,
|
|
533
|
+
options: {pretty?: boolean} = {},
|
|
534
|
+
): string => {
|
|
535
|
+
const output = {
|
|
536
|
+
baseline_found: result.baseline_found,
|
|
537
|
+
baseline_timestamp: result.baseline_timestamp,
|
|
538
|
+
baseline_commit: result.baseline_commit,
|
|
539
|
+
baseline_age_days: result.baseline_age_days,
|
|
540
|
+
baseline_stale: result.baseline_stale,
|
|
541
|
+
summary: {
|
|
542
|
+
total: result.comparisons.length,
|
|
543
|
+
regressions: result.regressions.length,
|
|
544
|
+
improvements: result.improvements.length,
|
|
545
|
+
unchanged: result.unchanged.length,
|
|
546
|
+
new_tasks: result.new_tasks.length,
|
|
547
|
+
removed_tasks: result.removed_tasks.length,
|
|
548
|
+
},
|
|
549
|
+
regressions: result.regressions.map((r) => ({
|
|
550
|
+
name: r.name,
|
|
551
|
+
speedup_ratio: r.comparison.speedup_ratio,
|
|
552
|
+
effect_size: r.comparison.effect_size,
|
|
553
|
+
effect_magnitude: r.comparison.effect_magnitude,
|
|
554
|
+
p_value: r.comparison.p_value,
|
|
555
|
+
baseline_mean_ns: r.baseline.mean_ns,
|
|
556
|
+
current_mean_ns: r.current.mean_ns,
|
|
557
|
+
})),
|
|
558
|
+
improvements: result.improvements.map((r) => ({
|
|
559
|
+
name: r.name,
|
|
560
|
+
speedup_ratio: r.comparison.speedup_ratio,
|
|
561
|
+
effect_size: r.comparison.effect_size,
|
|
562
|
+
effect_magnitude: r.comparison.effect_magnitude,
|
|
563
|
+
p_value: r.comparison.p_value,
|
|
564
|
+
baseline_mean_ns: r.baseline.mean_ns,
|
|
565
|
+
current_mean_ns: r.current.mean_ns,
|
|
566
|
+
})),
|
|
567
|
+
unchanged: result.unchanged.map((r) => r.name),
|
|
568
|
+
new_tasks: result.new_tasks,
|
|
569
|
+
removed_tasks: result.removed_tasks,
|
|
570
|
+
};
|
|
571
|
+
|
|
572
|
+
return options.pretty ? JSON.stringify(output, null, '\t') : JSON.stringify(output);
|
|
573
|
+
};
|