@fuzdev/fuz_util 0.54.0 → 0.56.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/args.d.ts +12 -12
- package/dist/args.js +11 -11
- package/dist/async.d.ts +12 -12
- package/dist/async.js +12 -12
- package/dist/benchmark.d.ts +24 -24
- package/dist/benchmark.js +26 -26
- package/dist/benchmark_baseline.d.ts +17 -11
- package/dist/benchmark_baseline.d.ts.map +1 -1
- package/dist/benchmark_baseline.js +26 -19
- package/dist/benchmark_format.d.ts +15 -15
- package/dist/benchmark_format.js +15 -15
- package/dist/benchmark_stats.d.ts +30 -10
- package/dist/benchmark_stats.d.ts.map +1 -1
- package/dist/benchmark_stats.js +48 -40
- package/dist/benchmark_types.d.ts +7 -7
- package/dist/bytes.d.ts +4 -4
- package/dist/bytes.js +4 -4
- package/dist/dag.d.ts +2 -2
- package/dist/dag.js +2 -2
- package/dist/deep_equal.d.ts +2 -2
- package/dist/deep_equal.js +2 -2
- package/dist/diff.d.ts +17 -17
- package/dist/diff.js +17 -17
- package/dist/dom.d.ts +4 -4
- package/dist/dom.js +4 -4
- package/dist/fetch.d.ts +1 -1
- package/dist/fetch.js +1 -1
- package/dist/git.d.ts +1 -1
- package/dist/git.js +1 -1
- package/dist/hash.d.ts +6 -6
- package/dist/hash.js +8 -8
- package/dist/hash_blake3.d.ts +1 -1
- package/dist/hash_blake3.js +1 -1
- package/dist/hex.d.ts +4 -4
- package/dist/hex.js +4 -4
- package/dist/json.d.ts +2 -2
- package/dist/json.js +2 -2
- package/dist/log.d.ts +12 -12
- package/dist/log.js +11 -11
- package/dist/map.d.ts +1 -1
- package/dist/map.js +1 -1
- package/dist/object.d.ts +1 -1
- package/dist/object.js +1 -1
- package/dist/package_json.d.ts +1 -1
- package/dist/package_json.js +1 -1
- package/dist/path.d.ts +5 -5
- package/dist/path.js +5 -5
- package/dist/process.d.ts +22 -22
- package/dist/process.js +22 -22
- package/dist/random.d.ts +2 -2
- package/dist/random.js +2 -2
- package/dist/result.d.ts +6 -6
- package/dist/result.js +6 -6
- package/dist/sort.d.ts +3 -3
- package/dist/sort.js +3 -3
- package/dist/source_json.d.ts +3 -3
- package/dist/source_json.js +3 -3
- package/dist/stats.d.ts +17 -17
- package/dist/stats.js +17 -17
- package/dist/string.d.ts +6 -6
- package/dist/string.js +6 -6
- package/dist/svelte_preprocess_helpers.d.ts +42 -42
- package/dist/svelte_preprocess_helpers.js +42 -42
- package/dist/testing.d.ts +44 -0
- package/dist/testing.d.ts.map +1 -0
- package/dist/testing.js +59 -0
- package/dist/time.d.ts +19 -19
- package/dist/time.js +19 -19
- package/dist/zod.d.ts +16 -16
- package/dist/zod.d.ts.map +1 -1
- package/dist/zod.js +24 -24
- package/package.json +6 -6
- package/src/lib/args.ts +12 -12
- package/src/lib/async.ts +12 -12
- package/src/lib/benchmark.ts +28 -28
- package/src/lib/benchmark_baseline.ts +37 -20
- package/src/lib/benchmark_format.ts +15 -15
- package/src/lib/benchmark_stats.ts +66 -44
- package/src/lib/benchmark_types.ts +7 -7
- package/src/lib/bytes.ts +4 -4
- package/src/lib/dag.ts +2 -2
- package/src/lib/deep_equal.ts +2 -2
- package/src/lib/diff.ts +17 -17
- package/src/lib/dom.ts +4 -4
- package/src/lib/fetch.ts +1 -1
- package/src/lib/git.ts +1 -1
- package/src/lib/hash.ts +8 -8
- package/src/lib/hash_blake3.ts +1 -1
- package/src/lib/hex.ts +4 -4
- package/src/lib/json.ts +2 -2
- package/src/lib/log.ts +12 -12
- package/src/lib/map.ts +1 -1
- package/src/lib/object.ts +1 -1
- package/src/lib/package_json.ts +1 -1
- package/src/lib/path.ts +5 -5
- package/src/lib/process.ts +22 -22
- package/src/lib/random.ts +2 -2
- package/src/lib/result.ts +6 -6
- package/src/lib/sort.ts +3 -3
- package/src/lib/source_json.ts +3 -3
- package/src/lib/stats.ts +17 -17
- package/src/lib/string.ts +6 -6
- package/src/lib/svelte_preprocess_helpers.ts +42 -42
- package/src/lib/testing.ts +80 -0
- package/src/lib/time.ts +19 -19
- package/src/lib/zod.ts +24 -24
|
@@ -89,6 +89,12 @@ export interface BenchmarkBaselineCompareOptions extends BenchmarkBaselineLoadOp
|
|
|
89
89
|
* Default: undefined (no staleness warning)
|
|
90
90
|
*/
|
|
91
91
|
staleness_warning_days?: number;
|
|
92
|
+
/**
|
|
93
|
+
* Minimum percentage difference to consider meaningful, as a ratio.
|
|
94
|
+
* Passed through to `benchmark_stats_compare`. See `BenchmarkCompareOptions`.
|
|
95
|
+
* Default: 0.10 (10%)
|
|
96
|
+
*/
|
|
97
|
+
min_percent_difference?: number;
|
|
92
98
|
}
|
|
93
99
|
|
|
94
100
|
/**
|
|
@@ -155,8 +161,8 @@ const results_to_entries = (results: Array<BenchmarkResult>): Array<BenchmarkBas
|
|
|
155
161
|
/**
|
|
156
162
|
* Save benchmark results as the current baseline.
|
|
157
163
|
*
|
|
158
|
-
* @param results -
|
|
159
|
-
* @param options -
|
|
164
|
+
* @param results - benchmark results to save
|
|
165
|
+
* @param options - save options
|
|
160
166
|
*
|
|
161
167
|
* @example
|
|
162
168
|
* ```ts
|
|
@@ -198,8 +204,8 @@ export const benchmark_baseline_save = async (
|
|
|
198
204
|
/**
|
|
199
205
|
* Load the current baseline from disk.
|
|
200
206
|
*
|
|
201
|
-
* @param options -
|
|
202
|
-
* @returns
|
|
207
|
+
* @param options - load options
|
|
208
|
+
* @returns the baseline, or null if not found or invalid
|
|
203
209
|
*
|
|
204
210
|
* @example
|
|
205
211
|
* ```ts
|
|
@@ -249,9 +255,9 @@ export const benchmark_baseline_load = async (
|
|
|
249
255
|
/**
|
|
250
256
|
* Compare benchmark results against the stored baseline.
|
|
251
257
|
*
|
|
252
|
-
* @param results -
|
|
253
|
-
* @param options -
|
|
254
|
-
* @returns
|
|
258
|
+
* @param results - current benchmark results
|
|
259
|
+
* @param options - comparison options including regression threshold and staleness warning
|
|
260
|
+
* @returns comparison result with regressions, improvements, and unchanged tasks
|
|
255
261
|
*
|
|
256
262
|
* @example
|
|
257
263
|
* ```ts
|
|
@@ -344,7 +350,9 @@ export const benchmark_baseline_compare = async (
|
|
|
344
350
|
),
|
|
345
351
|
};
|
|
346
352
|
|
|
347
|
-
const comparison = benchmark_stats_compare(baseline_stats, current_stats
|
|
353
|
+
const comparison = benchmark_stats_compare(baseline_stats, current_stats, {
|
|
354
|
+
min_percent_difference: options.min_percent_difference,
|
|
355
|
+
});
|
|
348
356
|
|
|
349
357
|
const task_comparison: BenchmarkBaselineTaskComparison = {
|
|
350
358
|
name: current.name,
|
|
@@ -357,7 +365,8 @@ export const benchmark_baseline_compare = async (
|
|
|
357
365
|
|
|
358
366
|
// Categorize based on comparison result
|
|
359
367
|
// Note: comparison.faster is 'a' (baseline) or 'b' (current)
|
|
360
|
-
|
|
368
|
+
// significant implies percent_difference >= min_pct, which implies effect_magnitude !== 'negligible'
|
|
369
|
+
if (comparison.significant) {
|
|
361
370
|
if (comparison.faster === 'a') {
|
|
362
371
|
// Baseline was faster = potential regression
|
|
363
372
|
// Only count as regression if it exceeds the threshold
|
|
@@ -384,14 +393,14 @@ export const benchmark_baseline_compare = async (
|
|
|
384
393
|
}
|
|
385
394
|
}
|
|
386
395
|
|
|
387
|
-
// Sort regressions and improvements by
|
|
388
|
-
const
|
|
396
|
+
// Sort regressions and improvements by percentage difference (largest first)
|
|
397
|
+
const sort_by_percent_difference = (
|
|
389
398
|
a: BenchmarkBaselineTaskComparison,
|
|
390
399
|
b: BenchmarkBaselineTaskComparison,
|
|
391
|
-
) => b.comparison.
|
|
400
|
+
) => b.comparison.percent_difference - a.comparison.percent_difference;
|
|
392
401
|
|
|
393
|
-
regressions.sort(
|
|
394
|
-
improvements.sort(
|
|
402
|
+
regressions.sort(sort_by_percent_difference);
|
|
403
|
+
improvements.sort(sort_by_percent_difference);
|
|
395
404
|
|
|
396
405
|
return {
|
|
397
406
|
baseline_found: true,
|
|
@@ -411,8 +420,8 @@ export const benchmark_baseline_compare = async (
|
|
|
411
420
|
/**
|
|
412
421
|
* Format a baseline comparison result as a human-readable string.
|
|
413
422
|
*
|
|
414
|
-
* @param result -
|
|
415
|
-
* @returns
|
|
423
|
+
* @param result - comparison result from `benchmark_baseline_compare`
|
|
424
|
+
* @returns formatted string summary
|
|
416
425
|
*/
|
|
417
426
|
export const benchmark_baseline_format = (result: BenchmarkBaselineComparisonResult): string => {
|
|
418
427
|
if (!result.baseline_found) {
|
|
@@ -440,8 +449,11 @@ export const benchmark_baseline_format = (result: BenchmarkBaselineComparisonRes
|
|
|
440
449
|
lines.push(`Regressions (${result.regressions.length}):`);
|
|
441
450
|
for (const r of result.regressions) {
|
|
442
451
|
const ratio = r.comparison.speedup_ratio.toFixed(2);
|
|
452
|
+
const pct = (r.comparison.percent_difference * 100).toFixed(1);
|
|
443
453
|
const p = r.comparison.p_value.toFixed(3);
|
|
444
|
-
lines.push(
|
|
454
|
+
lines.push(
|
|
455
|
+
` ${r.name}: ${ratio}x slower (${pct}%, p=${p}, ${r.comparison.effect_magnitude})`,
|
|
456
|
+
);
|
|
445
457
|
}
|
|
446
458
|
lines.push('');
|
|
447
459
|
}
|
|
@@ -450,8 +462,11 @@ export const benchmark_baseline_format = (result: BenchmarkBaselineComparisonRes
|
|
|
450
462
|
lines.push(`Improvements (${result.improvements.length}):`);
|
|
451
463
|
for (const r of result.improvements) {
|
|
452
464
|
const ratio = r.comparison.speedup_ratio.toFixed(2);
|
|
465
|
+
const pct = (r.comparison.percent_difference * 100).toFixed(1);
|
|
453
466
|
const p = r.comparison.p_value.toFixed(3);
|
|
454
|
-
lines.push(
|
|
467
|
+
lines.push(
|
|
468
|
+
` ${r.name}: ${ratio}x faster (${pct}%, p=${p}, ${r.comparison.effect_magnitude})`,
|
|
469
|
+
);
|
|
455
470
|
}
|
|
456
471
|
lines.push('');
|
|
457
472
|
}
|
|
@@ -491,8 +506,8 @@ export const benchmark_baseline_format = (result: BenchmarkBaselineComparisonRes
|
|
|
491
506
|
/**
|
|
492
507
|
* Format a baseline comparison result as JSON for programmatic consumption.
|
|
493
508
|
*
|
|
494
|
-
* @param result -
|
|
495
|
-
* @param options -
|
|
509
|
+
* @param result - comparison result from `benchmark_baseline_compare`
|
|
510
|
+
* @param options - formatting options
|
|
496
511
|
* @returns JSON string
|
|
497
512
|
*/
|
|
498
513
|
export const benchmark_baseline_format_json = (
|
|
@@ -516,6 +531,7 @@ export const benchmark_baseline_format_json = (
|
|
|
516
531
|
regressions: result.regressions.map((r) => ({
|
|
517
532
|
name: r.name,
|
|
518
533
|
speedup_ratio: r.comparison.speedup_ratio,
|
|
534
|
+
percent_difference: r.comparison.percent_difference,
|
|
519
535
|
effect_size: r.comparison.effect_size,
|
|
520
536
|
effect_magnitude: r.comparison.effect_magnitude,
|
|
521
537
|
p_value: r.comparison.p_value,
|
|
@@ -525,6 +541,7 @@ export const benchmark_baseline_format_json = (
|
|
|
525
541
|
improvements: result.improvements.map((r) => ({
|
|
526
542
|
name: r.name,
|
|
527
543
|
speedup_ratio: r.comparison.speedup_ratio,
|
|
544
|
+
percent_difference: r.comparison.percent_difference,
|
|
528
545
|
effect_size: r.comparison.effect_size,
|
|
529
546
|
effect_magnitude: r.comparison.effect_magnitude,
|
|
530
547
|
p_value: r.comparison.p_value,
|
|
@@ -6,9 +6,9 @@ import {format_number} from './maths.js';
|
|
|
6
6
|
/**
|
|
7
7
|
* Format results as an ASCII table with percentiles, min/max, and relative performance.
|
|
8
8
|
* All times use the same unit for easy comparison.
|
|
9
|
-
* @param results -
|
|
10
|
-
* @param baseline -
|
|
11
|
-
* @returns
|
|
9
|
+
* @param results - array of benchmark results
|
|
10
|
+
* @param baseline - optional task name to use as baseline for comparison (defaults to fastest)
|
|
11
|
+
* @returns formatted table string with enhanced metrics
|
|
12
12
|
*
|
|
13
13
|
* @example
|
|
14
14
|
* ```ts
|
|
@@ -126,9 +126,9 @@ export const benchmark_format_table = (
|
|
|
126
126
|
/**
|
|
127
127
|
* Format results as a Markdown table with key metrics.
|
|
128
128
|
* All times use the same unit for easy comparison.
|
|
129
|
-
* @param results -
|
|
130
|
-
* @param baseline -
|
|
131
|
-
* @returns
|
|
129
|
+
* @param results - array of benchmark results
|
|
130
|
+
* @param baseline - optional task name to use as baseline for comparison (defaults to fastest)
|
|
131
|
+
* @returns formatted markdown table string
|
|
132
132
|
*
|
|
133
133
|
* @example
|
|
134
134
|
* ```ts
|
|
@@ -238,9 +238,9 @@ export const benchmark_format_markdown = (
|
|
|
238
238
|
|
|
239
239
|
/**
|
|
240
240
|
* Format results as grouped Markdown tables with headers between groups.
|
|
241
|
-
* @param results -
|
|
242
|
-
* @param groups -
|
|
243
|
-
* @returns
|
|
241
|
+
* @param results - array of benchmark results
|
|
242
|
+
* @param groups - array of group definitions
|
|
243
|
+
* @returns formatted markdown string with group headers and tables
|
|
244
244
|
*
|
|
245
245
|
* @example
|
|
246
246
|
* ```ts
|
|
@@ -301,8 +301,8 @@ export interface BenchmarkFormatJsonOptions {
|
|
|
301
301
|
|
|
302
302
|
/**
|
|
303
303
|
* Format results as JSON.
|
|
304
|
-
* @param results -
|
|
305
|
-
* @param options -
|
|
304
|
+
* @param results - array of benchmark results
|
|
305
|
+
* @param options - formatting options
|
|
306
306
|
* @returns JSON string
|
|
307
307
|
*
|
|
308
308
|
* @example
|
|
@@ -348,9 +348,9 @@ export const benchmark_format_json = (
|
|
|
348
348
|
|
|
349
349
|
/**
|
|
350
350
|
* Format results as a grouped table with visual separators between groups.
|
|
351
|
-
* @param results -
|
|
352
|
-
* @param groups -
|
|
353
|
-
* @returns
|
|
351
|
+
* @param results - array of benchmark results
|
|
352
|
+
* @param groups - array of group definitions
|
|
353
|
+
* @returns formatted table string with group separators
|
|
354
354
|
*
|
|
355
355
|
* @example
|
|
356
356
|
* ```ts
|
|
@@ -405,6 +405,6 @@ export const benchmark_format_table_grouped = (
|
|
|
405
405
|
|
|
406
406
|
/**
|
|
407
407
|
* Format a number with fixed decimal places and thousands separators.
|
|
408
|
-
* @see
|
|
408
|
+
* @see `format_number` in `maths.ts` for the underlying implementation.
|
|
409
409
|
*/
|
|
410
410
|
export const benchmark_format_number = format_number;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Benchmark-specific statistical analysis.
|
|
3
|
-
* Uses the general stats utilities from stats.ts for timing/performance analysis.
|
|
3
|
+
* Uses the general stats utilities from `stats.ts` for timing/performance analysis.
|
|
4
4
|
* All timing values are in nanoseconds.
|
|
5
5
|
*
|
|
6
6
|
* @module
|
|
@@ -44,13 +44,15 @@ export interface BenchmarkComparison {
|
|
|
44
44
|
faster: 'a' | 'b' | 'equal';
|
|
45
45
|
/** How much faster the winner is (e.g., 1.5 means 1.5x faster) */
|
|
46
46
|
speedup_ratio: number;
|
|
47
|
-
/** Whether the difference is statistically
|
|
47
|
+
/** Whether the difference is both statistically and practically significant */
|
|
48
48
|
significant: boolean;
|
|
49
49
|
/** P-value from Welch's t-test (lower = more confident the difference is real) */
|
|
50
50
|
p_value: number;
|
|
51
|
-
/**
|
|
51
|
+
/** Percentage difference between means as a ratio (0.05 = 5%, 1.0 = 100%) */
|
|
52
|
+
percent_difference: number;
|
|
53
|
+
/** Cohen's d effect size (informational — not used for classification) */
|
|
52
54
|
effect_size: number;
|
|
53
|
-
/** Interpretation of
|
|
55
|
+
/** Interpretation of practical significance based on percentage difference */
|
|
54
56
|
effect_magnitude: EffectMagnitude;
|
|
55
57
|
/** Whether the 95% confidence intervals overlap */
|
|
56
58
|
ci_overlap: boolean;
|
|
@@ -64,6 +66,20 @@ export interface BenchmarkComparison {
|
|
|
64
66
|
export interface BenchmarkCompareOptions {
|
|
65
67
|
/** Significance level for hypothesis testing (default: 0.05) */
|
|
66
68
|
alpha?: number;
|
|
69
|
+
/**
|
|
70
|
+
* Minimum percentage difference to consider practically meaningful, as a ratio.
|
|
71
|
+
* Below this threshold, differences are classified as 'negligible' and
|
|
72
|
+
* `significant` is forced to `false`, regardless of p-value.
|
|
73
|
+
* This prevents the t-test's oversensitivity at large sample sizes from
|
|
74
|
+
* flagging system-level noise (thermal throttle, OS scheduler, cache pressure)
|
|
75
|
+
* as meaningful differences.
|
|
76
|
+
*
|
|
77
|
+
* Effect magnitude thresholds scale from this value:
|
|
78
|
+
* negligible < min, small < min*3, medium < min*5, large >= min*5.
|
|
79
|
+
*
|
|
80
|
+
* Default: 0.10 (10%).
|
|
81
|
+
*/
|
|
82
|
+
min_percent_difference?: number;
|
|
67
83
|
}
|
|
68
84
|
|
|
69
85
|
/**
|
|
@@ -181,13 +197,17 @@ export class BenchmarkStats {
|
|
|
181
197
|
}
|
|
182
198
|
|
|
183
199
|
/**
|
|
184
|
-
* Compare two benchmark results for statistical significance.
|
|
185
|
-
* Uses
|
|
200
|
+
* Compare two benchmark results for practical and statistical significance.
|
|
201
|
+
* Uses percentage difference for effect magnitude classification, with Welch's
|
|
202
|
+
* t-test for statistical confidence. Cohen's d is computed as an informational
|
|
203
|
+
* metric but does not drive classification — its thresholds (0.2/0.5/0.8) are
|
|
204
|
+
* calibrated for social science and produce false positives in benchmarking
|
|
205
|
+
* where within-run variance is tight.
|
|
186
206
|
*
|
|
187
|
-
* @param a -
|
|
188
|
-
* @param b -
|
|
189
|
-
* @param options -
|
|
190
|
-
* @returns
|
|
207
|
+
* @param a - first benchmark stats (or any object with required properties)
|
|
208
|
+
* @param b - second benchmark stats (or any object with required properties)
|
|
209
|
+
* @param options - comparison options
|
|
210
|
+
* @returns comparison result with significance, effect size, and recommendation
|
|
191
211
|
*
|
|
192
212
|
* @example
|
|
193
213
|
* ```ts
|
|
@@ -203,6 +223,7 @@ export const benchmark_stats_compare = (
|
|
|
203
223
|
options?: BenchmarkCompareOptions,
|
|
204
224
|
): BenchmarkComparison => {
|
|
205
225
|
const alpha = options?.alpha ?? 0.05;
|
|
226
|
+
const min_pct = options?.min_percent_difference ?? 0.1;
|
|
206
227
|
|
|
207
228
|
// Handle edge cases
|
|
208
229
|
if (a.sample_size === 0 || b.sample_size === 0) {
|
|
@@ -211,6 +232,7 @@ export const benchmark_stats_compare = (
|
|
|
211
232
|
speedup_ratio: 1,
|
|
212
233
|
significant: false,
|
|
213
234
|
p_value: 1,
|
|
235
|
+
percent_difference: 0,
|
|
214
236
|
effect_size: 0,
|
|
215
237
|
effect_magnitude: 'negligible',
|
|
216
238
|
ci_overlap: true,
|
|
@@ -223,6 +245,9 @@ export const benchmark_stats_compare = (
|
|
|
223
245
|
const faster: 'a' | 'b' | 'equal' =
|
|
224
246
|
a.mean_ns < b.mean_ns ? 'a' : a.mean_ns > b.mean_ns ? 'b' : 'equal';
|
|
225
247
|
|
|
248
|
+
// Percentage difference relative to the faster mean (always >= 0)
|
|
249
|
+
const percent_difference = speedup_ratio - 1;
|
|
250
|
+
|
|
226
251
|
// Welch's t-test (handles unequal variances)
|
|
227
252
|
// Special case: if both have zero variance, t-test is undefined
|
|
228
253
|
let p_value: number;
|
|
@@ -242,38 +267,33 @@ export const benchmark_stats_compare = (
|
|
|
242
267
|
p_value = stats_t_distribution_p_value(Math.abs(t_statistic), degrees_of_freedom);
|
|
243
268
|
}
|
|
244
269
|
|
|
245
|
-
// Cohen's d effect size
|
|
270
|
+
// Cohen's d effect size (informational only — not used for classification)
|
|
246
271
|
const pooled_std_dev = Math.sqrt(
|
|
247
272
|
((a.sample_size - 1) * a.std_dev_ns ** 2 + (b.sample_size - 1) * b.std_dev_ns ** 2) /
|
|
248
273
|
(a.sample_size + b.sample_size - 2),
|
|
249
274
|
);
|
|
250
|
-
|
|
251
|
-
// When pooled_std_dev is 0 but means differ, effect is maximal (infinite)
|
|
252
|
-
// When means are equal, effect is 0
|
|
253
275
|
let effect_size: number;
|
|
254
|
-
let effect_magnitude: EffectMagnitude;
|
|
255
|
-
|
|
256
276
|
if (pooled_std_dev === 0) {
|
|
257
|
-
|
|
258
|
-
if (a.mean_ns === b.mean_ns) {
|
|
259
|
-
effect_size = 0;
|
|
260
|
-
effect_magnitude = 'negligible';
|
|
261
|
-
} else {
|
|
262
|
-
// Any difference is 100% reliable when there's no variance
|
|
263
|
-
effect_size = Infinity;
|
|
264
|
-
effect_magnitude = 'large';
|
|
265
|
-
}
|
|
277
|
+
effect_size = a.mean_ns === b.mean_ns ? 0 : Infinity;
|
|
266
278
|
} else {
|
|
267
279
|
effect_size = Math.abs(a.mean_ns - b.mean_ns) / pooled_std_dev;
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// Effect magnitude based on percentage difference, not Cohen's d.
|
|
283
|
+
// Cohen's d thresholds (0.2/0.5/0.8) are calibrated for social science, not benchmarking.
|
|
284
|
+
// Within-run variance is tight, so even small system noise (thermal throttle, OS scheduler)
|
|
285
|
+
// produces large Cohen's d. Percentage thresholds directly answer "is this difference
|
|
286
|
+
// meaningful in practice?" Thresholds scale with min_percent_difference so users can
|
|
287
|
+
// tune one knob for their system's noise floor.
|
|
288
|
+
let effect_magnitude: EffectMagnitude;
|
|
289
|
+
if (percent_difference < min_pct) {
|
|
290
|
+
effect_magnitude = 'negligible';
|
|
291
|
+
} else if (percent_difference < min_pct * 3) {
|
|
292
|
+
effect_magnitude = 'small';
|
|
293
|
+
} else if (percent_difference < min_pct * 5) {
|
|
294
|
+
effect_magnitude = 'medium';
|
|
295
|
+
} else {
|
|
296
|
+
effect_magnitude = 'large';
|
|
277
297
|
}
|
|
278
298
|
|
|
279
299
|
// Check confidence interval overlap
|
|
@@ -281,20 +301,21 @@ export const benchmark_stats_compare = (
|
|
|
281
301
|
a.confidence_interval_ns[0] <= b.confidence_interval_ns[1] &&
|
|
282
302
|
b.confidence_interval_ns[0] <= a.confidence_interval_ns[1];
|
|
283
303
|
|
|
284
|
-
//
|
|
285
|
-
|
|
304
|
+
// Significance requires both statistical significance (p < alpha)
|
|
305
|
+
// AND practical significance (percent_difference >= min_pct).
|
|
306
|
+
// With large n, the t-test finds p≈0 for any difference because
|
|
307
|
+
// SE = std_dev/sqrt(n) → 0. Gating on practical significance
|
|
308
|
+
// prevents system noise from being flagged as meaningful.
|
|
309
|
+
const significant = p_value < alpha && percent_difference >= min_pct;
|
|
286
310
|
|
|
287
311
|
// Generate recommendation
|
|
288
312
|
let recommendation: string;
|
|
289
|
-
if (
|
|
290
|
-
recommendation =
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
: `Difference not statistically significant (p=${p_value.toFixed(3)}), but effect size suggests ${effect_magnitude} practical difference`;
|
|
294
|
-
} else if (effect_magnitude === 'negligible') {
|
|
295
|
-
recommendation = `Statistically significant but negligible practical difference (${speedup_ratio.toFixed(2)}x)`;
|
|
313
|
+
if (percent_difference < min_pct) {
|
|
314
|
+
recommendation = 'No meaningful difference detected';
|
|
315
|
+
} else if (!significant) {
|
|
316
|
+
recommendation = `${(percent_difference * 100).toFixed(1)}% difference observed but not statistically significant (p=${p_value.toFixed(3)})`;
|
|
296
317
|
} else {
|
|
297
|
-
recommendation = `${faster === 'a' ? 'First' : 'Second'} is ${speedup_ratio.toFixed(2)}x faster with ${effect_magnitude} effect size (p=${p_value.toFixed(3)})`;
|
|
318
|
+
recommendation = `${faster === 'a' ? 'First' : 'Second'} is ${speedup_ratio.toFixed(2)}x faster with ${effect_magnitude} effect size (${(percent_difference * 100).toFixed(1)}%, p=${p_value.toFixed(3)})`;
|
|
298
319
|
}
|
|
299
320
|
|
|
300
321
|
// Adjust 'faster' to 'equal' if effect is negligible
|
|
@@ -305,6 +326,7 @@ export const benchmark_stats_compare = (
|
|
|
305
326
|
speedup_ratio,
|
|
306
327
|
significant,
|
|
307
328
|
p_value,
|
|
329
|
+
percent_difference,
|
|
308
330
|
effect_size,
|
|
309
331
|
effect_magnitude,
|
|
310
332
|
ci_overlap,
|
|
@@ -41,7 +41,7 @@ export interface BenchmarkConfig {
|
|
|
41
41
|
|
|
42
42
|
/**
|
|
43
43
|
* Custom timer to use for measurements.
|
|
44
|
-
* Default: timer_default (auto-detects environment)
|
|
44
|
+
* Default: `timer_default` (auto-detects environment)
|
|
45
45
|
*/
|
|
46
46
|
timer?: Timer;
|
|
47
47
|
|
|
@@ -54,9 +54,9 @@ export interface BenchmarkConfig {
|
|
|
54
54
|
* after the timing capture. However, frequent GC calls will slow overall benchmark
|
|
55
55
|
* execution time.
|
|
56
56
|
*
|
|
57
|
-
* @param task_name -
|
|
58
|
-
* @param iteration -
|
|
59
|
-
* @param abort -
|
|
57
|
+
* @param task_name - name of the current task being benchmarked
|
|
58
|
+
* @param iteration - current iteration number (1-indexed)
|
|
59
|
+
* @param abort - call to stop the benchmark early for this task
|
|
60
60
|
*
|
|
61
61
|
* @example
|
|
62
62
|
* ```ts
|
|
@@ -90,9 +90,9 @@ export interface BenchmarkConfig {
|
|
|
90
90
|
* Callback invoked after each task completes.
|
|
91
91
|
* Useful for logging progress during long benchmark runs.
|
|
92
92
|
*
|
|
93
|
-
* @param result -
|
|
94
|
-
* @param index -
|
|
95
|
-
* @param total -
|
|
93
|
+
* @param result - the completed benchmark result
|
|
94
|
+
* @param index - zero-based index of the completed task
|
|
95
|
+
* @param total - total number of tasks to run
|
|
96
96
|
*
|
|
97
97
|
* @example
|
|
98
98
|
* ```ts
|
package/src/lib/bytes.ts
CHANGED
|
@@ -10,8 +10,8 @@ const encoder = new TextEncoder();
|
|
|
10
10
|
* Converts string or binary data to a `Uint8Array`.
|
|
11
11
|
* Strings are UTF-8 encoded. `Uint8Array` inputs are returned as-is.
|
|
12
12
|
*
|
|
13
|
-
* @param data -
|
|
14
|
-
* @returns `Uint8Array` view of the data
|
|
13
|
+
* @param data - string or `BufferSource` to convert
|
|
14
|
+
* @returns `Uint8Array` view of the data
|
|
15
15
|
*/
|
|
16
16
|
export const to_bytes = (data: BufferSource | string): Uint8Array => {
|
|
17
17
|
if (typeof data === 'string') return encoder.encode(data);
|
|
@@ -23,8 +23,8 @@ export const to_bytes = (data: BufferSource | string): Uint8Array => {
|
|
|
23
23
|
/**
|
|
24
24
|
* Formats a byte count as a human-readable string.
|
|
25
25
|
*
|
|
26
|
-
* @param n - byte count
|
|
27
|
-
* @returns formatted string like `'1.2 KB'` or `'3.4 MB'
|
|
26
|
+
* @param n - byte count
|
|
27
|
+
* @returns formatted string like `'1.2 KB'` or `'3.4 MB'`
|
|
28
28
|
*/
|
|
29
29
|
export const format_bytes = (n: number): string => {
|
|
30
30
|
if (n < 1024) return n + ' B';
|
package/src/lib/dag.ts
CHANGED
|
@@ -80,8 +80,8 @@ export interface DagResult {
|
|
|
80
80
|
* eligible to start. Failure cascading and stop-on-failure are handled
|
|
81
81
|
* per the options.
|
|
82
82
|
*
|
|
83
|
-
* @param options - DAG execution options
|
|
84
|
-
* @returns
|
|
83
|
+
* @param options - DAG execution options
|
|
84
|
+
* @returns aggregated result with per-node details
|
|
85
85
|
*/
|
|
86
86
|
export const run_dag = async <T extends DagNode>(options: DagOptions<T>): Promise<DagResult> => {
|
|
87
87
|
const {
|
package/src/lib/deep_equal.ts
CHANGED
|
@@ -10,8 +10,8 @@
|
|
|
10
10
|
* - Promises always return false (cannot be meaningfully compared)
|
|
11
11
|
* - Maps/Sets compare by reference for object keys/values
|
|
12
12
|
*
|
|
13
|
-
* @param a first value to compare
|
|
14
|
-
* @param b second value to compare
|
|
13
|
+
* @param a - first value to compare
|
|
14
|
+
* @param b - second value to compare
|
|
15
15
|
* @returns true if deeply equal, false otherwise
|
|
16
16
|
*/
|
|
17
17
|
export const deep_equal = (a: unknown, b: unknown): boolean => {
|
package/src/lib/diff.ts
CHANGED
|
@@ -15,9 +15,9 @@ export interface DiffLine {
|
|
|
15
15
|
/**
|
|
16
16
|
* Generate a line-based diff between two strings using LCS algorithm.
|
|
17
17
|
*
|
|
18
|
-
* @param a -
|
|
19
|
-
* @param b -
|
|
20
|
-
* @returns
|
|
18
|
+
* @param a - the original/current content
|
|
19
|
+
* @param b - the new/desired content
|
|
20
|
+
* @returns array of diff lines with type annotations
|
|
21
21
|
*/
|
|
22
22
|
export const diff_lines = (a: string, b: string): Array<DiffLine> => {
|
|
23
23
|
const a_lines = a.split('\n');
|
|
@@ -94,9 +94,9 @@ const compute_lcs = (a: Array<string>, b: Array<string>): Array<string> => {
|
|
|
94
94
|
/**
|
|
95
95
|
* Filter diff to only include lines within N lines of context around changes.
|
|
96
96
|
*
|
|
97
|
-
* @param diff -
|
|
98
|
-
* @param context_lines -
|
|
99
|
-
* @returns
|
|
97
|
+
* @param diff - the full diff lines
|
|
98
|
+
* @param context_lines - number of context lines to show around changes (default: 3)
|
|
99
|
+
* @returns filtered diff with ellipsis markers for skipped regions
|
|
100
100
|
*/
|
|
101
101
|
export const filter_diff_context = (diff: Array<DiffLine>, context_lines = 3): Array<DiffLine> => {
|
|
102
102
|
if (diff.length === 0) return [];
|
|
@@ -163,11 +163,11 @@ export interface FormatDiffOptions {
|
|
|
163
163
|
/**
|
|
164
164
|
* Format a diff for display.
|
|
165
165
|
*
|
|
166
|
-
* @param diff -
|
|
167
|
-
* @param current_path -
|
|
168
|
-
* @param desired_path -
|
|
169
|
-
* @param options -
|
|
170
|
-
* @returns
|
|
166
|
+
* @param diff - the diff lines to format
|
|
167
|
+
* @param current_path - path label for "current" content
|
|
168
|
+
* @param desired_path - path label for "desired" content
|
|
169
|
+
* @param options - formatting options
|
|
170
|
+
* @returns formatted diff string
|
|
171
171
|
*/
|
|
172
172
|
export const format_diff = (
|
|
173
173
|
diff: Array<DiffLine>,
|
|
@@ -208,14 +208,14 @@ export const format_diff = (
|
|
|
208
208
|
/**
|
|
209
209
|
* Generate a formatted diff between two strings.
|
|
210
210
|
*
|
|
211
|
-
* Combines diff_lines
|
|
211
|
+
* Combines `diff_lines`, `filter_diff_context`, and `format_diff` for convenience.
|
|
212
212
|
* Returns null if content is binary.
|
|
213
213
|
*
|
|
214
|
-
* @param current -
|
|
215
|
-
* @param desired -
|
|
216
|
-
* @param path -
|
|
217
|
-
* @param options -
|
|
218
|
-
* @returns
|
|
214
|
+
* @param current - current content
|
|
215
|
+
* @param desired - desired content
|
|
216
|
+
* @param path - file path for labels
|
|
217
|
+
* @param options - formatting options
|
|
218
|
+
* @returns formatted diff string, or null if binary
|
|
219
219
|
*/
|
|
220
220
|
export const generate_diff = (
|
|
221
221
|
current: string,
|
package/src/lib/dom.ts
CHANGED
|
@@ -60,11 +60,11 @@ export const is_interactive = (el: any): boolean => {
|
|
|
60
60
|
|
|
61
61
|
/**
|
|
62
62
|
* Stops an event from bubbling and doing default behavior.
|
|
63
|
-
* @param event
|
|
64
|
-
* @param immediate defaults to `true` to use `stopImmediatePropagation` over `stopPropagation`
|
|
65
|
-
* @param preventDefault defaults to `true`
|
|
63
|
+
* @param event - the event to swallow
|
|
64
|
+
* @param immediate - defaults to `true` to use `stopImmediatePropagation` over `stopPropagation`
|
|
65
|
+
* @param preventDefault - defaults to `true`
|
|
66
|
+
* @returns the swallowed event
|
|
66
67
|
* @mutates event - calls preventDefault(), stopPropagation(), or stopImmediatePropagation()
|
|
67
|
-
* @returns
|
|
68
68
|
*/
|
|
69
69
|
export const swallow = <
|
|
70
70
|
T extends Pick<Event, 'preventDefault' | 'stopPropagation' | 'stopImmediatePropagation'>,
|
package/src/lib/fetch.ts
CHANGED
|
@@ -34,7 +34,7 @@ export interface FetchValueOptions<TValue, TParams = undefined> {
|
|
|
34
34
|
* (you can still provide headers directly)
|
|
35
35
|
*
|
|
36
36
|
* Unlike `fetch`, this throws on ratelimits (status code 429)
|
|
37
|
-
* to halt whatever is
|
|
37
|
+
* to halt whatever is happening in its tracks to avoid accidental abuse,
|
|
38
38
|
* but returns a `Result` in all other cases.
|
|
39
39
|
* Handling ratelimit headers with more sophistication gets tricky because behavior
|
|
40
40
|
* differs across services.
|
package/src/lib/git.ts
CHANGED
|
@@ -122,7 +122,7 @@ export interface GitWorkspaceStatus {
|
|
|
122
122
|
* Note: This implementation treats submodules the same as regular files.
|
|
123
123
|
* Submodule-specific status codes (lowercase m, ?) are interpreted as changes.
|
|
124
124
|
*
|
|
125
|
-
* @param stdout -
|
|
125
|
+
* @param stdout - the raw output from `git status --porcelain -z`
|
|
126
126
|
* @returns status object with flags for unstaged changes, staged changes, and untracked files
|
|
127
127
|
*/
|
|
128
128
|
export const git_parse_workspace_status = (stdout: string | null): GitWorkspaceStatus => {
|
package/src/lib/hash.ts
CHANGED
|
@@ -15,9 +15,9 @@ const encoder = new TextEncoder();
|
|
|
15
15
|
/**
|
|
16
16
|
* Computes a hash using Web Crypto API.
|
|
17
17
|
*
|
|
18
|
-
* @param algorithm - Web Crypto algorithm name (e.g. `'SHA-256'`)
|
|
18
|
+
* @param algorithm - Web Crypto algorithm name (e.g. `'SHA-256'`)
|
|
19
19
|
* @param data - String or binary data to hash. Strings are UTF-8 encoded.
|
|
20
|
-
* @returns hexadecimal hash string
|
|
20
|
+
* @returns hexadecimal hash string
|
|
21
21
|
*/
|
|
22
22
|
const hash_webcrypto = async (algorithm: string, data: BufferSource | string): Promise<string> => {
|
|
23
23
|
const buffer = typeof data === 'string' ? encoder.encode(data) : data;
|
|
@@ -29,7 +29,7 @@ const hash_webcrypto = async (algorithm: string, data: BufferSource | string): P
|
|
|
29
29
|
* Computes a SHA-1 hash using Web Crypto API.
|
|
30
30
|
*
|
|
31
31
|
* @param data - String or binary data to hash. Strings are UTF-8 encoded.
|
|
32
|
-
* @returns 40-character hexadecimal hash string
|
|
32
|
+
* @returns 40-character hexadecimal hash string
|
|
33
33
|
*/
|
|
34
34
|
export const hash_sha1 = (data: BufferSource | string): Promise<string> =>
|
|
35
35
|
hash_webcrypto('SHA-1', data);
|
|
@@ -38,7 +38,7 @@ export const hash_sha1 = (data: BufferSource | string): Promise<string> =>
|
|
|
38
38
|
* Computes a SHA-256 hash using Web Crypto API.
|
|
39
39
|
*
|
|
40
40
|
* @param data - String or binary data to hash. Strings are UTF-8 encoded.
|
|
41
|
-
* @returns 64-character hexadecimal hash string
|
|
41
|
+
* @returns 64-character hexadecimal hash string
|
|
42
42
|
*/
|
|
43
43
|
export const hash_sha256 = (data: BufferSource | string): Promise<string> =>
|
|
44
44
|
hash_webcrypto('SHA-256', data);
|
|
@@ -47,7 +47,7 @@ export const hash_sha256 = (data: BufferSource | string): Promise<string> =>
|
|
|
47
47
|
* Computes a SHA-384 hash using Web Crypto API.
|
|
48
48
|
*
|
|
49
49
|
* @param data - String or binary data to hash. Strings are UTF-8 encoded.
|
|
50
|
-
* @returns 96-character hexadecimal hash string
|
|
50
|
+
* @returns 96-character hexadecimal hash string
|
|
51
51
|
*/
|
|
52
52
|
export const hash_sha384 = (data: BufferSource | string): Promise<string> =>
|
|
53
53
|
hash_webcrypto('SHA-384', data);
|
|
@@ -56,7 +56,7 @@ export const hash_sha384 = (data: BufferSource | string): Promise<string> =>
|
|
|
56
56
|
* Computes a SHA-512 hash using Web Crypto API.
|
|
57
57
|
*
|
|
58
58
|
* @param data - String or binary data to hash. Strings are UTF-8 encoded.
|
|
59
|
-
* @returns 128-character hexadecimal hash string
|
|
59
|
+
* @returns 128-character hexadecimal hash string
|
|
60
60
|
*/
|
|
61
61
|
export const hash_sha512 = (data: BufferSource | string): Promise<string> =>
|
|
62
62
|
hash_webcrypto('SHA-512', data);
|
|
@@ -68,8 +68,8 @@ export const hash_sha512 = (data: BufferSource | string): Promise<string> =>
|
|
|
68
68
|
* Note: Strings use UTF-16 code units, buffers use raw bytes.
|
|
69
69
|
* For non-ASCII, `hash_insecure(str) !== hash_insecure(encoder.encode(str))`.
|
|
70
70
|
*
|
|
71
|
-
* @param data -
|
|
72
|
-
* @returns 8-character hex-encoded unsigned 32-bit hash
|
|
71
|
+
* @param data - string or binary data to hash
|
|
72
|
+
* @returns 8-character hex-encoded unsigned 32-bit hash
|
|
73
73
|
*/
|
|
74
74
|
export const hash_insecure = (data: BufferSource | string): string => {
|
|
75
75
|
let hash = 5381; // DJB2 initial value, chosen empirically for good distribution
|