@fuzdev/fuz_util 0.54.0 → 0.56.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/args.d.ts +12 -12
- package/dist/args.js +11 -11
- package/dist/async.d.ts +12 -12
- package/dist/async.js +12 -12
- package/dist/benchmark.d.ts +24 -24
- package/dist/benchmark.js +26 -26
- package/dist/benchmark_baseline.d.ts +17 -11
- package/dist/benchmark_baseline.d.ts.map +1 -1
- package/dist/benchmark_baseline.js +26 -19
- package/dist/benchmark_format.d.ts +15 -15
- package/dist/benchmark_format.js +15 -15
- package/dist/benchmark_stats.d.ts +30 -10
- package/dist/benchmark_stats.d.ts.map +1 -1
- package/dist/benchmark_stats.js +48 -40
- package/dist/benchmark_types.d.ts +7 -7
- package/dist/bytes.d.ts +4 -4
- package/dist/bytes.js +4 -4
- package/dist/dag.d.ts +2 -2
- package/dist/dag.js +2 -2
- package/dist/deep_equal.d.ts +2 -2
- package/dist/deep_equal.js +2 -2
- package/dist/diff.d.ts +17 -17
- package/dist/diff.js +17 -17
- package/dist/dom.d.ts +4 -4
- package/dist/dom.js +4 -4
- package/dist/fetch.d.ts +1 -1
- package/dist/fetch.js +1 -1
- package/dist/git.d.ts +1 -1
- package/dist/git.js +1 -1
- package/dist/hash.d.ts +6 -6
- package/dist/hash.js +8 -8
- package/dist/hash_blake3.d.ts +1 -1
- package/dist/hash_blake3.js +1 -1
- package/dist/hex.d.ts +4 -4
- package/dist/hex.js +4 -4
- package/dist/json.d.ts +2 -2
- package/dist/json.js +2 -2
- package/dist/log.d.ts +12 -12
- package/dist/log.js +11 -11
- package/dist/map.d.ts +1 -1
- package/dist/map.js +1 -1
- package/dist/object.d.ts +1 -1
- package/dist/object.js +1 -1
- package/dist/package_json.d.ts +1 -1
- package/dist/package_json.js +1 -1
- package/dist/path.d.ts +5 -5
- package/dist/path.js +5 -5
- package/dist/process.d.ts +22 -22
- package/dist/process.js +22 -22
- package/dist/random.d.ts +2 -2
- package/dist/random.js +2 -2
- package/dist/result.d.ts +6 -6
- package/dist/result.js +6 -6
- package/dist/sort.d.ts +3 -3
- package/dist/sort.js +3 -3
- package/dist/source_json.d.ts +3 -3
- package/dist/source_json.js +3 -3
- package/dist/stats.d.ts +17 -17
- package/dist/stats.js +17 -17
- package/dist/string.d.ts +6 -6
- package/dist/string.js +6 -6
- package/dist/svelte_preprocess_helpers.d.ts +42 -42
- package/dist/svelte_preprocess_helpers.js +42 -42
- package/dist/testing.d.ts +44 -0
- package/dist/testing.d.ts.map +1 -0
- package/dist/testing.js +59 -0
- package/dist/time.d.ts +19 -19
- package/dist/time.js +19 -19
- package/dist/zod.d.ts +16 -16
- package/dist/zod.d.ts.map +1 -1
- package/dist/zod.js +24 -24
- package/package.json +6 -6
- package/src/lib/args.ts +12 -12
- package/src/lib/async.ts +12 -12
- package/src/lib/benchmark.ts +28 -28
- package/src/lib/benchmark_baseline.ts +37 -20
- package/src/lib/benchmark_format.ts +15 -15
- package/src/lib/benchmark_stats.ts +66 -44
- package/src/lib/benchmark_types.ts +7 -7
- package/src/lib/bytes.ts +4 -4
- package/src/lib/dag.ts +2 -2
- package/src/lib/deep_equal.ts +2 -2
- package/src/lib/diff.ts +17 -17
- package/src/lib/dom.ts +4 -4
- package/src/lib/fetch.ts +1 -1
- package/src/lib/git.ts +1 -1
- package/src/lib/hash.ts +8 -8
- package/src/lib/hash_blake3.ts +1 -1
- package/src/lib/hex.ts +4 -4
- package/src/lib/json.ts +2 -2
- package/src/lib/log.ts +12 -12
- package/src/lib/map.ts +1 -1
- package/src/lib/object.ts +1 -1
- package/src/lib/package_json.ts +1 -1
- package/src/lib/path.ts +5 -5
- package/src/lib/process.ts +22 -22
- package/src/lib/random.ts +2 -2
- package/src/lib/result.ts +6 -6
- package/src/lib/sort.ts +3 -3
- package/src/lib/source_json.ts +3 -3
- package/src/lib/stats.ts +17 -17
- package/src/lib/string.ts +6 -6
- package/src/lib/svelte_preprocess_helpers.ts +42 -42
- package/src/lib/testing.ts +80 -0
- package/src/lib/time.ts +19 -19
- package/src/lib/zod.ts +24 -24
|
@@ -65,8 +65,8 @@ const results_to_entries = (results) => {
|
|
|
65
65
|
/**
|
|
66
66
|
* Save benchmark results as the current baseline.
|
|
67
67
|
*
|
|
68
|
-
* @param results -
|
|
69
|
-
* @param options -
|
|
68
|
+
* @param results - benchmark results to save
|
|
69
|
+
* @param options - save options
|
|
70
70
|
*
|
|
71
71
|
* @example
|
|
72
72
|
* ```ts
|
|
@@ -101,8 +101,8 @@ export const benchmark_baseline_save = async (results, options = {}) => {
|
|
|
101
101
|
/**
|
|
102
102
|
* Load the current baseline from disk.
|
|
103
103
|
*
|
|
104
|
-
* @param options -
|
|
105
|
-
* @returns
|
|
104
|
+
* @param options - load options
|
|
105
|
+
* @returns the baseline, or null if not found or invalid
|
|
106
106
|
*
|
|
107
107
|
* @example
|
|
108
108
|
* ```ts
|
|
@@ -141,9 +141,9 @@ export const benchmark_baseline_load = async (options = {}) => {
|
|
|
141
141
|
/**
|
|
142
142
|
* Compare benchmark results against the stored baseline.
|
|
143
143
|
*
|
|
144
|
-
* @param results -
|
|
145
|
-
* @param options -
|
|
146
|
-
* @returns
|
|
144
|
+
* @param results - current benchmark results
|
|
145
|
+
* @param options - comparison options including regression threshold and staleness warning
|
|
146
|
+
* @returns comparison result with regressions, improvements, and unchanged tasks
|
|
147
147
|
*
|
|
148
148
|
* @example
|
|
149
149
|
* ```ts
|
|
@@ -217,7 +217,9 @@ export const benchmark_baseline_compare = async (results, options = {}) => {
|
|
|
217
217
|
sample_size: current.sample_size,
|
|
218
218
|
confidence_interval_ns: stats_confidence_interval_from_summary(current.mean_ns, current.std_dev_ns, current.sample_size),
|
|
219
219
|
};
|
|
220
|
-
const comparison = benchmark_stats_compare(baseline_stats, current_stats
|
|
220
|
+
const comparison = benchmark_stats_compare(baseline_stats, current_stats, {
|
|
221
|
+
min_percent_difference: options.min_percent_difference,
|
|
222
|
+
});
|
|
221
223
|
const task_comparison = {
|
|
222
224
|
name: current.name,
|
|
223
225
|
baseline: baseline_entry,
|
|
@@ -227,7 +229,8 @@ export const benchmark_baseline_compare = async (results, options = {}) => {
|
|
|
227
229
|
comparisons.push(task_comparison);
|
|
228
230
|
// Categorize based on comparison result
|
|
229
231
|
// Note: comparison.faster is 'a' (baseline) or 'b' (current)
|
|
230
|
-
|
|
232
|
+
// significant implies percent_difference >= min_pct, which implies effect_magnitude !== 'negligible'
|
|
233
|
+
if (comparison.significant) {
|
|
231
234
|
if (comparison.faster === 'a') {
|
|
232
235
|
// Baseline was faster = potential regression
|
|
233
236
|
// Only count as regression if it exceeds the threshold
|
|
@@ -256,10 +259,10 @@ export const benchmark_baseline_compare = async (results, options = {}) => {
|
|
|
256
259
|
removed_tasks.push(baseline_entry.name);
|
|
257
260
|
}
|
|
258
261
|
}
|
|
259
|
-
// Sort regressions and improvements by
|
|
260
|
-
const
|
|
261
|
-
regressions.sort(
|
|
262
|
-
improvements.sort(
|
|
262
|
+
// Sort regressions and improvements by percentage difference (largest first)
|
|
263
|
+
const sort_by_percent_difference = (a, b) => b.comparison.percent_difference - a.comparison.percent_difference;
|
|
264
|
+
regressions.sort(sort_by_percent_difference);
|
|
265
|
+
improvements.sort(sort_by_percent_difference);
|
|
263
266
|
return {
|
|
264
267
|
baseline_found: true,
|
|
265
268
|
baseline_timestamp: baseline.timestamp,
|
|
@@ -277,8 +280,8 @@ export const benchmark_baseline_compare = async (results, options = {}) => {
|
|
|
277
280
|
/**
|
|
278
281
|
* Format a baseline comparison result as a human-readable string.
|
|
279
282
|
*
|
|
280
|
-
* @param result -
|
|
281
|
-
* @returns
|
|
283
|
+
* @param result - comparison result from `benchmark_baseline_compare`
|
|
284
|
+
* @returns formatted string summary
|
|
282
285
|
*/
|
|
283
286
|
export const benchmark_baseline_format = (result) => {
|
|
284
287
|
if (!result.baseline_found) {
|
|
@@ -302,8 +305,9 @@ export const benchmark_baseline_format = (result) => {
|
|
|
302
305
|
lines.push(`Regressions (${result.regressions.length}):`);
|
|
303
306
|
for (const r of result.regressions) {
|
|
304
307
|
const ratio = r.comparison.speedup_ratio.toFixed(2);
|
|
308
|
+
const pct = (r.comparison.percent_difference * 100).toFixed(1);
|
|
305
309
|
const p = r.comparison.p_value.toFixed(3);
|
|
306
|
-
lines.push(` ${r.name}: ${ratio}x slower (p=${p}, ${r.comparison.effect_magnitude})`);
|
|
310
|
+
lines.push(` ${r.name}: ${ratio}x slower (${pct}%, p=${p}, ${r.comparison.effect_magnitude})`);
|
|
307
311
|
}
|
|
308
312
|
lines.push('');
|
|
309
313
|
}
|
|
@@ -311,8 +315,9 @@ export const benchmark_baseline_format = (result) => {
|
|
|
311
315
|
lines.push(`Improvements (${result.improvements.length}):`);
|
|
312
316
|
for (const r of result.improvements) {
|
|
313
317
|
const ratio = r.comparison.speedup_ratio.toFixed(2);
|
|
318
|
+
const pct = (r.comparison.percent_difference * 100).toFixed(1);
|
|
314
319
|
const p = r.comparison.p_value.toFixed(3);
|
|
315
|
-
lines.push(` ${r.name}: ${ratio}x faster (p=${p}, ${r.comparison.effect_magnitude})`);
|
|
320
|
+
lines.push(` ${r.name}: ${ratio}x faster (${pct}%, p=${p}, ${r.comparison.effect_magnitude})`);
|
|
316
321
|
}
|
|
317
322
|
lines.push('');
|
|
318
323
|
}
|
|
@@ -345,8 +350,8 @@ export const benchmark_baseline_format = (result) => {
|
|
|
345
350
|
/**
|
|
346
351
|
* Format a baseline comparison result as JSON for programmatic consumption.
|
|
347
352
|
*
|
|
348
|
-
* @param result -
|
|
349
|
-
* @param options -
|
|
353
|
+
* @param result - comparison result from `benchmark_baseline_compare`
|
|
354
|
+
* @param options - formatting options
|
|
350
355
|
* @returns JSON string
|
|
351
356
|
*/
|
|
352
357
|
export const benchmark_baseline_format_json = (result, options = {}) => {
|
|
@@ -367,6 +372,7 @@ export const benchmark_baseline_format_json = (result, options = {}) => {
|
|
|
367
372
|
regressions: result.regressions.map((r) => ({
|
|
368
373
|
name: r.name,
|
|
369
374
|
speedup_ratio: r.comparison.speedup_ratio,
|
|
375
|
+
percent_difference: r.comparison.percent_difference,
|
|
370
376
|
effect_size: r.comparison.effect_size,
|
|
371
377
|
effect_magnitude: r.comparison.effect_magnitude,
|
|
372
378
|
p_value: r.comparison.p_value,
|
|
@@ -376,6 +382,7 @@ export const benchmark_baseline_format_json = (result, options = {}) => {
|
|
|
376
382
|
improvements: result.improvements.map((r) => ({
|
|
377
383
|
name: r.name,
|
|
378
384
|
speedup_ratio: r.comparison.speedup_ratio,
|
|
385
|
+
percent_difference: r.comparison.percent_difference,
|
|
379
386
|
effect_size: r.comparison.effect_size,
|
|
380
387
|
effect_magnitude: r.comparison.effect_magnitude,
|
|
381
388
|
p_value: r.comparison.p_value,
|
|
@@ -2,9 +2,9 @@ import type { BenchmarkResult, BenchmarkGroup } from './benchmark_types.js';
|
|
|
2
2
|
/**
|
|
3
3
|
* Format results as an ASCII table with percentiles, min/max, and relative performance.
|
|
4
4
|
* All times use the same unit for easy comparison.
|
|
5
|
-
* @param results -
|
|
6
|
-
* @param baseline -
|
|
7
|
-
* @returns
|
|
5
|
+
* @param results - array of benchmark results
|
|
6
|
+
* @param baseline - optional task name to use as baseline for comparison (defaults to fastest)
|
|
7
|
+
* @returns formatted table string with enhanced metrics
|
|
8
8
|
*
|
|
9
9
|
* @example
|
|
10
10
|
* ```ts
|
|
@@ -21,9 +21,9 @@ export declare const benchmark_format_table: (results: Array<BenchmarkResult>, b
|
|
|
21
21
|
/**
|
|
22
22
|
* Format results as a Markdown table with key metrics.
|
|
23
23
|
* All times use the same unit for easy comparison.
|
|
24
|
-
* @param results -
|
|
25
|
-
* @param baseline -
|
|
26
|
-
* @returns
|
|
24
|
+
* @param results - array of benchmark results
|
|
25
|
+
* @param baseline - optional task name to use as baseline for comparison (defaults to fastest)
|
|
26
|
+
* @returns formatted markdown table string
|
|
27
27
|
*
|
|
28
28
|
* @example
|
|
29
29
|
* ```ts
|
|
@@ -37,9 +37,9 @@ export declare const benchmark_format_table: (results: Array<BenchmarkResult>, b
|
|
|
37
37
|
export declare const benchmark_format_markdown: (results: Array<BenchmarkResult>, baseline?: string) => string;
|
|
38
38
|
/**
|
|
39
39
|
* Format results as grouped Markdown tables with headers between groups.
|
|
40
|
-
* @param results -
|
|
41
|
-
* @param groups -
|
|
42
|
-
* @returns
|
|
40
|
+
* @param results - array of benchmark results
|
|
41
|
+
* @param groups - array of group definitions
|
|
42
|
+
* @returns formatted markdown string with group headers and tables
|
|
43
43
|
*
|
|
44
44
|
* @example
|
|
45
45
|
* ```ts
|
|
@@ -68,8 +68,8 @@ export interface BenchmarkFormatJsonOptions {
|
|
|
68
68
|
}
|
|
69
69
|
/**
|
|
70
70
|
* Format results as JSON.
|
|
71
|
-
* @param results -
|
|
72
|
-
* @param options -
|
|
71
|
+
* @param results - array of benchmark results
|
|
72
|
+
* @param options - formatting options
|
|
73
73
|
* @returns JSON string
|
|
74
74
|
*
|
|
75
75
|
* @example
|
|
@@ -82,9 +82,9 @@ export interface BenchmarkFormatJsonOptions {
|
|
|
82
82
|
export declare const benchmark_format_json: (results: Array<BenchmarkResult>, options?: BenchmarkFormatJsonOptions) => string;
|
|
83
83
|
/**
|
|
84
84
|
* Format results as a grouped table with visual separators between groups.
|
|
85
|
-
* @param results -
|
|
86
|
-
* @param groups -
|
|
87
|
-
* @returns
|
|
85
|
+
* @param results - array of benchmark results
|
|
86
|
+
* @param groups - array of group definitions
|
|
87
|
+
* @returns formatted table string with group separators
|
|
88
88
|
*
|
|
89
89
|
* @example
|
|
90
90
|
* ```ts
|
|
@@ -108,7 +108,7 @@ export declare const benchmark_format_json: (results: Array<BenchmarkResult>, op
|
|
|
108
108
|
export declare const benchmark_format_table_grouped: (results: Array<BenchmarkResult>, groups: Array<BenchmarkGroup>) => string;
|
|
109
109
|
/**
|
|
110
110
|
* Format a number with fixed decimal places and thousands separators.
|
|
111
|
-
* @see
|
|
111
|
+
* @see `format_number` in `maths.ts` for the underlying implementation.
|
|
112
112
|
*/
|
|
113
113
|
export declare const benchmark_format_number: (n: number, decimals?: number) => string;
|
|
114
114
|
//# sourceMappingURL=benchmark_format.d.ts.map
|
package/dist/benchmark_format.js
CHANGED
|
@@ -4,9 +4,9 @@ import { format_number } from './maths.js';
|
|
|
4
4
|
/**
|
|
5
5
|
* Format results as an ASCII table with percentiles, min/max, and relative performance.
|
|
6
6
|
* All times use the same unit for easy comparison.
|
|
7
|
-
* @param results -
|
|
8
|
-
* @param baseline -
|
|
9
|
-
* @returns
|
|
7
|
+
* @param results - array of benchmark results
|
|
8
|
+
* @param baseline - optional task name to use as baseline for comparison (defaults to fastest)
|
|
9
|
+
* @returns formatted table string with enhanced metrics
|
|
10
10
|
*
|
|
11
11
|
* @example
|
|
12
12
|
* ```ts
|
|
@@ -105,9 +105,9 @@ export const benchmark_format_table = (results, baseline) => {
|
|
|
105
105
|
/**
|
|
106
106
|
* Format results as a Markdown table with key metrics.
|
|
107
107
|
* All times use the same unit for easy comparison.
|
|
108
|
-
* @param results -
|
|
109
|
-
* @param baseline -
|
|
110
|
-
* @returns
|
|
108
|
+
* @param results - array of benchmark results
|
|
109
|
+
* @param baseline - optional task name to use as baseline for comparison (defaults to fastest)
|
|
110
|
+
* @returns formatted markdown table string
|
|
111
111
|
*
|
|
112
112
|
* @example
|
|
113
113
|
* ```ts
|
|
@@ -200,9 +200,9 @@ export const benchmark_format_markdown = (results, baseline) => {
|
|
|
200
200
|
};
|
|
201
201
|
/**
|
|
202
202
|
* Format results as grouped Markdown tables with headers between groups.
|
|
203
|
-
* @param results -
|
|
204
|
-
* @param groups -
|
|
205
|
-
* @returns
|
|
203
|
+
* @param results - array of benchmark results
|
|
204
|
+
* @param groups - array of group definitions
|
|
205
|
+
* @returns formatted markdown string with group headers and tables
|
|
206
206
|
*
|
|
207
207
|
* @example
|
|
208
208
|
* ```ts
|
|
@@ -248,8 +248,8 @@ export const benchmark_format_markdown_grouped = (results, groups) => {
|
|
|
248
248
|
};
|
|
249
249
|
/**
|
|
250
250
|
* Format results as JSON.
|
|
251
|
-
* @param results -
|
|
252
|
-
* @param options -
|
|
251
|
+
* @param results - array of benchmark results
|
|
252
|
+
* @param options - formatting options
|
|
253
253
|
* @returns JSON string
|
|
254
254
|
*
|
|
255
255
|
* @example
|
|
@@ -290,9 +290,9 @@ export const benchmark_format_json = (results, options) => {
|
|
|
290
290
|
};
|
|
291
291
|
/**
|
|
292
292
|
* Format results as a grouped table with visual separators between groups.
|
|
293
|
-
* @param results -
|
|
294
|
-
* @param groups -
|
|
295
|
-
* @returns
|
|
293
|
+
* @param results - array of benchmark results
|
|
294
|
+
* @param groups - array of group definitions
|
|
295
|
+
* @returns formatted table string with group separators
|
|
296
296
|
*
|
|
297
297
|
* @example
|
|
298
298
|
* ```ts
|
|
@@ -339,6 +339,6 @@ export const benchmark_format_table_grouped = (results, groups) => {
|
|
|
339
339
|
};
|
|
340
340
|
/**
|
|
341
341
|
* Format a number with fixed decimal places and thousands separators.
|
|
342
|
-
* @see
|
|
342
|
+
* @see `format_number` in `maths.ts` for the underlying implementation.
|
|
343
343
|
*/
|
|
344
344
|
export const benchmark_format_number = format_number;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Benchmark-specific statistical analysis.
|
|
3
|
-
* Uses the general stats utilities from stats.ts for timing/performance analysis.
|
|
3
|
+
* Uses the general stats utilities from `stats.ts` for timing/performance analysis.
|
|
4
4
|
* All timing values are in nanoseconds.
|
|
5
5
|
*
|
|
6
6
|
* @module
|
|
@@ -27,13 +27,15 @@ export interface BenchmarkComparison {
|
|
|
27
27
|
faster: 'a' | 'b' | 'equal';
|
|
28
28
|
/** How much faster the winner is (e.g., 1.5 means 1.5x faster) */
|
|
29
29
|
speedup_ratio: number;
|
|
30
|
-
/** Whether the difference is statistically
|
|
30
|
+
/** Whether the difference is both statistically and practically significant */
|
|
31
31
|
significant: boolean;
|
|
32
32
|
/** P-value from Welch's t-test (lower = more confident the difference is real) */
|
|
33
33
|
p_value: number;
|
|
34
|
-
/**
|
|
34
|
+
/** Percentage difference between means as a ratio (0.05 = 5%, 1.0 = 100%) */
|
|
35
|
+
percent_difference: number;
|
|
36
|
+
/** Cohen's d effect size (informational — not used for classification) */
|
|
35
37
|
effect_size: number;
|
|
36
|
-
/** Interpretation of
|
|
38
|
+
/** Interpretation of practical significance based on percentage difference */
|
|
37
39
|
effect_magnitude: EffectMagnitude;
|
|
38
40
|
/** Whether the 95% confidence intervals overlap */
|
|
39
41
|
ci_overlap: boolean;
|
|
@@ -46,6 +48,20 @@ export interface BenchmarkComparison {
|
|
|
46
48
|
export interface BenchmarkCompareOptions {
|
|
47
49
|
/** Significance level for hypothesis testing (default: 0.05) */
|
|
48
50
|
alpha?: number;
|
|
51
|
+
/**
|
|
52
|
+
* Minimum percentage difference to consider practically meaningful, as a ratio.
|
|
53
|
+
* Below this threshold, differences are classified as 'negligible' and
|
|
54
|
+
* `significant` is forced to `false`, regardless of p-value.
|
|
55
|
+
* This prevents the t-test's oversensitivity at large sample sizes from
|
|
56
|
+
* flagging system-level noise (thermal throttle, OS scheduler, cache pressure)
|
|
57
|
+
* as meaningful differences.
|
|
58
|
+
*
|
|
59
|
+
* Effect magnitude thresholds scale from this value:
|
|
60
|
+
* negligible < min, small < min*3, medium < min*5, large >= min*5.
|
|
61
|
+
*
|
|
62
|
+
* Default: 0.10 (10%).
|
|
63
|
+
*/
|
|
64
|
+
min_percent_difference?: number;
|
|
49
65
|
}
|
|
50
66
|
/**
|
|
51
67
|
* Complete statistical analysis of timing measurements.
|
|
@@ -94,13 +110,17 @@ export declare class BenchmarkStats {
|
|
|
94
110
|
toString(): string;
|
|
95
111
|
}
|
|
96
112
|
/**
|
|
97
|
-
* Compare two benchmark results for statistical significance.
|
|
98
|
-
* Uses
|
|
113
|
+
* Compare two benchmark results for practical and statistical significance.
|
|
114
|
+
* Uses percentage difference for effect magnitude classification, with Welch's
|
|
115
|
+
* t-test for statistical confidence. Cohen's d is computed as an informational
|
|
116
|
+
* metric but does not drive classification — its thresholds (0.2/0.5/0.8) are
|
|
117
|
+
* calibrated for social science and produce false positives in benchmarking
|
|
118
|
+
* where within-run variance is tight.
|
|
99
119
|
*
|
|
100
|
-
* @param a -
|
|
101
|
-
* @param b -
|
|
102
|
-
* @param options -
|
|
103
|
-
* @returns
|
|
120
|
+
* @param a - first benchmark stats (or any object with required properties)
|
|
121
|
+
* @param b - second benchmark stats (or any object with required properties)
|
|
122
|
+
* @param options - comparison options
|
|
123
|
+
* @returns comparison result with significance, effect size, and recommendation
|
|
104
124
|
*
|
|
105
125
|
* @example
|
|
106
126
|
* ```ts
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"benchmark_stats.d.ts","sourceRoot":"../src/lib/","sources":["../src/lib/benchmark_stats.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAgBH;;;GAGG;AACH,MAAM,WAAW,wBAAwB;IACxC,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,sBAAsB,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACzC;AAED;;GAEG;AACH,MAAM,MAAM,eAAe,GAAG,YAAY,GAAG,OAAO,GAAG,QAAQ,GAAG,OAAO,CAAC;AAE1E;;GAEG;AACH,MAAM,WAAW,mBAAmB;IACnC,mFAAmF;IACnF,MAAM,EAAE,GAAG,GAAG,GAAG,GAAG,OAAO,CAAC;IAC5B,kEAAkE;IAClE,aAAa,EAAE,MAAM,CAAC;IACtB
|
|
1
|
+
{"version":3,"file":"benchmark_stats.d.ts","sourceRoot":"../src/lib/","sources":["../src/lib/benchmark_stats.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAgBH;;;GAGG;AACH,MAAM,WAAW,wBAAwB;IACxC,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,sBAAsB,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACzC;AAED;;GAEG;AACH,MAAM,MAAM,eAAe,GAAG,YAAY,GAAG,OAAO,GAAG,QAAQ,GAAG,OAAO,CAAC;AAE1E;;GAEG;AACH,MAAM,WAAW,mBAAmB;IACnC,mFAAmF;IACnF,MAAM,EAAE,GAAG,GAAG,GAAG,GAAG,OAAO,CAAC;IAC5B,kEAAkE;IAClE,aAAa,EAAE,MAAM,CAAC;IACtB,+EAA+E;IAC/E,WAAW,EAAE,OAAO,CAAC;IACrB,kFAAkF;IAClF,OAAO,EAAE,MAAM,CAAC;IAChB,6EAA6E;IAC7E,kBAAkB,EAAE,MAAM,CAAC;IAC3B,0EAA0E;IAC1E,WAAW,EAAE,MAAM,CAAC;IACpB,8EAA8E;IAC9E,gBAAgB,EAAE,eAAe,CAAC;IAClC,mDAAmD;IACnD,UAAU,EAAE,OAAO,CAAC;IACpB,sDAAsD;IACtD,cAAc,EAAE,MAAM,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACvC,gEAAgE;IAChE,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;;;;;;;;;;;OAYG;IACH,sBAAsB,CAAC,EAAE,MAAM,CAAC;CAChC;AAED;;;;GAIG;AACH,qBAAa,cAAc;IAC1B,yCAAyC;IACzC,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,mDAAmD;IACnD,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,wCAAwC;IACxC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,kCAAkC;IAClC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,kCAAkC;IAClC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,qCAAqC;IACrC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,qCAAqC;IACrC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,qCAAqC;IACrC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,qCAAqC;IACrC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,gDAAgD;IAChD,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,0DAA0D;IAC1D,QAAQ,CAAC,sBAAsB,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAClD,sDAAsD;IACtD,QAAQ,CAAC,WAAW,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACpC,yCAAyC;IACzC,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;IAC/B,8CAA8C;IAC9C,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,0DAA0D;IAC1D,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC;IACjC,mDAAmD;IACnD,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,sEAAsE;IACtE,QAAQ,CAAC,iBAAiB,EAAE,MAAM,CAAC;gBAEvB,UAAU,EAAE,KAAK,CAAC,MAAM,CAAC;IAiErC;;OAEG;IACH,QAAQ,IAAI,MAAM;CAGlB;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,eAAO,MAAM,uBAAuB,GACnC,GAAG,wBAAwB,EAC3B,GAAG,wBAAwB,EAC3B,UAAU,uBAAuB,KAC/B,mBA+GF,CAAC"}
|
package/dist/benchmark_stats.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Benchmark-specific statistical analysis.
|
|
3
|
-
* Uses the general stats utilities from stats.ts for timing/performance analysis.
|
|
3
|
+
* Uses the general stats utilities from `stats.ts` for timing/performance analysis.
|
|
4
4
|
* All timing values are in nanoseconds.
|
|
5
5
|
*
|
|
6
6
|
* @module
|
|
@@ -110,13 +110,17 @@ export class BenchmarkStats {
|
|
|
110
110
|
}
|
|
111
111
|
}
|
|
112
112
|
/**
|
|
113
|
-
* Compare two benchmark results for statistical significance.
|
|
114
|
-
* Uses
|
|
113
|
+
* Compare two benchmark results for practical and statistical significance.
|
|
114
|
+
* Uses percentage difference for effect magnitude classification, with Welch's
|
|
115
|
+
* t-test for statistical confidence. Cohen's d is computed as an informational
|
|
116
|
+
* metric but does not drive classification — its thresholds (0.2/0.5/0.8) are
|
|
117
|
+
* calibrated for social science and produce false positives in benchmarking
|
|
118
|
+
* where within-run variance is tight.
|
|
115
119
|
*
|
|
116
|
-
* @param a -
|
|
117
|
-
* @param b -
|
|
118
|
-
* @param options -
|
|
119
|
-
* @returns
|
|
120
|
+
* @param a - first benchmark stats (or any object with required properties)
|
|
121
|
+
* @param b - second benchmark stats (or any object with required properties)
|
|
122
|
+
* @param options - comparison options
|
|
123
|
+
* @returns comparison result with significance, effect size, and recommendation
|
|
120
124
|
*
|
|
121
125
|
* @example
|
|
122
126
|
* ```ts
|
|
@@ -128,6 +132,7 @@ export class BenchmarkStats {
|
|
|
128
132
|
*/
|
|
129
133
|
export const benchmark_stats_compare = (a, b, options) => {
|
|
130
134
|
const alpha = options?.alpha ?? 0.05;
|
|
135
|
+
const min_pct = options?.min_percent_difference ?? 0.1;
|
|
131
136
|
// Handle edge cases
|
|
132
137
|
if (a.sample_size === 0 || b.sample_size === 0) {
|
|
133
138
|
return {
|
|
@@ -135,6 +140,7 @@ export const benchmark_stats_compare = (a, b, options) => {
|
|
|
135
140
|
speedup_ratio: 1,
|
|
136
141
|
significant: false,
|
|
137
142
|
p_value: 1,
|
|
143
|
+
percent_difference: 0,
|
|
138
144
|
effect_size: 0,
|
|
139
145
|
effect_magnitude: 'negligible',
|
|
140
146
|
ci_overlap: true,
|
|
@@ -144,6 +150,8 @@ export const benchmark_stats_compare = (a, b, options) => {
|
|
|
144
150
|
// Calculate speedup ratio (lower time = faster, so compare by time not ops/sec)
|
|
145
151
|
const speedup_ratio = a.mean_ns < b.mean_ns ? b.mean_ns / a.mean_ns : a.mean_ns / b.mean_ns;
|
|
146
152
|
const faster = a.mean_ns < b.mean_ns ? 'a' : a.mean_ns > b.mean_ns ? 'b' : 'equal';
|
|
153
|
+
// Percentage difference relative to the faster mean (always >= 0)
|
|
154
|
+
const percent_difference = speedup_ratio - 1;
|
|
147
155
|
// Welch's t-test (handles unequal variances)
|
|
148
156
|
// Special case: if both have zero variance, t-test is undefined
|
|
149
157
|
let p_value;
|
|
@@ -156,55 +164,54 @@ export const benchmark_stats_compare = (a, b, options) => {
|
|
|
156
164
|
// Calculate two-tailed p-value using t-distribution approximation
|
|
157
165
|
p_value = stats_t_distribution_p_value(Math.abs(t_statistic), degrees_of_freedom);
|
|
158
166
|
}
|
|
159
|
-
// Cohen's d effect size
|
|
167
|
+
// Cohen's d effect size (informational only — not used for classification)
|
|
160
168
|
const pooled_std_dev = Math.sqrt(((a.sample_size - 1) * a.std_dev_ns ** 2 + (b.sample_size - 1) * b.std_dev_ns ** 2) /
|
|
161
169
|
(a.sample_size + b.sample_size - 2));
|
|
162
|
-
// When pooled_std_dev is 0 but means differ, effect is maximal (infinite)
|
|
163
|
-
// When means are equal, effect is 0
|
|
164
170
|
let effect_size;
|
|
165
|
-
let effect_magnitude;
|
|
166
171
|
if (pooled_std_dev === 0) {
|
|
167
|
-
|
|
168
|
-
if (a.mean_ns === b.mean_ns) {
|
|
169
|
-
effect_size = 0;
|
|
170
|
-
effect_magnitude = 'negligible';
|
|
171
|
-
}
|
|
172
|
-
else {
|
|
173
|
-
// Any difference is 100% reliable when there's no variance
|
|
174
|
-
effect_size = Infinity;
|
|
175
|
-
effect_magnitude = 'large';
|
|
176
|
-
}
|
|
172
|
+
effect_size = a.mean_ns === b.mean_ns ? 0 : Infinity;
|
|
177
173
|
}
|
|
178
174
|
else {
|
|
179
175
|
effect_size = Math.abs(a.mean_ns - b.mean_ns) / pooled_std_dev;
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
176
|
+
}
|
|
177
|
+
// Effect magnitude based on percentage difference, not Cohen's d.
|
|
178
|
+
// Cohen's d thresholds (0.2/0.5/0.8) are calibrated for social science, not benchmarking.
|
|
179
|
+
// Within-run variance is tight, so even small system noise (thermal throttle, OS scheduler)
|
|
180
|
+
// produces large Cohen's d. Percentage thresholds directly answer "is this difference
|
|
181
|
+
// meaningful in practice?" Thresholds scale with min_percent_difference so users can
|
|
182
|
+
// tune one knob for their system's noise floor.
|
|
183
|
+
let effect_magnitude;
|
|
184
|
+
if (percent_difference < min_pct) {
|
|
185
|
+
effect_magnitude = 'negligible';
|
|
186
|
+
}
|
|
187
|
+
else if (percent_difference < min_pct * 3) {
|
|
188
|
+
effect_magnitude = 'small';
|
|
189
|
+
}
|
|
190
|
+
else if (percent_difference < min_pct * 5) {
|
|
191
|
+
effect_magnitude = 'medium';
|
|
192
|
+
}
|
|
193
|
+
else {
|
|
194
|
+
effect_magnitude = 'large';
|
|
189
195
|
}
|
|
190
196
|
// Check confidence interval overlap
|
|
191
197
|
const ci_overlap = a.confidence_interval_ns[0] <= b.confidence_interval_ns[1] &&
|
|
192
198
|
b.confidence_interval_ns[0] <= a.confidence_interval_ns[1];
|
|
193
|
-
//
|
|
194
|
-
|
|
199
|
+
// Significance requires both statistical significance (p < alpha)
|
|
200
|
+
// AND practical significance (percent_difference >= min_pct).
|
|
201
|
+
// With large n, the t-test finds p≈0 for any difference because
|
|
202
|
+
// SE = std_dev/sqrt(n) → 0. Gating on practical significance
|
|
203
|
+
// prevents system noise from being flagged as meaningful.
|
|
204
|
+
const significant = p_value < alpha && percent_difference >= min_pct;
|
|
195
205
|
// Generate recommendation
|
|
196
206
|
let recommendation;
|
|
197
|
-
if (
|
|
198
|
-
recommendation =
|
|
199
|
-
effect_magnitude === 'negligible'
|
|
200
|
-
? 'No meaningful difference detected'
|
|
201
|
-
: `Difference not statistically significant (p=${p_value.toFixed(3)}), but effect size suggests ${effect_magnitude} practical difference`;
|
|
207
|
+
if (percent_difference < min_pct) {
|
|
208
|
+
recommendation = 'No meaningful difference detected';
|
|
202
209
|
}
|
|
203
|
-
else if (
|
|
204
|
-
recommendation =
|
|
210
|
+
else if (!significant) {
|
|
211
|
+
recommendation = `${(percent_difference * 100).toFixed(1)}% difference observed but not statistically significant (p=${p_value.toFixed(3)})`;
|
|
205
212
|
}
|
|
206
213
|
else {
|
|
207
|
-
recommendation = `${faster === 'a' ? 'First' : 'Second'} is ${speedup_ratio.toFixed(2)}x faster with ${effect_magnitude} effect size (p=${p_value.toFixed(3)})`;
|
|
214
|
+
recommendation = `${faster === 'a' ? 'First' : 'Second'} is ${speedup_ratio.toFixed(2)}x faster with ${effect_magnitude} effect size (${(percent_difference * 100).toFixed(1)}%, p=${p_value.toFixed(3)})`;
|
|
208
215
|
}
|
|
209
216
|
// Adjust 'faster' to 'equal' if effect is negligible
|
|
210
217
|
const adjusted_faster = effect_magnitude === 'negligible' ? 'equal' : faster;
|
|
@@ -213,6 +220,7 @@ export const benchmark_stats_compare = (a, b, options) => {
|
|
|
213
220
|
speedup_ratio,
|
|
214
221
|
significant,
|
|
215
222
|
p_value,
|
|
223
|
+
percent_difference,
|
|
216
224
|
effect_size,
|
|
217
225
|
effect_magnitude,
|
|
218
226
|
ci_overlap,
|
|
@@ -35,7 +35,7 @@ export interface BenchmarkConfig {
|
|
|
35
35
|
max_iterations?: number;
|
|
36
36
|
/**
|
|
37
37
|
* Custom timer to use for measurements.
|
|
38
|
-
* Default: timer_default (auto-detects environment)
|
|
38
|
+
* Default: `timer_default` (auto-detects environment)
|
|
39
39
|
*/
|
|
40
40
|
timer?: Timer;
|
|
41
41
|
/**
|
|
@@ -47,9 +47,9 @@ export interface BenchmarkConfig {
|
|
|
47
47
|
* after the timing capture. However, frequent GC calls will slow overall benchmark
|
|
48
48
|
* execution time.
|
|
49
49
|
*
|
|
50
|
-
* @param task_name -
|
|
51
|
-
* @param iteration -
|
|
52
|
-
* @param abort -
|
|
50
|
+
* @param task_name - name of the current task being benchmarked
|
|
51
|
+
* @param iteration - current iteration number (1-indexed)
|
|
52
|
+
* @param abort - call to stop the benchmark early for this task
|
|
53
53
|
*
|
|
54
54
|
* @example
|
|
55
55
|
* ```ts
|
|
@@ -82,9 +82,9 @@ export interface BenchmarkConfig {
|
|
|
82
82
|
* Callback invoked after each task completes.
|
|
83
83
|
* Useful for logging progress during long benchmark runs.
|
|
84
84
|
*
|
|
85
|
-
* @param result -
|
|
86
|
-
* @param index -
|
|
87
|
-
* @param total -
|
|
85
|
+
* @param result - the completed benchmark result
|
|
86
|
+
* @param index - zero-based index of the completed task
|
|
87
|
+
* @param total - total number of tasks to run
|
|
88
88
|
*
|
|
89
89
|
* @example
|
|
90
90
|
* ```ts
|
package/dist/bytes.d.ts
CHANGED
|
@@ -7,15 +7,15 @@
|
|
|
7
7
|
* Converts string or binary data to a `Uint8Array`.
|
|
8
8
|
* Strings are UTF-8 encoded. `Uint8Array` inputs are returned as-is.
|
|
9
9
|
*
|
|
10
|
-
* @param data -
|
|
11
|
-
* @returns `Uint8Array` view of the data
|
|
10
|
+
* @param data - string or `BufferSource` to convert
|
|
11
|
+
* @returns `Uint8Array` view of the data
|
|
12
12
|
*/
|
|
13
13
|
export declare const to_bytes: (data: BufferSource | string) => Uint8Array;
|
|
14
14
|
/**
|
|
15
15
|
* Formats a byte count as a human-readable string.
|
|
16
16
|
*
|
|
17
|
-
* @param n - byte count
|
|
18
|
-
* @returns formatted string like `'1.2 KB'` or `'3.4 MB'
|
|
17
|
+
* @param n - byte count
|
|
18
|
+
* @returns formatted string like `'1.2 KB'` or `'3.4 MB'`
|
|
19
19
|
*/
|
|
20
20
|
export declare const format_bytes: (n: number) => string;
|
|
21
21
|
//# sourceMappingURL=bytes.d.ts.map
|
package/dist/bytes.js
CHANGED
|
@@ -8,8 +8,8 @@ const encoder = new TextEncoder();
|
|
|
8
8
|
* Converts string or binary data to a `Uint8Array`.
|
|
9
9
|
* Strings are UTF-8 encoded. `Uint8Array` inputs are returned as-is.
|
|
10
10
|
*
|
|
11
|
-
* @param data -
|
|
12
|
-
* @returns `Uint8Array` view of the data
|
|
11
|
+
* @param data - string or `BufferSource` to convert
|
|
12
|
+
* @returns `Uint8Array` view of the data
|
|
13
13
|
*/
|
|
14
14
|
export const to_bytes = (data) => {
|
|
15
15
|
if (typeof data === 'string')
|
|
@@ -23,8 +23,8 @@ export const to_bytes = (data) => {
|
|
|
23
23
|
/**
|
|
24
24
|
* Formats a byte count as a human-readable string.
|
|
25
25
|
*
|
|
26
|
-
* @param n - byte count
|
|
27
|
-
* @returns formatted string like `'1.2 KB'` or `'3.4 MB'
|
|
26
|
+
* @param n - byte count
|
|
27
|
+
* @returns formatted string like `'1.2 KB'` or `'3.4 MB'`
|
|
28
28
|
*/
|
|
29
29
|
export const format_bytes = (n) => {
|
|
30
30
|
if (n < 1024)
|
package/dist/dag.d.ts
CHANGED
|
@@ -73,8 +73,8 @@ export interface DagResult {
|
|
|
73
73
|
* eligible to start. Failure cascading and stop-on-failure are handled
|
|
74
74
|
* per the options.
|
|
75
75
|
*
|
|
76
|
-
* @param options - DAG execution options
|
|
77
|
-
* @returns
|
|
76
|
+
* @param options - DAG execution options
|
|
77
|
+
* @returns aggregated result with per-node details
|
|
78
78
|
*/
|
|
79
79
|
export declare const run_dag: <T extends DagNode>(options: DagOptions<T>) => Promise<DagResult>;
|
|
80
80
|
//# sourceMappingURL=dag.d.ts.map
|
package/dist/dag.js
CHANGED
|
@@ -18,8 +18,8 @@ import { topological_sort } from './sort.js';
|
|
|
18
18
|
* eligible to start. Failure cascading and stop-on-failure are handled
|
|
19
19
|
* per the options.
|
|
20
20
|
*
|
|
21
|
-
* @param options - DAG execution options
|
|
22
|
-
* @returns
|
|
21
|
+
* @param options - DAG execution options
|
|
22
|
+
* @returns aggregated result with per-node details
|
|
23
23
|
*/
|
|
24
24
|
export const run_dag = async (options) => {
|
|
25
25
|
const { nodes, execute, on_error, on_skip, should_skip, max_concurrency = Infinity, stop_on_failure = true, skip_validation = false, } = options;
|
package/dist/deep_equal.d.ts
CHANGED
|
@@ -10,8 +10,8 @@
|
|
|
10
10
|
* - Promises always return false (cannot be meaningfully compared)
|
|
11
11
|
* - Maps/Sets compare by reference for object keys/values
|
|
12
12
|
*
|
|
13
|
-
* @param a first value to compare
|
|
14
|
-
* @param b second value to compare
|
|
13
|
+
* @param a - first value to compare
|
|
14
|
+
* @param b - second value to compare
|
|
15
15
|
* @returns true if deeply equal, false otherwise
|
|
16
16
|
*/
|
|
17
17
|
export declare const deep_equal: (a: unknown, b: unknown) => boolean;
|
package/dist/deep_equal.js
CHANGED
|
@@ -10,8 +10,8 @@
|
|
|
10
10
|
* - Promises always return false (cannot be meaningfully compared)
|
|
11
11
|
* - Maps/Sets compare by reference for object keys/values
|
|
12
12
|
*
|
|
13
|
-
* @param a first value to compare
|
|
14
|
-
* @param b second value to compare
|
|
13
|
+
* @param a - first value to compare
|
|
14
|
+
* @param b - second value to compare
|
|
15
15
|
* @returns true if deeply equal, false otherwise
|
|
16
16
|
*/
|
|
17
17
|
export const deep_equal = (a, b) => {
|