@fuzdev/fuz_util 0.42.0 → 0.44.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +19 -12
- package/dist/async.d.ts +2 -2
- package/dist/async.d.ts.map +1 -1
- package/dist/async.js +2 -2
- package/dist/benchmark.d.ts +179 -0
- package/dist/benchmark.d.ts.map +1 -0
- package/dist/benchmark.js +400 -0
- package/dist/benchmark_baseline.d.ts +195 -0
- package/dist/benchmark_baseline.d.ts.map +1 -0
- package/dist/benchmark_baseline.js +388 -0
- package/dist/benchmark_format.d.ts +87 -0
- package/dist/benchmark_format.d.ts.map +1 -0
- package/dist/benchmark_format.js +266 -0
- package/dist/benchmark_stats.d.ts +112 -0
- package/dist/benchmark_stats.d.ts.map +1 -0
- package/dist/benchmark_stats.js +219 -0
- package/dist/benchmark_types.d.ts +174 -0
- package/dist/benchmark_types.d.ts.map +1 -0
- package/dist/benchmark_types.js +1 -0
- package/dist/git.d.ts +12 -0
- package/dist/git.d.ts.map +1 -1
- package/dist/git.js +14 -0
- package/dist/library_json.d.ts +3 -3
- package/dist/library_json.d.ts.map +1 -1
- package/dist/library_json.js +1 -1
- package/dist/maths.d.ts +4 -0
- package/dist/maths.d.ts.map +1 -1
- package/dist/maths.js +8 -0
- package/dist/object.js +1 -1
- package/dist/source_json.d.ts +4 -4
- package/dist/stats.d.ts +180 -0
- package/dist/stats.d.ts.map +1 -0
- package/dist/stats.js +402 -0
- package/dist/string.d.ts +13 -0
- package/dist/string.d.ts.map +1 -1
- package/dist/string.js +58 -0
- package/dist/time.d.ts +165 -0
- package/dist/time.d.ts.map +1 -0
- package/dist/time.js +264 -0
- package/dist/timings.d.ts +1 -7
- package/dist/timings.d.ts.map +1 -1
- package/dist/timings.js +16 -16
- package/package.json +21 -19
- package/src/lib/async.ts +3 -3
- package/src/lib/benchmark.ts +498 -0
- package/src/lib/benchmark_baseline.ts +538 -0
- package/src/lib/benchmark_format.ts +314 -0
- package/src/lib/benchmark_stats.ts +311 -0
- package/src/lib/benchmark_types.ts +197 -0
- package/src/lib/git.ts +24 -0
- package/src/lib/library_json.ts +3 -3
- package/src/lib/maths.ts +8 -0
- package/src/lib/object.ts +1 -1
- package/src/lib/stats.ts +534 -0
- package/src/lib/string.ts +66 -0
- package/src/lib/time.ts +319 -0
- package/src/lib/timings.ts +17 -17
- package/src/lib/types.ts +2 -2
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
import { time_unit_detect_best, time_format, TIME_UNIT_DISPLAY } from './time.js';
|
|
2
|
+
import { string_display_width, pad_width } from './string.js';
|
|
3
|
+
import { format_number } from './maths.js';
|
|
4
|
+
/**
|
|
5
|
+
* Format results as an ASCII table with percentiles, min/max, and relative performance.
|
|
6
|
+
* All times use the same unit for easy comparison.
|
|
7
|
+
* @param results - Array of benchmark results
|
|
8
|
+
* @returns Formatted table string with enhanced metrics
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```ts
|
|
12
|
+
* console.log(benchmark_format_table(results));
|
|
13
|
+
* // ┌─────────────┬────────────┬────────────┬──────────┬──────────┬──────────┬──────────┬──────────┬──────────┬──────────┐
|
|
14
|
+
* // │ Task Name │ ops/sec │ median(μs) │ p75 (μs) │ p90 (μs) │ p95 (μs) │ p99 (μs) │ min (μs) │ max (μs) │ vs Best │
|
|
15
|
+
* // ├─────────────┼────────────┼────────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┤
|
|
16
|
+
* // │ slugify v2 │ 1,237,144 │ 0.81 │ 0.85 │ 0.89 │ 0.95 │ 1.20 │ 0.72 │ 2.45 │ baseline │
|
|
17
|
+
* // │ slugify │ 261,619 │ 3.82 │ 3.95 │ 4.12 │ 4.35 │ 5.10 │ 3.21 │ 12.45 │ 4.73x │
|
|
18
|
+
* // └─────────────┴────────────┴────────────┴──────────┴──────────┴──────────┴──────────┴──────────┴──────────┴──────────┘
|
|
19
|
+
* ```
|
|
20
|
+
*/
|
|
21
|
+
export const benchmark_format_table = (results) => {
|
|
22
|
+
if (results.length === 0)
|
|
23
|
+
return '(no results)';
|
|
24
|
+
// Detect best unit for all results
|
|
25
|
+
const mean_times = results.map((r) => r.stats.mean_ns);
|
|
26
|
+
const unit = time_unit_detect_best(mean_times);
|
|
27
|
+
const unit_str = TIME_UNIT_DISPLAY[unit];
|
|
28
|
+
// Find fastest for relative comparison
|
|
29
|
+
const fastest_ops = Math.max(...results.map((r) => r.stats.ops_per_second));
|
|
30
|
+
const rows = [];
|
|
31
|
+
// Header with unit
|
|
32
|
+
rows.push([
|
|
33
|
+
'Task Name',
|
|
34
|
+
'ops/sec',
|
|
35
|
+
`median (${unit_str})`,
|
|
36
|
+
`p75 (${unit_str})`,
|
|
37
|
+
`p90 (${unit_str})`,
|
|
38
|
+
`p95 (${unit_str})`,
|
|
39
|
+
`p99 (${unit_str})`,
|
|
40
|
+
`min (${unit_str})`,
|
|
41
|
+
`max (${unit_str})`,
|
|
42
|
+
'vs Best',
|
|
43
|
+
]);
|
|
44
|
+
// Data rows - all use same unit
|
|
45
|
+
results.forEach((r) => {
|
|
46
|
+
const ops_sec = benchmark_format_number(r.stats.ops_per_second, 2);
|
|
47
|
+
const median = time_format(r.stats.median_ns, unit, 2).replace(unit_str, '').trim();
|
|
48
|
+
const p75 = time_format(r.stats.p75_ns, unit, 2).replace(unit_str, '').trim();
|
|
49
|
+
const p90 = time_format(r.stats.p90_ns, unit, 2).replace(unit_str, '').trim();
|
|
50
|
+
const p95 = time_format(r.stats.p95_ns, unit, 2).replace(unit_str, '').trim();
|
|
51
|
+
const p99 = time_format(r.stats.p99_ns, unit, 2).replace(unit_str, '').trim();
|
|
52
|
+
const min = time_format(r.stats.min_ns, unit, 2).replace(unit_str, '').trim();
|
|
53
|
+
const max = time_format(r.stats.max_ns, unit, 2).replace(unit_str, '').trim();
|
|
54
|
+
// Calculate relative performance
|
|
55
|
+
const ratio = fastest_ops / r.stats.ops_per_second;
|
|
56
|
+
const vs_best = ratio === 1.0 ? 'baseline' : `${ratio.toFixed(2)}x`;
|
|
57
|
+
rows.push([r.name, ops_sec, median, p75, p90, p95, p99, min, max, vs_best]);
|
|
58
|
+
});
|
|
59
|
+
// Calculate column widths (using display width for proper emoji handling)
|
|
60
|
+
const widths = rows[0].map((_, col_i) => {
|
|
61
|
+
return Math.max(...rows.map((row) => string_display_width(row[col_i])));
|
|
62
|
+
});
|
|
63
|
+
// Build table
|
|
64
|
+
const lines = [];
|
|
65
|
+
// Top border
|
|
66
|
+
lines.push('┌' + widths.map((w) => '─'.repeat(w + 2)).join('┬') + '┐');
|
|
67
|
+
// Header
|
|
68
|
+
const header = rows[0].map((cell, i) => ' ' + pad_width(cell, widths[i]) + ' ').join('│');
|
|
69
|
+
lines.push('│' + header + '│');
|
|
70
|
+
// Header separator
|
|
71
|
+
lines.push('├' + widths.map((w) => '─'.repeat(w + 2)).join('┼') + '┤');
|
|
72
|
+
// Data rows
|
|
73
|
+
for (let i = 1; i < rows.length; i++) {
|
|
74
|
+
const row = rows[i].map((cell, col_i) => {
|
|
75
|
+
const width = widths[col_i];
|
|
76
|
+
// Left-align task name, right-align numbers
|
|
77
|
+
if (col_i === 0) {
|
|
78
|
+
return ' ' + pad_width(cell, width, 'left') + ' ';
|
|
79
|
+
}
|
|
80
|
+
else {
|
|
81
|
+
return ' ' + pad_width(cell, width, 'right') + ' ';
|
|
82
|
+
}
|
|
83
|
+
}).join('│');
|
|
84
|
+
lines.push('│' + row + '│');
|
|
85
|
+
}
|
|
86
|
+
// Bottom border
|
|
87
|
+
lines.push('└' + widths.map((w) => '─'.repeat(w + 2)).join('┴') + '┘');
|
|
88
|
+
return lines.join('\n');
|
|
89
|
+
};
|
|
90
|
+
/**
|
|
91
|
+
* Format results as a Markdown table with key metrics.
|
|
92
|
+
* All times use the same unit for easy comparison.
|
|
93
|
+
* @param results - Array of benchmark results
|
|
94
|
+
* @returns Formatted markdown table string
|
|
95
|
+
*
|
|
96
|
+
* @example
|
|
97
|
+
* ```ts
|
|
98
|
+
* console.log(benchmark_format_markdown(results));
|
|
99
|
+
* // | Task Name | ops/sec | median (μs) | p75 (μs) | p90 (μs) | p95 (μs) | p99 (μs) | min (μs) | max (μs) | vs Best |
|
|
100
|
+
* // |------------|------------|-------------|----------|----------|----------|----------|----------|----------|----------|
|
|
101
|
+
* // | slugify v2 | 1,237,144 | 0.81 | 0.85 | 0.89 | 0.95 | 1.20 | 0.72 | 2.45 | baseline |
|
|
102
|
+
* // | slugify | 261,619 | 3.82 | 3.95 | 4.12 | 4.35 | 5.10 | 3.21 | 12.45 | 4.73x |
|
|
103
|
+
* ```
|
|
104
|
+
*/
|
|
105
|
+
export const benchmark_format_markdown = (results) => {
|
|
106
|
+
if (results.length === 0)
|
|
107
|
+
return '(no results)';
|
|
108
|
+
// Detect best unit for all results
|
|
109
|
+
const mean_times = results.map((r) => r.stats.mean_ns);
|
|
110
|
+
const unit = time_unit_detect_best(mean_times);
|
|
111
|
+
const unit_str = TIME_UNIT_DISPLAY[unit];
|
|
112
|
+
// Find fastest for relative comparison
|
|
113
|
+
const fastest_ops = Math.max(...results.map((r) => r.stats.ops_per_second));
|
|
114
|
+
const rows = [];
|
|
115
|
+
// Header with unit
|
|
116
|
+
rows.push([
|
|
117
|
+
'Task Name',
|
|
118
|
+
'ops/sec',
|
|
119
|
+
`median (${unit_str})`,
|
|
120
|
+
`p75 (${unit_str})`,
|
|
121
|
+
`p90 (${unit_str})`,
|
|
122
|
+
`p95 (${unit_str})`,
|
|
123
|
+
`p99 (${unit_str})`,
|
|
124
|
+
`min (${unit_str})`,
|
|
125
|
+
`max (${unit_str})`,
|
|
126
|
+
'vs Best',
|
|
127
|
+
]);
|
|
128
|
+
// Data rows - all use same unit
|
|
129
|
+
results.forEach((r) => {
|
|
130
|
+
const ops_sec = benchmark_format_number(r.stats.ops_per_second, 2);
|
|
131
|
+
const median = time_format(r.stats.median_ns, unit, 2).replace(unit_str, '').trim();
|
|
132
|
+
const p75 = time_format(r.stats.p75_ns, unit, 2).replace(unit_str, '').trim();
|
|
133
|
+
const p90 = time_format(r.stats.p90_ns, unit, 2).replace(unit_str, '').trim();
|
|
134
|
+
const p95 = time_format(r.stats.p95_ns, unit, 2).replace(unit_str, '').trim();
|
|
135
|
+
const p99 = time_format(r.stats.p99_ns, unit, 2).replace(unit_str, '').trim();
|
|
136
|
+
const min = time_format(r.stats.min_ns, unit, 2).replace(unit_str, '').trim();
|
|
137
|
+
const max = time_format(r.stats.max_ns, unit, 2).replace(unit_str, '').trim();
|
|
138
|
+
// Calculate relative performance
|
|
139
|
+
const ratio = fastest_ops / r.stats.ops_per_second;
|
|
140
|
+
const vs_best = ratio === 1.0 ? 'baseline' : `${ratio.toFixed(2)}x`;
|
|
141
|
+
rows.push([r.name, ops_sec, median, p75, p90, p95, p99, min, max, vs_best]);
|
|
142
|
+
});
|
|
143
|
+
// Calculate column widths
|
|
144
|
+
const widths = rows[0].map((_, col_i) => {
|
|
145
|
+
return Math.max(...rows.map((row) => row[col_i].length));
|
|
146
|
+
});
|
|
147
|
+
// Build table
|
|
148
|
+
const lines = [];
|
|
149
|
+
// Header
|
|
150
|
+
const header = rows[0].map((cell, i) => cell.padEnd(widths[i])).join(' | ');
|
|
151
|
+
lines.push('| ' + header + ' |');
|
|
152
|
+
// Separator
|
|
153
|
+
const separator = widths.map((w) => '-'.repeat(w)).join(' | ');
|
|
154
|
+
lines.push('| ' + separator + ' |');
|
|
155
|
+
// Data rows
|
|
156
|
+
for (let i = 1; i < rows.length; i++) {
|
|
157
|
+
const row = rows[i].map((cell, col_i) => {
|
|
158
|
+
const width = widths[col_i];
|
|
159
|
+
// Right-align numbers, left-align names
|
|
160
|
+
if (col_i === 0) {
|
|
161
|
+
return cell.padEnd(width);
|
|
162
|
+
}
|
|
163
|
+
else {
|
|
164
|
+
return cell.padStart(width);
|
|
165
|
+
}
|
|
166
|
+
}).join(' | ');
|
|
167
|
+
lines.push('| ' + row + ' |');
|
|
168
|
+
}
|
|
169
|
+
return lines.join('\n');
|
|
170
|
+
};
|
|
171
|
+
/**
|
|
172
|
+
* Format results as JSON.
|
|
173
|
+
* @param results - Array of benchmark results
|
|
174
|
+
* @param options - Formatting options
|
|
175
|
+
* @returns JSON string
|
|
176
|
+
*
|
|
177
|
+
* @example
|
|
178
|
+
* ```ts
|
|
179
|
+
* console.log(format_json(results));
|
|
180
|
+
* console.log(format_json(results, {pretty: false}));
|
|
181
|
+
* console.log(format_json(results, {include_timings: true}));
|
|
182
|
+
* ```
|
|
183
|
+
*/
|
|
184
|
+
export const benchmark_format_json = (results, options) => {
|
|
185
|
+
const pretty = options?.pretty ?? true;
|
|
186
|
+
const include_timings = options?.include_timings ?? false;
|
|
187
|
+
// Flatten stats into result object for easier consumption
|
|
188
|
+
const flattened = results.map((r) => ({
|
|
189
|
+
name: r.name,
|
|
190
|
+
iterations: r.iterations,
|
|
191
|
+
total_time_ms: r.total_time_ms,
|
|
192
|
+
ops_per_second: r.stats.ops_per_second,
|
|
193
|
+
mean_ns: r.stats.mean_ns,
|
|
194
|
+
median_ns: r.stats.median_ns,
|
|
195
|
+
std_dev_ns: r.stats.std_dev_ns,
|
|
196
|
+
min_ns: r.stats.min_ns,
|
|
197
|
+
max_ns: r.stats.max_ns,
|
|
198
|
+
p75_ns: r.stats.p75_ns,
|
|
199
|
+
p90_ns: r.stats.p90_ns,
|
|
200
|
+
p95_ns: r.stats.p95_ns,
|
|
201
|
+
p99_ns: r.stats.p99_ns,
|
|
202
|
+
cv: r.stats.cv,
|
|
203
|
+
confidence_interval_ns: r.stats.confidence_interval_ns,
|
|
204
|
+
outliers: r.stats.outliers_ns.length,
|
|
205
|
+
outlier_ratio: r.stats.outlier_ratio,
|
|
206
|
+
sample_size: r.stats.sample_size,
|
|
207
|
+
raw_sample_size: r.stats.raw_sample_size,
|
|
208
|
+
failed_iterations: r.stats.failed_iterations,
|
|
209
|
+
...(include_timings ? { timings_ns: r.timings_ns } : {}),
|
|
210
|
+
}));
|
|
211
|
+
return pretty ? JSON.stringify(flattened, null, 2) : JSON.stringify(flattened);
|
|
212
|
+
};
|
|
213
|
+
/**
|
|
214
|
+
* Format results as a grouped table with visual separators between groups.
|
|
215
|
+
* @param results - Array of benchmark results
|
|
216
|
+
* @param groups - Array of group definitions
|
|
217
|
+
* @returns Formatted table string with group separators
|
|
218
|
+
*
|
|
219
|
+
* @example
|
|
220
|
+
* ```ts
|
|
221
|
+
* const groups = [
|
|
222
|
+
* { name: 'FAST PATHS', filter: (r) => r.name.includes('fast') },
|
|
223
|
+
* { name: 'SLOW PATHS', filter: (r) => r.name.includes('slow') },
|
|
224
|
+
* ];
|
|
225
|
+
* console.log(benchmark_format_table_grouped(results, groups));
|
|
226
|
+
* // 📦 FAST PATHS
|
|
227
|
+
* // ┌────┬─────────────┬────────────┬...┐
|
|
228
|
+
* // │ 🐆 │ fast test 1 │ 1,237,144 │...│
|
|
229
|
+
* // │ 🐇 │ fast test 2 │ 261,619 │...│
|
|
230
|
+
* // └────┴─────────────┴────────────┴...┘
|
|
231
|
+
* //
|
|
232
|
+
* // 📦 SLOW PATHS
|
|
233
|
+
* // ┌────┬─────────────┬────────────┬...┐
|
|
234
|
+
* // │ 🐢 │ slow test 1 │ 10,123 │...│
|
|
235
|
+
* // └────┴─────────────┴────────────┴...┘
|
|
236
|
+
* ```
|
|
237
|
+
*/
|
|
238
|
+
export const benchmark_format_table_grouped = (results, groups) => {
|
|
239
|
+
if (results.length === 0)
|
|
240
|
+
return '(no results)';
|
|
241
|
+
const sections = [];
|
|
242
|
+
for (const group of groups) {
|
|
243
|
+
const group_results = results.filter(group.filter);
|
|
244
|
+
if (group_results.length === 0)
|
|
245
|
+
continue;
|
|
246
|
+
// Add group header and table
|
|
247
|
+
const header = group.description
|
|
248
|
+
? `\n📦 ${group.name}\n ${group.description}`
|
|
249
|
+
: `\n📦 ${group.name}`;
|
|
250
|
+
sections.push(header);
|
|
251
|
+
sections.push(benchmark_format_table(group_results));
|
|
252
|
+
}
|
|
253
|
+
// Handle ungrouped results (those that don't match any group)
|
|
254
|
+
const grouped_names = new Set(groups.flatMap((g) => results.filter(g.filter).map((r) => r.name)));
|
|
255
|
+
const ungrouped = results.filter((r) => !grouped_names.has(r.name));
|
|
256
|
+
if (ungrouped.length > 0) {
|
|
257
|
+
sections.push('\n📦 Other');
|
|
258
|
+
sections.push(benchmark_format_table(ungrouped));
|
|
259
|
+
}
|
|
260
|
+
return sections.join('\n');
|
|
261
|
+
};
|
|
262
|
+
/**
|
|
263
|
+
* Format a number with fixed decimal places and thousands separators.
|
|
264
|
+
* @see {@link format_number} in maths.ts for the underlying implementation.
|
|
265
|
+
*/
|
|
266
|
+
export const benchmark_format_number = format_number;
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Benchmark-specific statistical analysis.
|
|
3
|
+
* Uses the general stats utilities from stats.ts for timing/performance analysis.
|
|
4
|
+
* All timing values are in nanoseconds.
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* Minimal stats interface for comparison.
|
|
8
|
+
* This allows comparing stats from different sources (e.g., loaded baselines).
|
|
9
|
+
*/
|
|
10
|
+
export interface BenchmarkStatsComparable {
|
|
11
|
+
mean_ns: number;
|
|
12
|
+
std_dev_ns: number;
|
|
13
|
+
sample_size: number;
|
|
14
|
+
confidence_interval_ns: [number, number];
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Effect size magnitude interpretation (Cohen's d).
|
|
18
|
+
*/
|
|
19
|
+
export type EffectMagnitude = 'negligible' | 'small' | 'medium' | 'large';
|
|
20
|
+
/**
|
|
21
|
+
* Result from comparing two benchmark stats.
|
|
22
|
+
*/
|
|
23
|
+
export interface BenchmarkComparison {
|
|
24
|
+
/** Which benchmark is faster ('a', 'b', or 'equal' if difference is negligible) */
|
|
25
|
+
faster: 'a' | 'b' | 'equal';
|
|
26
|
+
/** How much faster the winner is (e.g., 1.5 means 1.5x faster) */
|
|
27
|
+
speedup_ratio: number;
|
|
28
|
+
/** Whether the difference is statistically significant at the given alpha */
|
|
29
|
+
significant: boolean;
|
|
30
|
+
/** P-value from Welch's t-test (lower = more confident the difference is real) */
|
|
31
|
+
p_value: number;
|
|
32
|
+
/** Cohen's d effect size (magnitude of difference independent of sample size) */
|
|
33
|
+
effect_size: number;
|
|
34
|
+
/** Interpretation of effect size */
|
|
35
|
+
effect_magnitude: EffectMagnitude;
|
|
36
|
+
/** Whether the 95% confidence intervals overlap */
|
|
37
|
+
ci_overlap: boolean;
|
|
38
|
+
/** Human-readable interpretation of the comparison */
|
|
39
|
+
recommendation: string;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Options for benchmark comparison.
|
|
43
|
+
*/
|
|
44
|
+
export interface BenchmarkCompareOptions {
|
|
45
|
+
/** Significance level for hypothesis testing (default: 0.05) */
|
|
46
|
+
alpha?: number;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Complete statistical analysis of timing measurements.
|
|
50
|
+
* Includes outlier detection, descriptive statistics, and performance metrics.
|
|
51
|
+
* All timing values are in nanoseconds.
|
|
52
|
+
*/
|
|
53
|
+
export declare class BenchmarkStats {
|
|
54
|
+
/** Mean (average) time in nanoseconds */
|
|
55
|
+
readonly mean_ns: number;
|
|
56
|
+
/** Median time in nanoseconds */
|
|
57
|
+
readonly median_ns: number;
|
|
58
|
+
/** Standard deviation in nanoseconds */
|
|
59
|
+
readonly std_dev_ns: number;
|
|
60
|
+
/** Minimum time in nanoseconds */
|
|
61
|
+
readonly min_ns: number;
|
|
62
|
+
/** Maximum time in nanoseconds */
|
|
63
|
+
readonly max_ns: number;
|
|
64
|
+
/** 75th percentile in nanoseconds */
|
|
65
|
+
readonly p75_ns: number;
|
|
66
|
+
/** 90th percentile in nanoseconds */
|
|
67
|
+
readonly p90_ns: number;
|
|
68
|
+
/** 95th percentile in nanoseconds */
|
|
69
|
+
readonly p95_ns: number;
|
|
70
|
+
/** 99th percentile in nanoseconds */
|
|
71
|
+
readonly p99_ns: number;
|
|
72
|
+
/** Coefficient of variation (std_dev / mean) */
|
|
73
|
+
readonly cv: number;
|
|
74
|
+
/** 95% confidence interval for the mean in nanoseconds */
|
|
75
|
+
readonly confidence_interval_ns: [number, number];
|
|
76
|
+
/** Array of detected outlier values in nanoseconds */
|
|
77
|
+
readonly outliers_ns: Array<number>;
|
|
78
|
+
/** Ratio of outliers to total samples */
|
|
79
|
+
readonly outlier_ratio: number;
|
|
80
|
+
/** Number of samples after outlier removal */
|
|
81
|
+
readonly sample_size: number;
|
|
82
|
+
/** Original number of samples (before outlier removal) */
|
|
83
|
+
readonly raw_sample_size: number;
|
|
84
|
+
/** Operations per second (NS_PER_SEC / mean_ns) */
|
|
85
|
+
readonly ops_per_second: number;
|
|
86
|
+
/** Number of failed iterations (NaN, Infinity, or negative values) */
|
|
87
|
+
readonly failed_iterations: number;
|
|
88
|
+
constructor(timings_ns: Array<number>);
|
|
89
|
+
/**
|
|
90
|
+
* Format stats as a human-readable string.
|
|
91
|
+
*/
|
|
92
|
+
toString(): string;
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Compare two benchmark results for statistical significance.
|
|
96
|
+
* Uses Welch's t-test (handles unequal variances) and Cohen's d effect size.
|
|
97
|
+
*
|
|
98
|
+
* @param a - First benchmark stats (or any object with required properties)
|
|
99
|
+
* @param b - Second benchmark stats (or any object with required properties)
|
|
100
|
+
* @param options - Comparison options
|
|
101
|
+
* @returns Comparison result with significance, effect size, and recommendation
|
|
102
|
+
*
|
|
103
|
+
* @example
|
|
104
|
+
* ```ts
|
|
105
|
+
* const comparison = benchmark_stats_compare(result_a.stats, result_b.stats);
|
|
106
|
+
* if (comparison.significant) {
|
|
107
|
+
* console.log(`${comparison.faster} is ${comparison.speedup_ratio.toFixed(2)}x faster`);
|
|
108
|
+
* }
|
|
109
|
+
* ```
|
|
110
|
+
*/
|
|
111
|
+
export declare const benchmark_stats_compare: (a: BenchmarkStatsComparable, b: BenchmarkStatsComparable, options?: BenchmarkCompareOptions) => BenchmarkComparison;
|
|
112
|
+
//# sourceMappingURL=benchmark_stats.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"benchmark_stats.d.ts","sourceRoot":"../src/lib/","sources":["../src/lib/benchmark_stats.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAgBH;;;GAGG;AACH,MAAM,WAAW,wBAAwB;IACxC,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,sBAAsB,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACzC;AAED;;GAEG;AACH,MAAM,MAAM,eAAe,GAAG,YAAY,GAAG,OAAO,GAAG,QAAQ,GAAG,OAAO,CAAC;AAE1E;;GAEG;AACH,MAAM,WAAW,mBAAmB;IACnC,mFAAmF;IACnF,MAAM,EAAE,GAAG,GAAG,GAAG,GAAG,OAAO,CAAC;IAC5B,kEAAkE;IAClE,aAAa,EAAE,MAAM,CAAC;IACtB,6EAA6E;IAC7E,WAAW,EAAE,OAAO,CAAC;IACrB,kFAAkF;IAClF,OAAO,EAAE,MAAM,CAAC;IAChB,iFAAiF;IACjF,WAAW,EAAE,MAAM,CAAC;IACpB,oCAAoC;IACpC,gBAAgB,EAAE,eAAe,CAAC;IAClC,mDAAmD;IACnD,UAAU,EAAE,OAAO,CAAC;IACpB,sDAAsD;IACtD,cAAc,EAAE,MAAM,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACvC,gEAAgE;IAChE,KAAK,CAAC,EAAE,MAAM,CAAC;CACf;AAED;;;;GAIG;AACH,qBAAa,cAAc;IAC1B,yCAAyC;IACzC,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,iCAAiC;IACjC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,wCAAwC;IACxC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,kCAAkC;IAClC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,kCAAkC;IAClC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,qCAAqC;IACrC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,qCAAqC;IACrC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,qCAAqC;IACrC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,qCAAqC;IACrC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,gDAAgD;IAChD,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,0DAA0D;IAC1D,QAAQ,CAAC,sBAAsB,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAClD,sDAAsD;IACtD,QAAQ,CAAC,WAAW,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACpC,yCAAyC;IACzC,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;IAC/B,8CAA8C;IAC9C,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,0DAA0D;IAC1D,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC;IACjC,mDAAmD;IACnD,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,sEAAsE;IACtE,QAAQ,CAAC,iBAAiB,EAAE,MAAM,CAAC;gBAEvB,UAAU,EAAE,KAAK,CAAC,MAAM,CAAC;IAiErC;;OAEG;IACH,QAAQ,IAAI,MAAM;CAGlB;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,eAAO,MAAM,uBAAuB,GACnC,GAAG,wBAAwB,EAC3B,GAAG,wBAAwB,EAC3B,UAAU,uBAAuB,KAC/B,mBA6GF,CAAC"}
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Benchmark-specific statistical analysis.
|
|
3
|
+
* Uses the general stats utilities from stats.ts for timing/performance analysis.
|
|
4
|
+
* All timing values are in nanoseconds.
|
|
5
|
+
*/
|
|
6
|
+
import { TIME_NS_PER_SEC, time_format_adaptive } from './time.js';
|
|
7
|
+
import { stats_mean, stats_median, stats_std_dev, stats_percentile, stats_cv, stats_min_max, stats_confidence_interval, stats_outliers_mad, stats_welch_t_test, stats_t_distribution_p_value, } from './stats.js';
|
|
8
|
+
/**
|
|
9
|
+
* Complete statistical analysis of timing measurements.
|
|
10
|
+
* Includes outlier detection, descriptive statistics, and performance metrics.
|
|
11
|
+
* All timing values are in nanoseconds.
|
|
12
|
+
*/
|
|
13
|
+
export class BenchmarkStats {
|
|
14
|
+
/** Mean (average) time in nanoseconds */
|
|
15
|
+
mean_ns;
|
|
16
|
+
/** Median time in nanoseconds */
|
|
17
|
+
median_ns;
|
|
18
|
+
/** Standard deviation in nanoseconds */
|
|
19
|
+
std_dev_ns;
|
|
20
|
+
/** Minimum time in nanoseconds */
|
|
21
|
+
min_ns;
|
|
22
|
+
/** Maximum time in nanoseconds */
|
|
23
|
+
max_ns;
|
|
24
|
+
/** 75th percentile in nanoseconds */
|
|
25
|
+
p75_ns;
|
|
26
|
+
/** 90th percentile in nanoseconds */
|
|
27
|
+
p90_ns;
|
|
28
|
+
/** 95th percentile in nanoseconds */
|
|
29
|
+
p95_ns;
|
|
30
|
+
/** 99th percentile in nanoseconds */
|
|
31
|
+
p99_ns;
|
|
32
|
+
/** Coefficient of variation (std_dev / mean) */
|
|
33
|
+
cv;
|
|
34
|
+
/** 95% confidence interval for the mean in nanoseconds */
|
|
35
|
+
confidence_interval_ns;
|
|
36
|
+
/** Array of detected outlier values in nanoseconds */
|
|
37
|
+
outliers_ns;
|
|
38
|
+
/** Ratio of outliers to total samples */
|
|
39
|
+
outlier_ratio;
|
|
40
|
+
/** Number of samples after outlier removal */
|
|
41
|
+
sample_size;
|
|
42
|
+
/** Original number of samples (before outlier removal) */
|
|
43
|
+
raw_sample_size;
|
|
44
|
+
/** Operations per second (NS_PER_SEC / mean_ns) */
|
|
45
|
+
ops_per_second;
|
|
46
|
+
/** Number of failed iterations (NaN, Infinity, or negative values) */
|
|
47
|
+
failed_iterations;
|
|
48
|
+
constructor(timings_ns) {
|
|
49
|
+
// Filter out invalid values (NaN, Infinity, negative)
|
|
50
|
+
const valid_timings = [];
|
|
51
|
+
let failed_count = 0;
|
|
52
|
+
for (const t of timings_ns) {
|
|
53
|
+
if (!isNaN(t) && isFinite(t) && t > 0) {
|
|
54
|
+
valid_timings.push(t);
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
failed_count++;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
this.failed_iterations = failed_count;
|
|
61
|
+
this.raw_sample_size = timings_ns.length;
|
|
62
|
+
// If no valid timings, return empty stats
|
|
63
|
+
if (valid_timings.length === 0) {
|
|
64
|
+
this.mean_ns = NaN;
|
|
65
|
+
this.median_ns = NaN;
|
|
66
|
+
this.std_dev_ns = NaN;
|
|
67
|
+
this.min_ns = NaN;
|
|
68
|
+
this.max_ns = NaN;
|
|
69
|
+
this.p75_ns = NaN;
|
|
70
|
+
this.p90_ns = NaN;
|
|
71
|
+
this.p95_ns = NaN;
|
|
72
|
+
this.p99_ns = NaN;
|
|
73
|
+
this.cv = NaN;
|
|
74
|
+
this.confidence_interval_ns = [NaN, NaN];
|
|
75
|
+
this.outliers_ns = [];
|
|
76
|
+
this.outlier_ratio = 0;
|
|
77
|
+
this.sample_size = 0;
|
|
78
|
+
this.ops_per_second = 0;
|
|
79
|
+
return;
|
|
80
|
+
}
|
|
81
|
+
// Detect and remove outliers
|
|
82
|
+
const { cleaned, outliers } = stats_outliers_mad(valid_timings);
|
|
83
|
+
const sorted_cleaned = [...cleaned].sort((a, b) => a - b);
|
|
84
|
+
this.outliers_ns = outliers;
|
|
85
|
+
this.outlier_ratio = outliers.length / valid_timings.length;
|
|
86
|
+
this.sample_size = cleaned.length;
|
|
87
|
+
// Calculate statistics on cleaned data
|
|
88
|
+
this.mean_ns = stats_mean(cleaned);
|
|
89
|
+
this.median_ns = stats_median(sorted_cleaned);
|
|
90
|
+
this.std_dev_ns = stats_std_dev(cleaned, this.mean_ns);
|
|
91
|
+
const { min, max } = stats_min_max(sorted_cleaned);
|
|
92
|
+
this.min_ns = min;
|
|
93
|
+
this.max_ns = max;
|
|
94
|
+
this.p75_ns = stats_percentile(sorted_cleaned, 0.75);
|
|
95
|
+
this.p90_ns = stats_percentile(sorted_cleaned, 0.9);
|
|
96
|
+
this.p95_ns = stats_percentile(sorted_cleaned, 0.95);
|
|
97
|
+
this.p99_ns = stats_percentile(sorted_cleaned, 0.99);
|
|
98
|
+
this.cv = stats_cv(this.mean_ns, this.std_dev_ns);
|
|
99
|
+
this.confidence_interval_ns = stats_confidence_interval(cleaned);
|
|
100
|
+
// Calculate throughput (operations per second)
|
|
101
|
+
this.ops_per_second = this.mean_ns > 0 ? TIME_NS_PER_SEC / this.mean_ns : 0;
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Format stats as a human-readable string.
|
|
105
|
+
*/
|
|
106
|
+
toString() {
|
|
107
|
+
return `BenchmarkStats(mean=${time_format_adaptive(this.mean_ns)}, ops/sec=${this.ops_per_second.toFixed(2)}, cv=${(this.cv * 100).toFixed(1)}%, samples=${this.sample_size})`;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Compare two benchmark results for statistical significance.
|
|
112
|
+
* Uses Welch's t-test (handles unequal variances) and Cohen's d effect size.
|
|
113
|
+
*
|
|
114
|
+
* @param a - First benchmark stats (or any object with required properties)
|
|
115
|
+
* @param b - Second benchmark stats (or any object with required properties)
|
|
116
|
+
* @param options - Comparison options
|
|
117
|
+
* @returns Comparison result with significance, effect size, and recommendation
|
|
118
|
+
*
|
|
119
|
+
* @example
|
|
120
|
+
* ```ts
|
|
121
|
+
* const comparison = benchmark_stats_compare(result_a.stats, result_b.stats);
|
|
122
|
+
* if (comparison.significant) {
|
|
123
|
+
* console.log(`${comparison.faster} is ${comparison.speedup_ratio.toFixed(2)}x faster`);
|
|
124
|
+
* }
|
|
125
|
+
* ```
|
|
126
|
+
*/
|
|
127
|
+
export const benchmark_stats_compare = (a, b, options) => {
|
|
128
|
+
const alpha = options?.alpha ?? 0.05;
|
|
129
|
+
// Handle edge cases
|
|
130
|
+
if (a.sample_size === 0 || b.sample_size === 0) {
|
|
131
|
+
return {
|
|
132
|
+
faster: 'equal',
|
|
133
|
+
speedup_ratio: 1,
|
|
134
|
+
significant: false,
|
|
135
|
+
p_value: 1,
|
|
136
|
+
effect_size: 0,
|
|
137
|
+
effect_magnitude: 'negligible',
|
|
138
|
+
ci_overlap: true,
|
|
139
|
+
recommendation: 'Insufficient data for comparison',
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
// Calculate speedup ratio (lower time = faster, so compare by time not ops/sec)
|
|
143
|
+
const speedup_ratio = a.mean_ns < b.mean_ns ? b.mean_ns / a.mean_ns : a.mean_ns / b.mean_ns;
|
|
144
|
+
const faster = a.mean_ns < b.mean_ns ? 'a' : a.mean_ns > b.mean_ns ? 'b' : 'equal';
|
|
145
|
+
// Welch's t-test (handles unequal variances)
|
|
146
|
+
// Special case: if both have zero variance, t-test is undefined
|
|
147
|
+
let p_value;
|
|
148
|
+
if (a.std_dev_ns === 0 && b.std_dev_ns === 0) {
|
|
149
|
+
// When there's no variance, any difference is 100% reliable (p=0) or identical (p=1)
|
|
150
|
+
p_value = a.mean_ns === b.mean_ns ? 1 : 0;
|
|
151
|
+
}
|
|
152
|
+
else {
|
|
153
|
+
const { t_statistic, degrees_of_freedom } = stats_welch_t_test(a.mean_ns, a.std_dev_ns, a.sample_size, b.mean_ns, b.std_dev_ns, b.sample_size);
|
|
154
|
+
// Calculate two-tailed p-value using t-distribution approximation
|
|
155
|
+
p_value = stats_t_distribution_p_value(Math.abs(t_statistic), degrees_of_freedom);
|
|
156
|
+
}
|
|
157
|
+
// Cohen's d effect size
|
|
158
|
+
const pooled_std_dev = Math.sqrt(((a.sample_size - 1) * a.std_dev_ns ** 2 + (b.sample_size - 1) * b.std_dev_ns ** 2) /
|
|
159
|
+
(a.sample_size + b.sample_size - 2));
|
|
160
|
+
// When pooled_std_dev is 0 but means differ, effect is maximal (infinite)
|
|
161
|
+
// When means are equal, effect is 0
|
|
162
|
+
let effect_size;
|
|
163
|
+
let effect_magnitude;
|
|
164
|
+
if (pooled_std_dev === 0) {
|
|
165
|
+
// Zero variance case - if means differ, it's a definitive difference
|
|
166
|
+
if (a.mean_ns === b.mean_ns) {
|
|
167
|
+
effect_size = 0;
|
|
168
|
+
effect_magnitude = 'negligible';
|
|
169
|
+
}
|
|
170
|
+
else {
|
|
171
|
+
// Any difference is 100% reliable when there's no variance
|
|
172
|
+
effect_size = Infinity;
|
|
173
|
+
effect_magnitude = 'large';
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
else {
|
|
177
|
+
effect_size = Math.abs(a.mean_ns - b.mean_ns) / pooled_std_dev;
|
|
178
|
+
// Interpret effect size (Cohen's conventions)
|
|
179
|
+
effect_magnitude =
|
|
180
|
+
effect_size < 0.2
|
|
181
|
+
? 'negligible'
|
|
182
|
+
: effect_size < 0.5
|
|
183
|
+
? 'small'
|
|
184
|
+
: effect_size < 0.8
|
|
185
|
+
? 'medium'
|
|
186
|
+
: 'large';
|
|
187
|
+
}
|
|
188
|
+
// Check confidence interval overlap
|
|
189
|
+
const ci_overlap = a.confidence_interval_ns[0] <= b.confidence_interval_ns[1] &&
|
|
190
|
+
b.confidence_interval_ns[0] <= a.confidence_interval_ns[1];
|
|
191
|
+
// Determine significance
|
|
192
|
+
const significant = p_value < alpha;
|
|
193
|
+
// Generate recommendation
|
|
194
|
+
let recommendation;
|
|
195
|
+
if (!significant) {
|
|
196
|
+
recommendation =
|
|
197
|
+
effect_magnitude === 'negligible'
|
|
198
|
+
? 'No meaningful difference detected'
|
|
199
|
+
: `Difference not statistically significant (p=${p_value.toFixed(3)}), but effect size suggests ${effect_magnitude} practical difference`;
|
|
200
|
+
}
|
|
201
|
+
else if (effect_magnitude === 'negligible') {
|
|
202
|
+
recommendation = `Statistically significant but negligible practical difference (${speedup_ratio.toFixed(2)}x)`;
|
|
203
|
+
}
|
|
204
|
+
else {
|
|
205
|
+
recommendation = `${faster === 'a' ? 'First' : 'Second'} is ${speedup_ratio.toFixed(2)}x faster with ${effect_magnitude} effect size (p=${p_value.toFixed(3)})`;
|
|
206
|
+
}
|
|
207
|
+
// Adjust 'faster' to 'equal' if effect is negligible
|
|
208
|
+
const adjusted_faster = effect_magnitude === 'negligible' ? 'equal' : faster;
|
|
209
|
+
return {
|
|
210
|
+
faster: adjusted_faster,
|
|
211
|
+
speedup_ratio,
|
|
212
|
+
significant,
|
|
213
|
+
p_value,
|
|
214
|
+
effect_size,
|
|
215
|
+
effect_magnitude,
|
|
216
|
+
ci_overlap,
|
|
217
|
+
recommendation,
|
|
218
|
+
};
|
|
219
|
+
};
|