benchforge 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/README.md +432 -0
  2. package/bin/benchforge +3 -0
  3. package/dist/bin/benchforge.mjs +9 -0
  4. package/dist/bin/benchforge.mjs.map +1 -0
  5. package/dist/browser/index.js +914 -0
  6. package/dist/index.mjs +3 -0
  7. package/dist/src-CGuaC3Wo.mjs +3676 -0
  8. package/dist/src-CGuaC3Wo.mjs.map +1 -0
  9. package/package.json +49 -0
  10. package/src/BenchMatrix.ts +380 -0
  11. package/src/Benchmark.ts +33 -0
  12. package/src/BenchmarkReport.ts +156 -0
  13. package/src/GitUtils.ts +79 -0
  14. package/src/HtmlDataPrep.ts +148 -0
  15. package/src/MeasuredResults.ts +127 -0
  16. package/src/NodeGC.ts +48 -0
  17. package/src/PermutationTest.ts +115 -0
  18. package/src/StandardSections.ts +268 -0
  19. package/src/StatisticalUtils.ts +176 -0
  20. package/src/TypeUtil.ts +8 -0
  21. package/src/bin/benchforge.ts +4 -0
  22. package/src/browser/BrowserGcStats.ts +44 -0
  23. package/src/browser/BrowserHeapSampler.ts +248 -0
  24. package/src/cli/CliArgs.ts +64 -0
  25. package/src/cli/FilterBenchmarks.ts +68 -0
  26. package/src/cli/RunBenchCLI.ts +856 -0
  27. package/src/export/JsonExport.ts +103 -0
  28. package/src/export/JsonFormat.ts +91 -0
  29. package/src/export/PerfettoExport.ts +203 -0
  30. package/src/heap-sample/HeapSampleReport.ts +196 -0
  31. package/src/heap-sample/HeapSampler.ts +78 -0
  32. package/src/html/HtmlReport.ts +131 -0
  33. package/src/html/HtmlTemplate.ts +284 -0
  34. package/src/html/Types.ts +88 -0
  35. package/src/html/browser/CIPlot.ts +287 -0
  36. package/src/html/browser/HistogramKde.ts +118 -0
  37. package/src/html/browser/LegendUtils.ts +163 -0
  38. package/src/html/browser/RenderPlots.ts +263 -0
  39. package/src/html/browser/SampleTimeSeries.ts +389 -0
  40. package/src/html/browser/Types.ts +96 -0
  41. package/src/html/browser/index.ts +1 -0
  42. package/src/html/index.ts +17 -0
  43. package/src/index.ts +92 -0
  44. package/src/matrix/CaseLoader.ts +36 -0
  45. package/src/matrix/MatrixFilter.ts +103 -0
  46. package/src/matrix/MatrixReport.ts +290 -0
  47. package/src/matrix/VariantLoader.ts +46 -0
  48. package/src/runners/AdaptiveWrapper.ts +391 -0
  49. package/src/runners/BasicRunner.ts +368 -0
  50. package/src/runners/BenchRunner.ts +60 -0
  51. package/src/runners/CreateRunner.ts +11 -0
  52. package/src/runners/GcStats.ts +107 -0
  53. package/src/runners/RunnerOrchestrator.ts +374 -0
  54. package/src/runners/RunnerUtils.ts +2 -0
  55. package/src/runners/TimingUtils.ts +13 -0
  56. package/src/runners/WorkerScript.ts +256 -0
  57. package/src/table-util/ConvergenceFormatters.ts +19 -0
  58. package/src/table-util/Formatters.ts +152 -0
  59. package/src/table-util/README.md +70 -0
  60. package/src/table-util/TableReport.ts +293 -0
  61. package/src/table-util/test/TableReport.test.ts +105 -0
  62. package/src/table-util/test/TableValueExtractor.test.ts +41 -0
  63. package/src/table-util/test/TableValueExtractor.ts +100 -0
  64. package/src/test/AdaptiveRunner.test.ts +185 -0
  65. package/src/test/AdaptiveStatistics.integration.ts +119 -0
  66. package/src/test/BenchmarkReport.test.ts +82 -0
  67. package/src/test/BrowserBench.e2e.test.ts +44 -0
  68. package/src/test/BrowserBench.test.ts +79 -0
  69. package/src/test/GcStats.test.ts +94 -0
  70. package/src/test/PermutationTest.test.ts +121 -0
  71. package/src/test/RunBenchCLI.test.ts +166 -0
  72. package/src/test/RunnerOrchestrator.test.ts +102 -0
  73. package/src/test/StatisticalUtils.test.ts +112 -0
  74. package/src/test/TestUtils.ts +93 -0
  75. package/src/test/fixtures/test-bench-script.ts +30 -0
  76. package/src/tests/AdaptiveConvergence.test.ts +177 -0
  77. package/src/tests/AdaptiveSampling.test.ts +240 -0
  78. package/src/tests/BenchMatrix.test.ts +366 -0
  79. package/src/tests/MatrixFilter.test.ts +117 -0
  80. package/src/tests/MatrixReport.test.ts +139 -0
  81. package/src/tests/RealDataValidation.test.ts +177 -0
  82. package/src/tests/fixtures/baseline/impl.ts +4 -0
  83. package/src/tests/fixtures/bevy30-samples.ts +158 -0
  84. package/src/tests/fixtures/cases/asyncCases.ts +7 -0
  85. package/src/tests/fixtures/cases/cases.ts +8 -0
  86. package/src/tests/fixtures/cases/variants/product.ts +2 -0
  87. package/src/tests/fixtures/cases/variants/sum.ts +2 -0
  88. package/src/tests/fixtures/discover/fast.ts +1 -0
  89. package/src/tests/fixtures/discover/slow.ts +4 -0
  90. package/src/tests/fixtures/invalid/bad.ts +1 -0
  91. package/src/tests/fixtures/loader/fast.ts +1 -0
  92. package/src/tests/fixtures/loader/slow.ts +4 -0
  93. package/src/tests/fixtures/loader/stateful.ts +2 -0
  94. package/src/tests/fixtures/stateful/stateful.ts +2 -0
  95. package/src/tests/fixtures/variants/extra.ts +1 -0
  96. package/src/tests/fixtures/variants/impl.ts +1 -0
  97. package/src/tests/fixtures/worker/fast.ts +1 -0
  98. package/src/tests/fixtures/worker/slow.ts +4 -0
@@ -0,0 +1,391 @@
1
+ import type { BenchmarkSpec } from "../Benchmark.ts";
2
+ import type { MeasuredResults } from "../MeasuredResults.ts";
3
+ import {
4
+ coefficientOfVariation,
5
+ medianAbsoluteDeviation,
6
+ percentile,
7
+ } from "../StatisticalUtils.ts";
8
+ import type { BenchRunner, RunnerOptions } from "./BenchRunner.ts";
9
+ import { msToNs } from "./RunnerUtils.ts";
10
+
11
+ const minTime = 1000;
12
+ const maxTime = 10000;
13
+ const targetConfidence = 95;
14
+ const fallbackThreshold = 80;
15
+ const windowSize = 50;
16
+ const stability = 0.05; // 5% drift threshold (was 2%, too strict for real benchmarks)
17
+ const initialBatch = 100;
18
+ const continueBatch = 100;
19
+ const continueIterations = 10;
20
+
21
+ type Metrics = {
22
+ medianDrift: number;
23
+ impactDrift: number;
24
+ medianStable: boolean;
25
+ impactStable: boolean;
26
+ };
27
+
28
+ interface ConvergenceResult {
29
+ converged: boolean;
30
+ confidence: number;
31
+ reason: string;
32
+ }
33
+
34
+ export interface AdaptiveOptions extends RunnerOptions {
35
+ adaptive?: boolean;
36
+ minTime?: number;
37
+ maxTime?: number;
38
+ targetConfidence?: number;
39
+ convergence?: number; // Confidence threshold (0-100)
40
+ }
41
+
42
+ /** @return adaptive sampling runner wrapper */
43
+ export function createAdaptiveWrapper(
44
+ baseRunner: BenchRunner,
45
+ options: AdaptiveOptions,
46
+ ): BenchRunner {
47
+ return {
48
+ async runBench<T = unknown>(
49
+ benchmark: BenchmarkSpec<T>,
50
+ runnerOptions: RunnerOptions,
51
+ params?: T,
52
+ ): Promise<MeasuredResults[]> {
53
+ return runAdaptiveBench(
54
+ baseRunner,
55
+ benchmark,
56
+ runnerOptions,
57
+ options,
58
+ params,
59
+ );
60
+ },
61
+ };
62
+ }
63
+
64
+ /** @return results using adaptive sampling strategy */
65
+ async function runAdaptiveBench<T>(
66
+ baseRunner: BenchRunner,
67
+ benchmark: BenchmarkSpec<T>,
68
+ runnerOptions: RunnerOptions,
69
+ options: AdaptiveOptions,
70
+ params?: T,
71
+ ): Promise<MeasuredResults[]> {
72
+ const {
73
+ minTime: min = options.minTime ?? minTime,
74
+ maxTime: max = options.maxTime ?? maxTime,
75
+ targetConfidence: target = options.convergence ?? targetConfidence,
76
+ } = runnerOptions as AdaptiveOptions;
77
+ const allSamples: number[] = [];
78
+
79
+ // Collect initial batch (includes warmup + settle)
80
+ const warmup = await collectInitial(
81
+ baseRunner,
82
+ benchmark,
83
+ runnerOptions,
84
+ params,
85
+ allSamples,
86
+ );
87
+
88
+ // Start timing AFTER warmup - warmup time doesn't count against maxTime
89
+ const startTime = performance.now();
90
+
91
+ const limits = {
92
+ minTime: min,
93
+ maxTime: max,
94
+ targetConfidence: target,
95
+ startTime,
96
+ };
97
+ await collectAdaptive(
98
+ baseRunner,
99
+ benchmark,
100
+ runnerOptions,
101
+ params,
102
+ allSamples,
103
+ limits,
104
+ );
105
+
106
+ const convergence = checkConvergence(allSamples.map(s => s * msToNs));
107
+ return buildResults(
108
+ allSamples,
109
+ startTime,
110
+ convergence,
111
+ benchmark.name,
112
+ warmup,
113
+ );
114
+ }
115
+
116
+ /** @return warmupSamples from initial batch */
117
+ async function collectInitial<T>(
118
+ baseRunner: BenchRunner,
119
+ benchmark: BenchmarkSpec<T>,
120
+ runnerOptions: RunnerOptions,
121
+ params: T | undefined,
122
+ allSamples: number[],
123
+ ): Promise<number[] | undefined> {
124
+ // Don't pass adaptive flag to base runner to avoid double wrapping
125
+ const opts = {
126
+ ...(runnerOptions as any),
127
+ maxTime: initialBatch,
128
+ maxIterations: undefined,
129
+ };
130
+ const results = await baseRunner.runBench(benchmark, opts, params);
131
+ appendSamples(results[0], allSamples);
132
+ return results[0].warmupSamples;
133
+ }
134
+
135
+ /** @return samples until convergence or timeout */
136
+ async function collectAdaptive<T>(
137
+ baseRunner: BenchRunner,
138
+ benchmark: BenchmarkSpec<T>,
139
+ runnerOptions: RunnerOptions,
140
+ params: T | undefined,
141
+ allSamples: number[],
142
+ limits: {
143
+ minTime: number;
144
+ maxTime: number;
145
+ targetConfidence: number;
146
+ startTime: number;
147
+ },
148
+ ): Promise<void> {
149
+ const { minTime, maxTime, targetConfidence, startTime } = limits;
150
+ let lastLog = 0;
151
+ while (performance.now() - startTime < maxTime) {
152
+ const samplesNs = allSamples.map(s => s * msToNs);
153
+ const convergence = checkConvergence(samplesNs);
154
+ const elapsed = performance.now() - startTime;
155
+
156
+ if (elapsed - lastLog > 1000) {
157
+ const elapsedSec = (elapsed / 1000).toFixed(1);
158
+ const conf = convergence.confidence.toFixed(0);
159
+ process.stderr.write(
160
+ `\r◊ ${benchmark.name}: ${conf}% confident (${elapsedSec}s) `,
161
+ );
162
+ lastLog = elapsed;
163
+ }
164
+
165
+ if (shouldStop(convergence, targetConfidence, elapsed, minTime)) {
166
+ break;
167
+ }
168
+
169
+ // Skip warmup for continuation batches (warmup done in initial batch)
170
+ const opts = {
171
+ ...(runnerOptions as any),
172
+ maxTime: continueBatch,
173
+ maxIterations: continueIterations,
174
+ skipWarmup: true,
175
+ };
176
+ const batchResults = await baseRunner.runBench(benchmark, opts, params);
177
+ appendSamples(batchResults[0], allSamples);
178
+ }
179
+ process.stderr.write("\r" + " ".repeat(60) + "\r");
180
+ }
181
+
182
+ /** Append samples one-by-one to avoid stack overflow from spread on large arrays */
183
+ function appendSamples(result: MeasuredResults, samples: number[]): void {
184
+ if (!result.samples?.length) return;
185
+ for (const sample of result.samples) samples.push(sample);
186
+ }
187
+
188
+ /** @return true if convergence reached or timeout */
189
+ function shouldStop(
190
+ convergence: ConvergenceResult,
191
+ targetConfidence: number,
192
+ elapsedTime: number,
193
+ minTime: number,
194
+ ): boolean {
195
+ if (convergence.converged && convergence.confidence >= targetConfidence) {
196
+ return true;
197
+ }
198
+ // After minTime, accept whichever is higher: targetConfidence or fallbackThreshold
199
+ const threshold = Math.max(targetConfidence, fallbackThreshold);
200
+ return elapsedTime >= minTime && convergence.confidence >= threshold;
201
+ }
202
+
203
+ /** @return measured results with convergence metrics */
204
+ function buildResults(
205
+ samplesMs: number[],
206
+ startTime: number,
207
+ convergence: ConvergenceResult,
208
+ name: string,
209
+ warmupSamples?: number[],
210
+ ): MeasuredResults[] {
211
+ const totalTime = (performance.now() - startTime) / 1000;
212
+ const samplesNs = samplesMs.map(s => s * msToNs);
213
+ const timeStats = computeTimeStats(samplesNs);
214
+
215
+ return [
216
+ {
217
+ name,
218
+ samples: samplesMs,
219
+ warmupSamples,
220
+ time: timeStats,
221
+ totalTime,
222
+ convergence,
223
+ },
224
+ ];
225
+ }
226
+
227
+ /** @return time percentiles and statistics in ms */
228
+ function computeTimeStats(samplesNs: number[]) {
229
+ const samplesMs = samplesNs.map(s => s / msToNs);
230
+ const { min, max, sum } = getMinMaxSum(samplesNs);
231
+ const percentiles = getPercentiles(samplesNs);
232
+ const robust = getRobustMetrics(samplesMs);
233
+
234
+ return {
235
+ min: min / msToNs,
236
+ max: max / msToNs,
237
+ avg: sum / samplesNs.length / msToNs,
238
+ ...percentiles,
239
+ ...robust,
240
+ };
241
+ }
242
+
243
+ /** @return min, max, sum of samples */
244
+ function getMinMaxSum(samples: number[]) {
245
+ const min = samples.reduce(
246
+ (a, b) => Math.min(a, b),
247
+ Number.POSITIVE_INFINITY,
248
+ );
249
+ const max = samples.reduce(
250
+ (a, b) => Math.max(a, b),
251
+ Number.NEGATIVE_INFINITY,
252
+ );
253
+ const sum = samples.reduce((a, b) => a + b, 0);
254
+ return { min, max, sum };
255
+ }
256
+
257
+ /** @return percentiles in ms */
258
+ function getPercentiles(samples: number[]) {
259
+ return {
260
+ p25: percentile(samples, 0.25) / msToNs,
261
+ p50: percentile(samples, 0.5) / msToNs,
262
+ p75: percentile(samples, 0.75) / msToNs,
263
+ p95: percentile(samples, 0.95) / msToNs,
264
+ p99: percentile(samples, 0.99) / msToNs,
265
+ p999: percentile(samples, 0.999) / msToNs,
266
+ };
267
+ }
268
+
269
+ /** @return robust variability metrics */
270
+ function getRobustMetrics(samplesMs: number[]) {
271
+ const impact = getOutlierImpact(samplesMs);
272
+ return {
273
+ cv: coefficientOfVariation(samplesMs),
274
+ mad: medianAbsoluteDeviation(samplesMs),
275
+ outlierRate: impact.ratio,
276
+ };
277
+ }
278
+
279
+ /** @return outlier impact as proportion of total time */
280
+ function getOutlierImpact(samples: number[]): { ratio: number; count: number } {
281
+ if (samples.length === 0) return { ratio: 0, count: 0 };
282
+
283
+ const median = percentile(samples, 0.5);
284
+ const q75 = percentile(samples, 0.75);
285
+ const threshold = median + 1.5 * (q75 - median);
286
+
287
+ let excessTime = 0;
288
+ let count = 0;
289
+
290
+ for (const sample of samples) {
291
+ if (sample > threshold) {
292
+ excessTime += sample - median;
293
+ count++;
294
+ }
295
+ }
296
+
297
+ const totalTime = samples.reduce((a, b) => a + b, 0);
298
+ return {
299
+ ratio: totalTime > 0 ? excessTime / totalTime : 0,
300
+ count,
301
+ };
302
+ }
303
+
304
+ /** @return convergence based on window stability */
305
+ export function checkConvergence(samples: number[]): ConvergenceResult {
306
+ const windowSize = getWindowSize(samples);
307
+ const minSamples = windowSize * 2;
308
+
309
+ if (samples.length < minSamples) {
310
+ return buildProgressResult(samples.length, minSamples);
311
+ }
312
+
313
+ const metrics = getStability(samples, windowSize);
314
+ return buildConvergence(metrics);
315
+ }
316
+
317
+ /** @return progress when samples insufficient */
318
+ function buildProgressResult(
319
+ currentSamples: number,
320
+ minSamples: number,
321
+ ): ConvergenceResult {
322
+ return {
323
+ converged: false,
324
+ confidence: (currentSamples / minSamples) * 100,
325
+ reason: `Collecting samples: ${currentSamples}/${minSamples}`,
326
+ };
327
+ }
328
+
329
+ /** @return stability metrics between windows */
330
+ function getStability(samples: number[], windowSize: number): Metrics {
331
+ const recent = samples.slice(-windowSize);
332
+ const previous = samples.slice(-windowSize * 2, -windowSize);
333
+
334
+ const recentMs = recent.map(s => s / msToNs);
335
+ const previousMs = previous.map(s => s / msToNs);
336
+
337
+ const medianRecent = percentile(recentMs, 0.5);
338
+ const medianPrevious = percentile(previousMs, 0.5);
339
+ const medianDrift = Math.abs(medianRecent - medianPrevious) / medianPrevious;
340
+
341
+ const impactRecent = getOutlierImpact(recentMs);
342
+ const impactPrevious = getOutlierImpact(previousMs);
343
+ const impactDrift = Math.abs(impactRecent.ratio - impactPrevious.ratio);
344
+
345
+ return {
346
+ medianDrift,
347
+ impactDrift,
348
+ medianStable: medianDrift < stability,
349
+ impactStable: impactDrift < stability,
350
+ };
351
+ }
352
+
353
+ /** @return convergence from stability metrics */
354
+ function buildConvergence(metrics: Metrics): ConvergenceResult {
355
+ const { medianDrift, impactDrift, medianStable, impactStable } = metrics;
356
+
357
+ if (medianStable && impactStable) {
358
+ return {
359
+ converged: true,
360
+ confidence: 100,
361
+ reason: "Stable performance pattern",
362
+ };
363
+ }
364
+
365
+ const confidence = Math.min(
366
+ 100,
367
+ (1 - medianDrift / stability) * 50 + (1 - impactDrift / stability) * 50,
368
+ );
369
+
370
+ const reason =
371
+ medianDrift > impactDrift
372
+ ? `Median drifting: ${(medianDrift * 100).toFixed(1)}%`
373
+ : `Outlier impact changing: ${(impactDrift * 100).toFixed(1)}%`;
374
+
375
+ return { converged: false, confidence: Math.max(0, confidence), reason };
376
+ }
377
+
378
+ /** @return window size scaled to execution time */
379
+ function getWindowSize(samples: number[]): number {
380
+ if (samples.length < 20) return windowSize; // Default for initial samples
381
+
382
+ const recentMs = samples.slice(-20).map(s => s / msToNs);
383
+ const recentMedian = percentile(recentMs, 0.5);
384
+
385
+ // Inverse scaling with execution time
386
+ if (recentMedian < 0.01) return 200; // <10μs
387
+ if (recentMedian < 0.1) return 100; // <100μs
388
+ if (recentMedian < 1) return 50; // <1ms
389
+ if (recentMedian < 10) return 30; // <10ms
390
+ return 20; // >10ms
391
+ }