benchforge 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/README.md +432 -0
  2. package/bin/benchforge +3 -0
  3. package/dist/bin/benchforge.mjs +9 -0
  4. package/dist/bin/benchforge.mjs.map +1 -0
  5. package/dist/browser/index.js +914 -0
  6. package/dist/index.mjs +3 -0
  7. package/dist/src-CGuaC3Wo.mjs +3676 -0
  8. package/dist/src-CGuaC3Wo.mjs.map +1 -0
  9. package/package.json +49 -0
  10. package/src/BenchMatrix.ts +380 -0
  11. package/src/Benchmark.ts +33 -0
  12. package/src/BenchmarkReport.ts +156 -0
  13. package/src/GitUtils.ts +79 -0
  14. package/src/HtmlDataPrep.ts +148 -0
  15. package/src/MeasuredResults.ts +127 -0
  16. package/src/NodeGC.ts +48 -0
  17. package/src/PermutationTest.ts +115 -0
  18. package/src/StandardSections.ts +268 -0
  19. package/src/StatisticalUtils.ts +176 -0
  20. package/src/TypeUtil.ts +8 -0
  21. package/src/bin/benchforge.ts +4 -0
  22. package/src/browser/BrowserGcStats.ts +44 -0
  23. package/src/browser/BrowserHeapSampler.ts +248 -0
  24. package/src/cli/CliArgs.ts +64 -0
  25. package/src/cli/FilterBenchmarks.ts +68 -0
  26. package/src/cli/RunBenchCLI.ts +856 -0
  27. package/src/export/JsonExport.ts +103 -0
  28. package/src/export/JsonFormat.ts +91 -0
  29. package/src/export/PerfettoExport.ts +203 -0
  30. package/src/heap-sample/HeapSampleReport.ts +196 -0
  31. package/src/heap-sample/HeapSampler.ts +78 -0
  32. package/src/html/HtmlReport.ts +131 -0
  33. package/src/html/HtmlTemplate.ts +284 -0
  34. package/src/html/Types.ts +88 -0
  35. package/src/html/browser/CIPlot.ts +287 -0
  36. package/src/html/browser/HistogramKde.ts +118 -0
  37. package/src/html/browser/LegendUtils.ts +163 -0
  38. package/src/html/browser/RenderPlots.ts +263 -0
  39. package/src/html/browser/SampleTimeSeries.ts +389 -0
  40. package/src/html/browser/Types.ts +96 -0
  41. package/src/html/browser/index.ts +1 -0
  42. package/src/html/index.ts +17 -0
  43. package/src/index.ts +92 -0
  44. package/src/matrix/CaseLoader.ts +36 -0
  45. package/src/matrix/MatrixFilter.ts +103 -0
  46. package/src/matrix/MatrixReport.ts +290 -0
  47. package/src/matrix/VariantLoader.ts +46 -0
  48. package/src/runners/AdaptiveWrapper.ts +391 -0
  49. package/src/runners/BasicRunner.ts +368 -0
  50. package/src/runners/BenchRunner.ts +60 -0
  51. package/src/runners/CreateRunner.ts +11 -0
  52. package/src/runners/GcStats.ts +107 -0
  53. package/src/runners/RunnerOrchestrator.ts +374 -0
  54. package/src/runners/RunnerUtils.ts +2 -0
  55. package/src/runners/TimingUtils.ts +13 -0
  56. package/src/runners/WorkerScript.ts +256 -0
  57. package/src/table-util/ConvergenceFormatters.ts +19 -0
  58. package/src/table-util/Formatters.ts +152 -0
  59. package/src/table-util/README.md +70 -0
  60. package/src/table-util/TableReport.ts +293 -0
  61. package/src/table-util/test/TableReport.test.ts +105 -0
  62. package/src/table-util/test/TableValueExtractor.test.ts +41 -0
  63. package/src/table-util/test/TableValueExtractor.ts +100 -0
  64. package/src/test/AdaptiveRunner.test.ts +185 -0
  65. package/src/test/AdaptiveStatistics.integration.ts +119 -0
  66. package/src/test/BenchmarkReport.test.ts +82 -0
  67. package/src/test/BrowserBench.e2e.test.ts +44 -0
  68. package/src/test/BrowserBench.test.ts +79 -0
  69. package/src/test/GcStats.test.ts +94 -0
  70. package/src/test/PermutationTest.test.ts +121 -0
  71. package/src/test/RunBenchCLI.test.ts +166 -0
  72. package/src/test/RunnerOrchestrator.test.ts +102 -0
  73. package/src/test/StatisticalUtils.test.ts +112 -0
  74. package/src/test/TestUtils.ts +93 -0
  75. package/src/test/fixtures/test-bench-script.ts +30 -0
  76. package/src/tests/AdaptiveConvergence.test.ts +177 -0
  77. package/src/tests/AdaptiveSampling.test.ts +240 -0
  78. package/src/tests/BenchMatrix.test.ts +366 -0
  79. package/src/tests/MatrixFilter.test.ts +117 -0
  80. package/src/tests/MatrixReport.test.ts +139 -0
  81. package/src/tests/RealDataValidation.test.ts +177 -0
  82. package/src/tests/fixtures/baseline/impl.ts +4 -0
  83. package/src/tests/fixtures/bevy30-samples.ts +158 -0
  84. package/src/tests/fixtures/cases/asyncCases.ts +7 -0
  85. package/src/tests/fixtures/cases/cases.ts +8 -0
  86. package/src/tests/fixtures/cases/variants/product.ts +2 -0
  87. package/src/tests/fixtures/cases/variants/sum.ts +2 -0
  88. package/src/tests/fixtures/discover/fast.ts +1 -0
  89. package/src/tests/fixtures/discover/slow.ts +4 -0
  90. package/src/tests/fixtures/invalid/bad.ts +1 -0
  91. package/src/tests/fixtures/loader/fast.ts +1 -0
  92. package/src/tests/fixtures/loader/slow.ts +4 -0
  93. package/src/tests/fixtures/loader/stateful.ts +2 -0
  94. package/src/tests/fixtures/stateful/stateful.ts +2 -0
  95. package/src/tests/fixtures/variants/extra.ts +1 -0
  96. package/src/tests/fixtures/variants/impl.ts +1 -0
  97. package/src/tests/fixtures/worker/fast.ts +1 -0
  98. package/src/tests/fixtures/worker/slow.ts +4 -0
package/README.md ADDED
@@ -0,0 +1,432 @@
1
+ # Benchforge
2
+
3
+ A TypeScript benchmarking library with CLI support for running performance tests.
4
+
5
+ ## Browser Profiling
6
+
7
+ See [Browser Heap Profiling](README-browser.md) for profiling code running in a browser.
8
+
9
+ ## Installation
10
+
11
+ ```bash
12
+ npm install benchforge
13
+ # or
14
+ pnpm add benchforge
15
+ ```
16
+
17
+ ## Quick Start
18
+
19
+ ```typescript
20
+ import { parseBenchArgs, runBenchmarks, reportResults, timeSection, runsSection, type BenchSuite } from 'benchforge';
21
+
22
+ const suite: BenchSuite = {
23
+ name: "String Operations",
24
+ groups: [
25
+ {
26
+ name: "Concatenation",
27
+ benchmarks: [
28
+ { name: "plus", fn: () => "a" + "b" },
29
+ { name: "template", fn: () => `a${"b"}` },
30
+ ],
31
+ },
32
+ ],
33
+ };
34
+
35
+ const args = parseBenchArgs();
36
+ const results = await runBenchmarks(suite, args);
37
+ const table = reportResults(results, [timeSection, runsSection]);
38
+ console.log(table);
39
+ ```
40
+
41
+ ### Setup and Baseline Example
42
+
43
+ Here's a more comprehensive example with shared setup data and baseline comparison:
44
+
45
+ ```typescript
46
+ import { parseBenchArgs, runBenchmarks, defaultReport, type BenchGroup, type BenchSuite } from 'benchforge';
47
+
48
+ const sortingGroup: BenchGroup<number[]> = {
49
+ name: "Array Sorting (1000 numbers)",
50
+ setup: () => Array.from({ length: 1000 }, () => Math.random()),
51
+ baseline: { name: "native sort", fn: nativeSort },
52
+ benchmarks: [
53
+ { name: "quicksort", fn: quickSort },
54
+ { name: "insertion sort", fn: insertionSort },
55
+ ],
56
+ };
57
+
58
+ const suite: BenchSuite = {
59
+ name: "Performance Tests",
60
+ groups: [sortingGroup],
61
+ };
62
+
63
+ const args = parseBenchArgs();
64
+ const results = await runBenchmarks(suite, args);
65
+ const report = defaultReport(results, args);
66
+ console.log(report);
67
+ ```
68
+
69
+ See `examples/simple-cli.ts` for a complete runnable example.
70
+
71
+ ### Worker Mode with Module Imports
72
+
73
+ For worker mode, benchmarks can reference module exports instead of inline functions. This is essential for proper isolation since functions can't be serialized across process boundaries.
74
+
75
+ ```typescript
76
+ const group: BenchGroup = {
77
+ name: "Parser Benchmark",
78
+ setup: () => loadTestData(),
79
+ benchmarks: [{
80
+ name: "parse",
81
+ fn: () => {}, // placeholder - not used in worker mode
82
+ modulePath: new URL("./benchmarks.ts", import.meta.url).href,
83
+ exportName: "parse",
84
+ setupExportName: "setup", // optional: called once, result passed to exportName fn
85
+ }],
86
+ };
87
+ ```
88
+
89
+ When `setupExportName` is provided, the worker:
90
+ 1. Imports the module
91
+ 2. Calls `setup(params)` once (where params comes from `BenchGroup.setup()`)
92
+ 3. Passes the setup result to each benchmark iteration
93
+
94
+ This eliminates manual caching boilerplate in worker modules.
95
+
96
+ ## CLI Options
97
+
98
+ ### Basic Options
99
+ - `--time <seconds>` - Benchmark duration per test (default: 0.642s)
100
+ - `--iterations <count>` - Exact number of iterations (overrides --time)
101
+ - `--filter <pattern>` - Run only benchmarks matching regex/substring
102
+ - `--worker` / `--no-worker` - Run in isolated worker process (default: true)
103
+ - `--profile` - Run once for profiling (single iteration, no warmup)
104
+ - `--warmup <count>` - Warmup iterations before measurement (default: 0)
105
+ - `--help` - Show all available options
106
+
107
+ ### Memory Profiling
108
+ - `--gc-stats` - Collect GC allocation/collection stats via --trace-gc-nvp
109
+ - `--heap-sample` - Heap sampling allocation attribution (includes garbage)
110
+ - `--heap-interval <bytes>` - Sampling interval in bytes (default: 32768)
111
+ - `--heap-depth <frames>` - Stack depth to capture (default: 64)
112
+ - `--heap-rows <n>` - Number of top allocation sites to show (default: 20)
113
+
114
+ ### Output Options
115
+ - `--html` - Generate HTML report, start server, and open in browser
116
+ - `--export-html <file>` - Export HTML report to file
117
+ - `--json <file>` - Export benchmark data to JSON
118
+ - `--perfetto <file>` - Export Perfetto trace file
119
+
120
+ ## CLI Usage
121
+
122
+ ### Filter benchmarks by name
123
+
124
+ ```bash
125
+ simple-cli.ts --filter "concat"
126
+ simple-cli.ts --filter "^parse" --time 2
127
+ ```
128
+
129
+ ### Profiling with external debuggers
130
+
131
+ Use `--profile` to run benchmarks once for attaching external profilers:
132
+
133
+ ```bash
134
+ # Use with Chrome DevTools profiler
135
+ node --inspect-brk simple-cli.ts --profile
136
+
137
+ # Use with other profiling tools
138
+ node --prof simple-cli.ts --profile
139
+ ```
140
+
141
+ The `--profile` flag executes exactly one iteration with no warmup, making it ideal for debugging and performance profiling.
142
+
143
+ ### Key Concepts
144
+
145
+ **Setup Functions**: Run once per group and provide shared data to all benchmarks in that group. The data returned by setup is automatically passed as the first parameter to benchmark functions that expect it.
146
+
147
+ **Baseline Comparison**: When a baseline is specified, all benchmarks in the group show percentage differences (Δ%) compared to baseline.
148
+
149
+ ## Output
150
+
151
+ Results are displayed in a formatted table:
152
+
153
+ ```
154
+ ╔═════════════════╤═══════════════════════════════════════════╤═══════╤═════════╗
155
+ ║ │ time │ │ ║
156
+ ║ name │ mean Δ% CI p50 p99 │ conv% │ runs ║
157
+ ╟─────────────────┼───────────────────────────────────────────┼───────┼─────────╢
158
+ ║ quicksort │ 0.17 +5.5% [+4.7%, +6.2%] 0.15 0.63 │ 100% │ 1,134 ║
159
+ ║ insertion sort │ 0.24 +25.9% [+25.3%, +27.4%] 0.18 0.36 │ 100% │ 807 ║
160
+ ║ --> native sort │ 0.16 0.15 0.41 │ 100% │ 1,210 ║
161
+ ╚═════════════════╧═══════════════════════════════════════════╧═══════╧═════════╝
162
+ ```
163
+
164
+ - **Δ% CI**: Percentage difference from baseline with bootstrap confidence interval
165
+ - **conv%**: Convergence percentage (100% = stable measurements)
166
+
167
+ ### HTML
168
+
169
+ The HTML report displays:
170
+ - Histogram + KDE: Bar chart showing the distribution
171
+ - Time Series: Sample values over iterations
172
+ - Allocation Series: Per-sample heap allocation (requires `--heap-sample`)
173
+
174
+ ```bash
175
+ # Generate HTML report, start server, and open in browser
176
+ simple-cli.ts --html
177
+ # Press Ctrl+C to exit when done viewing
178
+ ```
179
+
180
+ ### Perfetto Trace Export
181
+
182
+ Export benchmark data as a Perfetto-compatible trace file for detailed analysis:
183
+
184
+ ```bash
185
+ # Export trace file
186
+ simple-cli.ts --perfetto trace.json
187
+
188
+ # With V8 GC events (automatically merged after exit)
189
+ node --expose-gc --trace-events-enabled --trace-event-categories=v8,v8.gc \
190
+ simple-cli.ts --perfetto trace.json
191
+ ```
192
+
193
+ View the trace at https://ui.perfetto.dev by dragging the JSON file.
194
+
195
+ The trace includes:
196
+ - **Heap counter**: Continuous heap usage as a line graph
197
+ - **Sample markers**: Each benchmark iteration with timing
198
+ - **Pause markers**: V8 optimization pause points
199
+ - **V8 GC events**: Automatically merged after process exit (when run with `--trace-events-enabled`)
200
+
201
+ ### GC Statistics
202
+
203
+ Collect detailed garbage collection statistics via V8's `--trace-gc-nvp`:
204
+
205
+ ```bash
206
+ # Collect GC allocation/collection stats (requires worker mode)
207
+ simple-cli.ts --gc-stats
208
+ ```
209
+
210
+ Adds these columns to the output table:
211
+ - **alloc/iter**: Bytes allocated per iteration
212
+ - **scav**: Number of scavenge (minor) GCs
213
+ - **full**: Number of full (mark-compact) GCs
214
+ - **promo%**: Percentage of allocations promoted to old generation
215
+ - **pause/iter**: GC pause time per iteration
216
+
217
+ ### Heap Sampling
218
+
219
+ For allocation profiling including garbage (short-lived objects), use `--heap-sample` mode which uses Node's built-in inspector API:
220
+
221
+ ```bash
222
+ # Basic heap sampling
223
+ simple-cli.ts --heap-sample --iterations 100
224
+
225
+ # Smaller interval = more samples = better coverage of rare allocations
226
+ simple-cli.ts --heap-sample --heap-interval 4096 --iterations 100
227
+
228
+ # Verbose output with clickable file:// paths
229
+ simple-cli.ts --heap-sample --heap-verbose
230
+
231
+ # Control call stack display depth
232
+ simple-cli.ts --heap-sample --heap-stack 5
233
+ ```
234
+
235
+ **CLI Options:**
236
+ - `--heap-sample` - Enable heap sampling allocation attribution
237
+ - `--heap-interval <bytes>` - Sampling interval in bytes (default: 32768)
238
+ - `--heap-depth <frames>` - Maximum stack depth to capture (default: 64)
239
+ - `--heap-rows <n>` - Number of top allocation sites to show (default: 20)
240
+ - `--heap-stack <n>` - Call stack depth to display (default: 3)
241
+ - `--heap-verbose` - Show full file:// paths with line numbers (cmd-clickable)
242
+
243
+ **Output (default compact):**
244
+ ```
245
+ ─── Heap profile: bevy_env_map ───
246
+ Heap allocation sites (top 20, garbage included):
247
+ 13.62 MB recursiveResolve <- flattenTreeImport <- bindAndTransform
248
+ 12.36 MB nextToken <- parseBlockStatements <- parseCompoundStatement
249
+ 5.15 MB coverWithText <- finishElem <- parseVarOrLet
250
+
251
+ Total (all): 56.98 MB
252
+ Total (user-code): 28.45 MB
253
+ Samples: 1,842
254
+ ```
255
+
256
+ **How V8 Heap Sampling Works:**
257
+
258
+ V8's sampling profiler uses Poisson-distributed sampling. When an allocation occurs, V8 probabilistically decides whether to record it based on the sampling interval. Key points:
259
+
260
+ 1. **selfSize is scaled**: V8 doesn't report raw sampled bytes. It scales sample counts to estimate total allocations (`selfSize = size × count × scaleFactor`). This means changing `--heap-interval` affects sample count and overhead, but the estimated total converges to the same value.
261
+
262
+ 2. **Smaller intervals = better coverage**: With a smaller interval (e.g., 1024 vs 32768), you get more samples and discover more unique allocation sites, especially rare ones. The total estimate stays similar, but you see more of the distribution.
263
+
264
+ 3. **User-code only**: The report filters out Node.js internals (`node:`, `internal/`). "Total (user-code)" shows filtered allocations; "Total (all)" shows everything.
265
+
266
+ 4. **Measurement window**: Sampling covers benchmark module import + execution. Worker startup and framework init aren't captured (but do appear in `--gc-stats`).
267
+
268
+ 5. **Sites are stack-unique**: The same function appears multiple times with different callers. For example, `nextToken` may show up in several entries with different call stacks, each representing a distinct allocation pattern.
269
+
270
+ **Limitations:**
271
+ - **Function-level attribution only**: V8 reports the function where allocation occurred, not the specific line. The line:column shown is where the function is *defined*.
272
+ - **Statistical sampling**: Results vary between runs. More iterations = more stable results.
273
+ - **~50% filtered**: Node.js internals account for roughly half of allocations. Use "Total (all)" to see the full picture.
274
+
275
+ **When to use which:**
276
+ | Tool | Use When |
277
+ |------|----------|
278
+ | `--gc-stats` | Need total allocation/collection bytes, GC pause times |
279
+ | `--heap-sample` | Need to identify which functions allocate the most |
280
+ | Both | Cross-reference attribution with totals |
281
+
282
+ ## Requirements
283
+
284
+ - Node.js 22.6+ (for native TypeScript support)
285
+ - Use `--expose-gc --allow-natives-syntax` flags for garbage collection monitoring and V8 native functions
286
+
287
+ ## Adaptive Mode
288
+
289
+ Adaptive mode automatically adjusts the number of benchmark iterations until measurements stabilize, providing statistically significant results without excessive runtime.
290
+
291
+ ### Using Adaptive Mode
292
+
293
+ ```bash
294
+ # Enable adaptive benchmarking with default settings
295
+ simple-cli.ts --adaptive
296
+
297
+ # Customize time limits
298
+ simple-cli.ts --adaptive --time 60 --min-time 5
299
+
300
+ # Combine with other options
301
+ simple-cli.ts --adaptive --filter "quicksort"
302
+ ```
303
+
304
+ ### CLI Options for Adaptive Mode
305
+
306
+ - `--adaptive` - Enable adaptive sampling mode
307
+ - `--min-time <seconds>` - Minimum time before convergence can stop (default: 1s)
308
+ - `--convergence <percent>` - Confidence threshold 0-100 (default: 95)
309
+ - `--time <seconds>` - Maximum time limit (default: 20s in adaptive mode)
310
+
311
+ ### How It Works
312
+
313
+ 1. **Initial Sampling**: Collects initial batch of ~100 samples (includes warmup)
314
+ 2. **Window Comparison**: Compares recent samples against previous window
315
+ 3. **Stability Detection**: Checks median drift and outlier impact between windows
316
+ 4. **Convergence**: Stops when both metrics are stable (<5% drift) or reaches threshold
317
+
318
+ Progress is shown during execution:
319
+ ```
320
+ ◊ quicksort: 75% confident (2.1s)
321
+ ```
322
+
323
+ ### Output with Adaptive Mode
324
+
325
+ ```
326
+ ╔═════════════════╤═════════════════════════════════════════════╤═══════╤═════════╤══════╗
327
+ ║ │ time │ │ │ ║
328
+ ║ name │ median Δ% CI mean p99 │ conv% │ runs │ time ║
329
+ ╟─────────────────┼─────────────────────────────────────────────┼───────┼─────────┼──────╢
330
+ ║ quicksort │ 0.17 +17.3% [+15.4%, +20.0%] 0.20 0.65 │ 100% │ 526 │ 0.0s ║
331
+ ║ insertion sort │ 0.18 +24.2% [+23.9%, +24.6%] 0.19 0.36 │ 100% │ 529 │ 0.0s ║
332
+ ║ --> native sort │ 0.15 0.15 0.25 │ 100% │ 647 │ 0.0s ║
333
+ ╚═════════════════╧═════════════════════════════════════════════╧═══════╧═════════╧══════╝
334
+ ```
335
+
336
+ - **conv%**: Convergence percentage (100% = stable measurements)
337
+ - **time**: Total sampling duration for that benchmark
338
+
339
+ ## Statistical Considerations: Mean vs Median
340
+
341
+ ### When to Use Mean with Confidence Intervals
342
+
343
+ **Best for:**
344
+ - **Normally distributed data** - When benchmark times follow a bell curve
345
+ - **Statistical comparison** - Comparing performance between implementations
346
+ - **Throughput analysis** - Understanding average system performance
347
+ - **Resource planning** - Estimating typical resource usage
348
+
349
+ **Advantages:**
350
+ - Provides confidence intervals for statistical significance
351
+ - Captures the full distribution including outliers
352
+ - Better for detecting small but consistent performance differences
353
+ - Standard in academic performance research
354
+
355
+ **Example use cases:**
356
+ - Comparing algorithm implementations
357
+ - Measuring API response times under normal load
358
+ - Evaluating compiler optimizations
359
+ - Benchmarking pure computational functions
360
+
361
+ ### When to Use Median (p50)
362
+
363
+ **Best for:**
364
+ - **Skewed distributions** - When outliers are common
365
+ - **Latency-sensitive applications** - Where typical user experience matters
366
+ - **Noisy environments** - Systems with unpredictable interference
367
+ - **Service Level Agreements** - "50% of requests complete within X ms"
368
+
369
+ **Advantages:**
370
+ - Robust to outliers and system noise
371
+ - Better represents "typical" performance
372
+ - More stable in virtualized/cloud environments
373
+ - Less affected by GC pauses and OS scheduling
374
+
375
+ **Example use cases:**
376
+ - Web server response times
377
+ - Database query performance
378
+ - UI responsiveness metrics
379
+ - Real-time system benchmarks
380
+
381
+ ### Interpreting Results
382
+
383
+ #### Baseline Comparison (Δ% CI)
384
+ ```
385
+ 0.17 +5.5% [+4.7%, +6.2%]
386
+ ```
387
+ This shows the benchmark is 5.5% slower than baseline, with a bootstrap confidence interval of [+4.7%, +6.2%]. Use this for comparing implementations.
388
+
389
+ #### Percentiles
390
+ ```
391
+ p50: 0.15ms, p99: 0.27ms
392
+ ```
393
+ This shows that 50% of runs completed in ≤0.15ms and 99% in ≤0.27ms. Use this when you care about consistency and tail latencies.
394
+
395
+ ### Practical Guidelines
396
+
397
+ 1. **Use adaptive mode when:**
398
+ - You want automatic convergence detection
399
+ - Benchmarks have varying execution times
400
+ - You need stable measurements without guessing iteration counts
401
+
402
+ 2. **Use fixed iterations when:**
403
+ - Comparing across runs/machines (reproducibility)
404
+ - You know roughly how many samples you need
405
+ - Running in CI pipelines with time constraints
406
+
407
+ 3. **Interpreting conv%:**
408
+ - 100% = measurements are stable
409
+ - <100% = still converging or high variance
410
+ - Red color indicates low confidence
411
+
412
+ ### Statistical Notes
413
+
414
+ - **Bootstrap CI**: Baseline comparison uses permutation testing with bootstrap confidence intervals
415
+ - **Window Stability**: Adaptive mode compares sliding windows for median drift and outlier impact
416
+ - **Independence**: Assumes benchmark iterations are independent (use `--worker` flag for better isolation)
417
+
418
+ ## Understanding GC Time Measurements
419
+
420
+ ### GC Duration in Node.js Performance Hooks
421
+
422
+ The `duration` field in GC PerformanceEntry records **stop-the-world pause time** - the time when JavaScript execution is actually blocked. This does NOT include:
423
+
424
+ 1. **Concurrent GC work** done in parallel threads (concurrent marking, sweeping)
425
+ 2. **Performance degradation** from CPU contention and cache effects
426
+ 3. **Total GC overhead** including preparation and cleanup
427
+
428
+ ### Key Findings
429
+
430
+ 1. **Multiple GC Events**: A single `gc()` call can trigger multiple GC events that are recorded separately
431
+ 2. **Incremental GC**: V8 breaks up GC work into smaller increments to reduce pause times
432
+ 3. **Duration < Impact**: The recorded duration is often much less than the actual performance impact
package/bin/benchforge ADDED
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env -S node --experimental-strip-types
2
+ import { runDefaultBench } from "../src/index.ts";
3
+ await runDefaultBench();
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env node
2
+ import { g as runDefaultBench } from "../src-CGuaC3Wo.mjs";
3
+
4
+ //#region src/bin/benchforge.ts
5
+ await runDefaultBench();
6
+
7
+ //#endregion
8
+ export { };
9
+ //# sourceMappingURL=benchforge.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"benchforge.mjs","names":[],"sources":["../../src/bin/benchforge.ts"],"sourcesContent":["#!/usr/bin/env node\nimport { runDefaultBench } from \"../index.ts\";\n\nawait runDefaultBench();\n"],"mappings":";;;;AAGA,MAAM,iBAAiB"}