benchforge 0.1.11 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (253) hide show
  1. package/LICENSE +20 -0
  2. package/README.md +99 -294
  3. package/bin/benchforge +1 -2
  4. package/dist/AnalyzeArchive-8NCJhmhS.mjs +145 -0
  5. package/dist/AnalyzeArchive-8NCJhmhS.mjs.map +1 -0
  6. package/dist/BenchMatrix-BZVrBB_h.mjs +1050 -0
  7. package/dist/BenchMatrix-BZVrBB_h.mjs.map +1 -0
  8. package/dist/{BenchRunner-BzyUfiyB.d.mts → BenchRunner-DglX1NOn.d.mts} +119 -66
  9. package/dist/CoverageSampler-D5T9DRqe.mjs +27 -0
  10. package/dist/CoverageSampler-D5T9DRqe.mjs.map +1 -0
  11. package/dist/Formatters-BWj3d4sv.mjs +95 -0
  12. package/dist/Formatters-BWj3d4sv.mjs.map +1 -0
  13. package/dist/{HeapSampler-B8dtKHn1.mjs → HeapSampler-Dq-hpXem.mjs} +4 -4
  14. package/dist/HeapSampler-Dq-hpXem.mjs.map +1 -0
  15. package/dist/RunBenchCLI-C17DrJz8.mjs +3075 -0
  16. package/dist/RunBenchCLI-C17DrJz8.mjs.map +1 -0
  17. package/dist/StatisticalUtils-BD92crgM.mjs +255 -0
  18. package/dist/StatisticalUtils-BD92crgM.mjs.map +1 -0
  19. package/dist/TimeSampler-Ds8n7l2B.mjs +29 -0
  20. package/dist/TimeSampler-Ds8n7l2B.mjs.map +1 -0
  21. package/dist/ViewerServer-BJhdnxlN.mjs +639 -0
  22. package/dist/ViewerServer-BJhdnxlN.mjs.map +1 -0
  23. package/dist/ViewerServer-CuMNdNBz.mjs +2 -0
  24. package/dist/bin/benchforge.mjs +4 -5
  25. package/dist/bin/benchforge.mjs.map +1 -1
  26. package/dist/index.d.mts +711 -558
  27. package/dist/index.mjs +98 -3
  28. package/dist/index.mjs.map +1 -0
  29. package/dist/runners/WorkerScript.d.mts +12 -4
  30. package/dist/runners/WorkerScript.mjs +77 -105
  31. package/dist/runners/WorkerScript.mjs.map +1 -1
  32. package/dist/viewer/assets/CIPlot-BkOvMoMa.js +1 -0
  33. package/dist/viewer/assets/HistogramKde-CmSyUFY0.js +1 -0
  34. package/dist/viewer/assets/LegendUtils-BJpbn_jr.js +55 -0
  35. package/dist/viewer/assets/SampleTimeSeries-C4VBhXr3.js +1 -0
  36. package/dist/viewer/assets/index-Br9bp_cX.js +153 -0
  37. package/dist/viewer/assets/index-NzXXe_CC.css +1 -0
  38. package/dist/viewer/index.html +19 -0
  39. package/dist/viewer/speedscope/LICENSE +21 -0
  40. package/dist/viewer/speedscope/SourceCodePro-Regular.ttf-ILST5JV6.woff2 +0 -0
  41. package/dist/viewer/speedscope/favicon-16x16-V2DMIAZS.js +2 -0
  42. package/dist/viewer/speedscope/favicon-16x16-V2DMIAZS.js.map +7 -0
  43. package/dist/viewer/speedscope/favicon-16x16-VSI62OPJ.png +0 -0
  44. package/dist/viewer/speedscope/favicon-32x32-3EB2YCUY.png +0 -0
  45. package/dist/viewer/speedscope/favicon-32x32-THY3JDJL.js +2 -0
  46. package/dist/viewer/speedscope/favicon-32x32-THY3JDJL.js.map +7 -0
  47. package/dist/viewer/speedscope/favicon-FOKUP5Y5.ico +0 -0
  48. package/dist/viewer/speedscope/favicon-M34RF7BI.js +2 -0
  49. package/dist/viewer/speedscope/favicon-M34RF7BI.js.map +7 -0
  50. package/dist/viewer/speedscope/file-format-schema.json +274 -0
  51. package/dist/viewer/speedscope/index.html +19 -0
  52. package/dist/viewer/speedscope/jfrview_bg-BLJXNNQB.wasm +0 -0
  53. package/dist/viewer/speedscope/perf-vertx-stacks-01-collapsed-all-ZNUIGAJL.txt +199 -0
  54. package/dist/viewer/speedscope/release.txt +3 -0
  55. package/dist/viewer/speedscope/source-code-pro.LICENSE.md +93 -0
  56. package/dist/viewer/speedscope/speedscope-GHPHNKXC.css +2 -0
  57. package/dist/viewer/speedscope/speedscope-GHPHNKXC.css.map +7 -0
  58. package/dist/viewer/speedscope/speedscope-QZFMJ7VP.js +212 -0
  59. package/dist/viewer/speedscope/speedscope-QZFMJ7VP.js.map +7 -0
  60. package/package.json +52 -27
  61. package/src/bin/benchforge.ts +2 -2
  62. package/src/cli/AnalyzeArchive.ts +232 -0
  63. package/src/cli/BrowserBench.ts +322 -0
  64. package/src/cli/CliArgs.ts +164 -51
  65. package/src/cli/CliExport.ts +179 -0
  66. package/src/cli/CliOptions.ts +147 -0
  67. package/src/cli/CliReport.ts +197 -0
  68. package/src/cli/FilterBenchmarks.ts +18 -30
  69. package/src/cli/RunBenchCLI.ts +132 -866
  70. package/src/cli/SuiteRunner.ts +160 -0
  71. package/src/cli/ViewerServer.ts +282 -0
  72. package/src/export/AllocExport.ts +121 -0
  73. package/src/export/ArchiveExport.ts +146 -0
  74. package/src/export/ArchiveFormat.ts +50 -0
  75. package/src/export/CoverageExport.ts +148 -0
  76. package/src/export/EditorUri.ts +10 -0
  77. package/src/export/PerfettoExport.ts +64 -99
  78. package/src/export/SpeedscopeTypes.ts +98 -0
  79. package/src/export/TimeExport.ts +115 -0
  80. package/src/index.ts +86 -67
  81. package/src/matrix/BenchMatrix.ts +230 -0
  82. package/src/matrix/CaseLoader.ts +8 -6
  83. package/src/matrix/MatrixDirRunner.ts +153 -0
  84. package/src/matrix/MatrixFilter.ts +49 -47
  85. package/src/matrix/MatrixInlineRunner.ts +50 -0
  86. package/src/matrix/MatrixReport.ts +90 -250
  87. package/src/matrix/VariantLoader.ts +5 -5
  88. package/src/profiling/browser/BenchLoop.ts +51 -0
  89. package/src/profiling/browser/BrowserCDP.ts +133 -0
  90. package/src/profiling/browser/BrowserGcStats.ts +33 -0
  91. package/src/profiling/browser/BrowserProfiler.ts +160 -0
  92. package/src/profiling/browser/CdpClient.ts +82 -0
  93. package/src/profiling/browser/CdpPage.ts +138 -0
  94. package/src/profiling/browser/ChromeLauncher.ts +158 -0
  95. package/src/profiling/browser/ChromeTraceEvent.ts +28 -0
  96. package/src/profiling/browser/PageLoadMode.ts +61 -0
  97. package/src/profiling/node/CoverageSampler.ts +27 -0
  98. package/src/profiling/node/CoverageTypes.ts +23 -0
  99. package/src/profiling/node/HeapSampleReport.ts +261 -0
  100. package/src/{heap-sample → profiling/node}/HeapSampler.ts +1 -2
  101. package/src/{heap-sample → profiling/node}/ResolvedProfile.ts +18 -9
  102. package/src/profiling/node/TimeSampler.ts +57 -0
  103. package/src/report/BenchmarkReport.ts +146 -0
  104. package/src/report/Colors.ts +9 -0
  105. package/src/report/Formatters.ts +110 -0
  106. package/src/report/GcSections.ts +151 -0
  107. package/src/{GitUtils.ts → report/GitUtils.ts} +18 -19
  108. package/src/report/HtmlReport.ts +223 -0
  109. package/src/report/ParseStats.ts +73 -0
  110. package/src/report/StandardSections.ts +147 -0
  111. package/src/report/ViewerSections.ts +286 -0
  112. package/src/report/text/TableReport.ts +253 -0
  113. package/src/report/text/TextReport.ts +123 -0
  114. package/src/runners/AdaptiveWrapper.ts +116 -236
  115. package/src/runners/BenchRunner.ts +20 -15
  116. package/src/{Benchmark.ts → runners/BenchmarkSpec.ts} +5 -6
  117. package/src/runners/CreateRunner.ts +5 -7
  118. package/src/runners/GcStats.ts +47 -50
  119. package/src/{MeasuredResults.ts → runners/MeasuredResults.ts} +43 -37
  120. package/src/runners/MergeBatches.ts +123 -0
  121. package/src/{NodeGC.ts → runners/NodeGC.ts} +2 -3
  122. package/src/runners/RunnerOrchestrator.ts +127 -243
  123. package/src/runners/RunnerUtils.ts +75 -1
  124. package/src/runners/SampleStats.ts +100 -0
  125. package/src/runners/TimingRunner.ts +244 -0
  126. package/src/runners/TimingUtils.ts +3 -2
  127. package/src/runners/WorkerScript.ts +135 -151
  128. package/src/stats/BootstrapDifference.ts +282 -0
  129. package/src/{PermutationTest.ts → stats/PermutationTest.ts} +8 -17
  130. package/src/stats/StatisticalUtils.ts +445 -0
  131. package/src/{tests → test}/AdaptiveConvergence.test.ts +10 -10
  132. package/src/test/AdaptiveRunner.test.ts +39 -41
  133. package/src/{tests → test}/AdaptiveSampling.test.ts +9 -9
  134. package/src/test/AdaptiveStatistics.integration.ts +2 -2
  135. package/src/{tests → test}/BenchMatrix.test.ts +19 -16
  136. package/src/test/BenchmarkReport.test.ts +63 -13
  137. package/src/test/BrowserBench.e2e.test.ts +186 -17
  138. package/src/test/BrowserBench.test.ts +10 -5
  139. package/src/test/BuildTimeSection.test.ts +130 -0
  140. package/src/test/CapSamples.test.ts +82 -0
  141. package/src/test/CoverageExport.test.ts +115 -0
  142. package/src/test/CoverageSampler.test.ts +33 -0
  143. package/src/test/HeapAttribution.test.ts +14 -14
  144. package/src/{tests → test}/MatrixFilter.test.ts +1 -1
  145. package/src/{tests → test}/MatrixReport.test.ts +1 -1
  146. package/src/test/PermutationTest.test.ts +1 -1
  147. package/src/{tests → test}/RealDataValidation.test.ts +6 -6
  148. package/src/test/RunBenchCLI.test.ts +39 -38
  149. package/src/test/RunnerOrchestrator.test.ts +12 -12
  150. package/src/test/StatisticalUtils.test.ts +48 -12
  151. package/src/{table-util/test → test}/TableReport.test.ts +2 -2
  152. package/src/test/TestUtils.ts +12 -7
  153. package/src/test/TimeExport.test.ts +139 -0
  154. package/src/test/TimeSampler.test.ts +37 -0
  155. package/src/test/ViewerLive.e2e.test.ts +159 -0
  156. package/src/test/ViewerStatic.static.e2e.test.ts +137 -0
  157. package/src/{tests → test}/fixtures/baseline/impl.ts +1 -1
  158. package/src/{tests → test}/fixtures/bevy30-samples.ts +3 -1
  159. package/src/test/fixtures/cases/asyncCases.ts +9 -0
  160. package/src/{tests → test}/fixtures/cases/cases.ts +5 -2
  161. package/src/test/fixtures/cases/variants/product.ts +2 -0
  162. package/src/test/fixtures/cases/variants/sum.ts +2 -0
  163. package/src/test/fixtures/discover/fast.ts +1 -0
  164. package/src/{tests → test}/fixtures/discover/slow.ts +1 -1
  165. package/src/test/fixtures/invalid/bad.ts +1 -0
  166. package/src/test/fixtures/loader/fast.ts +1 -0
  167. package/src/{tests → test}/fixtures/loader/slow.ts +1 -1
  168. package/src/test/fixtures/loader/stateful.ts +2 -0
  169. package/src/test/fixtures/stateful/stateful.ts +2 -0
  170. package/src/test/fixtures/variants/extra.ts +1 -0
  171. package/src/test/fixtures/variants/impl.ts +1 -0
  172. package/src/test/fixtures/worker/fast.ts +1 -0
  173. package/src/{tests → test}/fixtures/worker/slow.ts +1 -1
  174. package/src/viewer/DateFormat.ts +30 -0
  175. package/src/viewer/Helpers.ts +23 -0
  176. package/src/viewer/LineData.ts +120 -0
  177. package/src/viewer/Providers.ts +191 -0
  178. package/src/viewer/ReportData.ts +123 -0
  179. package/src/viewer/State.ts +49 -0
  180. package/src/viewer/Theme.ts +15 -0
  181. package/src/viewer/components/App.tsx +73 -0
  182. package/src/viewer/components/DropZone.tsx +71 -0
  183. package/src/viewer/components/LazyPlot.ts +33 -0
  184. package/src/viewer/components/SamplesPanel.tsx +214 -0
  185. package/src/viewer/components/Shell.tsx +26 -0
  186. package/src/viewer/components/SourcePanel.tsx +216 -0
  187. package/src/viewer/components/SummaryPanel.tsx +332 -0
  188. package/src/viewer/components/TabBar.tsx +131 -0
  189. package/src/viewer/components/TabContent.tsx +46 -0
  190. package/src/viewer/components/ThemeToggle.tsx +50 -0
  191. package/src/viewer/index.html +20 -0
  192. package/src/viewer/main.tsx +4 -0
  193. package/src/viewer/plots/CIPlot.ts +313 -0
  194. package/src/{html/browser → viewer/plots}/HistogramKde.ts +33 -38
  195. package/src/viewer/plots/LegendUtils.ts +134 -0
  196. package/src/viewer/plots/PlotTypes.ts +85 -0
  197. package/src/viewer/plots/RenderPlots.ts +230 -0
  198. package/src/viewer/plots/SampleTimeSeries.ts +306 -0
  199. package/src/viewer/plots/SvgHelpers.ts +136 -0
  200. package/src/viewer/plots/TimeSeriesMarks.ts +319 -0
  201. package/src/viewer/report.css +427 -0
  202. package/src/viewer/shell.css +357 -0
  203. package/src/viewer/tsconfig.json +11 -0
  204. package/dist/BrowserHeapSampler-B6asLKWQ.mjs +0 -202
  205. package/dist/BrowserHeapSampler-B6asLKWQ.mjs.map +0 -1
  206. package/dist/GcStats-wX7Xyblu.mjs +0 -77
  207. package/dist/GcStats-wX7Xyblu.mjs.map +0 -1
  208. package/dist/HeapSampler-B8dtKHn1.mjs.map +0 -1
  209. package/dist/TimingUtils-DwOwkc8G.mjs +0 -597
  210. package/dist/TimingUtils-DwOwkc8G.mjs.map +0 -1
  211. package/dist/browser/index.js +0 -914
  212. package/dist/src-B-DDaCa9.mjs +0 -3108
  213. package/dist/src-B-DDaCa9.mjs.map +0 -1
  214. package/src/BenchMatrix.ts +0 -380
  215. package/src/BenchmarkReport.ts +0 -161
  216. package/src/HtmlDataPrep.ts +0 -148
  217. package/src/StandardSections.ts +0 -261
  218. package/src/StatisticalUtils.ts +0 -175
  219. package/src/TypeUtil.ts +0 -8
  220. package/src/browser/BrowserGcStats.ts +0 -44
  221. package/src/browser/BrowserHeapSampler.ts +0 -271
  222. package/src/export/JsonExport.ts +0 -103
  223. package/src/export/JsonFormat.ts +0 -91
  224. package/src/export/SpeedscopeExport.ts +0 -202
  225. package/src/heap-sample/HeapSampleReport.ts +0 -269
  226. package/src/html/HtmlReport.ts +0 -131
  227. package/src/html/HtmlTemplate.ts +0 -284
  228. package/src/html/Types.ts +0 -88
  229. package/src/html/browser/CIPlot.ts +0 -287
  230. package/src/html/browser/LegendUtils.ts +0 -163
  231. package/src/html/browser/RenderPlots.ts +0 -263
  232. package/src/html/browser/SampleTimeSeries.ts +0 -389
  233. package/src/html/browser/Types.ts +0 -96
  234. package/src/html/browser/index.ts +0 -1
  235. package/src/html/index.ts +0 -17
  236. package/src/runners/BasicRunner.ts +0 -364
  237. package/src/table-util/ConvergenceFormatters.ts +0 -19
  238. package/src/table-util/Formatters.ts +0 -157
  239. package/src/table-util/README.md +0 -70
  240. package/src/table-util/TableReport.ts +0 -293
  241. package/src/tests/fixtures/cases/asyncCases.ts +0 -7
  242. package/src/tests/fixtures/cases/variants/product.ts +0 -2
  243. package/src/tests/fixtures/cases/variants/sum.ts +0 -2
  244. package/src/tests/fixtures/discover/fast.ts +0 -1
  245. package/src/tests/fixtures/invalid/bad.ts +0 -1
  246. package/src/tests/fixtures/loader/fast.ts +0 -1
  247. package/src/tests/fixtures/loader/stateful.ts +0 -2
  248. package/src/tests/fixtures/stateful/stateful.ts +0 -2
  249. package/src/tests/fixtures/variants/extra.ts +0 -1
  250. package/src/tests/fixtures/variants/impl.ts +0 -1
  251. package/src/tests/fixtures/worker/fast.ts +0 -1
  252. /package/src/{table-util/test → test}/TableValueExtractor.test.ts +0 -0
  253. /package/src/{table-util/test → test}/TableValueExtractor.ts +0 -0
package/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Benchforge Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package/README.md CHANGED
@@ -1,23 +1,39 @@
1
1
  # Benchforge
2
2
 
3
- Traditional benchmarking tools either ignore GC or try to avoid it.
4
- Benchforge captures GC impact.
5
-
6
- Garbage collection makes benchmarks noisy — statistics like mean and max
7
- stabilize poorly when collection is intermittent. Most tools work around
8
- this by isolating microbenchmarks from GC, but that hides a key part of
9
- real-world performance. And heap snapshots are useful for finding leaks,
10
- but they can't show you where garbage is being generated.
11
-
12
- - **Heap allocation profiling** attribute allocations to call sites, including short-lived objects already collected by GC.
13
- - **GC-aware statistics** — bootstrap confidence intervals and baseline comparison that account for GC variance instead of hiding it.
14
- - **GC collection reports** — allocation rates, scavenge/full GC counts, promotion %, and pause times per iteration.
15
-
16
- Also:
17
- - **Zero-config CLI** export a function, run `benchforge file.ts`.
18
- - **Multiple export formats** — HTML reports, Perfetto traces, Speedscope flame charts, JSON.
19
- - **Worker isolation** node benchmarks run in child processes by default.
20
- - **Browser support** benchmark in Chromium via [Playwright + CDP](README-browser.md).
3
+ Benchforge helps you make faster JavaScript programs with integrated tools for
4
+ benchmarking and performance analysis in Node.js and Chrome, including features
5
+ designed specifically for analyzing garbage-collected programs.
6
+
7
+ Garbage collection is intermittent and infrequent, which makes it harder to
8
+ identify true performance issues. Typical perf tools isolate microbenchmarks
9
+ from GC, but that hides a key part of real-world performance. Intermittent
10
+ events also lead to statistically skewed measurement distributions. Perf tools
11
+ that assume normal distributions and noise-free test runs can easily create
12
+ misleading false-positive performance reports. Benchforge captures a truer
13
+ picture of garbage-collected programs:
14
+
15
+ - **GC-aware statistics** -- bootstrap confidence intervals account for GC
16
+ variance instead of hiding it.
17
+ - **Heap allocation profiling** -- see which functions allocate the most,
18
+ including short-lived objects already collected.
19
+ - **GC collection reports** -- allocation rates, scavenge/full GC counts,
20
+ promotion %, and pause times per iteration.
21
+ - **Visualization** -- distribution plots, icicle charts for allocators, source
22
+ annotations with allocation and call count metrics.
23
+ - **Archive** -- save traces and source code together to share with your team.
24
+
25
+ ## Timing Distributions
26
+ <img width="326" height="363" alt="stats with distribution curves" src="https://github.com/user-attachments/assets/532702bd-faa1-4cb3-8b33-ad5409631427" />
27
+
28
+ ## Heap Allocation
29
+ Explore memory _allocation_ per function:
30
+ <img width="4444" height="2706" alt="allocation view" src="https://github.com/user-attachments/assets/6d4e2dee-bb72-41ce-a71d-d036bebedb3d" />
31
+
32
+ ## Benchmark Iteration Time Series
33
+ <img width="387" height="306" alt="time series" src="https://github.com/user-attachments/assets/f5676b64-7906-422b-aef3-4eedc325c422" />
34
+
35
+ ## Source Code Annotated with Performance Info
36
+ <img width="1946" height="460" alt="src annotations" src="https://github.com/user-attachments/assets/102cc574-ecf3-4f5f-8143-d20ee7008a72" />
21
37
 
22
38
  ## Installation
23
39
 
@@ -27,9 +43,10 @@ npm install benchforge
27
43
  pnpm add benchforge
28
44
  ```
29
45
 
30
- ## Quick Start
46
+ ## Quick Start: Node
31
47
 
32
- The simplest way to benchmark a function: export it as the default export and pass the file to `benchforge`.
48
+ The simplest benchmark: export a default function and pass the file to
49
+ `benchforge`.
33
50
 
34
51
  ```typescript
35
52
  // my-bench.ts
@@ -42,304 +59,92 @@ export default function (): string {
42
59
  benchforge my-bench.ts --gc-stats
43
60
  ```
44
61
 
45
- ### BenchSuite Export
62
+ For suites with multiple benchmarks, groups, and baseline comparison, see
63
+ [Node.md](Node.md).
46
64
 
47
- For multiple benchmarks with groups, setup data, and baseline comparison, export a `BenchSuite`:
65
+ ## Quick Start: Browser
48
66
 
49
- ```typescript
50
- // sorting.ts
51
- import type { BenchGroup, BenchSuite } from 'benchforge';
52
-
53
- const sortingGroup: BenchGroup<number[]> = {
54
- name: "Array Sorting (1000 numbers)",
55
- setup: () => Array.from({ length: 1000 }, () => Math.random()),
56
- baseline: { name: "native sort", fn: (arr) => [...arr].sort((a, b) => a - b) },
57
- benchmarks: [
58
- { name: "quicksort", fn: quickSort },
59
- { name: "insertion sort", fn: insertionSort },
60
- ],
61
- };
67
+ `benchforge --url <page>` opens Chromium and runs your program.
62
68
 
63
- const suite: BenchSuite = {
64
- name: "Performance Tests",
65
- groups: [sortingGroup],
66
- };
67
-
68
- export default suite;
69
- ```
69
+ You can time any page without modification, and compare against a baseline.
70
70
 
71
71
  ```bash
72
- benchforge sorting.ts --gc-stats
72
+ benchforge --url http://localhost:5173 --baseline-url http://localhost:5174 \
73
+ --gc-stats --batches 20 --iterations 10 --headless
73
74
  ```
74
75
 
75
- A `MatrixSuite` export (`.matrices`) is also recognized and runs via `matrixBenchExports`.
76
-
77
- See `examples/simple-cli.ts` for a complete runnable example.
78
-
79
- ### Worker Mode with Module Imports
80
-
81
- For worker mode, benchmarks can reference module exports instead of inline functions. This is essential for proper isolation since functions can't be serialized across process boundaries.
76
+ If you export your test function as `window.__bench`, benchforge can run
77
+ multiple iterations in the same tab, which helps reveal the accumulated effect
78
+ of heap allocation over time. Tests also run faster.
82
79
 
83
- ```typescript
84
- const group: BenchGroup = {
85
- name: "Parser Benchmark",
86
- setup: () => loadTestData(),
87
- benchmarks: [{
88
- name: "parse",
89
- fn: () => {}, // placeholder - not used in worker mode
90
- modulePath: new URL("./benchmarks.ts", import.meta.url).href,
91
- exportName: "parse",
92
- setupExportName: "setup", // optional: called once, result passed to exportName fn
93
- }],
80
+ ```html
81
+ <!-- bench function mode -->
82
+ <script>
83
+ window.__bench = () => {
84
+ const arr = Array.from({ length: 10000 }, () => Math.random());
85
+ arr.sort((a, b) => a - b);
94
86
  };
87
+ </script>
95
88
  ```
96
89
 
97
- When `setupExportName` is provided, the worker:
98
- 1. Imports the module
99
- 2. Calls `setup(params)` once (where params comes from `BenchGroup.setup()`)
100
- 3. Passes the setup result to each benchmark iteration
101
-
102
- This eliminates manual caching boilerplate in worker modules.
103
-
104
- ## CLI Options
105
-
106
- ### Basic Options
107
- - `--time <seconds>` - Benchmark duration per test (default: 0.642s)
108
- - `--iterations <count>` - Exact number of iterations (overrides --time)
109
- - `--filter <pattern>` - Run only benchmarks matching regex/substring
110
- - `--worker` / `--no-worker` - Run in isolated worker process (default: true)
111
- - `--profile` - Run once for profiling (single iteration, no warmup)
112
- - `--warmup <count>` - Warmup iterations before measurement (default: 0)
113
- - `--help` - Show all available options
114
-
115
- ### Memory Profiling
116
- - `--gc-stats` - Collect GC allocation/collection stats via --trace-gc-nvp
117
- - `--heap-sample` - Heap sampling allocation attribution (includes garbage)
118
- - `--heap-interval <bytes>` - Sampling interval in bytes (default: 32768)
119
- - `--heap-depth <frames>` - Stack depth to capture (default: 64)
120
- - `--heap-rows <n>` - Number of top allocation sites to show (default: 20)
121
-
122
- ### Output Options
123
- - `--html` - Generate HTML report, start server, and open in browser
124
- - `--export-html <file>` - Export HTML report to file
125
- - `--json <file>` - Export benchmark data to JSON
126
- - `--export-perfetto <file>` - Export Perfetto trace file
127
- - `--speedscope` - Open heap profile in speedscope viewer (via npx)
128
- - `--export-speedscope <file>` - Export heap profile as speedscope JSON
129
-
130
- ## CLI Usage
131
-
132
- ### Filter benchmarks by name
133
-
134
- ```bash
135
- benchforge my-bench.ts --filter "concat"
136
- benchforge my-bench.ts --filter "^parse" --time 2
137
- ```
138
-
139
- ### Profiling with external debuggers
140
-
141
- Use `--profile` to run benchmarks once for attaching external profilers:
142
-
143
- ```bash
144
- # Use with Chrome DevTools profiler
145
- node --inspect-brk $(which benchforge) my-bench.ts --profile
146
-
147
- # Use with other profiling tools
148
- node --prof $(which benchforge) my-bench.ts --profile
149
- ```
150
-
151
- The `--profile` flag executes exactly one iteration with no warmup, making it ideal for debugging and performance profiling.
152
-
153
- ### Key Concepts
154
-
155
- **Setup Functions**: Run once per group and provide shared data to all benchmarks in that group. The data returned by setup is automatically passed as the first parameter to benchmark functions that expect it.
156
-
157
- **Baseline Comparison**: When a baseline is specified, all benchmarks in the group show percentage differences (Δ%) compared to baseline.
158
-
159
- ## Output
160
-
161
- Results are displayed in a formatted table:
162
-
163
- ```
164
- ╔═════════════════╤═══════════════════════════════════════════╤═════════╗
165
- ║ │ time │ ║
166
- ║ name │ mean Δ% CI p50 p99 │ runs ║
167
- ╟─────────────────┼───────────────────────────────────────────┼─────────╢
168
- ║ quicksort │ 0.17 +5.5% [+4.7%, +6.2%] 0.15 0.63 │ 1,134 ║
169
- ║ insertion sort │ 0.24 +25.9% [+25.3%, +27.4%] 0.18 0.36 │ 807 ║
170
- ║ --> native sort │ 0.16 0.15 0.41 │ 1,210 ║
171
- ╚═════════════════╧═══════════════════════════════════════════╧═════════╝
172
- ```
173
-
174
- - **Δ% CI**: Percentage difference from baseline with bootstrap confidence interval
175
-
176
- ### HTML
177
-
178
- The HTML report displays:
179
- - Histogram + KDE: Bar chart showing the distribution
180
- - Time Series: Sample values over iterations
181
- - Allocation Series: Per-sample heap allocation (requires `--heap-sample`)
182
-
183
- ```bash
184
- # Generate HTML report, start server, and open in browser
185
- benchforge my-bench.ts --html
186
- # Press Ctrl+C to exit when done viewing
187
- ```
188
-
189
- ### Perfetto Trace Export
190
-
191
- Export benchmark data as a Perfetto-compatible trace file for detailed analysis:
192
-
193
- ```bash
194
- # Export trace file
195
- benchforge my-bench.ts --export-perfetto trace.json
196
-
197
- # With V8 GC events (automatically merged after exit)
198
- node --expose-gc --trace-events-enabled --trace-event-categories=v8,v8.gc \
199
- benchforge my-bench.ts --export-perfetto trace.json
200
- ```
201
-
202
- View the trace at https://ui.perfetto.dev by dragging the JSON file.
203
-
204
- The trace includes:
205
- - **Heap counter**: Continuous heap usage as a line graph
206
- - **Sample markers**: Each benchmark iteration with timing
207
- - **Pause markers**: V8 optimization pause points
208
- - **V8 GC events**: Automatically merged after process exit (when run with `--trace-events-enabled`)
209
-
210
- ### Speedscope Export
211
-
212
- View heap allocation profiles as flame charts in speedscope:
213
-
214
- ```bash
215
- # Open directly in speedscope (launches via npx)
216
- benchforge my-bench.ts --heap-sample --speedscope
217
-
218
- # Export to file
219
- benchforge my-bench.ts --heap-sample --export-speedscope profile.json
220
- ```
90
+ See [Browser.md](Browser.md) for setup patterns, completion signals, and the CDP
91
+ flow.
221
92
 
222
- Each benchmark with a heap profile becomes a separate speedscope profile, with samples ordered temporally and weighted by allocation size in bytes.
93
+ ## CLI Overview
223
94
 
224
- ### GC Statistics
95
+ Core flags for common workflows. Run `benchforge --help` for the full list.
225
96
 
226
- Collect detailed garbage collection statistics via V8's `--trace-gc-nvp`:
97
+ | Flag | What it does |
98
+ |------|-------------|
99
+ | `--gc-stats` | GC allocation/collection stats |
100
+ | `--alloc` | Heap allocation sampling attribution |
101
+ | `--profile` | V8 CPU time sampling profiler |
102
+ | `--call-counts` | Per-function execution counts |
103
+ | `--stats <list>` | Timing columns to display (default: mean,p50,p99) |
104
+ | `--view` | Open interactive viewer in browser |
105
+ | `--archive [file]` | Archive profiles + sources to `.benchforge` file |
106
+ | `--duration <sec>` | Duration per batch (default: 0.642s) |
107
+ | `--iterations <n>` | Exact iterations (overrides --duration) |
108
+ | `--batches <n>` | Interleaved batches for baseline comparison |
109
+ | `--filter <pattern>` | Run only benchmarks matching regex/substring |
110
+ | `--url <url>` | Benchmark a browser page |
111
+ | `--baseline-url <url>` | A/B comparison in browser |
112
+ | `--equiv-margin <pct>` | Equivalence margin (default: 2%) |
227
113
 
228
- ```bash
229
- # Collect GC allocation/collection stats (requires worker mode)
230
- benchforge my-bench.ts --gc-stats
231
- ```
114
+ See [Profiling.md](Profiling.md) for detailed profiling options and V8 flags.
232
115
 
233
- Adds these columns to the output table:
234
- - **alloc/iter**: Bytes allocated per iteration
235
- - **scav**: Number of scavenge (minor) GCs
236
- - **full**: Number of full (mark-compact) GCs
237
- - **promo%**: Percentage of allocations promoted to old generation
238
- - **pause/iter**: GC pause time per iteration
116
+ ## Key Concepts
239
117
 
240
- ### Heap Sampling
118
+ ### Batching
241
119
 
242
- For allocation profiling including garbage (short-lived objects), use `--heap-sample` mode which uses Node's built-in inspector API:
120
+ When comparing against a baseline, use `--batches` to interleave runs and reduce
121
+ ordering bias. Batch 0 is dropped by default (OS cache warmup). For reliable
122
+ comparisons, use 40+ batches.
243
123
 
244
124
  ```bash
245
- # Basic heap sampling
246
- benchforge my-bench.ts --heap-sample --iterations 100
247
-
248
- # Smaller interval = more samples = better coverage of rare allocations
249
- benchforge my-bench.ts --heap-sample --heap-interval 4096 --iterations 100
250
-
251
- # Verbose output with clickable file:// paths
252
- benchforge my-bench.ts --heap-sample --heap-verbose
253
-
254
- # Control call stack display depth
255
- benchforge my-bench.ts --heap-sample --heap-stack 5
256
- ```
257
-
258
- **CLI Options:**
259
- - `--heap-sample` - Enable heap sampling allocation attribution
260
- - `--heap-interval <bytes>` - Sampling interval in bytes (default: 32768)
261
- - `--heap-depth <frames>` - Maximum stack depth to capture (default: 64)
262
- - `--heap-rows <n>` - Number of top allocation sites to show (default: 20)
263
- - `--heap-stack <n>` - Call stack depth to display (default: 3)
264
- - `--heap-verbose` - Show full file:// paths with line numbers (cmd-clickable)
265
- - `--heap-raw` - Dump every raw heap sample (ordinal, size, stack)
266
- - `--heap-user-only` - Filter to user code only (hide node internals)
267
-
268
- **Output (default compact):**
269
- ```
270
- ─── Heap profile: bevy_env_map ───
271
- Heap allocation sites (top 20, garbage included):
272
- 13.62 MB recursiveResolve <- flattenTreeImport <- bindAndTransform
273
- 12.36 MB nextToken <- parseBlockStatements <- parseCompoundStatement
274
- 5.15 MB coverWithText <- finishElem <- parseVarOrLet
275
-
276
- Total (all): 56.98 MB
277
- Total (user-code): 28.45 MB
278
- Samples: 1,842
279
- ```
280
-
281
- **How V8 Heap Sampling Works:**
282
-
283
- V8's sampling profiler uses Poisson-distributed sampling. When an allocation occurs, V8 probabilistically decides whether to record it based on the sampling interval. Key points:
284
-
285
- 1. **selfSize is scaled**: V8 doesn't report raw sampled bytes. It scales sample counts to estimate total allocations (`selfSize = size × count × scaleFactor`). This means changing `--heap-interval` affects sample count and overhead, but the estimated total converges to the same value.
286
-
287
- 2. **Smaller intervals = better coverage**: With a smaller interval (e.g., 1024 vs 32768), you get more samples and discover more unique allocation sites, especially rare ones. The total estimate stays similar, but you see more of the distribution.
288
-
289
- 3. **User-code only**: The report filters out Node.js internals (`node:`, `internal/`). "Total (user-code)" shows filtered allocations; "Total (all)" shows everything.
290
-
291
- 4. **Measurement window**: Sampling covers benchmark module import + execution. Worker startup and framework init aren't captured (but do appear in `--gc-stats`).
292
-
293
- 5. **Sites are stack-unique**: The same function appears multiple times with different callers. For example, `nextToken` may show up in several entries with different call stacks, each representing a distinct allocation pattern.
294
-
295
- **Limitations:**
296
- - **Function-level attribution only**: V8 reports the function where allocation occurred, not the specific line. The line:column shown is where the function is *defined*.
297
- - **Inlining shifts attribution**: V8 may inline a function into its caller, causing allocations to be reported against the caller instead. If attribution looks wrong, disable inlining to isolate: `node --js-flags='--no-turbo-inlining --no-maglev-inlining' benchforge ...` (or `--jitless` to disable JIT entirely, though this changes performance characteristics).
298
- - **Statistical sampling**: Results vary between runs. More iterations = more stable results.
299
- - **~50% filtered**: Node.js internals account for roughly half of allocations. Use "Total (all)" to see the full picture.
300
-
301
- **When to use which:**
302
- | Tool | Use When |
303
- |------|----------|
304
- | `--gc-stats` | Need total allocation/collection bytes, GC pause times |
305
- | `--heap-sample` | Need to identify which functions allocate the most |
306
- | Both | Cross-reference attribution with totals |
307
-
308
- ## Requirements
309
-
310
- - Node.js 22.6+ (for native TypeScript support)
311
- - Use `--expose-gc --allow-natives-syntax` flags for garbage collection monitoring and V8 native functions
312
-
313
- ## Adaptive Mode (Experimental)
314
-
315
- Adaptive mode (`--adaptive`) automatically adjusts iteration count until measurements stabilize. The algorithm is still being tuned — use `--help` for available options.
316
-
317
- ## Interpreting Results
318
-
319
- ### Baseline Comparison (Δ% CI)
320
- ```
321
- 0.17 +5.5% [+4.7%, +6.2%]
322
- ```
323
- The benchmark is 5.5% slower than baseline, with a bootstrap confidence interval of [+4.7%, +6.2%].
324
-
325
- ### Percentiles
326
- ```
327
- p50: 0.15ms, p99: 0.27ms
125
+ benchforge sorting.ts --batches 40 --duration 2
328
126
  ```
329
- 50% of runs completed in ≤0.15ms and 99% in ≤0.27ms. Use percentiles when you care about consistency and tail latencies.
330
127
 
331
- ## Understanding GC Time Measurements
128
+ See [Statistics.md](Statistics.md) for the full explanation of batched
129
+ execution, block bootstrap, and Tukey trimming.
332
130
 
333
- ### GC Duration in Node.js Performance Hooks
131
+ ### Baseline Comparison
334
132
 
335
- The `duration` field in GC PerformanceEntry records **stop-the-world pause time** - the time when JavaScript execution is actually blocked. This does NOT include:
133
+ When a group has a `baseline`, all benchmarks show Δ% with a bootstrap
134
+ confidence interval. The result is classified as faster, slower, equivalent, or
135
+ inconclusive based on the equivalence margin.
336
136
 
337
- 1. **Concurrent GC work** done in parallel threads (concurrent marking, sweeping)
338
- 2. **Performance degradation** from CPU contention and cache effects
339
- 3. **Total GC overhead** including preparation and cleanup
137
+ See [Statistics.md](Statistics.md#equivalence-margin) for how the four verdicts
138
+ work and how to calibrate the margin.
340
139
 
341
- ### Key Findings
140
+ ## Further Reading
342
141
 
343
- 1. **Multiple GC Events**: A single `gc()` call can trigger multiple GC events that are recorded separately
344
- 2. **Incremental GC**: V8 breaks up GC work into smaller increments to reduce pause times
345
- 3. **Duration < Impact**: The recorded duration is often much less than the actual performance impact
142
+ - [Node.md](Node.md) -- Worker mode, module imports, custom metric sections,
143
+ external debugger attachment
144
+ - [Browser.md](Browser.md) -- Bench function and page-load modes, completion
145
+ signals, CDP flow
146
+ - [Profiling.md](Profiling.md) -- Allocation sampling, GC stats, V8 flags,
147
+ Perfetto export
148
+ - [Statistics.md](Statistics.md) -- Column selection (`--stats`), bootstrap
149
+ methods, batching, equivalence testing
150
+ - [README-tachometer.md](README-tachometer.md) -- Coming from tachometer
package/bin/benchforge CHANGED
@@ -1,3 +1,2 @@
1
1
  #!/usr/bin/env -S node --experimental-strip-types
2
- import { runDefaultBench } from "../src/index.ts";
3
- await runDefaultBench();
2
+ import "../src/bin/benchforge.ts";
@@ -0,0 +1,145 @@
1
+ import { b as splitByOffsets, g as percentile, p as median, t as average, w as tukeyFences } from "./StatisticalUtils-BD92crgM.mjs";
2
+ import { a as formatSignedPercent, c as timeMs, u as colors } from "./Formatters-BWj3d4sv.mjs";
3
+ import { resolve } from "node:path";
4
+ import { readFile } from "node:fs/promises";
5
+ //#region src/cli/AnalyzeArchive.ts
6
+ /** Diagnostic analysis of a .benchforge archive's per-batch statistics. */
7
+ const { bold, dim, red, green, yellow } = colors;
8
+ const blockFenceMultiplier = 3;
9
+ /** Read an archive and print per-batch diagnostic analysis.
10
+ * (for benchforge debugging/development purposes, not a general user tool)
11
+ */
12
+ async function analyzeArchive(filePath) {
13
+ const content = await readFile(resolve(filePath), "utf-8");
14
+ const { report } = JSON.parse(content);
15
+ if (!report?.groups?.length) {
16
+ console.error("No report data found in archive.");
17
+ return;
18
+ }
19
+ const batchCount = report.metadata?.cliArgs?.batches;
20
+ for (const group of report.groups) analyzeGroup(group, batchCount);
21
+ }
22
+ /** Print analysis for all benchmarks in a group. */
23
+ function analyzeGroup(group, batchCount) {
24
+ console.log(bold(`\n=== ${group.name} ===\n`));
25
+ const baseline = group.baseline;
26
+ for (const bench of group.benchmarks) analyzeBenchmark(bench, baseline, batchCount);
27
+ }
28
+ /** Print per-batch analysis for one benchmark entry. */
29
+ function analyzeBenchmark(bench, baseline, batchCount) {
30
+ const bOffsets = bench.batchOffsets ?? inferOffsets(bench.samples, batchCount);
31
+ const baseOffsets = baseline?.batchOffsets ?? inferOffsets(baseline?.samples, batchCount);
32
+ if (!bOffsets?.length) {
33
+ console.log(dim(" No batch data (single batch run)"));
34
+ return;
35
+ }
36
+ const batches = splitByOffsets(bench.samples, bOffsets);
37
+ const baseBatches = baseOffsets && baseline ? splitByOffsets(baseline.samples, baseOffsets) : void 0;
38
+ printBatchHeader(bench, baseline, batches.length);
39
+ printBatchTable(batches, baseBatches);
40
+ if (baseBatches && baseBatches.length === batches.length) {
41
+ printOrderEffect(batches, baseBatches);
42
+ printPairedDeltas(batches, baseBatches);
43
+ printTrimmedBlocks(batches, baseBatches, bench.name);
44
+ }
45
+ console.log();
46
+ }
47
+ /** Infer equal-sized batch offsets when batchOffsets isn't in the archive. */
48
+ function inferOffsets(samples, batchCount) {
49
+ if (!samples?.length || !batchCount || batchCount <= 1) return void 0;
50
+ const size = Math.floor(samples.length / batchCount);
51
+ return Array.from({ length: batchCount }, (_, i) => i * size);
52
+ }
53
+ /** Print benchmark name with batch/run summary. */
54
+ function printBatchHeader(bench, baseline, nBatches) {
55
+ const baseRuns = baseline?.samples?.length;
56
+ const dur = bench.totalTime ? (bench.totalTime / nBatches).toFixed(1) + "s" : "?";
57
+ const info = dim(` (${nBatches} batches, ${baseRuns ? `${bench.samples.length}+${baseRuns} runs` : `${bench.samples.length} runs`}, ~${dur}/batch)`);
58
+ console.log(bold(` ${bench.name}`) + info);
59
+ }
60
+ /** Print per-batch median table for current and baseline. */
61
+ function printBatchTable(benches, baselines) {
62
+ const header = baselines ? ` ${"batch".padEnd(7)} ${"n".padStart(4)} ${"current".padStart(10)} ${"baseline".padStart(10)} ${"delta".padStart(8)}` : ` ${"batch".padEnd(7)} ${"n".padStart(4)} ${"median".padStart(10)}`;
63
+ console.log(dim(header));
64
+ for (let i = 0; i < benches.length; i++) {
65
+ const n = String(benches[i].length).padStart(4);
66
+ const med = (timeMs(median(benches[i])) ?? "").padStart(10);
67
+ const idx = String(i).padEnd(7);
68
+ if (!baselines?.[i]) {
69
+ console.log(` ${idx} ${n} ${med}`);
70
+ continue;
71
+ }
72
+ const baseMed = (timeMs(median(baselines[i])) ?? "").padStart(10);
73
+ const delta = formatDelta(medianDelta(benches[i], baselines[i])).padStart(8);
74
+ const order = i % 2 === 0 ? dim(" B>C") : dim(" C>B");
75
+ console.log(` ${idx} ${n} ${med} ${baseMed} ${delta}${order}`);
76
+ }
77
+ }
78
+ /** Analyze order effect: does running second make a difference? */
79
+ function printOrderEffect(benches, baselines) {
80
+ const deltas = benches.map((b, i) => medianDelta(b, baselines[i]));
81
+ const baseFirstDeltas = deltas.filter((_, i) => i % 2 === 0);
82
+ const currFirstDeltas = deltas.filter((_, i) => i % 2 === 1);
83
+ const baseFirstAvg = baseFirstDeltas.length ? average(baseFirstDeltas) : 0;
84
+ const currFirstAvg = currFirstDeltas.length ? average(currFirstDeltas) : 0;
85
+ console.log();
86
+ console.log(bold(" Order effect:"));
87
+ console.log(` baseline first (B>C): avg delta ${formatDelta(baseFirstAvg)}` + dim(` (${baseFirstDeltas.length} batches)`));
88
+ console.log(` current first (C>B): avg delta ${formatDelta(currFirstAvg)}` + dim(` (${currFirstDeltas.length} batches)`));
89
+ const diff = Math.abs(baseFirstAvg - currFirstAvg);
90
+ if (diff > 2) console.log(yellow(` ==> ${diff.toFixed(1)}% order effect detected`));
91
+ else console.log(dim(` order effect: ${diff.toFixed(1)}% (small)`));
92
+ }
93
+ /** Print paired batch deltas and their consistency. */
94
+ function printPairedDeltas(benches, baselines) {
95
+ const deltas = benches.map((b, i) => medianDelta(b, baselines[i]));
96
+ const positive = deltas.filter((d) => d > 0).length;
97
+ const negative = deltas.filter((d) => d < 0).length;
98
+ const avgDelta = average(deltas);
99
+ const med = median(deltas);
100
+ const spread = percentile(deltas, .75) - percentile(deltas, .25);
101
+ console.log();
102
+ console.log(bold(" Paired deltas:"));
103
+ console.log(` mean: ${formatDelta(avgDelta)} median: ${formatDelta(med)} IQR: ${spread.toFixed(1)}%`);
104
+ console.log(` direction: ${positive} slower, ${negative} faster` + dim(` (${deltas.length} batches)`));
105
+ if (positive > 0 && negative > 0) console.log(green(" ==> batches disagree on direction"));
106
+ else console.log(red(" ==> all batches agree on direction (systematic bias?)"));
107
+ }
108
+ /** Show which blocks would be Tukey-trimmed per side. */
109
+ function printTrimmedBlocks(benches, baselines, name) {
110
+ console.log();
111
+ console.log(bold(" Trimmed blocks:"));
112
+ const baseMeans = baselines.map((b) => average(b));
113
+ const benchMeans = benches.map((b) => average(b));
114
+ printSideTrim("baseline", baseMeans);
115
+ printSideTrim(name, benchMeans);
116
+ }
117
+ /** Color a percent delta: red if >1%, green if <-1%. */
118
+ function formatDelta(pct) {
119
+ const str = formatSignedPercent(pct);
120
+ if (pct > 1) return red(str);
121
+ if (pct < -1) return green(str);
122
+ return str;
123
+ }
124
+ /** Percent delta between two medians. */
125
+ function medianDelta(samples, baseSamples) {
126
+ const med = median(samples);
127
+ const baseMed = median(baseSamples);
128
+ return (med - baseMed) / baseMed * 100;
129
+ }
130
+ /** Print trimming info for one side using 3x IQR fences. */
131
+ function printSideTrim(label, means) {
132
+ const [, hi] = tukeyFences(means, blockFenceMultiplier);
133
+ const indices = means.map((v, i) => v > hi ? i : -1).filter((i) => i >= 0);
134
+ if (indices.length === 0) {
135
+ console.log(dim(` ${label}: 0 trimmed`));
136
+ return;
137
+ }
138
+ const vals = indices.map((i) => timeMs(means[i]) ?? "?").join(", ");
139
+ const fence = `hi: ${timeMs(hi)}`;
140
+ console.log(` ${label}: ${yellow(`${indices.length} trimmed`)} (${vals})` + dim(` fence: ${fence}`));
141
+ }
142
+ //#endregion
143
+ export { analyzeArchive };
144
+
145
+ //# sourceMappingURL=AnalyzeArchive-8NCJhmhS.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"AnalyzeArchive-8NCJhmhS.mjs","names":[],"sources":["../src/cli/AnalyzeArchive.ts"],"sourcesContent":["/** Diagnostic analysis of a .benchforge archive's per-batch statistics. */\nimport { readFile } from \"node:fs/promises\";\nimport { resolve } from \"node:path\";\nimport colors from \"../report/Colors.ts\";\nimport { formatSignedPercent, timeMs } from \"../report/Formatters.ts\";\nimport {\n average,\n median,\n percentile,\n splitByOffsets,\n tukeyFences,\n} from \"../stats/StatisticalUtils.ts\";\nimport type { BenchmarkEntry, BenchmarkGroup } from \"../viewer/ReportData.ts\";\n\nconst { bold, dim, red, green, yellow } = colors;\n\nconst blockFenceMultiplier = 3;\n\n/** Read an archive and print per-batch diagnostic analysis.\n * (for benchforge debugging/development purposes, not a general user tool)\n */\nexport async function analyzeArchive(filePath: string): Promise<void> {\n const absPath = resolve(filePath);\n const content = await readFile(absPath, \"utf-8\");\n const { report } = JSON.parse(content);\n if (!report?.groups?.length) {\n console.error(\"No report data found in archive.\");\n return;\n }\n const batchCount = report.metadata?.cliArgs?.batches as number | undefined;\n for (const group of report.groups) {\n analyzeGroup(group, batchCount);\n }\n}\n\n/** Print analysis for all benchmarks in a group. */\nfunction analyzeGroup(group: BenchmarkGroup, batchCount?: number): void {\n console.log(bold(`\\n=== ${group.name} ===\\n`));\n\n const baseline = group.baseline;\n for (const bench of group.benchmarks) {\n analyzeBenchmark(bench, baseline, batchCount);\n }\n}\n\n/** Print per-batch analysis for one benchmark entry. */\nfunction analyzeBenchmark(\n bench: BenchmarkEntry,\n baseline: BenchmarkEntry | undefined,\n batchCount?: number,\n): void {\n const bOffsets =\n bench.batchOffsets ?? inferOffsets(bench.samples, batchCount);\n const baseOffsets =\n baseline?.batchOffsets ?? inferOffsets(baseline?.samples, batchCount);\n if (!bOffsets?.length) {\n console.log(dim(\" No batch data (single batch run)\"));\n return;\n }\n\n const batches = splitByOffsets(bench.samples, bOffsets);\n const baseBatches =\n baseOffsets && baseline\n ? splitByOffsets(baseline.samples, baseOffsets)\n : undefined;\n\n printBatchHeader(bench, baseline, batches.length);\n printBatchTable(batches, baseBatches);\n\n if (baseBatches && baseBatches.length === batches.length) {\n printOrderEffect(batches, baseBatches);\n printPairedDeltas(batches, baseBatches);\n printTrimmedBlocks(batches, baseBatches, bench.name);\n }\n console.log();\n}\n\n/** Infer equal-sized batch offsets when batchOffsets isn't in the archive. */\nfunction inferOffsets(\n samples: number[] | undefined,\n batchCount?: number,\n): number[] | undefined {\n if (!samples?.length || !batchCount || batchCount <= 1) return undefined;\n const size = Math.floor(samples.length / batchCount);\n return Array.from({ length: batchCount }, (_, i) => i * size);\n}\n\n/** Print benchmark name with batch/run summary. */\nfunction printBatchHeader(\n bench: BenchmarkEntry,\n baseline: BenchmarkEntry | undefined,\n nBatches: number,\n): void {\n const baseRuns = baseline?.samples?.length;\n const dur = bench.totalTime\n ? (bench.totalTime / nBatches).toFixed(1) + \"s\"\n : \"?\";\n const runs = baseRuns\n ? `${bench.samples.length}+${baseRuns} runs`\n : `${bench.samples.length} runs`;\n const info = dim(` (${nBatches} batches, ${runs}, ~${dur}/batch)`);\n console.log(bold(` ${bench.name}`) + info);\n}\n\n/** Print per-batch median table for current and baseline. */\nfunction printBatchTable(\n benches: number[][],\n baselines: number[][] | undefined,\n): void {\n const header = baselines\n ? ` ${\"batch\".padEnd(7)} ${\"n\".padStart(4)} ${\"current\".padStart(10)} ${\"baseline\".padStart(10)} ${\"delta\".padStart(8)}`\n : ` ${\"batch\".padEnd(7)} ${\"n\".padStart(4)} ${\"median\".padStart(10)}`;\n console.log(dim(header));\n\n for (let i = 0; i < benches.length; i++) {\n const n = String(benches[i].length).padStart(4);\n const med = (timeMs(median(benches[i])) ?? \"\").padStart(10);\n const idx = String(i).padEnd(7);\n if (!baselines?.[i]) {\n console.log(` ${idx} ${n} ${med}`);\n continue;\n }\n const baseMed = (timeMs(median(baselines[i])) ?? \"\").padStart(10);\n const delta = formatDelta(medianDelta(benches[i], baselines[i])).padStart(\n 8,\n );\n const order = i % 2 === 0 ? dim(\" B>C\") : dim(\" C>B\");\n console.log(` ${idx} ${n} ${med} ${baseMed} ${delta}${order}`);\n }\n}\n\n/** Analyze order effect: does running second make a difference? */\nfunction printOrderEffect(benches: number[][], baselines: number[][]): void {\n // Even batches: baseline runs first (B>C), odd: current runs first (C>B)\n const deltas = benches.map((b, i) => medianDelta(b, baselines[i]));\n const baseFirstDeltas = deltas.filter((_, i) => i % 2 === 0);\n const currFirstDeltas = deltas.filter((_, i) => i % 2 === 1);\n const baseFirstAvg = baseFirstDeltas.length ? average(baseFirstDeltas) : 0;\n const currFirstAvg = currFirstDeltas.length ? average(currFirstDeltas) : 0;\n\n console.log();\n console.log(bold(\" Order effect:\"));\n console.log(\n ` baseline first (B>C): avg delta ${formatDelta(baseFirstAvg)}` +\n dim(` (${baseFirstDeltas.length} batches)`),\n );\n console.log(\n ` current first (C>B): avg delta ${formatDelta(currFirstAvg)}` +\n dim(` (${currFirstDeltas.length} batches)`),\n );\n\n const diff = Math.abs(baseFirstAvg - currFirstAvg);\n if (diff > 2) {\n console.log(yellow(` ==> ${diff.toFixed(1)}% order effect detected`));\n } else {\n console.log(dim(` order effect: ${diff.toFixed(1)}% (small)`));\n }\n}\n\n/** Print paired batch deltas and their consistency. */\nfunction printPairedDeltas(benches: number[][], baselines: number[][]): void {\n const deltas = benches.map((b, i) => medianDelta(b, baselines[i]));\n\n const positive = deltas.filter(d => d > 0).length;\n const negative = deltas.filter(d => d < 0).length;\n const avgDelta = average(deltas);\n const med = median(deltas);\n const spread = percentile(deltas, 0.75) - percentile(deltas, 0.25);\n\n console.log();\n console.log(bold(\" Paired deltas:\"));\n console.log(\n ` mean: ${formatDelta(avgDelta)} median: ${formatDelta(med)} IQR: ${spread.toFixed(1)}%`,\n );\n console.log(\n ` direction: ${positive} slower, ${negative} faster` +\n dim(` (${deltas.length} batches)`),\n );\n\n if (positive > 0 && negative > 0) {\n console.log(green(\" ==> batches disagree on direction\"));\n } else {\n console.log(\n red(\" ==> all batches agree on direction (systematic bias?)\"),\n );\n }\n}\n\n/** Show which blocks would be Tukey-trimmed per side. */\nfunction printTrimmedBlocks(\n benches: number[][],\n baselines: number[][],\n name: string,\n): void {\n console.log();\n console.log(bold(\" Trimmed blocks:\"));\n const baseMeans = baselines.map(b => average(b));\n const benchMeans = benches.map(b => average(b));\n printSideTrim(\"baseline\", baseMeans);\n printSideTrim(name, benchMeans);\n}\n\n/** Color a percent delta: red if >1%, green if <-1%. */\nfunction formatDelta(pct: number): string {\n const str = formatSignedPercent(pct);\n if (pct > 1) return red(str);\n if (pct < -1) return green(str);\n return str;\n}\n\n/** Percent delta between two medians. */\nfunction medianDelta(samples: number[], baseSamples: number[]): number {\n const med = median(samples);\n const baseMed = median(baseSamples);\n return ((med - baseMed) / baseMed) * 100;\n}\n\n/** Print trimming info for one side using 3x IQR fences. */\nfunction printSideTrim(label: string, means: number[]): void {\n const [, hi] = tukeyFences(means, blockFenceMultiplier);\n const indices = means.map((v, i) => (v > hi ? i : -1)).filter(i => i >= 0);\n if (indices.length === 0) {\n console.log(dim(` ${label}: 0 trimmed`));\n return;\n }\n const vals = indices.map(i => timeMs(means[i]) ?? \"?\").join(\", \");\n const fence = `hi: ${timeMs(hi)}`;\n console.log(\n ` ${label}: ${yellow(`${indices.length} trimmed`)} (${vals})` +\n dim(` fence: ${fence}`),\n );\n}\n"],"mappings":";;;;;;AAcA,MAAM,EAAE,MAAM,KAAK,KAAK,OAAO,WAAW;AAE1C,MAAM,uBAAuB;;;;AAK7B,eAAsB,eAAe,UAAiC;CAEpE,MAAM,UAAU,MAAM,SADN,QAAQ,SAAS,EACO,QAAQ;CAChD,MAAM,EAAE,WAAW,KAAK,MAAM,QAAQ;AACtC,KAAI,CAAC,QAAQ,QAAQ,QAAQ;AAC3B,UAAQ,MAAM,mCAAmC;AACjD;;CAEF,MAAM,aAAa,OAAO,UAAU,SAAS;AAC7C,MAAK,MAAM,SAAS,OAAO,OACzB,cAAa,OAAO,WAAW;;;AAKnC,SAAS,aAAa,OAAuB,YAA2B;AACtE,SAAQ,IAAI,KAAK,SAAS,MAAM,KAAK,QAAQ,CAAC;CAE9C,MAAM,WAAW,MAAM;AACvB,MAAK,MAAM,SAAS,MAAM,WACxB,kBAAiB,OAAO,UAAU,WAAW;;;AAKjD,SAAS,iBACP,OACA,UACA,YACM;CACN,MAAM,WACJ,MAAM,gBAAgB,aAAa,MAAM,SAAS,WAAW;CAC/D,MAAM,cACJ,UAAU,gBAAgB,aAAa,UAAU,SAAS,WAAW;AACvE,KAAI,CAAC,UAAU,QAAQ;AACrB,UAAQ,IAAI,IAAI,qCAAqC,CAAC;AACtD;;CAGF,MAAM,UAAU,eAAe,MAAM,SAAS,SAAS;CACvD,MAAM,cACJ,eAAe,WACX,eAAe,SAAS,SAAS,YAAY,GAC7C,KAAA;AAEN,kBAAiB,OAAO,UAAU,QAAQ,OAAO;AACjD,iBAAgB,SAAS,YAAY;AAErC,KAAI,eAAe,YAAY,WAAW,QAAQ,QAAQ;AACxD,mBAAiB,SAAS,YAAY;AACtC,oBAAkB,SAAS,YAAY;AACvC,qBAAmB,SAAS,aAAa,MAAM,KAAK;;AAEtD,SAAQ,KAAK;;;AAIf,SAAS,aACP,SACA,YACsB;AACtB,KAAI,CAAC,SAAS,UAAU,CAAC,cAAc,cAAc,EAAG,QAAO,KAAA;CAC/D,MAAM,OAAO,KAAK,MAAM,QAAQ,SAAS,WAAW;AACpD,QAAO,MAAM,KAAK,EAAE,QAAQ,YAAY,GAAG,GAAG,MAAM,IAAI,KAAK;;;AAI/D,SAAS,iBACP,OACA,UACA,UACM;CACN,MAAM,WAAW,UAAU,SAAS;CACpC,MAAM,MAAM,MAAM,aACb,MAAM,YAAY,UAAU,QAAQ,EAAE,GAAG,MAC1C;CAIJ,MAAM,OAAO,IAAI,KAAK,SAAS,YAHlB,WACT,GAAG,MAAM,QAAQ,OAAO,GAAG,SAAS,SACpC,GAAG,MAAM,QAAQ,OAAO,OACoB,KAAK,IAAI,SAAS;AAClE,SAAQ,IAAI,KAAK,KAAK,MAAM,OAAO,GAAG,KAAK;;;AAI7C,SAAS,gBACP,SACA,WACM;CACN,MAAM,SAAS,YACX,KAAK,QAAQ,OAAO,EAAE,CAAC,GAAG,IAAI,SAAS,EAAE,CAAC,IAAI,UAAU,SAAS,GAAG,CAAC,IAAI,WAAW,SAAS,GAAG,CAAC,IAAI,QAAQ,SAAS,EAAE,KACxH,KAAK,QAAQ,OAAO,EAAE,CAAC,GAAG,IAAI,SAAS,EAAE,CAAC,IAAI,SAAS,SAAS,GAAG;AACvE,SAAQ,IAAI,IAAI,OAAO,CAAC;AAExB,MAAK,IAAI,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;EACvC,MAAM,IAAI,OAAO,QAAQ,GAAG,OAAO,CAAC,SAAS,EAAE;EAC/C,MAAM,OAAO,OAAO,OAAO,QAAQ,GAAG,CAAC,IAAI,IAAI,SAAS,GAAG;EAC3D,MAAM,MAAM,OAAO,EAAE,CAAC,OAAO,EAAE;AAC/B,MAAI,CAAC,YAAY,IAAI;AACnB,WAAQ,IAAI,KAAK,IAAI,GAAG,EAAE,IAAI,MAAM;AACpC;;EAEF,MAAM,WAAW,OAAO,OAAO,UAAU,GAAG,CAAC,IAAI,IAAI,SAAS,GAAG;EACjE,MAAM,QAAQ,YAAY,YAAY,QAAQ,IAAI,UAAU,GAAG,CAAC,CAAC,SAC/D,EACD;EACD,MAAM,QAAQ,IAAI,MAAM,IAAI,IAAI,OAAO,GAAG,IAAI,OAAO;AACrD,UAAQ,IAAI,KAAK,IAAI,GAAG,EAAE,IAAI,IAAI,IAAI,QAAQ,IAAI,QAAQ,QAAQ;;;;AAKtE,SAAS,iBAAiB,SAAqB,WAA6B;CAE1E,MAAM,SAAS,QAAQ,KAAK,GAAG,MAAM,YAAY,GAAG,UAAU,GAAG,CAAC;CAClE,MAAM,kBAAkB,OAAO,QAAQ,GAAG,MAAM,IAAI,MAAM,EAAE;CAC5D,MAAM,kBAAkB,OAAO,QAAQ,GAAG,MAAM,IAAI,MAAM,EAAE;CAC5D,MAAM,eAAe,gBAAgB,SAAS,QAAQ,gBAAgB,GAAG;CACzE,MAAM,eAAe,gBAAgB,SAAS,QAAQ,gBAAgB,GAAG;AAEzE,SAAQ,KAAK;AACb,SAAQ,IAAI,KAAK,kBAAkB,CAAC;AACpC,SAAQ,IACN,uCAAuC,YAAY,aAAa,KAC9D,IAAI,KAAK,gBAAgB,OAAO,WAAW,CAC9C;AACD,SAAQ,IACN,uCAAuC,YAAY,aAAa,KAC9D,IAAI,KAAK,gBAAgB,OAAO,WAAW,CAC9C;CAED,MAAM,OAAO,KAAK,IAAI,eAAe,aAAa;AAClD,KAAI,OAAO,EACT,SAAQ,IAAI,OAAO,WAAW,KAAK,QAAQ,EAAE,CAAC,yBAAyB,CAAC;KAExE,SAAQ,IAAI,IAAI,qBAAqB,KAAK,QAAQ,EAAE,CAAC,WAAW,CAAC;;;AAKrE,SAAS,kBAAkB,SAAqB,WAA6B;CAC3E,MAAM,SAAS,QAAQ,KAAK,GAAG,MAAM,YAAY,GAAG,UAAU,GAAG,CAAC;CAElE,MAAM,WAAW,OAAO,QAAO,MAAK,IAAI,EAAE,CAAC;CAC3C,MAAM,WAAW,OAAO,QAAO,MAAK,IAAI,EAAE,CAAC;CAC3C,MAAM,WAAW,QAAQ,OAAO;CAChC,MAAM,MAAM,OAAO,OAAO;CAC1B,MAAM,SAAS,WAAW,QAAQ,IAAK,GAAG,WAAW,QAAQ,IAAK;AAElE,SAAQ,KAAK;AACb,SAAQ,IAAI,KAAK,mBAAmB,CAAC;AACrC,SAAQ,IACN,aAAa,YAAY,SAAS,CAAC,YAAY,YAAY,IAAI,CAAC,SAAS,OAAO,QAAQ,EAAE,CAAC,GAC5F;AACD,SAAQ,IACN,kBAAkB,SAAS,WAAW,SAAS,WAC7C,IAAI,KAAK,OAAO,OAAO,WAAW,CACrC;AAED,KAAI,WAAW,KAAK,WAAW,EAC7B,SAAQ,IAAI,MAAM,wCAAwC,CAAC;KAE3D,SAAQ,IACN,IAAI,4DAA4D,CACjE;;;AAKL,SAAS,mBACP,SACA,WACA,MACM;AACN,SAAQ,KAAK;AACb,SAAQ,IAAI,KAAK,oBAAoB,CAAC;CACtC,MAAM,YAAY,UAAU,KAAI,MAAK,QAAQ,EAAE,CAAC;CAChD,MAAM,aAAa,QAAQ,KAAI,MAAK,QAAQ,EAAE,CAAC;AAC/C,eAAc,YAAY,UAAU;AACpC,eAAc,MAAM,WAAW;;;AAIjC,SAAS,YAAY,KAAqB;CACxC,MAAM,MAAM,oBAAoB,IAAI;AACpC,KAAI,MAAM,EAAG,QAAO,IAAI,IAAI;AAC5B,KAAI,MAAM,GAAI,QAAO,MAAM,IAAI;AAC/B,QAAO;;;AAIT,SAAS,YAAY,SAAmB,aAA+B;CACrE,MAAM,MAAM,OAAO,QAAQ;CAC3B,MAAM,UAAU,OAAO,YAAY;AACnC,SAAS,MAAM,WAAW,UAAW;;;AAIvC,SAAS,cAAc,OAAe,OAAuB;CAC3D,MAAM,GAAG,MAAM,YAAY,OAAO,qBAAqB;CACvD,MAAM,UAAU,MAAM,KAAK,GAAG,MAAO,IAAI,KAAK,IAAI,GAAI,CAAC,QAAO,MAAK,KAAK,EAAE;AAC1E,KAAI,QAAQ,WAAW,GAAG;AACxB,UAAQ,IAAI,IAAI,OAAO,MAAM,aAAa,CAAC;AAC3C;;CAEF,MAAM,OAAO,QAAQ,KAAI,MAAK,OAAO,MAAM,GAAG,IAAI,IAAI,CAAC,KAAK,KAAK;CACjE,MAAM,QAAQ,OAAO,OAAO,GAAG;AAC/B,SAAQ,IACN,OAAO,MAAM,IAAI,OAAO,GAAG,QAAQ,OAAO,UAAU,CAAC,IAAI,KAAK,KAC5D,IAAI,YAAY,QAAQ,CAC3B"}