benchforge 0.1.9 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +20 -0
- package/README.md +99 -260
- package/bin/benchforge +1 -2
- package/dist/AnalyzeArchive-8NCJhmhS.mjs +145 -0
- package/dist/AnalyzeArchive-8NCJhmhS.mjs.map +1 -0
- package/dist/BenchMatrix-BZVrBB_h.mjs +1050 -0
- package/dist/BenchMatrix-BZVrBB_h.mjs.map +1 -0
- package/dist/BenchRunner-DglX1NOn.d.mts +302 -0
- package/dist/CoverageSampler-D5T9DRqe.mjs +27 -0
- package/dist/CoverageSampler-D5T9DRqe.mjs.map +1 -0
- package/dist/Formatters-BWj3d4sv.mjs +95 -0
- package/dist/Formatters-BWj3d4sv.mjs.map +1 -0
- package/dist/{HeapSampler-B8dtKHn1.mjs → HeapSampler-Dq-hpXem.mjs} +4 -4
- package/dist/HeapSampler-Dq-hpXem.mjs.map +1 -0
- package/dist/RunBenchCLI-C17DrJz8.mjs +3075 -0
- package/dist/RunBenchCLI-C17DrJz8.mjs.map +1 -0
- package/dist/StatisticalUtils-BD92crgM.mjs +255 -0
- package/dist/StatisticalUtils-BD92crgM.mjs.map +1 -0
- package/dist/TimeSampler-Ds8n7l2B.mjs +29 -0
- package/dist/TimeSampler-Ds8n7l2B.mjs.map +1 -0
- package/dist/ViewerServer-BJhdnxlN.mjs +639 -0
- package/dist/ViewerServer-BJhdnxlN.mjs.map +1 -0
- package/dist/ViewerServer-CuMNdNBz.mjs +2 -0
- package/dist/bin/benchforge.mjs +4 -5
- package/dist/bin/benchforge.mjs.map +1 -1
- package/dist/index.d.mts +731 -522
- package/dist/index.mjs +98 -3
- package/dist/index.mjs.map +1 -0
- package/dist/runners/WorkerScript.d.mts +12 -4
- package/dist/runners/WorkerScript.mjs +92 -120
- package/dist/runners/WorkerScript.mjs.map +1 -1
- package/dist/viewer/assets/CIPlot-BkOvMoMa.js +1 -0
- package/dist/viewer/assets/HistogramKde-CmSyUFY0.js +1 -0
- package/dist/viewer/assets/LegendUtils-BJpbn_jr.js +55 -0
- package/dist/viewer/assets/SampleTimeSeries-C4VBhXr3.js +1 -0
- package/dist/viewer/assets/index-Br9bp_cX.js +153 -0
- package/dist/viewer/assets/index-NzXXe_CC.css +1 -0
- package/dist/viewer/index.html +19 -0
- package/dist/viewer/speedscope/LICENSE +21 -0
- package/dist/viewer/speedscope/SourceCodePro-Regular.ttf-ILST5JV6.woff2 +0 -0
- package/dist/viewer/speedscope/favicon-16x16-V2DMIAZS.js +2 -0
- package/dist/viewer/speedscope/favicon-16x16-V2DMIAZS.js.map +7 -0
- package/dist/viewer/speedscope/favicon-16x16-VSI62OPJ.png +0 -0
- package/dist/viewer/speedscope/favicon-32x32-3EB2YCUY.png +0 -0
- package/dist/viewer/speedscope/favicon-32x32-THY3JDJL.js +2 -0
- package/dist/viewer/speedscope/favicon-32x32-THY3JDJL.js.map +7 -0
- package/dist/viewer/speedscope/favicon-FOKUP5Y5.ico +0 -0
- package/dist/viewer/speedscope/favicon-M34RF7BI.js +2 -0
- package/dist/viewer/speedscope/favicon-M34RF7BI.js.map +7 -0
- package/dist/viewer/speedscope/file-format-schema.json +274 -0
- package/dist/viewer/speedscope/index.html +19 -0
- package/dist/viewer/speedscope/jfrview_bg-BLJXNNQB.wasm +0 -0
- package/dist/viewer/speedscope/perf-vertx-stacks-01-collapsed-all-ZNUIGAJL.txt +199 -0
- package/dist/viewer/speedscope/release.txt +3 -0
- package/dist/viewer/speedscope/source-code-pro.LICENSE.md +93 -0
- package/dist/viewer/speedscope/speedscope-GHPHNKXC.css +2 -0
- package/dist/viewer/speedscope/speedscope-GHPHNKXC.css.map +7 -0
- package/dist/viewer/speedscope/speedscope-QZFMJ7VP.js +212 -0
- package/dist/viewer/speedscope/speedscope-QZFMJ7VP.js.map +7 -0
- package/package.json +52 -26
- package/src/bin/benchforge.ts +2 -2
- package/src/cli/AnalyzeArchive.ts +232 -0
- package/src/cli/BrowserBench.ts +322 -0
- package/src/cli/CliArgs.ts +164 -48
- package/src/cli/CliExport.ts +179 -0
- package/src/cli/CliOptions.ts +147 -0
- package/src/cli/CliReport.ts +197 -0
- package/src/cli/FilterBenchmarks.ts +18 -30
- package/src/cli/RunBenchCLI.ts +138 -844
- package/src/cli/SuiteRunner.ts +160 -0
- package/src/cli/ViewerServer.ts +282 -0
- package/src/export/AllocExport.ts +121 -0
- package/src/export/ArchiveExport.ts +146 -0
- package/src/export/ArchiveFormat.ts +50 -0
- package/src/export/CoverageExport.ts +148 -0
- package/src/export/EditorUri.ts +10 -0
- package/src/export/PerfettoExport.ts +91 -126
- package/src/export/SpeedscopeTypes.ts +98 -0
- package/src/export/TimeExport.ts +115 -0
- package/src/index.ts +87 -62
- package/src/matrix/BenchMatrix.ts +230 -0
- package/src/matrix/CaseLoader.ts +8 -6
- package/src/matrix/MatrixDirRunner.ts +153 -0
- package/src/matrix/MatrixFilter.ts +55 -53
- package/src/matrix/MatrixInlineRunner.ts +50 -0
- package/src/matrix/MatrixReport.ts +94 -254
- package/src/matrix/VariantLoader.ts +9 -9
- package/src/profiling/browser/BenchLoop.ts +51 -0
- package/src/profiling/browser/BrowserCDP.ts +133 -0
- package/src/profiling/browser/BrowserGcStats.ts +33 -0
- package/src/profiling/browser/BrowserProfiler.ts +160 -0
- package/src/profiling/browser/CdpClient.ts +82 -0
- package/src/profiling/browser/CdpPage.ts +138 -0
- package/src/profiling/browser/ChromeLauncher.ts +158 -0
- package/src/profiling/browser/ChromeTraceEvent.ts +28 -0
- package/src/profiling/browser/PageLoadMode.ts +61 -0
- package/src/profiling/node/CoverageSampler.ts +27 -0
- package/src/profiling/node/CoverageTypes.ts +23 -0
- package/src/profiling/node/HeapSampleReport.ts +261 -0
- package/src/{heap-sample → profiling/node}/HeapSampler.ts +55 -13
- package/src/profiling/node/ResolvedProfile.ts +98 -0
- package/src/profiling/node/TimeSampler.ts +57 -0
- package/src/report/BenchmarkReport.ts +146 -0
- package/src/report/Colors.ts +9 -0
- package/src/report/Formatters.ts +110 -0
- package/src/report/GcSections.ts +151 -0
- package/src/{GitUtils.ts → report/GitUtils.ts} +18 -19
- package/src/report/HtmlReport.ts +223 -0
- package/src/report/ParseStats.ts +73 -0
- package/src/report/StandardSections.ts +147 -0
- package/src/report/ViewerSections.ts +286 -0
- package/src/report/text/TableReport.ts +253 -0
- package/src/report/text/TextReport.ts +123 -0
- package/src/runners/AdaptiveWrapper.ts +167 -287
- package/src/runners/BenchRunner.ts +27 -22
- package/src/{Benchmark.ts → runners/BenchmarkSpec.ts} +5 -6
- package/src/runners/CreateRunner.ts +5 -7
- package/src/runners/GcStats.ts +58 -61
- package/src/{MeasuredResults.ts → runners/MeasuredResults.ts} +43 -37
- package/src/runners/MergeBatches.ts +123 -0
- package/src/{NodeGC.ts → runners/NodeGC.ts} +2 -3
- package/src/runners/RunnerOrchestrator.ts +180 -296
- package/src/runners/RunnerUtils.ts +75 -1
- package/src/runners/SampleStats.ts +100 -0
- package/src/runners/TimingRunner.ts +244 -0
- package/src/runners/TimingUtils.ts +3 -2
- package/src/runners/WorkerScript.ts +162 -178
- package/src/stats/BootstrapDifference.ts +282 -0
- package/src/{PermutationTest.ts → stats/PermutationTest.ts} +31 -40
- package/src/stats/StatisticalUtils.ts +445 -0
- package/src/{tests → test}/AdaptiveConvergence.test.ts +10 -10
- package/src/test/AdaptiveRunner.test.ts +39 -41
- package/src/{tests → test}/AdaptiveSampling.test.ts +9 -9
- package/src/test/AdaptiveStatistics.integration.ts +9 -41
- package/src/{tests → test}/BenchMatrix.test.ts +31 -28
- package/src/test/BenchmarkReport.test.ts +63 -13
- package/src/test/BrowserBench.e2e.test.ts +186 -17
- package/src/test/BrowserBench.test.ts +10 -5
- package/src/test/BuildTimeSection.test.ts +130 -0
- package/src/test/CapSamples.test.ts +82 -0
- package/src/test/CoverageExport.test.ts +115 -0
- package/src/test/CoverageSampler.test.ts +33 -0
- package/src/test/HeapAttribution.test.ts +51 -0
- package/src/{tests → test}/MatrixFilter.test.ts +16 -16
- package/src/{tests → test}/MatrixReport.test.ts +1 -1
- package/src/test/PermutationTest.test.ts +1 -1
- package/src/{tests → test}/RealDataValidation.test.ts +6 -6
- package/src/test/RunBenchCLI.test.ts +57 -56
- package/src/test/RunnerOrchestrator.test.ts +12 -12
- package/src/test/StatisticalUtils.test.ts +48 -12
- package/src/{table-util/test → test}/TableReport.test.ts +2 -2
- package/src/test/TestUtils.ts +35 -30
- package/src/test/TimeExport.test.ts +139 -0
- package/src/test/TimeSampler.test.ts +37 -0
- package/src/test/ViewerLive.e2e.test.ts +159 -0
- package/src/test/ViewerStatic.static.e2e.test.ts +137 -0
- package/src/{tests → test}/fixtures/baseline/impl.ts +1 -1
- package/src/{tests → test}/fixtures/bevy30-samples.ts +3 -1
- package/src/test/fixtures/cases/asyncCases.ts +9 -0
- package/src/{tests → test}/fixtures/cases/cases.ts +5 -2
- package/src/test/fixtures/cases/variants/product.ts +2 -0
- package/src/test/fixtures/cases/variants/sum.ts +2 -0
- package/src/test/fixtures/discover/fast.ts +1 -0
- package/src/{tests → test}/fixtures/discover/slow.ts +1 -1
- package/src/test/fixtures/invalid/bad.ts +1 -0
- package/src/test/fixtures/loader/fast.ts +1 -0
- package/src/{tests → test}/fixtures/loader/slow.ts +1 -1
- package/src/test/fixtures/loader/stateful.ts +2 -0
- package/src/test/fixtures/stateful/stateful.ts +2 -0
- package/src/test/fixtures/variants/extra.ts +1 -0
- package/src/test/fixtures/variants/impl.ts +1 -0
- package/src/test/fixtures/worker/fast.ts +1 -0
- package/src/{tests → test}/fixtures/worker/slow.ts +1 -1
- package/src/viewer/DateFormat.ts +30 -0
- package/src/viewer/Helpers.ts +23 -0
- package/src/viewer/LineData.ts +120 -0
- package/src/viewer/Providers.ts +191 -0
- package/src/viewer/ReportData.ts +123 -0
- package/src/viewer/State.ts +49 -0
- package/src/viewer/Theme.ts +15 -0
- package/src/viewer/components/App.tsx +73 -0
- package/src/viewer/components/DropZone.tsx +71 -0
- package/src/viewer/components/LazyPlot.ts +33 -0
- package/src/viewer/components/SamplesPanel.tsx +214 -0
- package/src/viewer/components/Shell.tsx +26 -0
- package/src/viewer/components/SourcePanel.tsx +216 -0
- package/src/viewer/components/SummaryPanel.tsx +332 -0
- package/src/viewer/components/TabBar.tsx +131 -0
- package/src/viewer/components/TabContent.tsx +46 -0
- package/src/viewer/components/ThemeToggle.tsx +50 -0
- package/src/viewer/index.html +20 -0
- package/src/viewer/main.tsx +4 -0
- package/src/viewer/plots/CIPlot.ts +313 -0
- package/src/{html/browser → viewer/plots}/HistogramKde.ts +42 -47
- package/src/viewer/plots/LegendUtils.ts +134 -0
- package/src/viewer/plots/PlotTypes.ts +85 -0
- package/src/viewer/plots/RenderPlots.ts +230 -0
- package/src/viewer/plots/SampleTimeSeries.ts +306 -0
- package/src/viewer/plots/SvgHelpers.ts +136 -0
- package/src/viewer/plots/TimeSeriesMarks.ts +319 -0
- package/src/viewer/report.css +427 -0
- package/src/viewer/shell.css +357 -0
- package/src/viewer/tsconfig.json +11 -0
- package/dist/BenchRunner-CSKN9zPy.d.mts +0 -225
- package/dist/BrowserHeapSampler-DCeL42RE.mjs +0 -202
- package/dist/BrowserHeapSampler-DCeL42RE.mjs.map +0 -1
- package/dist/GcStats-ByEovUi1.mjs +0 -77
- package/dist/GcStats-ByEovUi1.mjs.map +0 -1
- package/dist/HeapSampler-B8dtKHn1.mjs.map +0 -1
- package/dist/TimingUtils-ClclVQ7E.mjs +0 -597
- package/dist/TimingUtils-ClclVQ7E.mjs.map +0 -1
- package/dist/browser/index.js +0 -914
- package/dist/src-Cf_LXwlp.mjs +0 -2873
- package/dist/src-Cf_LXwlp.mjs.map +0 -1
- package/src/BenchMatrix.ts +0 -380
- package/src/BenchmarkReport.ts +0 -156
- package/src/HtmlDataPrep.ts +0 -148
- package/src/StandardSections.ts +0 -261
- package/src/StatisticalUtils.ts +0 -176
- package/src/TypeUtil.ts +0 -8
- package/src/browser/BrowserGcStats.ts +0 -44
- package/src/browser/BrowserHeapSampler.ts +0 -271
- package/src/export/JsonExport.ts +0 -103
- package/src/export/JsonFormat.ts +0 -91
- package/src/heap-sample/HeapSampleReport.ts +0 -196
- package/src/html/HtmlReport.ts +0 -131
- package/src/html/HtmlTemplate.ts +0 -284
- package/src/html/Types.ts +0 -88
- package/src/html/browser/CIPlot.ts +0 -287
- package/src/html/browser/LegendUtils.ts +0 -163
- package/src/html/browser/RenderPlots.ts +0 -263
- package/src/html/browser/SampleTimeSeries.ts +0 -389
- package/src/html/browser/Types.ts +0 -96
- package/src/html/browser/index.ts +0 -1
- package/src/html/index.ts +0 -17
- package/src/runners/BasicRunner.ts +0 -364
- package/src/table-util/ConvergenceFormatters.ts +0 -19
- package/src/table-util/Formatters.ts +0 -152
- package/src/table-util/README.md +0 -70
- package/src/table-util/TableReport.ts +0 -293
- package/src/tests/fixtures/cases/asyncCases.ts +0 -7
- package/src/tests/fixtures/cases/variants/product.ts +0 -2
- package/src/tests/fixtures/cases/variants/sum.ts +0 -2
- package/src/tests/fixtures/discover/fast.ts +0 -1
- package/src/tests/fixtures/invalid/bad.ts +0 -1
- package/src/tests/fixtures/loader/fast.ts +0 -1
- package/src/tests/fixtures/loader/stateful.ts +0 -2
- package/src/tests/fixtures/stateful/stateful.ts +0 -2
- package/src/tests/fixtures/variants/extra.ts +0 -1
- package/src/tests/fixtures/variants/impl.ts +0 -1
- package/src/tests/fixtures/worker/fast.ts +0 -1
- package/src/{table-util/test → test}/TableValueExtractor.test.ts +0 -0
- package/src/{table-util/test → test}/TableValueExtractor.ts +9 -9
package/LICENSE
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Benchforge Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
6
|
+
this software and associated documentation files (the "Software"), to deal in
|
|
7
|
+
the Software without restriction, including without limitation the rights to
|
|
8
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
|
9
|
+
the Software, and to permit persons to whom the Software is furnished to do so,
|
|
10
|
+
subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
|
17
|
+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
|
18
|
+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
|
19
|
+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
20
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
package/README.md
CHANGED
|
@@ -1,10 +1,39 @@
|
|
|
1
1
|
# Benchforge
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
3
|
+
Benchforge helps you make faster JavaScript programs with integrated tools for
|
|
4
|
+
benchmarking and performance analysis in Node.js and Chrome, including features
|
|
5
|
+
designed specifically for analyzing garbage-collected programs.
|
|
6
|
+
|
|
7
|
+
Garbage collection is intermittent and infrequent, which makes it harder to
|
|
8
|
+
identify true performance issues. Typical perf tools isolate microbenchmarks
|
|
9
|
+
from GC, but that hides a key part of real-world performance. Intermittent
|
|
10
|
+
events also lead to statistically skewed measurement distributions. Perf tools
|
|
11
|
+
that assume normal distributions and noise-free test runs can easily create
|
|
12
|
+
misleading false-positive performance reports. Benchforge captures a truer
|
|
13
|
+
picture of garbage-collected programs:
|
|
14
|
+
|
|
15
|
+
- **GC-aware statistics** -- bootstrap confidence intervals account for GC
|
|
16
|
+
variance instead of hiding it.
|
|
17
|
+
- **Heap allocation profiling** -- see which functions allocate the most,
|
|
18
|
+
including short-lived objects already collected.
|
|
19
|
+
- **GC collection reports** -- allocation rates, scavenge/full GC counts,
|
|
20
|
+
promotion %, and pause times per iteration.
|
|
21
|
+
- **Visualization** -- distribution plots, icicle charts for allocators, source
|
|
22
|
+
annotations with allocation and call count metrics.
|
|
23
|
+
- **Archive** -- save traces and source code together to share with your team.
|
|
24
|
+
|
|
25
|
+
## Timing Distributions
|
|
26
|
+
<img width="326" height="363" alt="stats with distribution curves" src="https://github.com/user-attachments/assets/532702bd-faa1-4cb3-8b33-ad5409631427" />
|
|
27
|
+
|
|
28
|
+
## Heap Allocation
|
|
29
|
+
Explore memory _allocation_ per function:
|
|
30
|
+
<img width="4444" height="2706" alt="allocation view" src="https://github.com/user-attachments/assets/6d4e2dee-bb72-41ce-a71d-d036bebedb3d" />
|
|
31
|
+
|
|
32
|
+
## Benchmark Iteration Time Series
|
|
33
|
+
<img width="387" height="306" alt="time series" src="https://github.com/user-attachments/assets/f5676b64-7906-422b-aef3-4eedc325c422" />
|
|
34
|
+
|
|
35
|
+
## Source Code Annotated with Performance Info
|
|
36
|
+
<img width="1946" height="460" alt="src annotations" src="https://github.com/user-attachments/assets/102cc574-ecf3-4f5f-8143-d20ee7008a72" />
|
|
8
37
|
|
|
9
38
|
## Installation
|
|
10
39
|
|
|
@@ -14,9 +43,10 @@ npm install benchforge
|
|
|
14
43
|
pnpm add benchforge
|
|
15
44
|
```
|
|
16
45
|
|
|
17
|
-
## Quick Start
|
|
46
|
+
## Quick Start: Node
|
|
18
47
|
|
|
19
|
-
The simplest
|
|
48
|
+
The simplest benchmark: export a default function and pass the file to
|
|
49
|
+
`benchforge`.
|
|
20
50
|
|
|
21
51
|
```typescript
|
|
22
52
|
// my-bench.ts
|
|
@@ -29,283 +59,92 @@ export default function (): string {
|
|
|
29
59
|
benchforge my-bench.ts --gc-stats
|
|
30
60
|
```
|
|
31
61
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
For multiple benchmarks with groups, setup data, and baseline comparison, export a `BenchSuite`:
|
|
62
|
+
For suites with multiple benchmarks, groups, and baseline comparison, see
|
|
63
|
+
[Node.md](Node.md).
|
|
35
64
|
|
|
36
|
-
|
|
37
|
-
// sorting.ts
|
|
38
|
-
import type { BenchGroup, BenchSuite } from 'benchforge';
|
|
39
|
-
|
|
40
|
-
const sortingGroup: BenchGroup<number[]> = {
|
|
41
|
-
name: "Array Sorting (1000 numbers)",
|
|
42
|
-
setup: () => Array.from({ length: 1000 }, () => Math.random()),
|
|
43
|
-
baseline: { name: "native sort", fn: (arr) => [...arr].sort((a, b) => a - b) },
|
|
44
|
-
benchmarks: [
|
|
45
|
-
{ name: "quicksort", fn: quickSort },
|
|
46
|
-
{ name: "insertion sort", fn: insertionSort },
|
|
47
|
-
],
|
|
48
|
-
};
|
|
65
|
+
## Quick Start: Browser
|
|
49
66
|
|
|
50
|
-
|
|
51
|
-
name: "Performance Tests",
|
|
52
|
-
groups: [sortingGroup],
|
|
53
|
-
};
|
|
67
|
+
`benchforge --url <page>` opens Chromium and runs your program.
|
|
54
68
|
|
|
55
|
-
|
|
56
|
-
```
|
|
69
|
+
You can time any page without modification, and compare against a baseline.
|
|
57
70
|
|
|
58
71
|
```bash
|
|
59
|
-
benchforge
|
|
72
|
+
benchforge --url http://localhost:5173 --baseline-url http://localhost:5174 \
|
|
73
|
+
--gc-stats --batches 20 --iterations 10 --headless
|
|
60
74
|
```
|
|
61
75
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
76
|
+
If you export your test function as `window.__bench`, benchforge can run
|
|
77
|
+
multiple iterations in the same tab, which helps reveal the accumulated effect
|
|
78
|
+
of heap allocation over time. Tests also run faster.
|
|
65
79
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
benchmarks: [{
|
|
73
|
-
name: "parse",
|
|
74
|
-
fn: () => {}, // placeholder - not used in worker mode
|
|
75
|
-
modulePath: new URL("./benchmarks.ts", import.meta.url).href,
|
|
76
|
-
exportName: "parse",
|
|
77
|
-
setupExportName: "setup", // optional: called once, result passed to exportName fn
|
|
78
|
-
}],
|
|
80
|
+
```html
|
|
81
|
+
<!-- bench function mode -->
|
|
82
|
+
<script>
|
|
83
|
+
window.__bench = () => {
|
|
84
|
+
const arr = Array.from({ length: 10000 }, () => Math.random());
|
|
85
|
+
arr.sort((a, b) => a - b);
|
|
79
86
|
};
|
|
87
|
+
</script>
|
|
80
88
|
```
|
|
81
89
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
2. Calls `setup(params)` once (where params comes from `BenchGroup.setup()`)
|
|
85
|
-
3. Passes the setup result to each benchmark iteration
|
|
86
|
-
|
|
87
|
-
This eliminates manual caching boilerplate in worker modules.
|
|
88
|
-
|
|
89
|
-
## CLI Options
|
|
90
|
-
|
|
91
|
-
### Basic Options
|
|
92
|
-
- `--time <seconds>` - Benchmark duration per test (default: 0.642s)
|
|
93
|
-
- `--iterations <count>` - Exact number of iterations (overrides --time)
|
|
94
|
-
- `--filter <pattern>` - Run only benchmarks matching regex/substring
|
|
95
|
-
- `--worker` / `--no-worker` - Run in isolated worker process (default: true)
|
|
96
|
-
- `--profile` - Run once for profiling (single iteration, no warmup)
|
|
97
|
-
- `--warmup <count>` - Warmup iterations before measurement (default: 0)
|
|
98
|
-
- `--help` - Show all available options
|
|
99
|
-
|
|
100
|
-
### Memory Profiling
|
|
101
|
-
- `--gc-stats` - Collect GC allocation/collection stats via --trace-gc-nvp
|
|
102
|
-
- `--heap-sample` - Heap sampling allocation attribution (includes garbage)
|
|
103
|
-
- `--heap-interval <bytes>` - Sampling interval in bytes (default: 32768)
|
|
104
|
-
- `--heap-depth <frames>` - Stack depth to capture (default: 64)
|
|
105
|
-
- `--heap-rows <n>` - Number of top allocation sites to show (default: 20)
|
|
106
|
-
|
|
107
|
-
### Output Options
|
|
108
|
-
- `--html` - Generate HTML report, start server, and open in browser
|
|
109
|
-
- `--export-html <file>` - Export HTML report to file
|
|
110
|
-
- `--json <file>` - Export benchmark data to JSON
|
|
111
|
-
- `--perfetto <file>` - Export Perfetto trace file
|
|
112
|
-
|
|
113
|
-
## CLI Usage
|
|
114
|
-
|
|
115
|
-
### Filter benchmarks by name
|
|
116
|
-
|
|
117
|
-
```bash
|
|
118
|
-
benchforge my-bench.ts --filter "concat"
|
|
119
|
-
benchforge my-bench.ts --filter "^parse" --time 2
|
|
120
|
-
```
|
|
121
|
-
|
|
122
|
-
### Profiling with external debuggers
|
|
123
|
-
|
|
124
|
-
Use `--profile` to run benchmarks once for attaching external profilers:
|
|
125
|
-
|
|
126
|
-
```bash
|
|
127
|
-
# Use with Chrome DevTools profiler
|
|
128
|
-
node --inspect-brk $(which benchforge) my-bench.ts --profile
|
|
129
|
-
|
|
130
|
-
# Use with other profiling tools
|
|
131
|
-
node --prof $(which benchforge) my-bench.ts --profile
|
|
132
|
-
```
|
|
133
|
-
|
|
134
|
-
The `--profile` flag executes exactly one iteration with no warmup, making it ideal for debugging and performance profiling.
|
|
135
|
-
|
|
136
|
-
### Key Concepts
|
|
137
|
-
|
|
138
|
-
**Setup Functions**: Run once per group and provide shared data to all benchmarks in that group. The data returned by setup is automatically passed as the first parameter to benchmark functions that expect it.
|
|
139
|
-
|
|
140
|
-
**Baseline Comparison**: When a baseline is specified, all benchmarks in the group show percentage differences (Δ%) compared to baseline.
|
|
141
|
-
|
|
142
|
-
## Output
|
|
143
|
-
|
|
144
|
-
Results are displayed in a formatted table:
|
|
145
|
-
|
|
146
|
-
```
|
|
147
|
-
╔═════════════════╤═══════════════════════════════════════════╤═════════╗
|
|
148
|
-
║ │ time │ ║
|
|
149
|
-
║ name │ mean Δ% CI p50 p99 │ runs ║
|
|
150
|
-
╟─────────────────┼───────────────────────────────────────────┼─────────╢
|
|
151
|
-
║ quicksort │ 0.17 +5.5% [+4.7%, +6.2%] 0.15 0.63 │ 1,134 ║
|
|
152
|
-
║ insertion sort │ 0.24 +25.9% [+25.3%, +27.4%] 0.18 0.36 │ 807 ║
|
|
153
|
-
║ --> native sort │ 0.16 0.15 0.41 │ 1,210 ║
|
|
154
|
-
╚═════════════════╧═══════════════════════════════════════════╧═════════╝
|
|
155
|
-
```
|
|
156
|
-
|
|
157
|
-
- **Δ% CI**: Percentage difference from baseline with bootstrap confidence interval
|
|
158
|
-
|
|
159
|
-
### HTML
|
|
160
|
-
|
|
161
|
-
The HTML report displays:
|
|
162
|
-
- Histogram + KDE: Bar chart showing the distribution
|
|
163
|
-
- Time Series: Sample values over iterations
|
|
164
|
-
- Allocation Series: Per-sample heap allocation (requires `--heap-sample`)
|
|
165
|
-
|
|
166
|
-
```bash
|
|
167
|
-
# Generate HTML report, start server, and open in browser
|
|
168
|
-
benchforge my-bench.ts --html
|
|
169
|
-
# Press Ctrl+C to exit when done viewing
|
|
170
|
-
```
|
|
171
|
-
|
|
172
|
-
### Perfetto Trace Export
|
|
173
|
-
|
|
174
|
-
Export benchmark data as a Perfetto-compatible trace file for detailed analysis:
|
|
175
|
-
|
|
176
|
-
```bash
|
|
177
|
-
# Export trace file
|
|
178
|
-
benchforge my-bench.ts --perfetto trace.json
|
|
179
|
-
|
|
180
|
-
# With V8 GC events (automatically merged after exit)
|
|
181
|
-
node --expose-gc --trace-events-enabled --trace-event-categories=v8,v8.gc \
|
|
182
|
-
benchforge my-bench.ts --perfetto trace.json
|
|
183
|
-
```
|
|
184
|
-
|
|
185
|
-
View the trace at https://ui.perfetto.dev by dragging the JSON file.
|
|
90
|
+
See [Browser.md](Browser.md) for setup patterns, completion signals, and the CDP
|
|
91
|
+
flow.
|
|
186
92
|
|
|
187
|
-
|
|
188
|
-
- **Heap counter**: Continuous heap usage as a line graph
|
|
189
|
-
- **Sample markers**: Each benchmark iteration with timing
|
|
190
|
-
- **Pause markers**: V8 optimization pause points
|
|
191
|
-
- **V8 GC events**: Automatically merged after process exit (when run with `--trace-events-enabled`)
|
|
93
|
+
## CLI Overview
|
|
192
94
|
|
|
193
|
-
|
|
95
|
+
Core flags for common workflows. Run `benchforge --help` for the full list.
|
|
194
96
|
|
|
195
|
-
|
|
97
|
+
| Flag | What it does |
|
|
98
|
+
|------|-------------|
|
|
99
|
+
| `--gc-stats` | GC allocation/collection stats |
|
|
100
|
+
| `--alloc` | Heap allocation sampling attribution |
|
|
101
|
+
| `--profile` | V8 CPU time sampling profiler |
|
|
102
|
+
| `--call-counts` | Per-function execution counts |
|
|
103
|
+
| `--stats <list>` | Timing columns to display (default: mean,p50,p99) |
|
|
104
|
+
| `--view` | Open interactive viewer in browser |
|
|
105
|
+
| `--archive [file]` | Archive profiles + sources to `.benchforge` file |
|
|
106
|
+
| `--duration <sec>` | Duration per batch (default: 0.642s) |
|
|
107
|
+
| `--iterations <n>` | Exact iterations (overrides --duration) |
|
|
108
|
+
| `--batches <n>` | Interleaved batches for baseline comparison |
|
|
109
|
+
| `--filter <pattern>` | Run only benchmarks matching regex/substring |
|
|
110
|
+
| `--url <url>` | Benchmark a browser page |
|
|
111
|
+
| `--baseline-url <url>` | A/B comparison in browser |
|
|
112
|
+
| `--equiv-margin <pct>` | Equivalence margin (default: 2%) |
|
|
196
113
|
|
|
197
|
-
|
|
198
|
-
# Collect GC allocation/collection stats (requires worker mode)
|
|
199
|
-
benchforge my-bench.ts --gc-stats
|
|
200
|
-
```
|
|
114
|
+
See [Profiling.md](Profiling.md) for detailed profiling options and V8 flags.
|
|
201
115
|
|
|
202
|
-
|
|
203
|
-
- **alloc/iter**: Bytes allocated per iteration
|
|
204
|
-
- **scav**: Number of scavenge (minor) GCs
|
|
205
|
-
- **full**: Number of full (mark-compact) GCs
|
|
206
|
-
- **promo%**: Percentage of allocations promoted to old generation
|
|
207
|
-
- **pause/iter**: GC pause time per iteration
|
|
116
|
+
## Key Concepts
|
|
208
117
|
|
|
209
|
-
###
|
|
118
|
+
### Batching
|
|
210
119
|
|
|
211
|
-
|
|
120
|
+
When comparing against a baseline, use `--batches` to interleave runs and reduce
|
|
121
|
+
ordering bias. Batch 0 is dropped by default (OS cache warmup). For reliable
|
|
122
|
+
comparisons, use 40+ batches.
|
|
212
123
|
|
|
213
124
|
```bash
|
|
214
|
-
|
|
215
|
-
benchforge my-bench.ts --heap-sample --iterations 100
|
|
216
|
-
|
|
217
|
-
# Smaller interval = more samples = better coverage of rare allocations
|
|
218
|
-
benchforge my-bench.ts --heap-sample --heap-interval 4096 --iterations 100
|
|
219
|
-
|
|
220
|
-
# Verbose output with clickable file:// paths
|
|
221
|
-
benchforge my-bench.ts --heap-sample --heap-verbose
|
|
222
|
-
|
|
223
|
-
# Control call stack display depth
|
|
224
|
-
benchforge my-bench.ts --heap-sample --heap-stack 5
|
|
225
|
-
```
|
|
226
|
-
|
|
227
|
-
**CLI Options:**
|
|
228
|
-
- `--heap-sample` - Enable heap sampling allocation attribution
|
|
229
|
-
- `--heap-interval <bytes>` - Sampling interval in bytes (default: 32768)
|
|
230
|
-
- `--heap-depth <frames>` - Maximum stack depth to capture (default: 64)
|
|
231
|
-
- `--heap-rows <n>` - Number of top allocation sites to show (default: 20)
|
|
232
|
-
- `--heap-stack <n>` - Call stack depth to display (default: 3)
|
|
233
|
-
- `--heap-verbose` - Show full file:// paths with line numbers (cmd-clickable)
|
|
234
|
-
|
|
235
|
-
**Output (default compact):**
|
|
236
|
-
```
|
|
237
|
-
─── Heap profile: bevy_env_map ───
|
|
238
|
-
Heap allocation sites (top 20, garbage included):
|
|
239
|
-
13.62 MB recursiveResolve <- flattenTreeImport <- bindAndTransform
|
|
240
|
-
12.36 MB nextToken <- parseBlockStatements <- parseCompoundStatement
|
|
241
|
-
5.15 MB coverWithText <- finishElem <- parseVarOrLet
|
|
242
|
-
|
|
243
|
-
Total (all): 56.98 MB
|
|
244
|
-
Total (user-code): 28.45 MB
|
|
245
|
-
Samples: 1,842
|
|
246
|
-
```
|
|
247
|
-
|
|
248
|
-
**How V8 Heap Sampling Works:**
|
|
249
|
-
|
|
250
|
-
V8's sampling profiler uses Poisson-distributed sampling. When an allocation occurs, V8 probabilistically decides whether to record it based on the sampling interval. Key points:
|
|
251
|
-
|
|
252
|
-
1. **selfSize is scaled**: V8 doesn't report raw sampled bytes. It scales sample counts to estimate total allocations (`selfSize = size × count × scaleFactor`). This means changing `--heap-interval` affects sample count and overhead, but the estimated total converges to the same value.
|
|
253
|
-
|
|
254
|
-
2. **Smaller intervals = better coverage**: With a smaller interval (e.g., 1024 vs 32768), you get more samples and discover more unique allocation sites, especially rare ones. The total estimate stays similar, but you see more of the distribution.
|
|
255
|
-
|
|
256
|
-
3. **User-code only**: The report filters out Node.js internals (`node:`, `internal/`). "Total (user-code)" shows filtered allocations; "Total (all)" shows everything.
|
|
257
|
-
|
|
258
|
-
4. **Measurement window**: Sampling covers benchmark module import + execution. Worker startup and framework init aren't captured (but do appear in `--gc-stats`).
|
|
259
|
-
|
|
260
|
-
5. **Sites are stack-unique**: The same function appears multiple times with different callers. For example, `nextToken` may show up in several entries with different call stacks, each representing a distinct allocation pattern.
|
|
261
|
-
|
|
262
|
-
**Limitations:**
|
|
263
|
-
- **Function-level attribution only**: V8 reports the function where allocation occurred, not the specific line. The line:column shown is where the function is *defined*.
|
|
264
|
-
- **Statistical sampling**: Results vary between runs. More iterations = more stable results.
|
|
265
|
-
- **~50% filtered**: Node.js internals account for roughly half of allocations. Use "Total (all)" to see the full picture.
|
|
266
|
-
|
|
267
|
-
**When to use which:**
|
|
268
|
-
| Tool | Use When |
|
|
269
|
-
|------|----------|
|
|
270
|
-
| `--gc-stats` | Need total allocation/collection bytes, GC pause times |
|
|
271
|
-
| `--heap-sample` | Need to identify which functions allocate the most |
|
|
272
|
-
| Both | Cross-reference attribution with totals |
|
|
273
|
-
|
|
274
|
-
## Requirements
|
|
275
|
-
|
|
276
|
-
- Node.js 22.6+ (for native TypeScript support)
|
|
277
|
-
- Use `--expose-gc --allow-natives-syntax` flags for garbage collection monitoring and V8 native functions
|
|
278
|
-
|
|
279
|
-
## Adaptive Mode (Experimental)
|
|
280
|
-
|
|
281
|
-
Adaptive mode (`--adaptive`) automatically adjusts iteration count until measurements stabilize. The algorithm is still being tuned — use `--help` for available options.
|
|
282
|
-
|
|
283
|
-
## Interpreting Results
|
|
284
|
-
|
|
285
|
-
### Baseline Comparison (Δ% CI)
|
|
286
|
-
```
|
|
287
|
-
0.17 +5.5% [+4.7%, +6.2%]
|
|
288
|
-
```
|
|
289
|
-
The benchmark is 5.5% slower than baseline, with a bootstrap confidence interval of [+4.7%, +6.2%].
|
|
290
|
-
|
|
291
|
-
### Percentiles
|
|
292
|
-
```
|
|
293
|
-
p50: 0.15ms, p99: 0.27ms
|
|
125
|
+
benchforge sorting.ts --batches 40 --duration 2
|
|
294
126
|
```
|
|
295
|
-
50% of runs completed in ≤0.15ms and 99% in ≤0.27ms. Use percentiles when you care about consistency and tail latencies.
|
|
296
127
|
|
|
297
|
-
|
|
128
|
+
See [Statistics.md](Statistics.md) for the full explanation of batched
|
|
129
|
+
execution, block bootstrap, and Tukey trimming.
|
|
298
130
|
|
|
299
|
-
###
|
|
131
|
+
### Baseline Comparison
|
|
300
132
|
|
|
301
|
-
|
|
133
|
+
When a group has a `baseline`, all benchmarks show Δ% with a bootstrap
|
|
134
|
+
confidence interval. The result is classified as faster, slower, equivalent, or
|
|
135
|
+
inconclusive based on the equivalence margin.
|
|
302
136
|
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
3. **Total GC overhead** including preparation and cleanup
|
|
137
|
+
See [Statistics.md](Statistics.md#equivalence-margin) for how the four verdicts
|
|
138
|
+
work and how to calibrate the margin.
|
|
306
139
|
|
|
307
|
-
|
|
140
|
+
## Further Reading
|
|
308
141
|
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
142
|
+
- [Node.md](Node.md) -- Worker mode, module imports, custom metric sections,
|
|
143
|
+
external debugger attachment
|
|
144
|
+
- [Browser.md](Browser.md) -- Bench function and page-load modes, completion
|
|
145
|
+
signals, CDP flow
|
|
146
|
+
- [Profiling.md](Profiling.md) -- Allocation sampling, GC stats, V8 flags,
|
|
147
|
+
Perfetto export
|
|
148
|
+
- [Statistics.md](Statistics.md) -- Column selection (`--stats`), bootstrap
|
|
149
|
+
methods, batching, equivalence testing
|
|
150
|
+
- [README-tachometer.md](README-tachometer.md) -- Coming from tachometer
|
package/bin/benchforge
CHANGED
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import { b as splitByOffsets, g as percentile, p as median, t as average, w as tukeyFences } from "./StatisticalUtils-BD92crgM.mjs";
|
|
2
|
+
import { a as formatSignedPercent, c as timeMs, u as colors } from "./Formatters-BWj3d4sv.mjs";
|
|
3
|
+
import { resolve } from "node:path";
|
|
4
|
+
import { readFile } from "node:fs/promises";
|
|
5
|
+
//#region src/cli/AnalyzeArchive.ts
|
|
6
|
+
/** Diagnostic analysis of a .benchforge archive's per-batch statistics. */
|
|
7
|
+
const { bold, dim, red, green, yellow } = colors;
|
|
8
|
+
const blockFenceMultiplier = 3;
|
|
9
|
+
/** Read an archive and print per-batch diagnostic analysis.
|
|
10
|
+
* (for benchforge debugging/development purposes, not a general user tool)
|
|
11
|
+
*/
|
|
12
|
+
async function analyzeArchive(filePath) {
|
|
13
|
+
const content = await readFile(resolve(filePath), "utf-8");
|
|
14
|
+
const { report } = JSON.parse(content);
|
|
15
|
+
if (!report?.groups?.length) {
|
|
16
|
+
console.error("No report data found in archive.");
|
|
17
|
+
return;
|
|
18
|
+
}
|
|
19
|
+
const batchCount = report.metadata?.cliArgs?.batches;
|
|
20
|
+
for (const group of report.groups) analyzeGroup(group, batchCount);
|
|
21
|
+
}
|
|
22
|
+
/** Print analysis for all benchmarks in a group. */
|
|
23
|
+
function analyzeGroup(group, batchCount) {
|
|
24
|
+
console.log(bold(`\n=== ${group.name} ===\n`));
|
|
25
|
+
const baseline = group.baseline;
|
|
26
|
+
for (const bench of group.benchmarks) analyzeBenchmark(bench, baseline, batchCount);
|
|
27
|
+
}
|
|
28
|
+
/** Print per-batch analysis for one benchmark entry. */
|
|
29
|
+
function analyzeBenchmark(bench, baseline, batchCount) {
|
|
30
|
+
const bOffsets = bench.batchOffsets ?? inferOffsets(bench.samples, batchCount);
|
|
31
|
+
const baseOffsets = baseline?.batchOffsets ?? inferOffsets(baseline?.samples, batchCount);
|
|
32
|
+
if (!bOffsets?.length) {
|
|
33
|
+
console.log(dim(" No batch data (single batch run)"));
|
|
34
|
+
return;
|
|
35
|
+
}
|
|
36
|
+
const batches = splitByOffsets(bench.samples, bOffsets);
|
|
37
|
+
const baseBatches = baseOffsets && baseline ? splitByOffsets(baseline.samples, baseOffsets) : void 0;
|
|
38
|
+
printBatchHeader(bench, baseline, batches.length);
|
|
39
|
+
printBatchTable(batches, baseBatches);
|
|
40
|
+
if (baseBatches && baseBatches.length === batches.length) {
|
|
41
|
+
printOrderEffect(batches, baseBatches);
|
|
42
|
+
printPairedDeltas(batches, baseBatches);
|
|
43
|
+
printTrimmedBlocks(batches, baseBatches, bench.name);
|
|
44
|
+
}
|
|
45
|
+
console.log();
|
|
46
|
+
}
|
|
47
|
+
/** Infer equal-sized batch offsets when batchOffsets isn't in the archive. */
|
|
48
|
+
function inferOffsets(samples, batchCount) {
|
|
49
|
+
if (!samples?.length || !batchCount || batchCount <= 1) return void 0;
|
|
50
|
+
const size = Math.floor(samples.length / batchCount);
|
|
51
|
+
return Array.from({ length: batchCount }, (_, i) => i * size);
|
|
52
|
+
}
|
|
53
|
+
/** Print benchmark name with batch/run summary. */
|
|
54
|
+
function printBatchHeader(bench, baseline, nBatches) {
|
|
55
|
+
const baseRuns = baseline?.samples?.length;
|
|
56
|
+
const dur = bench.totalTime ? (bench.totalTime / nBatches).toFixed(1) + "s" : "?";
|
|
57
|
+
const info = dim(` (${nBatches} batches, ${baseRuns ? `${bench.samples.length}+${baseRuns} runs` : `${bench.samples.length} runs`}, ~${dur}/batch)`);
|
|
58
|
+
console.log(bold(` ${bench.name}`) + info);
|
|
59
|
+
}
|
|
60
|
+
/** Print per-batch median table for current and baseline. */
|
|
61
|
+
function printBatchTable(benches, baselines) {
|
|
62
|
+
const header = baselines ? ` ${"batch".padEnd(7)} ${"n".padStart(4)} ${"current".padStart(10)} ${"baseline".padStart(10)} ${"delta".padStart(8)}` : ` ${"batch".padEnd(7)} ${"n".padStart(4)} ${"median".padStart(10)}`;
|
|
63
|
+
console.log(dim(header));
|
|
64
|
+
for (let i = 0; i < benches.length; i++) {
|
|
65
|
+
const n = String(benches[i].length).padStart(4);
|
|
66
|
+
const med = (timeMs(median(benches[i])) ?? "").padStart(10);
|
|
67
|
+
const idx = String(i).padEnd(7);
|
|
68
|
+
if (!baselines?.[i]) {
|
|
69
|
+
console.log(` ${idx} ${n} ${med}`);
|
|
70
|
+
continue;
|
|
71
|
+
}
|
|
72
|
+
const baseMed = (timeMs(median(baselines[i])) ?? "").padStart(10);
|
|
73
|
+
const delta = formatDelta(medianDelta(benches[i], baselines[i])).padStart(8);
|
|
74
|
+
const order = i % 2 === 0 ? dim(" B>C") : dim(" C>B");
|
|
75
|
+
console.log(` ${idx} ${n} ${med} ${baseMed} ${delta}${order}`);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
/** Analyze order effect: does running second make a difference? */
|
|
79
|
+
function printOrderEffect(benches, baselines) {
|
|
80
|
+
const deltas = benches.map((b, i) => medianDelta(b, baselines[i]));
|
|
81
|
+
const baseFirstDeltas = deltas.filter((_, i) => i % 2 === 0);
|
|
82
|
+
const currFirstDeltas = deltas.filter((_, i) => i % 2 === 1);
|
|
83
|
+
const baseFirstAvg = baseFirstDeltas.length ? average(baseFirstDeltas) : 0;
|
|
84
|
+
const currFirstAvg = currFirstDeltas.length ? average(currFirstDeltas) : 0;
|
|
85
|
+
console.log();
|
|
86
|
+
console.log(bold(" Order effect:"));
|
|
87
|
+
console.log(` baseline first (B>C): avg delta ${formatDelta(baseFirstAvg)}` + dim(` (${baseFirstDeltas.length} batches)`));
|
|
88
|
+
console.log(` current first (C>B): avg delta ${formatDelta(currFirstAvg)}` + dim(` (${currFirstDeltas.length} batches)`));
|
|
89
|
+
const diff = Math.abs(baseFirstAvg - currFirstAvg);
|
|
90
|
+
if (diff > 2) console.log(yellow(` ==> ${diff.toFixed(1)}% order effect detected`));
|
|
91
|
+
else console.log(dim(` order effect: ${diff.toFixed(1)}% (small)`));
|
|
92
|
+
}
|
|
93
|
+
/** Print paired batch deltas and their consistency. */
|
|
94
|
+
function printPairedDeltas(benches, baselines) {
|
|
95
|
+
const deltas = benches.map((b, i) => medianDelta(b, baselines[i]));
|
|
96
|
+
const positive = deltas.filter((d) => d > 0).length;
|
|
97
|
+
const negative = deltas.filter((d) => d < 0).length;
|
|
98
|
+
const avgDelta = average(deltas);
|
|
99
|
+
const med = median(deltas);
|
|
100
|
+
const spread = percentile(deltas, .75) - percentile(deltas, .25);
|
|
101
|
+
console.log();
|
|
102
|
+
console.log(bold(" Paired deltas:"));
|
|
103
|
+
console.log(` mean: ${formatDelta(avgDelta)} median: ${formatDelta(med)} IQR: ${spread.toFixed(1)}%`);
|
|
104
|
+
console.log(` direction: ${positive} slower, ${negative} faster` + dim(` (${deltas.length} batches)`));
|
|
105
|
+
if (positive > 0 && negative > 0) console.log(green(" ==> batches disagree on direction"));
|
|
106
|
+
else console.log(red(" ==> all batches agree on direction (systematic bias?)"));
|
|
107
|
+
}
|
|
108
|
+
/** Show which blocks would be Tukey-trimmed per side. */
|
|
109
|
+
function printTrimmedBlocks(benches, baselines, name) {
|
|
110
|
+
console.log();
|
|
111
|
+
console.log(bold(" Trimmed blocks:"));
|
|
112
|
+
const baseMeans = baselines.map((b) => average(b));
|
|
113
|
+
const benchMeans = benches.map((b) => average(b));
|
|
114
|
+
printSideTrim("baseline", baseMeans);
|
|
115
|
+
printSideTrim(name, benchMeans);
|
|
116
|
+
}
|
|
117
|
+
/** Color a percent delta: red if >1%, green if <-1%. */
|
|
118
|
+
function formatDelta(pct) {
|
|
119
|
+
const str = formatSignedPercent(pct);
|
|
120
|
+
if (pct > 1) return red(str);
|
|
121
|
+
if (pct < -1) return green(str);
|
|
122
|
+
return str;
|
|
123
|
+
}
|
|
124
|
+
/** Percent delta between two medians. */
|
|
125
|
+
function medianDelta(samples, baseSamples) {
|
|
126
|
+
const med = median(samples);
|
|
127
|
+
const baseMed = median(baseSamples);
|
|
128
|
+
return (med - baseMed) / baseMed * 100;
|
|
129
|
+
}
|
|
130
|
+
/** Print trimming info for one side using 3x IQR fences. */
|
|
131
|
+
function printSideTrim(label, means) {
|
|
132
|
+
const [, hi] = tukeyFences(means, blockFenceMultiplier);
|
|
133
|
+
const indices = means.map((v, i) => v > hi ? i : -1).filter((i) => i >= 0);
|
|
134
|
+
if (indices.length === 0) {
|
|
135
|
+
console.log(dim(` ${label}: 0 trimmed`));
|
|
136
|
+
return;
|
|
137
|
+
}
|
|
138
|
+
const vals = indices.map((i) => timeMs(means[i]) ?? "?").join(", ");
|
|
139
|
+
const fence = `hi: ${timeMs(hi)}`;
|
|
140
|
+
console.log(` ${label}: ${yellow(`${indices.length} trimmed`)} (${vals})` + dim(` fence: ${fence}`));
|
|
141
|
+
}
|
|
142
|
+
//#endregion
|
|
143
|
+
export { analyzeArchive };
|
|
144
|
+
|
|
145
|
+
//# sourceMappingURL=AnalyzeArchive-8NCJhmhS.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AnalyzeArchive-8NCJhmhS.mjs","names":[],"sources":["../src/cli/AnalyzeArchive.ts"],"sourcesContent":["/** Diagnostic analysis of a .benchforge archive's per-batch statistics. */\nimport { readFile } from \"node:fs/promises\";\nimport { resolve } from \"node:path\";\nimport colors from \"../report/Colors.ts\";\nimport { formatSignedPercent, timeMs } from \"../report/Formatters.ts\";\nimport {\n average,\n median,\n percentile,\n splitByOffsets,\n tukeyFences,\n} from \"../stats/StatisticalUtils.ts\";\nimport type { BenchmarkEntry, BenchmarkGroup } from \"../viewer/ReportData.ts\";\n\nconst { bold, dim, red, green, yellow } = colors;\n\nconst blockFenceMultiplier = 3;\n\n/** Read an archive and print per-batch diagnostic analysis.\n * (for benchforge debugging/development purposes, not a general user tool)\n */\nexport async function analyzeArchive(filePath: string): Promise<void> {\n const absPath = resolve(filePath);\n const content = await readFile(absPath, \"utf-8\");\n const { report } = JSON.parse(content);\n if (!report?.groups?.length) {\n console.error(\"No report data found in archive.\");\n return;\n }\n const batchCount = report.metadata?.cliArgs?.batches as number | undefined;\n for (const group of report.groups) {\n analyzeGroup(group, batchCount);\n }\n}\n\n/** Print analysis for all benchmarks in a group. */\nfunction analyzeGroup(group: BenchmarkGroup, batchCount?: number): void {\n console.log(bold(`\\n=== ${group.name} ===\\n`));\n\n const baseline = group.baseline;\n for (const bench of group.benchmarks) {\n analyzeBenchmark(bench, baseline, batchCount);\n }\n}\n\n/** Print per-batch analysis for one benchmark entry. */\nfunction analyzeBenchmark(\n bench: BenchmarkEntry,\n baseline: BenchmarkEntry | undefined,\n batchCount?: number,\n): void {\n const bOffsets =\n bench.batchOffsets ?? inferOffsets(bench.samples, batchCount);\n const baseOffsets =\n baseline?.batchOffsets ?? inferOffsets(baseline?.samples, batchCount);\n if (!bOffsets?.length) {\n console.log(dim(\" No batch data (single batch run)\"));\n return;\n }\n\n const batches = splitByOffsets(bench.samples, bOffsets);\n const baseBatches =\n baseOffsets && baseline\n ? splitByOffsets(baseline.samples, baseOffsets)\n : undefined;\n\n printBatchHeader(bench, baseline, batches.length);\n printBatchTable(batches, baseBatches);\n\n if (baseBatches && baseBatches.length === batches.length) {\n printOrderEffect(batches, baseBatches);\n printPairedDeltas(batches, baseBatches);\n printTrimmedBlocks(batches, baseBatches, bench.name);\n }\n console.log();\n}\n\n/** Infer equal-sized batch offsets when batchOffsets isn't in the archive. */\nfunction inferOffsets(\n samples: number[] | undefined,\n batchCount?: number,\n): number[] | undefined {\n if (!samples?.length || !batchCount || batchCount <= 1) return undefined;\n const size = Math.floor(samples.length / batchCount);\n return Array.from({ length: batchCount }, (_, i) => i * size);\n}\n\n/** Print benchmark name with batch/run summary. */\nfunction printBatchHeader(\n bench: BenchmarkEntry,\n baseline: BenchmarkEntry | undefined,\n nBatches: number,\n): void {\n const baseRuns = baseline?.samples?.length;\n const dur = bench.totalTime\n ? (bench.totalTime / nBatches).toFixed(1) + \"s\"\n : \"?\";\n const runs = baseRuns\n ? `${bench.samples.length}+${baseRuns} runs`\n : `${bench.samples.length} runs`;\n const info = dim(` (${nBatches} batches, ${runs}, ~${dur}/batch)`);\n console.log(bold(` ${bench.name}`) + info);\n}\n\n/** Print per-batch median table for current and baseline. */\nfunction printBatchTable(\n benches: number[][],\n baselines: number[][] | undefined,\n): void {\n const header = baselines\n ? ` ${\"batch\".padEnd(7)} ${\"n\".padStart(4)} ${\"current\".padStart(10)} ${\"baseline\".padStart(10)} ${\"delta\".padStart(8)}`\n : ` ${\"batch\".padEnd(7)} ${\"n\".padStart(4)} ${\"median\".padStart(10)}`;\n console.log(dim(header));\n\n for (let i = 0; i < benches.length; i++) {\n const n = String(benches[i].length).padStart(4);\n const med = (timeMs(median(benches[i])) ?? \"\").padStart(10);\n const idx = String(i).padEnd(7);\n if (!baselines?.[i]) {\n console.log(` ${idx} ${n} ${med}`);\n continue;\n }\n const baseMed = (timeMs(median(baselines[i])) ?? \"\").padStart(10);\n const delta = formatDelta(medianDelta(benches[i], baselines[i])).padStart(\n 8,\n );\n const order = i % 2 === 0 ? dim(\" B>C\") : dim(\" C>B\");\n console.log(` ${idx} ${n} ${med} ${baseMed} ${delta}${order}`);\n }\n}\n\n/** Analyze order effect: does running second make a difference? */\nfunction printOrderEffect(benches: number[][], baselines: number[][]): void {\n // Even batches: baseline runs first (B>C), odd: current runs first (C>B)\n const deltas = benches.map((b, i) => medianDelta(b, baselines[i]));\n const baseFirstDeltas = deltas.filter((_, i) => i % 2 === 0);\n const currFirstDeltas = deltas.filter((_, i) => i % 2 === 1);\n const baseFirstAvg = baseFirstDeltas.length ? average(baseFirstDeltas) : 0;\n const currFirstAvg = currFirstDeltas.length ? average(currFirstDeltas) : 0;\n\n console.log();\n console.log(bold(\" Order effect:\"));\n console.log(\n ` baseline first (B>C): avg delta ${formatDelta(baseFirstAvg)}` +\n dim(` (${baseFirstDeltas.length} batches)`),\n );\n console.log(\n ` current first (C>B): avg delta ${formatDelta(currFirstAvg)}` +\n dim(` (${currFirstDeltas.length} batches)`),\n );\n\n const diff = Math.abs(baseFirstAvg - currFirstAvg);\n if (diff > 2) {\n console.log(yellow(` ==> ${diff.toFixed(1)}% order effect detected`));\n } else {\n console.log(dim(` order effect: ${diff.toFixed(1)}% (small)`));\n }\n}\n\n/** Print paired batch deltas and their consistency. */\nfunction printPairedDeltas(benches: number[][], baselines: number[][]): void {\n const deltas = benches.map((b, i) => medianDelta(b, baselines[i]));\n\n const positive = deltas.filter(d => d > 0).length;\n const negative = deltas.filter(d => d < 0).length;\n const avgDelta = average(deltas);\n const med = median(deltas);\n const spread = percentile(deltas, 0.75) - percentile(deltas, 0.25);\n\n console.log();\n console.log(bold(\" Paired deltas:\"));\n console.log(\n ` mean: ${formatDelta(avgDelta)} median: ${formatDelta(med)} IQR: ${spread.toFixed(1)}%`,\n );\n console.log(\n ` direction: ${positive} slower, ${negative} faster` +\n dim(` (${deltas.length} batches)`),\n );\n\n if (positive > 0 && negative > 0) {\n console.log(green(\" ==> batches disagree on direction\"));\n } else {\n console.log(\n red(\" ==> all batches agree on direction (systematic bias?)\"),\n );\n }\n}\n\n/** Show which blocks would be Tukey-trimmed per side. */\nfunction printTrimmedBlocks(\n benches: number[][],\n baselines: number[][],\n name: string,\n): void {\n console.log();\n console.log(bold(\" Trimmed blocks:\"));\n const baseMeans = baselines.map(b => average(b));\n const benchMeans = benches.map(b => average(b));\n printSideTrim(\"baseline\", baseMeans);\n printSideTrim(name, benchMeans);\n}\n\n/** Color a percent delta: red if >1%, green if <-1%. */\nfunction formatDelta(pct: number): string {\n const str = formatSignedPercent(pct);\n if (pct > 1) return red(str);\n if (pct < -1) return green(str);\n return str;\n}\n\n/** Percent delta between two medians. */\nfunction medianDelta(samples: number[], baseSamples: number[]): number {\n const med = median(samples);\n const baseMed = median(baseSamples);\n return ((med - baseMed) / baseMed) * 100;\n}\n\n/** Print trimming info for one side using 3x IQR fences. */\nfunction printSideTrim(label: string, means: number[]): void {\n const [, hi] = tukeyFences(means, blockFenceMultiplier);\n const indices = means.map((v, i) => (v > hi ? i : -1)).filter(i => i >= 0);\n if (indices.length === 0) {\n console.log(dim(` ${label}: 0 trimmed`));\n return;\n }\n const vals = indices.map(i => timeMs(means[i]) ?? \"?\").join(\", \");\n const fence = `hi: ${timeMs(hi)}`;\n console.log(\n ` ${label}: ${yellow(`${indices.length} trimmed`)} (${vals})` +\n dim(` fence: ${fence}`),\n );\n}\n"],"mappings":";;;;;;AAcA,MAAM,EAAE,MAAM,KAAK,KAAK,OAAO,WAAW;AAE1C,MAAM,uBAAuB;;;;AAK7B,eAAsB,eAAe,UAAiC;CAEpE,MAAM,UAAU,MAAM,SADN,QAAQ,SAAS,EACO,QAAQ;CAChD,MAAM,EAAE,WAAW,KAAK,MAAM,QAAQ;AACtC,KAAI,CAAC,QAAQ,QAAQ,QAAQ;AAC3B,UAAQ,MAAM,mCAAmC;AACjD;;CAEF,MAAM,aAAa,OAAO,UAAU,SAAS;AAC7C,MAAK,MAAM,SAAS,OAAO,OACzB,cAAa,OAAO,WAAW;;;AAKnC,SAAS,aAAa,OAAuB,YAA2B;AACtE,SAAQ,IAAI,KAAK,SAAS,MAAM,KAAK,QAAQ,CAAC;CAE9C,MAAM,WAAW,MAAM;AACvB,MAAK,MAAM,SAAS,MAAM,WACxB,kBAAiB,OAAO,UAAU,WAAW;;;AAKjD,SAAS,iBACP,OACA,UACA,YACM;CACN,MAAM,WACJ,MAAM,gBAAgB,aAAa,MAAM,SAAS,WAAW;CAC/D,MAAM,cACJ,UAAU,gBAAgB,aAAa,UAAU,SAAS,WAAW;AACvE,KAAI,CAAC,UAAU,QAAQ;AACrB,UAAQ,IAAI,IAAI,qCAAqC,CAAC;AACtD;;CAGF,MAAM,UAAU,eAAe,MAAM,SAAS,SAAS;CACvD,MAAM,cACJ,eAAe,WACX,eAAe,SAAS,SAAS,YAAY,GAC7C,KAAA;AAEN,kBAAiB,OAAO,UAAU,QAAQ,OAAO;AACjD,iBAAgB,SAAS,YAAY;AAErC,KAAI,eAAe,YAAY,WAAW,QAAQ,QAAQ;AACxD,mBAAiB,SAAS,YAAY;AACtC,oBAAkB,SAAS,YAAY;AACvC,qBAAmB,SAAS,aAAa,MAAM,KAAK;;AAEtD,SAAQ,KAAK;;;AAIf,SAAS,aACP,SACA,YACsB;AACtB,KAAI,CAAC,SAAS,UAAU,CAAC,cAAc,cAAc,EAAG,QAAO,KAAA;CAC/D,MAAM,OAAO,KAAK,MAAM,QAAQ,SAAS,WAAW;AACpD,QAAO,MAAM,KAAK,EAAE,QAAQ,YAAY,GAAG,GAAG,MAAM,IAAI,KAAK;;;AAI/D,SAAS,iBACP,OACA,UACA,UACM;CACN,MAAM,WAAW,UAAU,SAAS;CACpC,MAAM,MAAM,MAAM,aACb,MAAM,YAAY,UAAU,QAAQ,EAAE,GAAG,MAC1C;CAIJ,MAAM,OAAO,IAAI,KAAK,SAAS,YAHlB,WACT,GAAG,MAAM,QAAQ,OAAO,GAAG,SAAS,SACpC,GAAG,MAAM,QAAQ,OAAO,OACoB,KAAK,IAAI,SAAS;AAClE,SAAQ,IAAI,KAAK,KAAK,MAAM,OAAO,GAAG,KAAK;;;AAI7C,SAAS,gBACP,SACA,WACM;CACN,MAAM,SAAS,YACX,KAAK,QAAQ,OAAO,EAAE,CAAC,GAAG,IAAI,SAAS,EAAE,CAAC,IAAI,UAAU,SAAS,GAAG,CAAC,IAAI,WAAW,SAAS,GAAG,CAAC,IAAI,QAAQ,SAAS,EAAE,KACxH,KAAK,QAAQ,OAAO,EAAE,CAAC,GAAG,IAAI,SAAS,EAAE,CAAC,IAAI,SAAS,SAAS,GAAG;AACvE,SAAQ,IAAI,IAAI,OAAO,CAAC;AAExB,MAAK,IAAI,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;EACvC,MAAM,IAAI,OAAO,QAAQ,GAAG,OAAO,CAAC,SAAS,EAAE;EAC/C,MAAM,OAAO,OAAO,OAAO,QAAQ,GAAG,CAAC,IAAI,IAAI,SAAS,GAAG;EAC3D,MAAM,MAAM,OAAO,EAAE,CAAC,OAAO,EAAE;AAC/B,MAAI,CAAC,YAAY,IAAI;AACnB,WAAQ,IAAI,KAAK,IAAI,GAAG,EAAE,IAAI,MAAM;AACpC;;EAEF,MAAM,WAAW,OAAO,OAAO,UAAU,GAAG,CAAC,IAAI,IAAI,SAAS,GAAG;EACjE,MAAM,QAAQ,YAAY,YAAY,QAAQ,IAAI,UAAU,GAAG,CAAC,CAAC,SAC/D,EACD;EACD,MAAM,QAAQ,IAAI,MAAM,IAAI,IAAI,OAAO,GAAG,IAAI,OAAO;AACrD,UAAQ,IAAI,KAAK,IAAI,GAAG,EAAE,IAAI,IAAI,IAAI,QAAQ,IAAI,QAAQ,QAAQ;;;;AAKtE,SAAS,iBAAiB,SAAqB,WAA6B;CAE1E,MAAM,SAAS,QAAQ,KAAK,GAAG,MAAM,YAAY,GAAG,UAAU,GAAG,CAAC;CAClE,MAAM,kBAAkB,OAAO,QAAQ,GAAG,MAAM,IAAI,MAAM,EAAE;CAC5D,MAAM,kBAAkB,OAAO,QAAQ,GAAG,MAAM,IAAI,MAAM,EAAE;CAC5D,MAAM,eAAe,gBAAgB,SAAS,QAAQ,gBAAgB,GAAG;CACzE,MAAM,eAAe,gBAAgB,SAAS,QAAQ,gBAAgB,GAAG;AAEzE,SAAQ,KAAK;AACb,SAAQ,IAAI,KAAK,kBAAkB,CAAC;AACpC,SAAQ,IACN,uCAAuC,YAAY,aAAa,KAC9D,IAAI,KAAK,gBAAgB,OAAO,WAAW,CAC9C;AACD,SAAQ,IACN,uCAAuC,YAAY,aAAa,KAC9D,IAAI,KAAK,gBAAgB,OAAO,WAAW,CAC9C;CAED,MAAM,OAAO,KAAK,IAAI,eAAe,aAAa;AAClD,KAAI,OAAO,EACT,SAAQ,IAAI,OAAO,WAAW,KAAK,QAAQ,EAAE,CAAC,yBAAyB,CAAC;KAExE,SAAQ,IAAI,IAAI,qBAAqB,KAAK,QAAQ,EAAE,CAAC,WAAW,CAAC;;;AAKrE,SAAS,kBAAkB,SAAqB,WAA6B;CAC3E,MAAM,SAAS,QAAQ,KAAK,GAAG,MAAM,YAAY,GAAG,UAAU,GAAG,CAAC;CAElE,MAAM,WAAW,OAAO,QAAO,MAAK,IAAI,EAAE,CAAC;CAC3C,MAAM,WAAW,OAAO,QAAO,MAAK,IAAI,EAAE,CAAC;CAC3C,MAAM,WAAW,QAAQ,OAAO;CAChC,MAAM,MAAM,OAAO,OAAO;CAC1B,MAAM,SAAS,WAAW,QAAQ,IAAK,GAAG,WAAW,QAAQ,IAAK;AAElE,SAAQ,KAAK;AACb,SAAQ,IAAI,KAAK,mBAAmB,CAAC;AACrC,SAAQ,IACN,aAAa,YAAY,SAAS,CAAC,YAAY,YAAY,IAAI,CAAC,SAAS,OAAO,QAAQ,EAAE,CAAC,GAC5F;AACD,SAAQ,IACN,kBAAkB,SAAS,WAAW,SAAS,WAC7C,IAAI,KAAK,OAAO,OAAO,WAAW,CACrC;AAED,KAAI,WAAW,KAAK,WAAW,EAC7B,SAAQ,IAAI,MAAM,wCAAwC,CAAC;KAE3D,SAAQ,IACN,IAAI,4DAA4D,CACjE;;;AAKL,SAAS,mBACP,SACA,WACA,MACM;AACN,SAAQ,KAAK;AACb,SAAQ,IAAI,KAAK,oBAAoB,CAAC;CACtC,MAAM,YAAY,UAAU,KAAI,MAAK,QAAQ,EAAE,CAAC;CAChD,MAAM,aAAa,QAAQ,KAAI,MAAK,QAAQ,EAAE,CAAC;AAC/C,eAAc,YAAY,UAAU;AACpC,eAAc,MAAM,WAAW;;;AAIjC,SAAS,YAAY,KAAqB;CACxC,MAAM,MAAM,oBAAoB,IAAI;AACpC,KAAI,MAAM,EAAG,QAAO,IAAI,IAAI;AAC5B,KAAI,MAAM,GAAI,QAAO,MAAM,IAAI;AAC/B,QAAO;;;AAIT,SAAS,YAAY,SAAmB,aAA+B;CACrE,MAAM,MAAM,OAAO,QAAQ;CAC3B,MAAM,UAAU,OAAO,YAAY;AACnC,SAAS,MAAM,WAAW,UAAW;;;AAIvC,SAAS,cAAc,OAAe,OAAuB;CAC3D,MAAM,GAAG,MAAM,YAAY,OAAO,qBAAqB;CACvD,MAAM,UAAU,MAAM,KAAK,GAAG,MAAO,IAAI,KAAK,IAAI,GAAI,CAAC,QAAO,MAAK,KAAK,EAAE;AAC1E,KAAI,QAAQ,WAAW,GAAG;AACxB,UAAQ,IAAI,IAAI,OAAO,MAAM,aAAa,CAAC;AAC3C;;CAEF,MAAM,OAAO,QAAQ,KAAI,MAAK,OAAO,MAAM,GAAG,IAAI,IAAI,CAAC,KAAK,KAAK;CACjE,MAAM,QAAQ,OAAO,OAAO,GAAG;AAC/B,SAAQ,IACN,OAAO,MAAM,IAAI,OAAO,GAAG,QAAQ,OAAO,UAAU,CAAC,IAAI,KAAK,KAC5D,IAAI,YAAY,QAAQ,CAC3B"}
|