benchforge 0.1.9 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (253) hide show
  1. package/LICENSE +20 -0
  2. package/README.md +99 -260
  3. package/bin/benchforge +1 -2
  4. package/dist/AnalyzeArchive-8NCJhmhS.mjs +145 -0
  5. package/dist/AnalyzeArchive-8NCJhmhS.mjs.map +1 -0
  6. package/dist/BenchMatrix-BZVrBB_h.mjs +1050 -0
  7. package/dist/BenchMatrix-BZVrBB_h.mjs.map +1 -0
  8. package/dist/BenchRunner-DglX1NOn.d.mts +302 -0
  9. package/dist/CoverageSampler-D5T9DRqe.mjs +27 -0
  10. package/dist/CoverageSampler-D5T9DRqe.mjs.map +1 -0
  11. package/dist/Formatters-BWj3d4sv.mjs +95 -0
  12. package/dist/Formatters-BWj3d4sv.mjs.map +1 -0
  13. package/dist/{HeapSampler-B8dtKHn1.mjs → HeapSampler-Dq-hpXem.mjs} +4 -4
  14. package/dist/HeapSampler-Dq-hpXem.mjs.map +1 -0
  15. package/dist/RunBenchCLI-C17DrJz8.mjs +3075 -0
  16. package/dist/RunBenchCLI-C17DrJz8.mjs.map +1 -0
  17. package/dist/StatisticalUtils-BD92crgM.mjs +255 -0
  18. package/dist/StatisticalUtils-BD92crgM.mjs.map +1 -0
  19. package/dist/TimeSampler-Ds8n7l2B.mjs +29 -0
  20. package/dist/TimeSampler-Ds8n7l2B.mjs.map +1 -0
  21. package/dist/ViewerServer-BJhdnxlN.mjs +639 -0
  22. package/dist/ViewerServer-BJhdnxlN.mjs.map +1 -0
  23. package/dist/ViewerServer-CuMNdNBz.mjs +2 -0
  24. package/dist/bin/benchforge.mjs +4 -5
  25. package/dist/bin/benchforge.mjs.map +1 -1
  26. package/dist/index.d.mts +731 -522
  27. package/dist/index.mjs +98 -3
  28. package/dist/index.mjs.map +1 -0
  29. package/dist/runners/WorkerScript.d.mts +12 -4
  30. package/dist/runners/WorkerScript.mjs +92 -120
  31. package/dist/runners/WorkerScript.mjs.map +1 -1
  32. package/dist/viewer/assets/CIPlot-BkOvMoMa.js +1 -0
  33. package/dist/viewer/assets/HistogramKde-CmSyUFY0.js +1 -0
  34. package/dist/viewer/assets/LegendUtils-BJpbn_jr.js +55 -0
  35. package/dist/viewer/assets/SampleTimeSeries-C4VBhXr3.js +1 -0
  36. package/dist/viewer/assets/index-Br9bp_cX.js +153 -0
  37. package/dist/viewer/assets/index-NzXXe_CC.css +1 -0
  38. package/dist/viewer/index.html +19 -0
  39. package/dist/viewer/speedscope/LICENSE +21 -0
  40. package/dist/viewer/speedscope/SourceCodePro-Regular.ttf-ILST5JV6.woff2 +0 -0
  41. package/dist/viewer/speedscope/favicon-16x16-V2DMIAZS.js +2 -0
  42. package/dist/viewer/speedscope/favicon-16x16-V2DMIAZS.js.map +7 -0
  43. package/dist/viewer/speedscope/favicon-16x16-VSI62OPJ.png +0 -0
  44. package/dist/viewer/speedscope/favicon-32x32-3EB2YCUY.png +0 -0
  45. package/dist/viewer/speedscope/favicon-32x32-THY3JDJL.js +2 -0
  46. package/dist/viewer/speedscope/favicon-32x32-THY3JDJL.js.map +7 -0
  47. package/dist/viewer/speedscope/favicon-FOKUP5Y5.ico +0 -0
  48. package/dist/viewer/speedscope/favicon-M34RF7BI.js +2 -0
  49. package/dist/viewer/speedscope/favicon-M34RF7BI.js.map +7 -0
  50. package/dist/viewer/speedscope/file-format-schema.json +274 -0
  51. package/dist/viewer/speedscope/index.html +19 -0
  52. package/dist/viewer/speedscope/jfrview_bg-BLJXNNQB.wasm +0 -0
  53. package/dist/viewer/speedscope/perf-vertx-stacks-01-collapsed-all-ZNUIGAJL.txt +199 -0
  54. package/dist/viewer/speedscope/release.txt +3 -0
  55. package/dist/viewer/speedscope/source-code-pro.LICENSE.md +93 -0
  56. package/dist/viewer/speedscope/speedscope-GHPHNKXC.css +2 -0
  57. package/dist/viewer/speedscope/speedscope-GHPHNKXC.css.map +7 -0
  58. package/dist/viewer/speedscope/speedscope-QZFMJ7VP.js +212 -0
  59. package/dist/viewer/speedscope/speedscope-QZFMJ7VP.js.map +7 -0
  60. package/package.json +52 -26
  61. package/src/bin/benchforge.ts +2 -2
  62. package/src/cli/AnalyzeArchive.ts +232 -0
  63. package/src/cli/BrowserBench.ts +322 -0
  64. package/src/cli/CliArgs.ts +164 -48
  65. package/src/cli/CliExport.ts +179 -0
  66. package/src/cli/CliOptions.ts +147 -0
  67. package/src/cli/CliReport.ts +197 -0
  68. package/src/cli/FilterBenchmarks.ts +18 -30
  69. package/src/cli/RunBenchCLI.ts +138 -844
  70. package/src/cli/SuiteRunner.ts +160 -0
  71. package/src/cli/ViewerServer.ts +282 -0
  72. package/src/export/AllocExport.ts +121 -0
  73. package/src/export/ArchiveExport.ts +146 -0
  74. package/src/export/ArchiveFormat.ts +50 -0
  75. package/src/export/CoverageExport.ts +148 -0
  76. package/src/export/EditorUri.ts +10 -0
  77. package/src/export/PerfettoExport.ts +91 -126
  78. package/src/export/SpeedscopeTypes.ts +98 -0
  79. package/src/export/TimeExport.ts +115 -0
  80. package/src/index.ts +87 -62
  81. package/src/matrix/BenchMatrix.ts +230 -0
  82. package/src/matrix/CaseLoader.ts +8 -6
  83. package/src/matrix/MatrixDirRunner.ts +153 -0
  84. package/src/matrix/MatrixFilter.ts +55 -53
  85. package/src/matrix/MatrixInlineRunner.ts +50 -0
  86. package/src/matrix/MatrixReport.ts +94 -254
  87. package/src/matrix/VariantLoader.ts +9 -9
  88. package/src/profiling/browser/BenchLoop.ts +51 -0
  89. package/src/profiling/browser/BrowserCDP.ts +133 -0
  90. package/src/profiling/browser/BrowserGcStats.ts +33 -0
  91. package/src/profiling/browser/BrowserProfiler.ts +160 -0
  92. package/src/profiling/browser/CdpClient.ts +82 -0
  93. package/src/profiling/browser/CdpPage.ts +138 -0
  94. package/src/profiling/browser/ChromeLauncher.ts +158 -0
  95. package/src/profiling/browser/ChromeTraceEvent.ts +28 -0
  96. package/src/profiling/browser/PageLoadMode.ts +61 -0
  97. package/src/profiling/node/CoverageSampler.ts +27 -0
  98. package/src/profiling/node/CoverageTypes.ts +23 -0
  99. package/src/profiling/node/HeapSampleReport.ts +261 -0
  100. package/src/{heap-sample → profiling/node}/HeapSampler.ts +55 -13
  101. package/src/profiling/node/ResolvedProfile.ts +98 -0
  102. package/src/profiling/node/TimeSampler.ts +57 -0
  103. package/src/report/BenchmarkReport.ts +146 -0
  104. package/src/report/Colors.ts +9 -0
  105. package/src/report/Formatters.ts +110 -0
  106. package/src/report/GcSections.ts +151 -0
  107. package/src/{GitUtils.ts → report/GitUtils.ts} +18 -19
  108. package/src/report/HtmlReport.ts +223 -0
  109. package/src/report/ParseStats.ts +73 -0
  110. package/src/report/StandardSections.ts +147 -0
  111. package/src/report/ViewerSections.ts +286 -0
  112. package/src/report/text/TableReport.ts +253 -0
  113. package/src/report/text/TextReport.ts +123 -0
  114. package/src/runners/AdaptiveWrapper.ts +167 -287
  115. package/src/runners/BenchRunner.ts +27 -22
  116. package/src/{Benchmark.ts → runners/BenchmarkSpec.ts} +5 -6
  117. package/src/runners/CreateRunner.ts +5 -7
  118. package/src/runners/GcStats.ts +58 -61
  119. package/src/{MeasuredResults.ts → runners/MeasuredResults.ts} +43 -37
  120. package/src/runners/MergeBatches.ts +123 -0
  121. package/src/{NodeGC.ts → runners/NodeGC.ts} +2 -3
  122. package/src/runners/RunnerOrchestrator.ts +180 -296
  123. package/src/runners/RunnerUtils.ts +75 -1
  124. package/src/runners/SampleStats.ts +100 -0
  125. package/src/runners/TimingRunner.ts +244 -0
  126. package/src/runners/TimingUtils.ts +3 -2
  127. package/src/runners/WorkerScript.ts +162 -178
  128. package/src/stats/BootstrapDifference.ts +282 -0
  129. package/src/{PermutationTest.ts → stats/PermutationTest.ts} +31 -40
  130. package/src/stats/StatisticalUtils.ts +445 -0
  131. package/src/{tests → test}/AdaptiveConvergence.test.ts +10 -10
  132. package/src/test/AdaptiveRunner.test.ts +39 -41
  133. package/src/{tests → test}/AdaptiveSampling.test.ts +9 -9
  134. package/src/test/AdaptiveStatistics.integration.ts +9 -41
  135. package/src/{tests → test}/BenchMatrix.test.ts +31 -28
  136. package/src/test/BenchmarkReport.test.ts +63 -13
  137. package/src/test/BrowserBench.e2e.test.ts +186 -17
  138. package/src/test/BrowserBench.test.ts +10 -5
  139. package/src/test/BuildTimeSection.test.ts +130 -0
  140. package/src/test/CapSamples.test.ts +82 -0
  141. package/src/test/CoverageExport.test.ts +115 -0
  142. package/src/test/CoverageSampler.test.ts +33 -0
  143. package/src/test/HeapAttribution.test.ts +51 -0
  144. package/src/{tests → test}/MatrixFilter.test.ts +16 -16
  145. package/src/{tests → test}/MatrixReport.test.ts +1 -1
  146. package/src/test/PermutationTest.test.ts +1 -1
  147. package/src/{tests → test}/RealDataValidation.test.ts +6 -6
  148. package/src/test/RunBenchCLI.test.ts +57 -56
  149. package/src/test/RunnerOrchestrator.test.ts +12 -12
  150. package/src/test/StatisticalUtils.test.ts +48 -12
  151. package/src/{table-util/test → test}/TableReport.test.ts +2 -2
  152. package/src/test/TestUtils.ts +35 -30
  153. package/src/test/TimeExport.test.ts +139 -0
  154. package/src/test/TimeSampler.test.ts +37 -0
  155. package/src/test/ViewerLive.e2e.test.ts +159 -0
  156. package/src/test/ViewerStatic.static.e2e.test.ts +137 -0
  157. package/src/{tests → test}/fixtures/baseline/impl.ts +1 -1
  158. package/src/{tests → test}/fixtures/bevy30-samples.ts +3 -1
  159. package/src/test/fixtures/cases/asyncCases.ts +9 -0
  160. package/src/{tests → test}/fixtures/cases/cases.ts +5 -2
  161. package/src/test/fixtures/cases/variants/product.ts +2 -0
  162. package/src/test/fixtures/cases/variants/sum.ts +2 -0
  163. package/src/test/fixtures/discover/fast.ts +1 -0
  164. package/src/{tests → test}/fixtures/discover/slow.ts +1 -1
  165. package/src/test/fixtures/invalid/bad.ts +1 -0
  166. package/src/test/fixtures/loader/fast.ts +1 -0
  167. package/src/{tests → test}/fixtures/loader/slow.ts +1 -1
  168. package/src/test/fixtures/loader/stateful.ts +2 -0
  169. package/src/test/fixtures/stateful/stateful.ts +2 -0
  170. package/src/test/fixtures/variants/extra.ts +1 -0
  171. package/src/test/fixtures/variants/impl.ts +1 -0
  172. package/src/test/fixtures/worker/fast.ts +1 -0
  173. package/src/{tests → test}/fixtures/worker/slow.ts +1 -1
  174. package/src/viewer/DateFormat.ts +30 -0
  175. package/src/viewer/Helpers.ts +23 -0
  176. package/src/viewer/LineData.ts +120 -0
  177. package/src/viewer/Providers.ts +191 -0
  178. package/src/viewer/ReportData.ts +123 -0
  179. package/src/viewer/State.ts +49 -0
  180. package/src/viewer/Theme.ts +15 -0
  181. package/src/viewer/components/App.tsx +73 -0
  182. package/src/viewer/components/DropZone.tsx +71 -0
  183. package/src/viewer/components/LazyPlot.ts +33 -0
  184. package/src/viewer/components/SamplesPanel.tsx +214 -0
  185. package/src/viewer/components/Shell.tsx +26 -0
  186. package/src/viewer/components/SourcePanel.tsx +216 -0
  187. package/src/viewer/components/SummaryPanel.tsx +332 -0
  188. package/src/viewer/components/TabBar.tsx +131 -0
  189. package/src/viewer/components/TabContent.tsx +46 -0
  190. package/src/viewer/components/ThemeToggle.tsx +50 -0
  191. package/src/viewer/index.html +20 -0
  192. package/src/viewer/main.tsx +4 -0
  193. package/src/viewer/plots/CIPlot.ts +313 -0
  194. package/src/{html/browser → viewer/plots}/HistogramKde.ts +42 -47
  195. package/src/viewer/plots/LegendUtils.ts +134 -0
  196. package/src/viewer/plots/PlotTypes.ts +85 -0
  197. package/src/viewer/plots/RenderPlots.ts +230 -0
  198. package/src/viewer/plots/SampleTimeSeries.ts +306 -0
  199. package/src/viewer/plots/SvgHelpers.ts +136 -0
  200. package/src/viewer/plots/TimeSeriesMarks.ts +319 -0
  201. package/src/viewer/report.css +427 -0
  202. package/src/viewer/shell.css +357 -0
  203. package/src/viewer/tsconfig.json +11 -0
  204. package/dist/BenchRunner-CSKN9zPy.d.mts +0 -225
  205. package/dist/BrowserHeapSampler-DCeL42RE.mjs +0 -202
  206. package/dist/BrowserHeapSampler-DCeL42RE.mjs.map +0 -1
  207. package/dist/GcStats-ByEovUi1.mjs +0 -77
  208. package/dist/GcStats-ByEovUi1.mjs.map +0 -1
  209. package/dist/HeapSampler-B8dtKHn1.mjs.map +0 -1
  210. package/dist/TimingUtils-ClclVQ7E.mjs +0 -597
  211. package/dist/TimingUtils-ClclVQ7E.mjs.map +0 -1
  212. package/dist/browser/index.js +0 -914
  213. package/dist/src-Cf_LXwlp.mjs +0 -2873
  214. package/dist/src-Cf_LXwlp.mjs.map +0 -1
  215. package/src/BenchMatrix.ts +0 -380
  216. package/src/BenchmarkReport.ts +0 -156
  217. package/src/HtmlDataPrep.ts +0 -148
  218. package/src/StandardSections.ts +0 -261
  219. package/src/StatisticalUtils.ts +0 -176
  220. package/src/TypeUtil.ts +0 -8
  221. package/src/browser/BrowserGcStats.ts +0 -44
  222. package/src/browser/BrowserHeapSampler.ts +0 -271
  223. package/src/export/JsonExport.ts +0 -103
  224. package/src/export/JsonFormat.ts +0 -91
  225. package/src/heap-sample/HeapSampleReport.ts +0 -196
  226. package/src/html/HtmlReport.ts +0 -131
  227. package/src/html/HtmlTemplate.ts +0 -284
  228. package/src/html/Types.ts +0 -88
  229. package/src/html/browser/CIPlot.ts +0 -287
  230. package/src/html/browser/LegendUtils.ts +0 -163
  231. package/src/html/browser/RenderPlots.ts +0 -263
  232. package/src/html/browser/SampleTimeSeries.ts +0 -389
  233. package/src/html/browser/Types.ts +0 -96
  234. package/src/html/browser/index.ts +0 -1
  235. package/src/html/index.ts +0 -17
  236. package/src/runners/BasicRunner.ts +0 -364
  237. package/src/table-util/ConvergenceFormatters.ts +0 -19
  238. package/src/table-util/Formatters.ts +0 -152
  239. package/src/table-util/README.md +0 -70
  240. package/src/table-util/TableReport.ts +0 -293
  241. package/src/tests/fixtures/cases/asyncCases.ts +0 -7
  242. package/src/tests/fixtures/cases/variants/product.ts +0 -2
  243. package/src/tests/fixtures/cases/variants/sum.ts +0 -2
  244. package/src/tests/fixtures/discover/fast.ts +0 -1
  245. package/src/tests/fixtures/invalid/bad.ts +0 -1
  246. package/src/tests/fixtures/loader/fast.ts +0 -1
  247. package/src/tests/fixtures/loader/stateful.ts +0 -2
  248. package/src/tests/fixtures/stateful/stateful.ts +0 -2
  249. package/src/tests/fixtures/variants/extra.ts +0 -1
  250. package/src/tests/fixtures/variants/impl.ts +0 -1
  251. package/src/tests/fixtures/worker/fast.ts +0 -1
  252. package/src/{table-util/test → test}/TableValueExtractor.test.ts +0 -0
  253. package/src/{table-util/test → test}/TableValueExtractor.ts +9 -9
@@ -0,0 +1,123 @@
1
+ import {
2
+ type BenchmarkReport,
3
+ type ComparisonOptions,
4
+ computeDiffCI,
5
+ extractSectionValues,
6
+ findPrimaryColumn,
7
+ isHigherIsBetter,
8
+ type ReportColumn,
9
+ type ReportGroup,
10
+ type ReportSection,
11
+ } from "../BenchmarkReport.ts";
12
+ import { formatDiffWithCI, truncate } from "../Formatters.ts";
13
+ import {
14
+ buildTable,
15
+ type ColumnGroup,
16
+ type ResultGroup,
17
+ } from "./TableReport.ts";
18
+
19
+ /** Options for text report rendering, including baseline comparison settings. */
20
+ export interface TextReportOptions extends ComparisonOptions {}
21
+
22
+ type Row = Record<string, unknown> & { name: string };
23
+
24
+ /** Build a formatted text table from benchmark groups, with baseline diff columns when present. */
25
+ export function reportResults(
26
+ groups: ReportGroup[],
27
+ sections: ReportSection[],
28
+ options?: TextReportOptions,
29
+ ): string {
30
+ const primary = findPrimaryColumn(sections);
31
+ const results = groups.map(g =>
32
+ resultGroupValues(g, sections, primary, options),
33
+ );
34
+ const hasBaseline = results.some(g => g.baseline);
35
+ const table = buildTable(sectionColumnGroups(sections, hasBaseline), results);
36
+ const hasSampleCI = results.some(g =>
37
+ g.results.some(r => r.diffCI && (r.diffCI as any).ciLevel === "sample"),
38
+ );
39
+ if (!hasSampleCI) return table;
40
+ return (
41
+ table +
42
+ "\n* Confidence intervals may be too narrow (single batch)." +
43
+ " Use --batches for more accurate intervals.\n"
44
+ );
45
+ }
46
+
47
+ /** Extract stats from all sections into row objects for each report. */
48
+ export function valuesForReports(
49
+ reports: BenchmarkReport[],
50
+ sections: ReportSection[],
51
+ ): Row[] {
52
+ return reports.map(r => ({
53
+ name: truncate(r.name),
54
+ ...extractSectionValues(r.measuredResults, sections, r.metadata),
55
+ }));
56
+ }
57
+
58
+ /** Insert a "delta% CI" column after the first comparable column. */
59
+ export function injectDiffColumns(
60
+ groups: ColumnGroup<Row>[],
61
+ ): ColumnGroup<Row>[] {
62
+ const asSections = groups.map(g => ({
63
+ title: g.groupTitle ?? "",
64
+ columns: g.columns as ReportColumn[],
65
+ }));
66
+ const higher = isHigherIsBetter(asSections);
67
+ const fmt = (v: unknown) => formatDiffWithCI(v, higher);
68
+ const ciCol = { title: "Δ% CI", key: "diffCI" as keyof Row, formatter: fmt };
69
+
70
+ let ciAdded = false;
71
+ return groups.map(group => ({
72
+ groupTitle: group.groupTitle,
73
+ columns: group.columns.flatMap(col => {
74
+ if ((col as ReportColumn).comparable && !ciAdded) {
75
+ ciAdded = true;
76
+ return [col, ciCol];
77
+ }
78
+ return [col];
79
+ }),
80
+ }));
81
+ }
82
+
83
+ /** Build table columns from sections, with name column and optional CI diff columns. */
84
+ export function sectionColumnGroups(
85
+ sections: ReportSection[],
86
+ hasBaseline: boolean,
87
+ nameTitle = "name",
88
+ ): ColumnGroup<Row>[] {
89
+ const nameCol: ColumnGroup<Row> = {
90
+ columns: [{ key: "name" as keyof Row, title: nameTitle }],
91
+ };
92
+ const groups: ColumnGroup<Row>[] = sections.map(s => ({
93
+ groupTitle: s.title || undefined,
94
+ columns: s.columns.map(c => ({
95
+ ...c,
96
+ key: (c.key ?? c.title) as keyof Row,
97
+ })),
98
+ }));
99
+ const cols = hasBaseline ? injectDiffColumns(groups) : groups;
100
+ return [nameCol, ...cols];
101
+ }
102
+
103
+ /** Extract section stats and bootstrap CI diffs for all reports in a group. */
104
+ function resultGroupValues(
105
+ group: ReportGroup,
106
+ sections: ReportSection[],
107
+ primary?: ReportColumn,
108
+ options?: TextReportOptions,
109
+ ): ResultGroup<Row> {
110
+ const { reports, baseline } = group;
111
+ const baseM = baseline?.measuredResults;
112
+ const { statKind, higherIsBetter } = primary ?? {};
113
+ const results = reports.map(r => {
114
+ const { measuredResults: m, metadata } = r;
115
+ const diffCI = statKind
116
+ ? computeDiffCI(baseM, m, statKind, options, higherIsBetter)
117
+ : undefined;
118
+ const values = extractSectionValues(m, sections, metadata);
119
+ return { name: truncate(r.name), ...values, ...(diffCI && { diffCI }) };
120
+ });
121
+ const baseRow = baseline && valuesForReports([baseline], sections)[0];
122
+ return { results, baseline: baseRow };
123
+ }
@@ -1,22 +1,23 @@
1
- import type { BenchmarkSpec } from "../Benchmark.ts";
2
- import type { MeasuredResults } from "../MeasuredResults.ts";
3
- import {
4
- coefficientOfVariation,
5
- medianAbsoluteDeviation,
6
- percentile,
7
- } from "../StatisticalUtils.ts";
1
+ import { median } from "../stats/StatisticalUtils.ts";
2
+ import type { BenchmarkSpec } from "./BenchmarkSpec.ts";
8
3
  import type { BenchRunner, RunnerOptions } from "./BenchRunner.ts";
4
+ import type { MeasuredResults } from "./MeasuredResults.ts";
9
5
  import { msToNs } from "./RunnerUtils.ts";
6
+ import { computeStats, outlierImpactRatio } from "./SampleStats.ts";
10
7
 
11
- const minTime = 1000;
12
- const maxTime = 10000;
13
- const targetConfidence = 95;
14
- const fallbackThreshold = 80;
15
- const windowSize = 50;
16
- const stability = 0.05; // 5% drift threshold (was 2%, too strict for real benchmarks)
17
- const initialBatch = 100;
18
- const continueBatch = 100;
19
- const continueIterations = 10;
8
+ /** Options for adaptive sampling: collects until statistical convergence or timeout. */
9
+ export interface AdaptiveOptions extends RunnerOptions {
10
+ /** Enable adaptive sampling (default: true when using adaptive runner) */
11
+ adaptive?: boolean;
12
+ /** Minimum measurement time in ms before convergence can stop sampling (default: 1000) */
13
+ minTime?: number;
14
+ /** Maximum measurement time in ms, hard stop (default: 10000) */
15
+ maxTime?: number;
16
+ /** Target confidence percentage to stop early (default: 95) */
17
+ targetConfidence?: number;
18
+ /** Confidence threshold 0-100 (alias for targetConfidence) */
19
+ convergence?: number;
20
+ }
20
21
 
21
22
  type Metrics = {
22
23
  medianDrift: number;
@@ -31,112 +32,163 @@ interface ConvergenceResult {
31
32
  reason: string;
32
33
  }
33
34
 
34
- export interface AdaptiveOptions extends RunnerOptions {
35
- adaptive?: boolean;
36
- minTime?: number;
37
- maxTime?: number;
38
- targetConfidence?: number;
39
- convergence?: number; // Confidence threshold (0-100)
40
- }
35
+ const minTime = 1000;
36
+ const maxTime = 10000;
37
+ const targetConfidence = 95;
38
+ const fallbackThreshold = 80;
39
+ const windowSize = 50;
40
+ const stability = 0.05; // 5% drift threshold (was 2%, too strict for real benchmarks)
41
+ const initialBatch = 100;
42
+ const continueBatch = 100;
43
+ const continueIterations = 10;
41
44
 
42
- /** @return adaptive sampling runner wrapper */
45
+ /** Wrap a runner with adaptive sampling (convergence detection or timeout). */
43
46
  export function createAdaptiveWrapper(
44
47
  baseRunner: BenchRunner,
45
48
  options: AdaptiveOptions,
46
49
  ): BenchRunner {
47
50
  return {
48
51
  async runBench<T = unknown>(
49
- benchmark: BenchmarkSpec<T>,
50
- runnerOptions: RunnerOptions,
52
+ bench: BenchmarkSpec<T>,
53
+ opts: RunnerOptions,
51
54
  params?: T,
52
55
  ): Promise<MeasuredResults[]> {
53
- return runAdaptiveBench(
54
- baseRunner,
55
- benchmark,
56
- runnerOptions,
57
- options,
58
- params,
59
- );
56
+ return runAdaptiveBench(baseRunner, bench, opts, options, params);
60
57
  },
61
58
  };
62
59
  }
63
60
 
64
- /** @return results using adaptive sampling strategy */
61
+ /** Check convergence by comparing sliding windows of samples for stability. */
62
+ export function checkConvergence(samples: number[]): ConvergenceResult {
63
+ const windowSize = getWindowSize(samples);
64
+ const minSamples = windowSize * 2;
65
+ if (samples.length < minSamples) {
66
+ const confidence = (samples.length / minSamples) * 100;
67
+ const reason = `Collecting samples: ${samples.length}/${minSamples}`;
68
+ return { converged: false, confidence, reason };
69
+ }
70
+ return buildConvergence(getStability(samples, windowSize));
71
+ }
72
+
73
+ /** Run benchmark with adaptive sampling until convergence or timeout. */
65
74
  async function runAdaptiveBench<T>(
66
- baseRunner: BenchRunner,
67
- benchmark: BenchmarkSpec<T>,
68
- runnerOptions: RunnerOptions,
69
- options: AdaptiveOptions,
75
+ runner: BenchRunner,
76
+ bench: BenchmarkSpec<T>,
77
+ opts: RunnerOptions,
78
+ adaptive: AdaptiveOptions,
70
79
  params?: T,
71
80
  ): Promise<MeasuredResults[]> {
72
- const {
73
- minTime: min = options.minTime ?? minTime,
74
- maxTime: max = options.maxTime ?? maxTime,
75
- targetConfidence: target = options.convergence ?? targetConfidence,
76
- } = runnerOptions as AdaptiveOptions;
81
+ const overrides = opts as AdaptiveOptions;
82
+ const min = overrides.minTime ?? adaptive.minTime ?? minTime;
83
+ const max = overrides.maxTime ?? adaptive.maxTime ?? maxTime;
84
+ const target =
85
+ overrides.convergence ?? adaptive.convergence ?? targetConfidence;
77
86
  const allSamples: number[] = [];
78
87
 
79
- // Collect initial batch (includes warmup + settle)
80
- const warmup = await collectInitial(
81
- baseRunner,
82
- benchmark,
83
- runnerOptions,
88
+ const { warmup, startTime: hrtimeStart } = await collectInitial(
89
+ runner,
90
+ bench,
91
+ opts,
84
92
  params,
85
93
  allSamples,
86
94
  );
87
-
88
- // Start timing AFTER warmup - warmup time doesn't count against maxTime
95
+ // Start timing after warmup so warmup time doesn't count against maxTime
89
96
  const startTime = performance.now();
90
-
91
97
  const limits = {
92
98
  minTime: min,
93
99
  maxTime: max,
94
100
  targetConfidence: target,
95
101
  startTime,
96
102
  };
97
- await collectAdaptive(
98
- baseRunner,
99
- benchmark,
100
- runnerOptions,
101
- params,
102
- allSamples,
103
- limits,
104
- );
103
+ await collectAdaptive(runner, bench, opts, params, allSamples, limits);
105
104
 
106
- const convergence = checkConvergence(allSamples.map(s => s * msToNs));
105
+ const samplesNs = allSamples.map(s => s * msToNs);
106
+ const convergence = checkConvergence(samplesNs);
107
107
  return buildResults(
108
108
  allSamples,
109
109
  startTime,
110
110
  convergence,
111
- benchmark.name,
111
+ bench.name,
112
112
  warmup,
113
+ hrtimeStart,
113
114
  );
114
115
  }
115
116
 
116
- /** @return warmupSamples from initial batch */
117
+ /** Scale window size inversely with execution time -- fast ops need more samples. */
118
+ function getWindowSize(samples: number[]): number {
119
+ if (samples.length < 20) return windowSize;
120
+
121
+ const recentMs = samples.slice(-20).map(s => s / msToNs);
122
+ const recentMedian = median(recentMs);
123
+
124
+ if (recentMedian < 0.01) return 200; // <10μs
125
+ if (recentMedian < 0.1) return 100; // <100μs
126
+ if (recentMedian < 1) return 50; // <1ms
127
+ if (recentMedian < 10) return 30; // <10ms
128
+ return 20; // >10ms
129
+ }
130
+
131
+ /** Convert stability metrics to a convergence result with confidence score. */
132
+ function buildConvergence(metrics: Metrics): ConvergenceResult {
133
+ const { medianDrift, impactDrift, medianStable, impactStable } = metrics;
134
+ if (medianStable && impactStable)
135
+ return {
136
+ converged: true,
137
+ confidence: 100,
138
+ reason: "Stable performance pattern",
139
+ };
140
+ const raw =
141
+ (1 - medianDrift / stability) * 50 + (1 - impactDrift / stability) * 50;
142
+ const confidence = Math.max(0, Math.min(100, raw));
143
+ const reason =
144
+ medianDrift > impactDrift
145
+ ? `Median drifting: ${(medianDrift * 100).toFixed(1)}%`
146
+ : `Outlier impact changing: ${(impactDrift * 100).toFixed(1)}%`;
147
+ return { converged: false, confidence, reason };
148
+ }
149
+
150
+ /** Compare median and outlier-impact drift between recent and previous windows. */
151
+ function getStability(samples: number[], windowSize: number): Metrics {
152
+ const toMs = (s: number) => s / msToNs;
153
+ const recentMs = samples.slice(-windowSize).map(toMs);
154
+ const previousMs = samples.slice(-windowSize * 2, -windowSize).map(toMs);
155
+
156
+ const medianRecent = median(recentMs);
157
+ const medianPrevious = median(previousMs);
158
+ const medianDrift = Math.abs(medianRecent - medianPrevious) / medianPrevious;
159
+
160
+ const impactRecent = outlierImpactRatio(recentMs);
161
+ const impactPrevious = outlierImpactRatio(previousMs);
162
+ const impactDrift = Math.abs(impactRecent - impactPrevious);
163
+
164
+ const medianStable = medianDrift < stability;
165
+ const impactStable = impactDrift < stability;
166
+ return { medianDrift, impactDrift, medianStable, impactStable };
167
+ }
168
+
169
+ /** Collect the initial batch (warmup + settle), returning warmup samples. */
117
170
  async function collectInitial<T>(
118
- baseRunner: BenchRunner,
119
- benchmark: BenchmarkSpec<T>,
120
- runnerOptions: RunnerOptions,
171
+ runner: BenchRunner,
172
+ bench: BenchmarkSpec<T>,
173
+ opts: RunnerOptions,
121
174
  params: T | undefined,
122
175
  allSamples: number[],
123
- ): Promise<number[] | undefined> {
124
- // Don't pass adaptive flag to base runner to avoid double wrapping
125
- const opts = {
126
- ...(runnerOptions as any),
176
+ ): Promise<{ warmup?: number[]; startTime?: number }> {
177
+ const batchOpts = {
178
+ ...(opts as any),
127
179
  maxTime: initialBatch,
128
180
  maxIterations: undefined,
129
181
  };
130
- const results = await baseRunner.runBench(benchmark, opts, params);
182
+ const results = await runner.runBench(bench, batchOpts, params);
131
183
  appendSamples(results[0], allSamples);
132
- return results[0].warmupSamples;
184
+ return { warmup: results[0].warmupSamples, startTime: results[0].startTime };
133
185
  }
134
186
 
135
- /** @return samples until convergence or timeout */
187
+ /** Collect batches until convergence or timeout, with progress logging. */
136
188
  async function collectAdaptive<T>(
137
- baseRunner: BenchRunner,
138
- benchmark: BenchmarkSpec<T>,
139
- runnerOptions: RunnerOptions,
189
+ runner: BenchRunner,
190
+ bench: BenchmarkSpec<T>,
191
+ opts: RunnerOptions,
140
192
  params: T | undefined,
141
193
  allSamples: number[],
142
194
  limits: {
@@ -153,239 +205,67 @@ async function collectAdaptive<T>(
153
205
  const convergence = checkConvergence(samplesNs);
154
206
  const elapsed = performance.now() - startTime;
155
207
 
156
- if (elapsed - lastLog > 1000) {
157
- const elapsedSec = (elapsed / 1000).toFixed(1);
158
- const conf = convergence.confidence.toFixed(0);
159
- process.stderr.write(
160
- `\r◊ ${benchmark.name}: ${conf}% confident (${elapsedSec}s) `,
161
- );
162
- lastLog = elapsed;
163
- }
164
-
165
- if (shouldStop(convergence, targetConfidence, elapsed, minTime)) {
166
- break;
167
- }
208
+ lastLog = logProgress(bench.name, convergence, elapsed, lastLog);
209
+ if (shouldStop(convergence, targetConfidence, elapsed, minTime)) break;
168
210
 
169
- // Skip warmup for continuation batches (warmup done in initial batch)
170
- const opts = {
171
- ...(runnerOptions as any),
211
+ const batch = {
212
+ ...(opts as any),
172
213
  maxTime: continueBatch,
173
214
  maxIterations: continueIterations,
174
215
  skipWarmup: true,
175
216
  };
176
- const batchResults = await baseRunner.runBench(benchmark, opts, params);
177
- appendSamples(batchResults[0], allSamples);
217
+ const results = await runner.runBench(bench, batch, params);
218
+ appendSamples(results[0], allSamples);
178
219
  }
179
220
  process.stderr.write("\r" + " ".repeat(60) + "\r");
180
221
  }
181
222
 
182
- /** Append samples one-by-one to avoid stack overflow from spread on large arrays */
183
- function appendSamples(result: MeasuredResults, samples: number[]): void {
184
- if (!result.samples?.length) return;
185
- for (const sample of result.samples) samples.push(sample);
186
- }
187
-
188
- /** @return true if convergence reached or timeout */
189
- function shouldStop(
190
- convergence: ConvergenceResult,
191
- targetConfidence: number,
192
- elapsedTime: number,
193
- minTime: number,
194
- ): boolean {
195
- if (convergence.converged && convergence.confidence >= targetConfidence) {
196
- return true;
197
- }
198
- // After minTime, accept whichever is higher: targetConfidence or fallbackThreshold
199
- const threshold = Math.max(targetConfidence, fallbackThreshold);
200
- return elapsedTime >= minTime && convergence.confidence >= threshold;
201
- }
202
-
203
- /** @return measured results with convergence metrics */
223
+ /** Build final MeasuredResults from collected samples and convergence state. */
204
224
  function buildResults(
205
- samplesMs: number[],
206
- startTime: number,
225
+ samples: number[],
226
+ elapsedStart: number,
207
227
  convergence: ConvergenceResult,
208
228
  name: string,
209
229
  warmupSamples?: number[],
230
+ startTime?: number,
210
231
  ): MeasuredResults[] {
211
- const totalTime = (performance.now() - startTime) / 1000;
212
- const samplesNs = samplesMs.map(s => s * msToNs);
213
- const timeStats = computeTimeStats(samplesNs);
214
-
232
+ const totalTime = (performance.now() - elapsedStart) / 1000;
233
+ const time = computeStats(samples);
215
234
  return [
216
- {
217
- name,
218
- samples: samplesMs,
219
- warmupSamples,
220
- time: timeStats,
221
- totalTime,
222
- convergence,
223
- },
235
+ { name, samples, warmupSamples, time, totalTime, startTime, convergence },
224
236
  ];
225
237
  }
226
238
 
227
- /** @return time percentiles and statistics in ms */
228
- function computeTimeStats(samplesNs: number[]) {
229
- const samplesMs = samplesNs.map(s => s / msToNs);
230
- const { min, max, sum } = getMinMaxSum(samplesNs);
231
- const percentiles = getPercentiles(samplesNs);
232
- const robust = getRobustMetrics(samplesMs);
233
-
234
- return {
235
- min: min / msToNs,
236
- max: max / msToNs,
237
- avg: sum / samplesNs.length / msToNs,
238
- ...percentiles,
239
- ...robust,
240
- };
241
- }
242
-
243
- /** @return min, max, sum of samples */
244
- function getMinMaxSum(samples: number[]) {
245
- const min = samples.reduce(
246
- (a, b) => Math.min(a, b),
247
- Number.POSITIVE_INFINITY,
248
- );
249
- const max = samples.reduce(
250
- (a, b) => Math.max(a, b),
251
- Number.NEGATIVE_INFINITY,
252
- );
253
- const sum = samples.reduce((a, b) => a + b, 0);
254
- return { min, max, sum };
255
- }
256
-
257
- /** @return percentiles in ms */
258
- function getPercentiles(samples: number[]) {
259
- return {
260
- p25: percentile(samples, 0.25) / msToNs,
261
- p50: percentile(samples, 0.5) / msToNs,
262
- p75: percentile(samples, 0.75) / msToNs,
263
- p95: percentile(samples, 0.95) / msToNs,
264
- p99: percentile(samples, 0.99) / msToNs,
265
- p999: percentile(samples, 0.999) / msToNs,
266
- };
267
- }
268
-
269
- /** @return robust variability metrics */
270
- function getRobustMetrics(samplesMs: number[]) {
271
- const impact = getOutlierImpact(samplesMs);
272
- return {
273
- cv: coefficientOfVariation(samplesMs),
274
- mad: medianAbsoluteDeviation(samplesMs),
275
- outlierRate: impact.ratio,
276
- };
277
- }
278
-
279
- /** @return outlier impact as proportion of total time */
280
- function getOutlierImpact(samples: number[]): { ratio: number; count: number } {
281
- if (samples.length === 0) return { ratio: 0, count: 0 };
282
-
283
- const median = percentile(samples, 0.5);
284
- const q75 = percentile(samples, 0.75);
285
- const threshold = median + 1.5 * (q75 - median);
286
-
287
- let excessTime = 0;
288
- let count = 0;
289
-
290
- for (const sample of samples) {
291
- if (sample > threshold) {
292
- excessTime += sample - median;
293
- count++;
294
- }
295
- }
296
-
297
- const totalTime = samples.reduce((a, b) => a + b, 0);
298
- return {
299
- ratio: totalTime > 0 ? excessTime / totalTime : 0,
300
- count,
301
- };
302
- }
303
-
304
- /** @return convergence based on window stability */
305
- export function checkConvergence(samples: number[]): ConvergenceResult {
306
- const windowSize = getWindowSize(samples);
307
- const minSamples = windowSize * 2;
308
-
309
- if (samples.length < minSamples) {
310
- return buildProgressResult(samples.length, minSamples);
311
- }
312
-
313
- const metrics = getStability(samples, windowSize);
314
- return buildConvergence(metrics);
315
- }
316
-
317
- /** @return progress when samples insufficient */
318
- function buildProgressResult(
319
- currentSamples: number,
320
- minSamples: number,
321
- ): ConvergenceResult {
322
- return {
323
- converged: false,
324
- confidence: (currentSamples / minSamples) * 100,
325
- reason: `Collecting samples: ${currentSamples}/${minSamples}`,
326
- };
239
+ /** Append samples one-by-one to avoid stack overflow from spread on large arrays. */
240
+ function appendSamples(result: MeasuredResults, samples: number[]): void {
241
+ if (!result.samples?.length) return;
242
+ for (const sample of result.samples) samples.push(sample);
327
243
  }
328
244
 
329
- /** @return stability metrics between windows */
330
- function getStability(samples: number[], windowSize: number): Metrics {
331
- const recent = samples.slice(-windowSize);
332
- const previous = samples.slice(-windowSize * 2, -windowSize);
333
-
334
- const recentMs = recent.map(s => s / msToNs);
335
- const previousMs = previous.map(s => s / msToNs);
336
-
337
- const medianRecent = percentile(recentMs, 0.5);
338
- const medianPrevious = percentile(previousMs, 0.5);
339
- const medianDrift = Math.abs(medianRecent - medianPrevious) / medianPrevious;
340
-
341
- const impactRecent = getOutlierImpact(recentMs);
342
- const impactPrevious = getOutlierImpact(previousMs);
343
- const impactDrift = Math.abs(impactRecent.ratio - impactPrevious.ratio);
344
-
345
- return {
346
- medianDrift,
347
- impactDrift,
348
- medianStable: medianDrift < stability,
349
- impactStable: impactDrift < stability,
350
- };
245
+ /** Log adaptive sampling progress at ~1s intervals. */
246
+ function logProgress(
247
+ name: string,
248
+ convergence: ConvergenceResult,
249
+ elapsed: number,
250
+ lastLog: number,
251
+ ): number {
252
+ if (elapsed - lastLog <= 1000) return lastLog;
253
+ const sec = (elapsed / 1000).toFixed(1);
254
+ const conf = convergence.confidence.toFixed(0);
255
+ process.stderr.write(`\r◊ ${name}: ${conf}% confident (${sec}s) `);
256
+ return elapsed;
351
257
  }
352
258
 
353
- /** @return convergence from stability metrics */
354
- function buildConvergence(metrics: Metrics): ConvergenceResult {
355
- const { medianDrift, impactDrift, medianStable, impactStable } = metrics;
356
-
357
- if (medianStable && impactStable) {
358
- return {
359
- converged: true,
360
- confidence: 100,
361
- reason: "Stable performance pattern",
362
- };
363
- }
364
-
365
- const confidence = Math.min(
366
- 100,
367
- (1 - medianDrift / stability) * 50 + (1 - impactDrift / stability) * 50,
259
+ /** @return true if convergence target met, or minTime elapsed with fallback confidence. */
260
+ function shouldStop(
261
+ convergence: ConvergenceResult,
262
+ target: number,
263
+ elapsed: number,
264
+ minElapsed: number,
265
+ ): boolean {
266
+ if (convergence.converged && convergence.confidence >= target) return true;
267
+ return (
268
+ elapsed >= minElapsed &&
269
+ convergence.confidence >= Math.max(target, fallbackThreshold)
368
270
  );
369
-
370
- const reason =
371
- medianDrift > impactDrift
372
- ? `Median drifting: ${(medianDrift * 100).toFixed(1)}%`
373
- : `Outlier impact changing: ${(impactDrift * 100).toFixed(1)}%`;
374
-
375
- return { converged: false, confidence: Math.max(0, confidence), reason };
376
- }
377
-
378
- /** @return window size scaled to execution time */
379
- function getWindowSize(samples: number[]): number {
380
- if (samples.length < 20) return windowSize; // Default for initial samples
381
-
382
- const recentMs = samples.slice(-20).map(s => s / msToNs);
383
- const recentMedian = percentile(recentMs, 0.5);
384
-
385
- // Inverse scaling with execution time
386
- if (recentMedian < 0.01) return 200; // <10μs
387
- if (recentMedian < 0.1) return 100; // <100μs
388
- if (recentMedian < 1) return 50; // <1ms
389
- if (recentMedian < 10) return 30; // <10ms
390
- return 20; // >10ms
391
271
  }