benchforge 0.1.11 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (253) hide show
  1. package/LICENSE +20 -0
  2. package/README.md +99 -294
  3. package/bin/benchforge +1 -2
  4. package/dist/AnalyzeArchive-8NCJhmhS.mjs +145 -0
  5. package/dist/AnalyzeArchive-8NCJhmhS.mjs.map +1 -0
  6. package/dist/BenchMatrix-BZVrBB_h.mjs +1050 -0
  7. package/dist/BenchMatrix-BZVrBB_h.mjs.map +1 -0
  8. package/dist/{BenchRunner-BzyUfiyB.d.mts → BenchRunner-DglX1NOn.d.mts} +119 -66
  9. package/dist/CoverageSampler-D5T9DRqe.mjs +27 -0
  10. package/dist/CoverageSampler-D5T9DRqe.mjs.map +1 -0
  11. package/dist/Formatters-BWj3d4sv.mjs +95 -0
  12. package/dist/Formatters-BWj3d4sv.mjs.map +1 -0
  13. package/dist/{HeapSampler-B8dtKHn1.mjs → HeapSampler-Dq-hpXem.mjs} +4 -4
  14. package/dist/HeapSampler-Dq-hpXem.mjs.map +1 -0
  15. package/dist/RunBenchCLI-C17DrJz8.mjs +3075 -0
  16. package/dist/RunBenchCLI-C17DrJz8.mjs.map +1 -0
  17. package/dist/StatisticalUtils-BD92crgM.mjs +255 -0
  18. package/dist/StatisticalUtils-BD92crgM.mjs.map +1 -0
  19. package/dist/TimeSampler-Ds8n7l2B.mjs +29 -0
  20. package/dist/TimeSampler-Ds8n7l2B.mjs.map +1 -0
  21. package/dist/ViewerServer-BJhdnxlN.mjs +639 -0
  22. package/dist/ViewerServer-BJhdnxlN.mjs.map +1 -0
  23. package/dist/ViewerServer-CuMNdNBz.mjs +2 -0
  24. package/dist/bin/benchforge.mjs +4 -5
  25. package/dist/bin/benchforge.mjs.map +1 -1
  26. package/dist/index.d.mts +711 -558
  27. package/dist/index.mjs +98 -3
  28. package/dist/index.mjs.map +1 -0
  29. package/dist/runners/WorkerScript.d.mts +12 -4
  30. package/dist/runners/WorkerScript.mjs +77 -105
  31. package/dist/runners/WorkerScript.mjs.map +1 -1
  32. package/dist/viewer/assets/CIPlot-BkOvMoMa.js +1 -0
  33. package/dist/viewer/assets/HistogramKde-CmSyUFY0.js +1 -0
  34. package/dist/viewer/assets/LegendUtils-BJpbn_jr.js +55 -0
  35. package/dist/viewer/assets/SampleTimeSeries-C4VBhXr3.js +1 -0
  36. package/dist/viewer/assets/index-Br9bp_cX.js +153 -0
  37. package/dist/viewer/assets/index-NzXXe_CC.css +1 -0
  38. package/dist/viewer/index.html +19 -0
  39. package/dist/viewer/speedscope/LICENSE +21 -0
  40. package/dist/viewer/speedscope/SourceCodePro-Regular.ttf-ILST5JV6.woff2 +0 -0
  41. package/dist/viewer/speedscope/favicon-16x16-V2DMIAZS.js +2 -0
  42. package/dist/viewer/speedscope/favicon-16x16-V2DMIAZS.js.map +7 -0
  43. package/dist/viewer/speedscope/favicon-16x16-VSI62OPJ.png +0 -0
  44. package/dist/viewer/speedscope/favicon-32x32-3EB2YCUY.png +0 -0
  45. package/dist/viewer/speedscope/favicon-32x32-THY3JDJL.js +2 -0
  46. package/dist/viewer/speedscope/favicon-32x32-THY3JDJL.js.map +7 -0
  47. package/dist/viewer/speedscope/favicon-FOKUP5Y5.ico +0 -0
  48. package/dist/viewer/speedscope/favicon-M34RF7BI.js +2 -0
  49. package/dist/viewer/speedscope/favicon-M34RF7BI.js.map +7 -0
  50. package/dist/viewer/speedscope/file-format-schema.json +274 -0
  51. package/dist/viewer/speedscope/index.html +19 -0
  52. package/dist/viewer/speedscope/jfrview_bg-BLJXNNQB.wasm +0 -0
  53. package/dist/viewer/speedscope/perf-vertx-stacks-01-collapsed-all-ZNUIGAJL.txt +199 -0
  54. package/dist/viewer/speedscope/release.txt +3 -0
  55. package/dist/viewer/speedscope/source-code-pro.LICENSE.md +93 -0
  56. package/dist/viewer/speedscope/speedscope-GHPHNKXC.css +2 -0
  57. package/dist/viewer/speedscope/speedscope-GHPHNKXC.css.map +7 -0
  58. package/dist/viewer/speedscope/speedscope-QZFMJ7VP.js +212 -0
  59. package/dist/viewer/speedscope/speedscope-QZFMJ7VP.js.map +7 -0
  60. package/package.json +52 -27
  61. package/src/bin/benchforge.ts +2 -2
  62. package/src/cli/AnalyzeArchive.ts +232 -0
  63. package/src/cli/BrowserBench.ts +322 -0
  64. package/src/cli/CliArgs.ts +164 -51
  65. package/src/cli/CliExport.ts +179 -0
  66. package/src/cli/CliOptions.ts +147 -0
  67. package/src/cli/CliReport.ts +197 -0
  68. package/src/cli/FilterBenchmarks.ts +18 -30
  69. package/src/cli/RunBenchCLI.ts +132 -866
  70. package/src/cli/SuiteRunner.ts +160 -0
  71. package/src/cli/ViewerServer.ts +282 -0
  72. package/src/export/AllocExport.ts +121 -0
  73. package/src/export/ArchiveExport.ts +146 -0
  74. package/src/export/ArchiveFormat.ts +50 -0
  75. package/src/export/CoverageExport.ts +148 -0
  76. package/src/export/EditorUri.ts +10 -0
  77. package/src/export/PerfettoExport.ts +64 -99
  78. package/src/export/SpeedscopeTypes.ts +98 -0
  79. package/src/export/TimeExport.ts +115 -0
  80. package/src/index.ts +86 -67
  81. package/src/matrix/BenchMatrix.ts +230 -0
  82. package/src/matrix/CaseLoader.ts +8 -6
  83. package/src/matrix/MatrixDirRunner.ts +153 -0
  84. package/src/matrix/MatrixFilter.ts +49 -47
  85. package/src/matrix/MatrixInlineRunner.ts +50 -0
  86. package/src/matrix/MatrixReport.ts +90 -250
  87. package/src/matrix/VariantLoader.ts +5 -5
  88. package/src/profiling/browser/BenchLoop.ts +51 -0
  89. package/src/profiling/browser/BrowserCDP.ts +133 -0
  90. package/src/profiling/browser/BrowserGcStats.ts +33 -0
  91. package/src/profiling/browser/BrowserProfiler.ts +160 -0
  92. package/src/profiling/browser/CdpClient.ts +82 -0
  93. package/src/profiling/browser/CdpPage.ts +138 -0
  94. package/src/profiling/browser/ChromeLauncher.ts +158 -0
  95. package/src/profiling/browser/ChromeTraceEvent.ts +28 -0
  96. package/src/profiling/browser/PageLoadMode.ts +61 -0
  97. package/src/profiling/node/CoverageSampler.ts +27 -0
  98. package/src/profiling/node/CoverageTypes.ts +23 -0
  99. package/src/profiling/node/HeapSampleReport.ts +261 -0
  100. package/src/{heap-sample → profiling/node}/HeapSampler.ts +1 -2
  101. package/src/{heap-sample → profiling/node}/ResolvedProfile.ts +18 -9
  102. package/src/profiling/node/TimeSampler.ts +57 -0
  103. package/src/report/BenchmarkReport.ts +146 -0
  104. package/src/report/Colors.ts +9 -0
  105. package/src/report/Formatters.ts +110 -0
  106. package/src/report/GcSections.ts +151 -0
  107. package/src/{GitUtils.ts → report/GitUtils.ts} +18 -19
  108. package/src/report/HtmlReport.ts +223 -0
  109. package/src/report/ParseStats.ts +73 -0
  110. package/src/report/StandardSections.ts +147 -0
  111. package/src/report/ViewerSections.ts +286 -0
  112. package/src/report/text/TableReport.ts +253 -0
  113. package/src/report/text/TextReport.ts +123 -0
  114. package/src/runners/AdaptiveWrapper.ts +116 -236
  115. package/src/runners/BenchRunner.ts +20 -15
  116. package/src/{Benchmark.ts → runners/BenchmarkSpec.ts} +5 -6
  117. package/src/runners/CreateRunner.ts +5 -7
  118. package/src/runners/GcStats.ts +47 -50
  119. package/src/{MeasuredResults.ts → runners/MeasuredResults.ts} +43 -37
  120. package/src/runners/MergeBatches.ts +123 -0
  121. package/src/{NodeGC.ts → runners/NodeGC.ts} +2 -3
  122. package/src/runners/RunnerOrchestrator.ts +127 -243
  123. package/src/runners/RunnerUtils.ts +75 -1
  124. package/src/runners/SampleStats.ts +100 -0
  125. package/src/runners/TimingRunner.ts +244 -0
  126. package/src/runners/TimingUtils.ts +3 -2
  127. package/src/runners/WorkerScript.ts +135 -151
  128. package/src/stats/BootstrapDifference.ts +282 -0
  129. package/src/{PermutationTest.ts → stats/PermutationTest.ts} +8 -17
  130. package/src/stats/StatisticalUtils.ts +445 -0
  131. package/src/{tests → test}/AdaptiveConvergence.test.ts +10 -10
  132. package/src/test/AdaptiveRunner.test.ts +39 -41
  133. package/src/{tests → test}/AdaptiveSampling.test.ts +9 -9
  134. package/src/test/AdaptiveStatistics.integration.ts +2 -2
  135. package/src/{tests → test}/BenchMatrix.test.ts +19 -16
  136. package/src/test/BenchmarkReport.test.ts +63 -13
  137. package/src/test/BrowserBench.e2e.test.ts +186 -17
  138. package/src/test/BrowserBench.test.ts +10 -5
  139. package/src/test/BuildTimeSection.test.ts +130 -0
  140. package/src/test/CapSamples.test.ts +82 -0
  141. package/src/test/CoverageExport.test.ts +115 -0
  142. package/src/test/CoverageSampler.test.ts +33 -0
  143. package/src/test/HeapAttribution.test.ts +14 -14
  144. package/src/{tests → test}/MatrixFilter.test.ts +1 -1
  145. package/src/{tests → test}/MatrixReport.test.ts +1 -1
  146. package/src/test/PermutationTest.test.ts +1 -1
  147. package/src/{tests → test}/RealDataValidation.test.ts +6 -6
  148. package/src/test/RunBenchCLI.test.ts +39 -38
  149. package/src/test/RunnerOrchestrator.test.ts +12 -12
  150. package/src/test/StatisticalUtils.test.ts +48 -12
  151. package/src/{table-util/test → test}/TableReport.test.ts +2 -2
  152. package/src/test/TestUtils.ts +12 -7
  153. package/src/test/TimeExport.test.ts +139 -0
  154. package/src/test/TimeSampler.test.ts +37 -0
  155. package/src/test/ViewerLive.e2e.test.ts +159 -0
  156. package/src/test/ViewerStatic.static.e2e.test.ts +137 -0
  157. package/src/{tests → test}/fixtures/baseline/impl.ts +1 -1
  158. package/src/{tests → test}/fixtures/bevy30-samples.ts +3 -1
  159. package/src/test/fixtures/cases/asyncCases.ts +9 -0
  160. package/src/{tests → test}/fixtures/cases/cases.ts +5 -2
  161. package/src/test/fixtures/cases/variants/product.ts +2 -0
  162. package/src/test/fixtures/cases/variants/sum.ts +2 -0
  163. package/src/test/fixtures/discover/fast.ts +1 -0
  164. package/src/{tests → test}/fixtures/discover/slow.ts +1 -1
  165. package/src/test/fixtures/invalid/bad.ts +1 -0
  166. package/src/test/fixtures/loader/fast.ts +1 -0
  167. package/src/{tests → test}/fixtures/loader/slow.ts +1 -1
  168. package/src/test/fixtures/loader/stateful.ts +2 -0
  169. package/src/test/fixtures/stateful/stateful.ts +2 -0
  170. package/src/test/fixtures/variants/extra.ts +1 -0
  171. package/src/test/fixtures/variants/impl.ts +1 -0
  172. package/src/test/fixtures/worker/fast.ts +1 -0
  173. package/src/{tests → test}/fixtures/worker/slow.ts +1 -1
  174. package/src/viewer/DateFormat.ts +30 -0
  175. package/src/viewer/Helpers.ts +23 -0
  176. package/src/viewer/LineData.ts +120 -0
  177. package/src/viewer/Providers.ts +191 -0
  178. package/src/viewer/ReportData.ts +123 -0
  179. package/src/viewer/State.ts +49 -0
  180. package/src/viewer/Theme.ts +15 -0
  181. package/src/viewer/components/App.tsx +73 -0
  182. package/src/viewer/components/DropZone.tsx +71 -0
  183. package/src/viewer/components/LazyPlot.ts +33 -0
  184. package/src/viewer/components/SamplesPanel.tsx +214 -0
  185. package/src/viewer/components/Shell.tsx +26 -0
  186. package/src/viewer/components/SourcePanel.tsx +216 -0
  187. package/src/viewer/components/SummaryPanel.tsx +332 -0
  188. package/src/viewer/components/TabBar.tsx +131 -0
  189. package/src/viewer/components/TabContent.tsx +46 -0
  190. package/src/viewer/components/ThemeToggle.tsx +50 -0
  191. package/src/viewer/index.html +20 -0
  192. package/src/viewer/main.tsx +4 -0
  193. package/src/viewer/plots/CIPlot.ts +313 -0
  194. package/src/{html/browser → viewer/plots}/HistogramKde.ts +33 -38
  195. package/src/viewer/plots/LegendUtils.ts +134 -0
  196. package/src/viewer/plots/PlotTypes.ts +85 -0
  197. package/src/viewer/plots/RenderPlots.ts +230 -0
  198. package/src/viewer/plots/SampleTimeSeries.ts +306 -0
  199. package/src/viewer/plots/SvgHelpers.ts +136 -0
  200. package/src/viewer/plots/TimeSeriesMarks.ts +319 -0
  201. package/src/viewer/report.css +427 -0
  202. package/src/viewer/shell.css +357 -0
  203. package/src/viewer/tsconfig.json +11 -0
  204. package/dist/BrowserHeapSampler-B6asLKWQ.mjs +0 -202
  205. package/dist/BrowserHeapSampler-B6asLKWQ.mjs.map +0 -1
  206. package/dist/GcStats-wX7Xyblu.mjs +0 -77
  207. package/dist/GcStats-wX7Xyblu.mjs.map +0 -1
  208. package/dist/HeapSampler-B8dtKHn1.mjs.map +0 -1
  209. package/dist/TimingUtils-DwOwkc8G.mjs +0 -597
  210. package/dist/TimingUtils-DwOwkc8G.mjs.map +0 -1
  211. package/dist/browser/index.js +0 -914
  212. package/dist/src-B-DDaCa9.mjs +0 -3108
  213. package/dist/src-B-DDaCa9.mjs.map +0 -1
  214. package/src/BenchMatrix.ts +0 -380
  215. package/src/BenchmarkReport.ts +0 -161
  216. package/src/HtmlDataPrep.ts +0 -148
  217. package/src/StandardSections.ts +0 -261
  218. package/src/StatisticalUtils.ts +0 -175
  219. package/src/TypeUtil.ts +0 -8
  220. package/src/browser/BrowserGcStats.ts +0 -44
  221. package/src/browser/BrowserHeapSampler.ts +0 -271
  222. package/src/export/JsonExport.ts +0 -103
  223. package/src/export/JsonFormat.ts +0 -91
  224. package/src/export/SpeedscopeExport.ts +0 -202
  225. package/src/heap-sample/HeapSampleReport.ts +0 -269
  226. package/src/html/HtmlReport.ts +0 -131
  227. package/src/html/HtmlTemplate.ts +0 -284
  228. package/src/html/Types.ts +0 -88
  229. package/src/html/browser/CIPlot.ts +0 -287
  230. package/src/html/browser/LegendUtils.ts +0 -163
  231. package/src/html/browser/RenderPlots.ts +0 -263
  232. package/src/html/browser/SampleTimeSeries.ts +0 -389
  233. package/src/html/browser/Types.ts +0 -96
  234. package/src/html/browser/index.ts +0 -1
  235. package/src/html/index.ts +0 -17
  236. package/src/runners/BasicRunner.ts +0 -364
  237. package/src/table-util/ConvergenceFormatters.ts +0 -19
  238. package/src/table-util/Formatters.ts +0 -157
  239. package/src/table-util/README.md +0 -70
  240. package/src/table-util/TableReport.ts +0 -293
  241. package/src/tests/fixtures/cases/asyncCases.ts +0 -7
  242. package/src/tests/fixtures/cases/variants/product.ts +0 -2
  243. package/src/tests/fixtures/cases/variants/sum.ts +0 -2
  244. package/src/tests/fixtures/discover/fast.ts +0 -1
  245. package/src/tests/fixtures/invalid/bad.ts +0 -1
  246. package/src/tests/fixtures/loader/fast.ts +0 -1
  247. package/src/tests/fixtures/loader/stateful.ts +0 -2
  248. package/src/tests/fixtures/stateful/stateful.ts +0 -2
  249. package/src/tests/fixtures/variants/extra.ts +0 -1
  250. package/src/tests/fixtures/variants/impl.ts +0 -1
  251. package/src/tests/fixtures/worker/fast.ts +0 -1
  252. /package/src/{table-util/test → test}/TableValueExtractor.test.ts +0 -0
  253. /package/src/{table-util/test → test}/TableValueExtractor.ts +0 -0
@@ -0,0 +1,282 @@
1
+ import type {
2
+ BootstrapResult,
3
+ CIDirection,
4
+ DifferenceCI,
5
+ HistogramBin,
6
+ StatKind,
7
+ } from "./StatisticalUtils.ts";
8
+ import {
9
+ average,
10
+ bootstrapSamples,
11
+ computeInterval,
12
+ createResample,
13
+ defaultConfidence,
14
+ isBootstrappable,
15
+ maxBootstrapInput,
16
+ maxOf,
17
+ minOf,
18
+ percentile,
19
+ prepareBlocks,
20
+ quickSelect,
21
+ resampleInto,
22
+ statKindToFn,
23
+ subsample,
24
+ } from "./StatisticalUtils.ts";
25
+
26
+ /** Options for blockDifferenceCI (extends DiffOptions with block parameters) */
27
+ export type BlockDiffOptions = DiffOptions & {
28
+ /** Block boundaries for the second sample array (defaults to blocksA) */
29
+ blocksB?: number[];
30
+ /** Disable Tukey trimming of outlier batches */
31
+ noBatchTrim?: boolean;
32
+ };
33
+
34
+ /** Options for difference CI functions */
35
+ type DiffOptions = {
36
+ /** Number of bootstrap resamples (default: 10000) */
37
+ resamples?: number;
38
+ /** Confidence level 0-1 (default: 0.95) */
39
+ confidence?: number;
40
+ /** Equivalence margin in percent. CI within [-margin, +margin] ==> "equivalent" */
41
+ equivMargin?: number;
42
+ };
43
+
44
+ type BinnedCI = {
45
+ estimate: number;
46
+ ci: [number, number];
47
+ histogram: HistogramBin[];
48
+ };
49
+
50
+ interface DiffOp {
51
+ origIndex: number;
52
+ execIndex: number;
53
+ computeA: (buf: number[]) => number;
54
+ computeB: (buf: number[]) => number;
55
+ pointEstimate: (s: number[]) => number;
56
+ }
57
+
58
+ /** @return sample-level bootstrap CI for percentage difference between baseline (a) and current (b). */
59
+ export function sampleDifferenceCI(
60
+ a: number[],
61
+ b: number[],
62
+ statFn: (s: number[]) => number,
63
+ options: DiffOptions = {},
64
+ ): DifferenceCI {
65
+ const { resamples = bootstrapSamples, confidence: conf = defaultConfidence } =
66
+ options;
67
+ const baseVal = statFn(a);
68
+ const currVal = statFn(b);
69
+ const observedPct = ((currVal - baseVal) / baseVal) * 100;
70
+
71
+ const subA = subsample(a, maxBootstrapInput);
72
+ const subB = subsample(b, maxBootstrapInput);
73
+ const bufA = new Array(subA.length);
74
+ const bufB = new Array(subB.length);
75
+ const diffs = Array.from({ length: resamples }, () => {
76
+ resampleInto(subA, bufA);
77
+ resampleInto(subB, bufB);
78
+ const base = statFn(bufA);
79
+ return ((statFn(bufB) - base) / base) * 100;
80
+ });
81
+ const ci = computeInterval(diffs, conf);
82
+ const capped = subA !== a || subB !== b;
83
+ return {
84
+ percent: observedPct,
85
+ ci,
86
+ direction: classifyDirection(ci, observedPct, options.equivMargin),
87
+ histogram: binValues(diffs),
88
+ ciLevel: "sample",
89
+ ...(capped && { subsampled: Math.max(a.length, b.length) }),
90
+ };
91
+ }
92
+
93
+ /** Shared-resample difference CI: one resample pair per iteration, all stats computed.
94
+ * @return DifferenceCI[] in same order as input stats. */
95
+ export function multiSampleDifferenceCI(
96
+ a: number[],
97
+ b: number[],
98
+ stats: StatKind[],
99
+ options: DiffOptions = {},
100
+ ): DifferenceCI[] {
101
+ const { resamples = bootstrapSamples, confidence: conf = defaultConfidence } =
102
+ options;
103
+ const subA = subsample(a, maxBootstrapInput);
104
+ const subB = subsample(b, maxBootstrapInput);
105
+ const bufA = new Array(subA.length);
106
+ const bufB = new Array(subB.length);
107
+ const ops = buildDiffOps(stats, subA.length, subB.length);
108
+ const allDiffs = ops.map(() => new Array<number>(resamples));
109
+
110
+ // Point estimates from original data
111
+ const baseVals = ops.map(op => op.pointEstimate(a));
112
+ const currVals = ops.map(op => op.pointEstimate(b));
113
+ const observedPcts = ops.map(
114
+ (_, j) => ((currVals[j] - baseVals[j]) / baseVals[j]) * 100,
115
+ );
116
+
117
+ for (let i = 0; i < resamples; i++) {
118
+ resampleInto(subA, bufA);
119
+ resampleInto(subB, bufB);
120
+ for (let j = 0; j < ops.length; j++) {
121
+ const base = ops[j].computeA(bufA);
122
+ const curr = ops[j].computeB(bufB);
123
+ allDiffs[j][i] = ((curr - base) / base) * 100;
124
+ }
125
+ }
126
+
127
+ const capped = subA !== a || subB !== b;
128
+ const results = new Array<DifferenceCI>(stats.length);
129
+ for (const op of ops) {
130
+ const j = op.execIndex;
131
+ const ci = computeInterval(allDiffs[j], conf);
132
+ results[op.origIndex] = {
133
+ percent: observedPcts[j],
134
+ ci,
135
+ direction: classifyDirection(ci, observedPcts[j], options.equivMargin),
136
+ histogram: binValues(allDiffs[j]),
137
+ ciLevel: "sample",
138
+ ...(capped && { subsampled: Math.max(a.length, b.length) }),
139
+ };
140
+ }
141
+ return results;
142
+ }
143
+
144
+ /** Difference CIs for multiple stats, dispatching block vs sample automatically.
145
+ * Returns undefined for non-bootstrappable stats (min/max). */
146
+ export function diffCIs(
147
+ a: number[],
148
+ aOffsets: number[] | undefined,
149
+ b: number[],
150
+ bOffsets: number[] | undefined,
151
+ stats: StatKind[],
152
+ options: BlockDiffOptions = {},
153
+ ): (DifferenceCI | undefined)[] {
154
+ const bsStats = stats.filter(isBootstrappable);
155
+ if (bsStats.length === 0) return stats.map(() => undefined);
156
+
157
+ const hasBlocks =
158
+ (aOffsets?.length ?? 0) >= 2 && (bOffsets?.length ?? 0) >= 2;
159
+ const bsResults = hasBlocks
160
+ ? bsStats.map(s =>
161
+ blockDifferenceCI(a, aOffsets!, b, statKindToFn(s), {
162
+ ...options,
163
+ blocksB: bOffsets!,
164
+ }),
165
+ )
166
+ : multiSampleDifferenceCI(a, b, bsStats, options);
167
+
168
+ const results: (DifferenceCI | undefined)[] = new Array(stats.length);
169
+ let bi = 0;
170
+ for (let i = 0; i < stats.length; i++) {
171
+ results[i] = isBootstrappable(stats[i]) ? bsResults[bi++] : undefined;
172
+ }
173
+ return results;
174
+ }
175
+
176
+ /** @return block bootstrap CI for percentage difference between baseline (a) and current (b).
177
+ * Tukey-trims outlier batches, then resamples per-block statFn values. Requires 2+ blocks. */
178
+ export function blockDifferenceCI(
179
+ a: number[],
180
+ blocksA: number[],
181
+ b: number[],
182
+ statFn: (s: number[]) => number,
183
+ options: BlockDiffOptions = {},
184
+ ): DifferenceCI {
185
+ const { resamples = bootstrapSamples, confidence: conf = defaultConfidence } =
186
+ options;
187
+ const bB = options.blocksB ?? blocksA;
188
+ const noTrim = options.noBatchTrim;
189
+ const sideA = prepareBlocks(a, blocksA, statFn, noTrim);
190
+ const sideB = prepareBlocks(b, bB, statFn, noTrim);
191
+
192
+ const baseVal = statFn(sideA.filtered);
193
+ const currVal = statFn(sideB.filtered);
194
+ const observedPct = ((currVal - baseVal) / baseVal) * 100;
195
+
196
+ const drawA = () => average(createResample(sideA.blockVals));
197
+ const drawB = () => average(createResample(sideB.blockVals));
198
+ const diffs = Array.from({ length: resamples }, () => {
199
+ const base = drawA();
200
+ return ((drawB() - base) / base) * 100;
201
+ });
202
+ const ci = computeInterval(diffs, conf);
203
+ return {
204
+ percent: observedPct,
205
+ ci,
206
+ direction: classifyDirection(ci, observedPct, options.equivMargin),
207
+ histogram: binValues(diffs),
208
+ trimmed: [sideA.trimCount, sideB.trimCount],
209
+ ciLevel: "block",
210
+ };
211
+ }
212
+
213
+ /** @return binned CI with histogram from a BootstrapResult */
214
+ export function binBootstrapResult(result: BootstrapResult): BinnedCI {
215
+ const { estimate, ci, samples } = result;
216
+ return { estimate, ci, histogram: binValues(samples) };
217
+ }
218
+
219
+ /** @return CI direction, with optional equivalence margin (in percent) */
220
+ function classifyDirection(
221
+ ci: [number, number],
222
+ observed: number,
223
+ margin?: number,
224
+ ): CIDirection {
225
+ const withinMargin =
226
+ margin != null && margin > 0 && ci[0] >= -margin && ci[1] <= margin;
227
+ if (withinMargin) return "equivalent";
228
+ const excludesZero = ci[0] > 0 || ci[1] < 0;
229
+ if (excludesZero) return observed < 0 ? "faster" : "slower";
230
+ return "uncertain";
231
+ }
232
+
233
+ /** @return values binned into histogram for compact visualization */
234
+ function binValues(values: number[], binCount = 30): HistogramBin[] {
235
+ let min = values[0];
236
+ let max = values[0];
237
+ for (let i = 1; i < values.length; i++) {
238
+ if (values[i] < min) min = values[i];
239
+ if (values[i] > max) max = values[i];
240
+ }
241
+ if (min === max) return [{ x: min, count: values.length }];
242
+
243
+ const step = (max - min) / binCount;
244
+ const counts = new Array(binCount).fill(0);
245
+ for (const v of values) {
246
+ const bin = Math.min(Math.floor((v - min) / step), binCount - 1);
247
+ counts[bin]++;
248
+ }
249
+ return counts.map((count, i) => ({ x: min + (i + 0.5) * step, count }));
250
+ }
251
+
252
+ /** Build diff operations: mean/min/max first (non-destructive), then percentiles ascending.
253
+ * Each side (A, B) gets its own quickSelect k values since sample sizes may differ. */
254
+ function buildDiffOps(stats: StatKind[], nA: number, nB: number): DiffOp[] {
255
+ const uniform = (order: number, i: number, fn: (s: number[]) => number) => ({
256
+ order,
257
+ origIndex: i,
258
+ execIndex: 0,
259
+ computeA: fn,
260
+ computeB: fn,
261
+ pointEstimate: fn,
262
+ });
263
+ const entries = stats.map((s, i) => {
264
+ if (s === "mean") return uniform(-3, i, average);
265
+ if (s === "min") return uniform(-2, i, minOf);
266
+ if (s === "max") return uniform(-1, i, maxOf);
267
+ const p = s.percentile;
268
+ const kA = Math.max(0, Math.ceil(nA * p) - 1);
269
+ const kB = Math.max(0, Math.ceil(nB * p) - 1);
270
+ return {
271
+ order: p,
272
+ origIndex: i,
273
+ execIndex: 0,
274
+ computeA: (buf: number[]) => quickSelect(buf, kA),
275
+ computeB: (buf: number[]) => quickSelect(buf, kB),
276
+ pointEstimate: (v: number[]) => percentile(v, p),
277
+ };
278
+ });
279
+ entries.sort((a, b) => a.order - b.order);
280
+ for (let i = 0; i < entries.length; i++) entries[i].execIndex = i;
281
+ return entries;
282
+ }
@@ -1,14 +1,10 @@
1
1
  /**
2
2
  * Permutation-based hypothesis testing for benchmark comparisons.
3
- *
4
- * Currently unused - the main reporting pipeline uses bootstrapDifferenceCI()
5
- * from StatisticalUtils.ts instead, which provides confidence intervals on
6
- * the difference rather than p-values.
7
- *
8
- * Kept for potential future use cases where p-values are needed.
3
+ * Currently unused -- the reporting pipeline uses blockDifferenceCI() instead,
4
+ * which provides confidence intervals rather than p-values.
9
5
  */
10
6
 
11
- import { average, percentile } from "./StatisticalUtils.ts";
7
+ import { average, bootstrapSamples, median } from "./StatisticalUtils.ts";
12
8
 
13
9
  /** Statistical comparison between baseline and current benchmark samples */
14
10
  export interface ComparisonResult {
@@ -37,19 +33,17 @@ export interface ComparisonResult {
37
33
  const significanceThreshold = 0.05;
38
34
  const strongSignificance = 0.001;
39
35
  const goodSignificance = 0.01;
40
- const defaultBootstrapSamples = 10000;
41
36
 
42
37
  /** @return statistical comparison between baseline and current samples */
43
38
  export function compareWithBaseline(
44
39
  baseline: number[],
45
40
  current: number[],
46
41
  ): ComparisonResult {
47
- const baselineMedian = percentile(baseline, 0.5);
48
- const currentMedian = percentile(current, 0.5);
42
+ const baselineMedian = median(baseline);
43
+ const currentMedian = median(current);
49
44
  const baselineMean = average(baseline);
50
45
  const currentMean = average(current);
51
46
 
52
- const median = (s: number[]) => percentile(s, 0.5);
53
47
  const medianPValue = bootstrapDifferenceTest(baseline, current, median);
54
48
  const meanPValue = bootstrapDifferenceTest(baseline, current, average);
55
49
 
@@ -74,12 +68,12 @@ function bootstrapDifferenceTest(
74
68
  const n1 = sample1.length;
75
69
 
76
70
  let moreExtreme = 0;
77
- for (let i = 0; i < defaultBootstrapSamples; i++) {
71
+ for (let i = 0; i < bootstrapSamples; i++) {
78
72
  const { resample1, resample2 } = shuffleAndSplit(combined, n1);
79
73
  const diff = statistic(resample2) - statistic(resample1);
80
74
  if (Math.abs(diff) >= Math.abs(observedDiff)) moreExtreme++;
81
75
  }
82
- return moreExtreme / defaultBootstrapSamples;
76
+ return moreExtreme / bootstrapSamples;
83
77
  }
84
78
 
85
79
  /** @return change statistics for a current vs baseline comparison */
@@ -100,10 +94,7 @@ function shuffleAndSplit(combined: number[], n1: number) {
100
94
  const j = Math.floor(Math.random() * (i + 1));
101
95
  [shuffled[i], shuffled[j]] = [shuffled[j], shuffled[i]];
102
96
  }
103
- return {
104
- resample1: shuffled.slice(0, n1),
105
- resample2: shuffled.slice(n1),
106
- };
97
+ return { resample1: shuffled.slice(0, n1), resample2: shuffled.slice(n1) };
107
98
  }
108
99
 
109
100
  /** @return significance level based on p-value thresholds */