benchforge 0.1.9 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (253) hide show
  1. package/LICENSE +20 -0
  2. package/README.md +99 -260
  3. package/bin/benchforge +1 -2
  4. package/dist/AnalyzeArchive-8NCJhmhS.mjs +145 -0
  5. package/dist/AnalyzeArchive-8NCJhmhS.mjs.map +1 -0
  6. package/dist/BenchMatrix-BZVrBB_h.mjs +1050 -0
  7. package/dist/BenchMatrix-BZVrBB_h.mjs.map +1 -0
  8. package/dist/BenchRunner-DglX1NOn.d.mts +302 -0
  9. package/dist/CoverageSampler-D5T9DRqe.mjs +27 -0
  10. package/dist/CoverageSampler-D5T9DRqe.mjs.map +1 -0
  11. package/dist/Formatters-BWj3d4sv.mjs +95 -0
  12. package/dist/Formatters-BWj3d4sv.mjs.map +1 -0
  13. package/dist/{HeapSampler-B8dtKHn1.mjs → HeapSampler-Dq-hpXem.mjs} +4 -4
  14. package/dist/HeapSampler-Dq-hpXem.mjs.map +1 -0
  15. package/dist/RunBenchCLI-C17DrJz8.mjs +3075 -0
  16. package/dist/RunBenchCLI-C17DrJz8.mjs.map +1 -0
  17. package/dist/StatisticalUtils-BD92crgM.mjs +255 -0
  18. package/dist/StatisticalUtils-BD92crgM.mjs.map +1 -0
  19. package/dist/TimeSampler-Ds8n7l2B.mjs +29 -0
  20. package/dist/TimeSampler-Ds8n7l2B.mjs.map +1 -0
  21. package/dist/ViewerServer-BJhdnxlN.mjs +639 -0
  22. package/dist/ViewerServer-BJhdnxlN.mjs.map +1 -0
  23. package/dist/ViewerServer-CuMNdNBz.mjs +2 -0
  24. package/dist/bin/benchforge.mjs +4 -5
  25. package/dist/bin/benchforge.mjs.map +1 -1
  26. package/dist/index.d.mts +731 -522
  27. package/dist/index.mjs +98 -3
  28. package/dist/index.mjs.map +1 -0
  29. package/dist/runners/WorkerScript.d.mts +12 -4
  30. package/dist/runners/WorkerScript.mjs +92 -120
  31. package/dist/runners/WorkerScript.mjs.map +1 -1
  32. package/dist/viewer/assets/CIPlot-BkOvMoMa.js +1 -0
  33. package/dist/viewer/assets/HistogramKde-CmSyUFY0.js +1 -0
  34. package/dist/viewer/assets/LegendUtils-BJpbn_jr.js +55 -0
  35. package/dist/viewer/assets/SampleTimeSeries-C4VBhXr3.js +1 -0
  36. package/dist/viewer/assets/index-Br9bp_cX.js +153 -0
  37. package/dist/viewer/assets/index-NzXXe_CC.css +1 -0
  38. package/dist/viewer/index.html +19 -0
  39. package/dist/viewer/speedscope/LICENSE +21 -0
  40. package/dist/viewer/speedscope/SourceCodePro-Regular.ttf-ILST5JV6.woff2 +0 -0
  41. package/dist/viewer/speedscope/favicon-16x16-V2DMIAZS.js +2 -0
  42. package/dist/viewer/speedscope/favicon-16x16-V2DMIAZS.js.map +7 -0
  43. package/dist/viewer/speedscope/favicon-16x16-VSI62OPJ.png +0 -0
  44. package/dist/viewer/speedscope/favicon-32x32-3EB2YCUY.png +0 -0
  45. package/dist/viewer/speedscope/favicon-32x32-THY3JDJL.js +2 -0
  46. package/dist/viewer/speedscope/favicon-32x32-THY3JDJL.js.map +7 -0
  47. package/dist/viewer/speedscope/favicon-FOKUP5Y5.ico +0 -0
  48. package/dist/viewer/speedscope/favicon-M34RF7BI.js +2 -0
  49. package/dist/viewer/speedscope/favicon-M34RF7BI.js.map +7 -0
  50. package/dist/viewer/speedscope/file-format-schema.json +274 -0
  51. package/dist/viewer/speedscope/index.html +19 -0
  52. package/dist/viewer/speedscope/jfrview_bg-BLJXNNQB.wasm +0 -0
  53. package/dist/viewer/speedscope/perf-vertx-stacks-01-collapsed-all-ZNUIGAJL.txt +199 -0
  54. package/dist/viewer/speedscope/release.txt +3 -0
  55. package/dist/viewer/speedscope/source-code-pro.LICENSE.md +93 -0
  56. package/dist/viewer/speedscope/speedscope-GHPHNKXC.css +2 -0
  57. package/dist/viewer/speedscope/speedscope-GHPHNKXC.css.map +7 -0
  58. package/dist/viewer/speedscope/speedscope-QZFMJ7VP.js +212 -0
  59. package/dist/viewer/speedscope/speedscope-QZFMJ7VP.js.map +7 -0
  60. package/package.json +52 -26
  61. package/src/bin/benchforge.ts +2 -2
  62. package/src/cli/AnalyzeArchive.ts +232 -0
  63. package/src/cli/BrowserBench.ts +322 -0
  64. package/src/cli/CliArgs.ts +164 -48
  65. package/src/cli/CliExport.ts +179 -0
  66. package/src/cli/CliOptions.ts +147 -0
  67. package/src/cli/CliReport.ts +197 -0
  68. package/src/cli/FilterBenchmarks.ts +18 -30
  69. package/src/cli/RunBenchCLI.ts +138 -844
  70. package/src/cli/SuiteRunner.ts +160 -0
  71. package/src/cli/ViewerServer.ts +282 -0
  72. package/src/export/AllocExport.ts +121 -0
  73. package/src/export/ArchiveExport.ts +146 -0
  74. package/src/export/ArchiveFormat.ts +50 -0
  75. package/src/export/CoverageExport.ts +148 -0
  76. package/src/export/EditorUri.ts +10 -0
  77. package/src/export/PerfettoExport.ts +91 -126
  78. package/src/export/SpeedscopeTypes.ts +98 -0
  79. package/src/export/TimeExport.ts +115 -0
  80. package/src/index.ts +87 -62
  81. package/src/matrix/BenchMatrix.ts +230 -0
  82. package/src/matrix/CaseLoader.ts +8 -6
  83. package/src/matrix/MatrixDirRunner.ts +153 -0
  84. package/src/matrix/MatrixFilter.ts +55 -53
  85. package/src/matrix/MatrixInlineRunner.ts +50 -0
  86. package/src/matrix/MatrixReport.ts +94 -254
  87. package/src/matrix/VariantLoader.ts +9 -9
  88. package/src/profiling/browser/BenchLoop.ts +51 -0
  89. package/src/profiling/browser/BrowserCDP.ts +133 -0
  90. package/src/profiling/browser/BrowserGcStats.ts +33 -0
  91. package/src/profiling/browser/BrowserProfiler.ts +160 -0
  92. package/src/profiling/browser/CdpClient.ts +82 -0
  93. package/src/profiling/browser/CdpPage.ts +138 -0
  94. package/src/profiling/browser/ChromeLauncher.ts +158 -0
  95. package/src/profiling/browser/ChromeTraceEvent.ts +28 -0
  96. package/src/profiling/browser/PageLoadMode.ts +61 -0
  97. package/src/profiling/node/CoverageSampler.ts +27 -0
  98. package/src/profiling/node/CoverageTypes.ts +23 -0
  99. package/src/profiling/node/HeapSampleReport.ts +261 -0
  100. package/src/{heap-sample → profiling/node}/HeapSampler.ts +55 -13
  101. package/src/profiling/node/ResolvedProfile.ts +98 -0
  102. package/src/profiling/node/TimeSampler.ts +57 -0
  103. package/src/report/BenchmarkReport.ts +146 -0
  104. package/src/report/Colors.ts +9 -0
  105. package/src/report/Formatters.ts +110 -0
  106. package/src/report/GcSections.ts +151 -0
  107. package/src/{GitUtils.ts → report/GitUtils.ts} +18 -19
  108. package/src/report/HtmlReport.ts +223 -0
  109. package/src/report/ParseStats.ts +73 -0
  110. package/src/report/StandardSections.ts +147 -0
  111. package/src/report/ViewerSections.ts +286 -0
  112. package/src/report/text/TableReport.ts +253 -0
  113. package/src/report/text/TextReport.ts +123 -0
  114. package/src/runners/AdaptiveWrapper.ts +167 -287
  115. package/src/runners/BenchRunner.ts +27 -22
  116. package/src/{Benchmark.ts → runners/BenchmarkSpec.ts} +5 -6
  117. package/src/runners/CreateRunner.ts +5 -7
  118. package/src/runners/GcStats.ts +58 -61
  119. package/src/{MeasuredResults.ts → runners/MeasuredResults.ts} +43 -37
  120. package/src/runners/MergeBatches.ts +123 -0
  121. package/src/{NodeGC.ts → runners/NodeGC.ts} +2 -3
  122. package/src/runners/RunnerOrchestrator.ts +180 -296
  123. package/src/runners/RunnerUtils.ts +75 -1
  124. package/src/runners/SampleStats.ts +100 -0
  125. package/src/runners/TimingRunner.ts +244 -0
  126. package/src/runners/TimingUtils.ts +3 -2
  127. package/src/runners/WorkerScript.ts +162 -178
  128. package/src/stats/BootstrapDifference.ts +282 -0
  129. package/src/{PermutationTest.ts → stats/PermutationTest.ts} +31 -40
  130. package/src/stats/StatisticalUtils.ts +445 -0
  131. package/src/{tests → test}/AdaptiveConvergence.test.ts +10 -10
  132. package/src/test/AdaptiveRunner.test.ts +39 -41
  133. package/src/{tests → test}/AdaptiveSampling.test.ts +9 -9
  134. package/src/test/AdaptiveStatistics.integration.ts +9 -41
  135. package/src/{tests → test}/BenchMatrix.test.ts +31 -28
  136. package/src/test/BenchmarkReport.test.ts +63 -13
  137. package/src/test/BrowserBench.e2e.test.ts +186 -17
  138. package/src/test/BrowserBench.test.ts +10 -5
  139. package/src/test/BuildTimeSection.test.ts +130 -0
  140. package/src/test/CapSamples.test.ts +82 -0
  141. package/src/test/CoverageExport.test.ts +115 -0
  142. package/src/test/CoverageSampler.test.ts +33 -0
  143. package/src/test/HeapAttribution.test.ts +51 -0
  144. package/src/{tests → test}/MatrixFilter.test.ts +16 -16
  145. package/src/{tests → test}/MatrixReport.test.ts +1 -1
  146. package/src/test/PermutationTest.test.ts +1 -1
  147. package/src/{tests → test}/RealDataValidation.test.ts +6 -6
  148. package/src/test/RunBenchCLI.test.ts +57 -56
  149. package/src/test/RunnerOrchestrator.test.ts +12 -12
  150. package/src/test/StatisticalUtils.test.ts +48 -12
  151. package/src/{table-util/test → test}/TableReport.test.ts +2 -2
  152. package/src/test/TestUtils.ts +35 -30
  153. package/src/test/TimeExport.test.ts +139 -0
  154. package/src/test/TimeSampler.test.ts +37 -0
  155. package/src/test/ViewerLive.e2e.test.ts +159 -0
  156. package/src/test/ViewerStatic.static.e2e.test.ts +137 -0
  157. package/src/{tests → test}/fixtures/baseline/impl.ts +1 -1
  158. package/src/{tests → test}/fixtures/bevy30-samples.ts +3 -1
  159. package/src/test/fixtures/cases/asyncCases.ts +9 -0
  160. package/src/{tests → test}/fixtures/cases/cases.ts +5 -2
  161. package/src/test/fixtures/cases/variants/product.ts +2 -0
  162. package/src/test/fixtures/cases/variants/sum.ts +2 -0
  163. package/src/test/fixtures/discover/fast.ts +1 -0
  164. package/src/{tests → test}/fixtures/discover/slow.ts +1 -1
  165. package/src/test/fixtures/invalid/bad.ts +1 -0
  166. package/src/test/fixtures/loader/fast.ts +1 -0
  167. package/src/{tests → test}/fixtures/loader/slow.ts +1 -1
  168. package/src/test/fixtures/loader/stateful.ts +2 -0
  169. package/src/test/fixtures/stateful/stateful.ts +2 -0
  170. package/src/test/fixtures/variants/extra.ts +1 -0
  171. package/src/test/fixtures/variants/impl.ts +1 -0
  172. package/src/test/fixtures/worker/fast.ts +1 -0
  173. package/src/{tests → test}/fixtures/worker/slow.ts +1 -1
  174. package/src/viewer/DateFormat.ts +30 -0
  175. package/src/viewer/Helpers.ts +23 -0
  176. package/src/viewer/LineData.ts +120 -0
  177. package/src/viewer/Providers.ts +191 -0
  178. package/src/viewer/ReportData.ts +123 -0
  179. package/src/viewer/State.ts +49 -0
  180. package/src/viewer/Theme.ts +15 -0
  181. package/src/viewer/components/App.tsx +73 -0
  182. package/src/viewer/components/DropZone.tsx +71 -0
  183. package/src/viewer/components/LazyPlot.ts +33 -0
  184. package/src/viewer/components/SamplesPanel.tsx +214 -0
  185. package/src/viewer/components/Shell.tsx +26 -0
  186. package/src/viewer/components/SourcePanel.tsx +216 -0
  187. package/src/viewer/components/SummaryPanel.tsx +332 -0
  188. package/src/viewer/components/TabBar.tsx +131 -0
  189. package/src/viewer/components/TabContent.tsx +46 -0
  190. package/src/viewer/components/ThemeToggle.tsx +50 -0
  191. package/src/viewer/index.html +20 -0
  192. package/src/viewer/main.tsx +4 -0
  193. package/src/viewer/plots/CIPlot.ts +313 -0
  194. package/src/{html/browser → viewer/plots}/HistogramKde.ts +42 -47
  195. package/src/viewer/plots/LegendUtils.ts +134 -0
  196. package/src/viewer/plots/PlotTypes.ts +85 -0
  197. package/src/viewer/plots/RenderPlots.ts +230 -0
  198. package/src/viewer/plots/SampleTimeSeries.ts +306 -0
  199. package/src/viewer/plots/SvgHelpers.ts +136 -0
  200. package/src/viewer/plots/TimeSeriesMarks.ts +319 -0
  201. package/src/viewer/report.css +427 -0
  202. package/src/viewer/shell.css +357 -0
  203. package/src/viewer/tsconfig.json +11 -0
  204. package/dist/BenchRunner-CSKN9zPy.d.mts +0 -225
  205. package/dist/BrowserHeapSampler-DCeL42RE.mjs +0 -202
  206. package/dist/BrowserHeapSampler-DCeL42RE.mjs.map +0 -1
  207. package/dist/GcStats-ByEovUi1.mjs +0 -77
  208. package/dist/GcStats-ByEovUi1.mjs.map +0 -1
  209. package/dist/HeapSampler-B8dtKHn1.mjs.map +0 -1
  210. package/dist/TimingUtils-ClclVQ7E.mjs +0 -597
  211. package/dist/TimingUtils-ClclVQ7E.mjs.map +0 -1
  212. package/dist/browser/index.js +0 -914
  213. package/dist/src-Cf_LXwlp.mjs +0 -2873
  214. package/dist/src-Cf_LXwlp.mjs.map +0 -1
  215. package/src/BenchMatrix.ts +0 -380
  216. package/src/BenchmarkReport.ts +0 -156
  217. package/src/HtmlDataPrep.ts +0 -148
  218. package/src/StandardSections.ts +0 -261
  219. package/src/StatisticalUtils.ts +0 -176
  220. package/src/TypeUtil.ts +0 -8
  221. package/src/browser/BrowserGcStats.ts +0 -44
  222. package/src/browser/BrowserHeapSampler.ts +0 -271
  223. package/src/export/JsonExport.ts +0 -103
  224. package/src/export/JsonFormat.ts +0 -91
  225. package/src/heap-sample/HeapSampleReport.ts +0 -196
  226. package/src/html/HtmlReport.ts +0 -131
  227. package/src/html/HtmlTemplate.ts +0 -284
  228. package/src/html/Types.ts +0 -88
  229. package/src/html/browser/CIPlot.ts +0 -287
  230. package/src/html/browser/LegendUtils.ts +0 -163
  231. package/src/html/browser/RenderPlots.ts +0 -263
  232. package/src/html/browser/SampleTimeSeries.ts +0 -389
  233. package/src/html/browser/Types.ts +0 -96
  234. package/src/html/browser/index.ts +0 -1
  235. package/src/html/index.ts +0 -17
  236. package/src/runners/BasicRunner.ts +0 -364
  237. package/src/table-util/ConvergenceFormatters.ts +0 -19
  238. package/src/table-util/Formatters.ts +0 -152
  239. package/src/table-util/README.md +0 -70
  240. package/src/table-util/TableReport.ts +0 -293
  241. package/src/tests/fixtures/cases/asyncCases.ts +0 -7
  242. package/src/tests/fixtures/cases/variants/product.ts +0 -2
  243. package/src/tests/fixtures/cases/variants/sum.ts +0 -2
  244. package/src/tests/fixtures/discover/fast.ts +0 -1
  245. package/src/tests/fixtures/invalid/bad.ts +0 -1
  246. package/src/tests/fixtures/loader/fast.ts +0 -1
  247. package/src/tests/fixtures/loader/stateful.ts +0 -2
  248. package/src/tests/fixtures/stateful/stateful.ts +0 -2
  249. package/src/tests/fixtures/variants/extra.ts +0 -1
  250. package/src/tests/fixtures/variants/impl.ts +0 -1
  251. package/src/tests/fixtures/worker/fast.ts +0 -1
  252. package/src/{table-util/test → test}/TableValueExtractor.test.ts +0 -0
  253. package/src/{table-util/test → test}/TableValueExtractor.ts +9 -9
@@ -0,0 +1,282 @@
1
+ import type {
2
+ BootstrapResult,
3
+ CIDirection,
4
+ DifferenceCI,
5
+ HistogramBin,
6
+ StatKind,
7
+ } from "./StatisticalUtils.ts";
8
+ import {
9
+ average,
10
+ bootstrapSamples,
11
+ computeInterval,
12
+ createResample,
13
+ defaultConfidence,
14
+ isBootstrappable,
15
+ maxBootstrapInput,
16
+ maxOf,
17
+ minOf,
18
+ percentile,
19
+ prepareBlocks,
20
+ quickSelect,
21
+ resampleInto,
22
+ statKindToFn,
23
+ subsample,
24
+ } from "./StatisticalUtils.ts";
25
+
26
+ /** Options for blockDifferenceCI (extends DiffOptions with block parameters) */
27
+ export type BlockDiffOptions = DiffOptions & {
28
+ /** Block boundaries for the second sample array (defaults to blocksA) */
29
+ blocksB?: number[];
30
+ /** Disable Tukey trimming of outlier batches */
31
+ noBatchTrim?: boolean;
32
+ };
33
+
34
+ /** Options for difference CI functions */
35
+ type DiffOptions = {
36
+ /** Number of bootstrap resamples (default: 10000) */
37
+ resamples?: number;
38
+ /** Confidence level 0-1 (default: 0.95) */
39
+ confidence?: number;
40
+ /** Equivalence margin in percent. CI within [-margin, +margin] ==> "equivalent" */
41
+ equivMargin?: number;
42
+ };
43
+
44
+ type BinnedCI = {
45
+ estimate: number;
46
+ ci: [number, number];
47
+ histogram: HistogramBin[];
48
+ };
49
+
50
+ interface DiffOp {
51
+ origIndex: number;
52
+ execIndex: number;
53
+ computeA: (buf: number[]) => number;
54
+ computeB: (buf: number[]) => number;
55
+ pointEstimate: (s: number[]) => number;
56
+ }
57
+
58
+ /** @return sample-level bootstrap CI for percentage difference between baseline (a) and current (b). */
59
+ export function sampleDifferenceCI(
60
+ a: number[],
61
+ b: number[],
62
+ statFn: (s: number[]) => number,
63
+ options: DiffOptions = {},
64
+ ): DifferenceCI {
65
+ const { resamples = bootstrapSamples, confidence: conf = defaultConfidence } =
66
+ options;
67
+ const baseVal = statFn(a);
68
+ const currVal = statFn(b);
69
+ const observedPct = ((currVal - baseVal) / baseVal) * 100;
70
+
71
+ const subA = subsample(a, maxBootstrapInput);
72
+ const subB = subsample(b, maxBootstrapInput);
73
+ const bufA = new Array(subA.length);
74
+ const bufB = new Array(subB.length);
75
+ const diffs = Array.from({ length: resamples }, () => {
76
+ resampleInto(subA, bufA);
77
+ resampleInto(subB, bufB);
78
+ const base = statFn(bufA);
79
+ return ((statFn(bufB) - base) / base) * 100;
80
+ });
81
+ const ci = computeInterval(diffs, conf);
82
+ const capped = subA !== a || subB !== b;
83
+ return {
84
+ percent: observedPct,
85
+ ci,
86
+ direction: classifyDirection(ci, observedPct, options.equivMargin),
87
+ histogram: binValues(diffs),
88
+ ciLevel: "sample",
89
+ ...(capped && { subsampled: Math.max(a.length, b.length) }),
90
+ };
91
+ }
92
+
93
+ /** Shared-resample difference CI: one resample pair per iteration, all stats computed.
94
+ * @return DifferenceCI[] in same order as input stats. */
95
+ export function multiSampleDifferenceCI(
96
+ a: number[],
97
+ b: number[],
98
+ stats: StatKind[],
99
+ options: DiffOptions = {},
100
+ ): DifferenceCI[] {
101
+ const { resamples = bootstrapSamples, confidence: conf = defaultConfidence } =
102
+ options;
103
+ const subA = subsample(a, maxBootstrapInput);
104
+ const subB = subsample(b, maxBootstrapInput);
105
+ const bufA = new Array(subA.length);
106
+ const bufB = new Array(subB.length);
107
+ const ops = buildDiffOps(stats, subA.length, subB.length);
108
+ const allDiffs = ops.map(() => new Array<number>(resamples));
109
+
110
+ // Point estimates from original data
111
+ const baseVals = ops.map(op => op.pointEstimate(a));
112
+ const currVals = ops.map(op => op.pointEstimate(b));
113
+ const observedPcts = ops.map(
114
+ (_, j) => ((currVals[j] - baseVals[j]) / baseVals[j]) * 100,
115
+ );
116
+
117
+ for (let i = 0; i < resamples; i++) {
118
+ resampleInto(subA, bufA);
119
+ resampleInto(subB, bufB);
120
+ for (let j = 0; j < ops.length; j++) {
121
+ const base = ops[j].computeA(bufA);
122
+ const curr = ops[j].computeB(bufB);
123
+ allDiffs[j][i] = ((curr - base) / base) * 100;
124
+ }
125
+ }
126
+
127
+ const capped = subA !== a || subB !== b;
128
+ const results = new Array<DifferenceCI>(stats.length);
129
+ for (const op of ops) {
130
+ const j = op.execIndex;
131
+ const ci = computeInterval(allDiffs[j], conf);
132
+ results[op.origIndex] = {
133
+ percent: observedPcts[j],
134
+ ci,
135
+ direction: classifyDirection(ci, observedPcts[j], options.equivMargin),
136
+ histogram: binValues(allDiffs[j]),
137
+ ciLevel: "sample",
138
+ ...(capped && { subsampled: Math.max(a.length, b.length) }),
139
+ };
140
+ }
141
+ return results;
142
+ }
143
+
144
+ /** Difference CIs for multiple stats, dispatching block vs sample automatically.
145
+ * Returns undefined for non-bootstrappable stats (min/max). */
146
+ export function diffCIs(
147
+ a: number[],
148
+ aOffsets: number[] | undefined,
149
+ b: number[],
150
+ bOffsets: number[] | undefined,
151
+ stats: StatKind[],
152
+ options: BlockDiffOptions = {},
153
+ ): (DifferenceCI | undefined)[] {
154
+ const bsStats = stats.filter(isBootstrappable);
155
+ if (bsStats.length === 0) return stats.map(() => undefined);
156
+
157
+ const hasBlocks =
158
+ (aOffsets?.length ?? 0) >= 2 && (bOffsets?.length ?? 0) >= 2;
159
+ const bsResults = hasBlocks
160
+ ? bsStats.map(s =>
161
+ blockDifferenceCI(a, aOffsets!, b, statKindToFn(s), {
162
+ ...options,
163
+ blocksB: bOffsets!,
164
+ }),
165
+ )
166
+ : multiSampleDifferenceCI(a, b, bsStats, options);
167
+
168
+ const results: (DifferenceCI | undefined)[] = new Array(stats.length);
169
+ let bi = 0;
170
+ for (let i = 0; i < stats.length; i++) {
171
+ results[i] = isBootstrappable(stats[i]) ? bsResults[bi++] : undefined;
172
+ }
173
+ return results;
174
+ }
175
+
176
+ /** @return block bootstrap CI for percentage difference between baseline (a) and current (b).
177
+ * Tukey-trims outlier batches, then resamples per-block statFn values. Requires 2+ blocks. */
178
+ export function blockDifferenceCI(
179
+ a: number[],
180
+ blocksA: number[],
181
+ b: number[],
182
+ statFn: (s: number[]) => number,
183
+ options: BlockDiffOptions = {},
184
+ ): DifferenceCI {
185
+ const { resamples = bootstrapSamples, confidence: conf = defaultConfidence } =
186
+ options;
187
+ const bB = options.blocksB ?? blocksA;
188
+ const noTrim = options.noBatchTrim;
189
+ const sideA = prepareBlocks(a, blocksA, statFn, noTrim);
190
+ const sideB = prepareBlocks(b, bB, statFn, noTrim);
191
+
192
+ const baseVal = statFn(sideA.filtered);
193
+ const currVal = statFn(sideB.filtered);
194
+ const observedPct = ((currVal - baseVal) / baseVal) * 100;
195
+
196
+ const drawA = () => average(createResample(sideA.blockVals));
197
+ const drawB = () => average(createResample(sideB.blockVals));
198
+ const diffs = Array.from({ length: resamples }, () => {
199
+ const base = drawA();
200
+ return ((drawB() - base) / base) * 100;
201
+ });
202
+ const ci = computeInterval(diffs, conf);
203
+ return {
204
+ percent: observedPct,
205
+ ci,
206
+ direction: classifyDirection(ci, observedPct, options.equivMargin),
207
+ histogram: binValues(diffs),
208
+ trimmed: [sideA.trimCount, sideB.trimCount],
209
+ ciLevel: "block",
210
+ };
211
+ }
212
+
213
+ /** @return binned CI with histogram from a BootstrapResult */
214
+ export function binBootstrapResult(result: BootstrapResult): BinnedCI {
215
+ const { estimate, ci, samples } = result;
216
+ return { estimate, ci, histogram: binValues(samples) };
217
+ }
218
+
219
+ /** @return CI direction, with optional equivalence margin (in percent) */
220
+ function classifyDirection(
221
+ ci: [number, number],
222
+ observed: number,
223
+ margin?: number,
224
+ ): CIDirection {
225
+ const withinMargin =
226
+ margin != null && margin > 0 && ci[0] >= -margin && ci[1] <= margin;
227
+ if (withinMargin) return "equivalent";
228
+ const excludesZero = ci[0] > 0 || ci[1] < 0;
229
+ if (excludesZero) return observed < 0 ? "faster" : "slower";
230
+ return "uncertain";
231
+ }
232
+
233
+ /** @return values binned into histogram for compact visualization */
234
+ function binValues(values: number[], binCount = 30): HistogramBin[] {
235
+ let min = values[0];
236
+ let max = values[0];
237
+ for (let i = 1; i < values.length; i++) {
238
+ if (values[i] < min) min = values[i];
239
+ if (values[i] > max) max = values[i];
240
+ }
241
+ if (min === max) return [{ x: min, count: values.length }];
242
+
243
+ const step = (max - min) / binCount;
244
+ const counts = new Array(binCount).fill(0);
245
+ for (const v of values) {
246
+ const bin = Math.min(Math.floor((v - min) / step), binCount - 1);
247
+ counts[bin]++;
248
+ }
249
+ return counts.map((count, i) => ({ x: min + (i + 0.5) * step, count }));
250
+ }
251
+
252
+ /** Build diff operations: mean/min/max first (non-destructive), then percentiles ascending.
253
+ * Each side (A, B) gets its own quickSelect k values since sample sizes may differ. */
254
+ function buildDiffOps(stats: StatKind[], nA: number, nB: number): DiffOp[] {
255
+ const uniform = (order: number, i: number, fn: (s: number[]) => number) => ({
256
+ order,
257
+ origIndex: i,
258
+ execIndex: 0,
259
+ computeA: fn,
260
+ computeB: fn,
261
+ pointEstimate: fn,
262
+ });
263
+ const entries = stats.map((s, i) => {
264
+ if (s === "mean") return uniform(-3, i, average);
265
+ if (s === "min") return uniform(-2, i, minOf);
266
+ if (s === "max") return uniform(-1, i, maxOf);
267
+ const p = s.percentile;
268
+ const kA = Math.max(0, Math.ceil(nA * p) - 1);
269
+ const kB = Math.max(0, Math.ceil(nB * p) - 1);
270
+ return {
271
+ order: p,
272
+ origIndex: i,
273
+ execIndex: 0,
274
+ computeA: (buf: number[]) => quickSelect(buf, kA),
275
+ computeB: (buf: number[]) => quickSelect(buf, kB),
276
+ pointEstimate: (v: number[]) => percentile(v, p),
277
+ };
278
+ });
279
+ entries.sort((a, b) => a.order - b.order);
280
+ for (let i = 0; i < entries.length; i++) entries[i].execIndex = i;
281
+ return entries;
282
+ }
@@ -1,19 +1,10 @@
1
1
  /**
2
2
  * Permutation-based hypothesis testing for benchmark comparisons.
3
- *
4
- * Currently unused - the main reporting pipeline uses bootstrapDifferenceCI()
5
- * from StatisticalUtils.ts instead, which provides confidence intervals on
6
- * the difference rather than p-values.
7
- *
8
- * Kept for potential future use cases where p-values are needed.
3
+ * Currently unused -- the reporting pipeline uses blockDifferenceCI() instead,
4
+ * which provides confidence intervals rather than p-values.
9
5
  */
10
6
 
11
- import { average, percentile } from "./StatisticalUtils.ts";
12
-
13
- const significanceThreshold = 0.05;
14
- const strongSignificance = 0.001;
15
- const goodSignificance = 0.01;
16
- const defaultBootstrapSamples = 10000;
7
+ import { average, bootstrapSamples, median } from "./StatisticalUtils.ts";
17
8
 
18
9
  /** Statistical comparison between baseline and current benchmark samples */
19
10
  export interface ComparisonResult {
@@ -39,17 +30,20 @@ export interface ComparisonResult {
39
30
  };
40
31
  }
41
32
 
33
+ const significanceThreshold = 0.05;
34
+ const strongSignificance = 0.001;
35
+ const goodSignificance = 0.01;
36
+
42
37
  /** @return statistical comparison between baseline and current samples */
43
38
  export function compareWithBaseline(
44
39
  baseline: number[],
45
40
  current: number[],
46
41
  ): ComparisonResult {
47
- const baselineMedian = percentile(baseline, 0.5);
48
- const currentMedian = percentile(current, 0.5);
42
+ const baselineMedian = median(baseline);
43
+ const currentMedian = median(current);
49
44
  const baselineMean = average(baseline);
50
45
  const currentMean = average(current);
51
46
 
52
- const median = (s: number[]) => percentile(s, 0.5);
53
47
  const medianPValue = bootstrapDifferenceTest(baseline, current, median);
54
48
  const meanPValue = bootstrapDifferenceTest(baseline, current, average);
55
49
 
@@ -63,25 +57,6 @@ export function compareWithBaseline(
63
57
  };
64
58
  }
65
59
 
66
- /** @return change statistics for a current vs baseline comparison */
67
- function changeStats(current: number, base: number, pValue: number) {
68
- return {
69
- absolute: current - base,
70
- percent: ((current - base) / base) * 100,
71
- pValue,
72
- significant: pValue < significanceThreshold,
73
- significance: getSignificance(pValue),
74
- };
75
- }
76
-
77
- /** @return significance level based on p-value thresholds */
78
- function getSignificance(pValue: number): "strong" | "good" | "weak" | "none" {
79
- if (pValue < strongSignificance) return "strong";
80
- if (pValue < goodSignificance) return "good";
81
- if (pValue < significanceThreshold) return "weak";
82
- return "none";
83
- }
84
-
85
60
  /** @return p-value from permutation test for difference in statistics */
86
61
  function bootstrapDifferenceTest(
87
62
  sample1: number[],
@@ -93,12 +68,23 @@ function bootstrapDifferenceTest(
93
68
  const n1 = sample1.length;
94
69
 
95
70
  let moreExtreme = 0;
96
- for (let i = 0; i < defaultBootstrapSamples; i++) {
71
+ for (let i = 0; i < bootstrapSamples; i++) {
97
72
  const { resample1, resample2 } = shuffleAndSplit(combined, n1);
98
73
  const diff = statistic(resample2) - statistic(resample1);
99
74
  if (Math.abs(diff) >= Math.abs(observedDiff)) moreExtreme++;
100
75
  }
101
- return moreExtreme / defaultBootstrapSamples;
76
+ return moreExtreme / bootstrapSamples;
77
+ }
78
+
79
+ /** @return change statistics for a current vs baseline comparison */
80
+ function changeStats(current: number, base: number, pValue: number) {
81
+ return {
82
+ absolute: current - base,
83
+ percent: ((current - base) / base) * 100,
84
+ pValue,
85
+ significant: pValue < significanceThreshold,
86
+ significance: getSignificance(pValue),
87
+ };
102
88
  }
103
89
 
104
90
  /** @return randomly shuffled samples split at n1 (Fisher-Yates shuffle) */
@@ -108,8 +94,13 @@ function shuffleAndSplit(combined: number[], n1: number) {
108
94
  const j = Math.floor(Math.random() * (i + 1));
109
95
  [shuffled[i], shuffled[j]] = [shuffled[j], shuffled[i]];
110
96
  }
111
- return {
112
- resample1: shuffled.slice(0, n1),
113
- resample2: shuffled.slice(n1),
114
- };
97
+ return { resample1: shuffled.slice(0, n1), resample2: shuffled.slice(n1) };
98
+ }
99
+
100
+ /** @return significance level based on p-value thresholds */
101
+ function getSignificance(pValue: number): "strong" | "good" | "weak" | "none" {
102
+ if (pValue < strongSignificance) return "strong";
103
+ if (pValue < goodSignificance) return "good";
104
+ if (pValue < significanceThreshold) return "weak";
105
+ return "none";
115
106
  }