benchforge 0.1.11 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +20 -0
- package/README.md +99 -294
- package/bin/benchforge +1 -2
- package/dist/AnalyzeArchive-8NCJhmhS.mjs +145 -0
- package/dist/AnalyzeArchive-8NCJhmhS.mjs.map +1 -0
- package/dist/BenchMatrix-BZVrBB_h.mjs +1050 -0
- package/dist/BenchMatrix-BZVrBB_h.mjs.map +1 -0
- package/dist/{BenchRunner-BzyUfiyB.d.mts → BenchRunner-DglX1NOn.d.mts} +119 -66
- package/dist/CoverageSampler-D5T9DRqe.mjs +27 -0
- package/dist/CoverageSampler-D5T9DRqe.mjs.map +1 -0
- package/dist/Formatters-BWj3d4sv.mjs +95 -0
- package/dist/Formatters-BWj3d4sv.mjs.map +1 -0
- package/dist/{HeapSampler-B8dtKHn1.mjs → HeapSampler-Dq-hpXem.mjs} +4 -4
- package/dist/HeapSampler-Dq-hpXem.mjs.map +1 -0
- package/dist/RunBenchCLI-C17DrJz8.mjs +3075 -0
- package/dist/RunBenchCLI-C17DrJz8.mjs.map +1 -0
- package/dist/StatisticalUtils-BD92crgM.mjs +255 -0
- package/dist/StatisticalUtils-BD92crgM.mjs.map +1 -0
- package/dist/TimeSampler-Ds8n7l2B.mjs +29 -0
- package/dist/TimeSampler-Ds8n7l2B.mjs.map +1 -0
- package/dist/ViewerServer-BJhdnxlN.mjs +639 -0
- package/dist/ViewerServer-BJhdnxlN.mjs.map +1 -0
- package/dist/ViewerServer-CuMNdNBz.mjs +2 -0
- package/dist/bin/benchforge.mjs +4 -5
- package/dist/bin/benchforge.mjs.map +1 -1
- package/dist/index.d.mts +711 -558
- package/dist/index.mjs +98 -3
- package/dist/index.mjs.map +1 -0
- package/dist/runners/WorkerScript.d.mts +12 -4
- package/dist/runners/WorkerScript.mjs +77 -105
- package/dist/runners/WorkerScript.mjs.map +1 -1
- package/dist/viewer/assets/CIPlot-BkOvMoMa.js +1 -0
- package/dist/viewer/assets/HistogramKde-CmSyUFY0.js +1 -0
- package/dist/viewer/assets/LegendUtils-BJpbn_jr.js +55 -0
- package/dist/viewer/assets/SampleTimeSeries-C4VBhXr3.js +1 -0
- package/dist/viewer/assets/index-Br9bp_cX.js +153 -0
- package/dist/viewer/assets/index-NzXXe_CC.css +1 -0
- package/dist/viewer/index.html +19 -0
- package/dist/viewer/speedscope/LICENSE +21 -0
- package/dist/viewer/speedscope/SourceCodePro-Regular.ttf-ILST5JV6.woff2 +0 -0
- package/dist/viewer/speedscope/favicon-16x16-V2DMIAZS.js +2 -0
- package/dist/viewer/speedscope/favicon-16x16-V2DMIAZS.js.map +7 -0
- package/dist/viewer/speedscope/favicon-16x16-VSI62OPJ.png +0 -0
- package/dist/viewer/speedscope/favicon-32x32-3EB2YCUY.png +0 -0
- package/dist/viewer/speedscope/favicon-32x32-THY3JDJL.js +2 -0
- package/dist/viewer/speedscope/favicon-32x32-THY3JDJL.js.map +7 -0
- package/dist/viewer/speedscope/favicon-FOKUP5Y5.ico +0 -0
- package/dist/viewer/speedscope/favicon-M34RF7BI.js +2 -0
- package/dist/viewer/speedscope/favicon-M34RF7BI.js.map +7 -0
- package/dist/viewer/speedscope/file-format-schema.json +274 -0
- package/dist/viewer/speedscope/index.html +19 -0
- package/dist/viewer/speedscope/jfrview_bg-BLJXNNQB.wasm +0 -0
- package/dist/viewer/speedscope/perf-vertx-stacks-01-collapsed-all-ZNUIGAJL.txt +199 -0
- package/dist/viewer/speedscope/release.txt +3 -0
- package/dist/viewer/speedscope/source-code-pro.LICENSE.md +93 -0
- package/dist/viewer/speedscope/speedscope-GHPHNKXC.css +2 -0
- package/dist/viewer/speedscope/speedscope-GHPHNKXC.css.map +7 -0
- package/dist/viewer/speedscope/speedscope-QZFMJ7VP.js +212 -0
- package/dist/viewer/speedscope/speedscope-QZFMJ7VP.js.map +7 -0
- package/package.json +52 -27
- package/src/bin/benchforge.ts +2 -2
- package/src/cli/AnalyzeArchive.ts +232 -0
- package/src/cli/BrowserBench.ts +322 -0
- package/src/cli/CliArgs.ts +164 -51
- package/src/cli/CliExport.ts +179 -0
- package/src/cli/CliOptions.ts +147 -0
- package/src/cli/CliReport.ts +197 -0
- package/src/cli/FilterBenchmarks.ts +18 -30
- package/src/cli/RunBenchCLI.ts +132 -866
- package/src/cli/SuiteRunner.ts +160 -0
- package/src/cli/ViewerServer.ts +282 -0
- package/src/export/AllocExport.ts +121 -0
- package/src/export/ArchiveExport.ts +146 -0
- package/src/export/ArchiveFormat.ts +50 -0
- package/src/export/CoverageExport.ts +148 -0
- package/src/export/EditorUri.ts +10 -0
- package/src/export/PerfettoExport.ts +64 -99
- package/src/export/SpeedscopeTypes.ts +98 -0
- package/src/export/TimeExport.ts +115 -0
- package/src/index.ts +86 -67
- package/src/matrix/BenchMatrix.ts +230 -0
- package/src/matrix/CaseLoader.ts +8 -6
- package/src/matrix/MatrixDirRunner.ts +153 -0
- package/src/matrix/MatrixFilter.ts +49 -47
- package/src/matrix/MatrixInlineRunner.ts +50 -0
- package/src/matrix/MatrixReport.ts +90 -250
- package/src/matrix/VariantLoader.ts +5 -5
- package/src/profiling/browser/BenchLoop.ts +51 -0
- package/src/profiling/browser/BrowserCDP.ts +133 -0
- package/src/profiling/browser/BrowserGcStats.ts +33 -0
- package/src/profiling/browser/BrowserProfiler.ts +160 -0
- package/src/profiling/browser/CdpClient.ts +82 -0
- package/src/profiling/browser/CdpPage.ts +138 -0
- package/src/profiling/browser/ChromeLauncher.ts +158 -0
- package/src/profiling/browser/ChromeTraceEvent.ts +28 -0
- package/src/profiling/browser/PageLoadMode.ts +61 -0
- package/src/profiling/node/CoverageSampler.ts +27 -0
- package/src/profiling/node/CoverageTypes.ts +23 -0
- package/src/profiling/node/HeapSampleReport.ts +261 -0
- package/src/{heap-sample → profiling/node}/HeapSampler.ts +1 -2
- package/src/{heap-sample → profiling/node}/ResolvedProfile.ts +18 -9
- package/src/profiling/node/TimeSampler.ts +57 -0
- package/src/report/BenchmarkReport.ts +146 -0
- package/src/report/Colors.ts +9 -0
- package/src/report/Formatters.ts +110 -0
- package/src/report/GcSections.ts +151 -0
- package/src/{GitUtils.ts → report/GitUtils.ts} +18 -19
- package/src/report/HtmlReport.ts +223 -0
- package/src/report/ParseStats.ts +73 -0
- package/src/report/StandardSections.ts +147 -0
- package/src/report/ViewerSections.ts +286 -0
- package/src/report/text/TableReport.ts +253 -0
- package/src/report/text/TextReport.ts +123 -0
- package/src/runners/AdaptiveWrapper.ts +116 -236
- package/src/runners/BenchRunner.ts +20 -15
- package/src/{Benchmark.ts → runners/BenchmarkSpec.ts} +5 -6
- package/src/runners/CreateRunner.ts +5 -7
- package/src/runners/GcStats.ts +47 -50
- package/src/{MeasuredResults.ts → runners/MeasuredResults.ts} +43 -37
- package/src/runners/MergeBatches.ts +123 -0
- package/src/{NodeGC.ts → runners/NodeGC.ts} +2 -3
- package/src/runners/RunnerOrchestrator.ts +127 -243
- package/src/runners/RunnerUtils.ts +75 -1
- package/src/runners/SampleStats.ts +100 -0
- package/src/runners/TimingRunner.ts +244 -0
- package/src/runners/TimingUtils.ts +3 -2
- package/src/runners/WorkerScript.ts +135 -151
- package/src/stats/BootstrapDifference.ts +282 -0
- package/src/{PermutationTest.ts → stats/PermutationTest.ts} +8 -17
- package/src/stats/StatisticalUtils.ts +445 -0
- package/src/{tests → test}/AdaptiveConvergence.test.ts +10 -10
- package/src/test/AdaptiveRunner.test.ts +39 -41
- package/src/{tests → test}/AdaptiveSampling.test.ts +9 -9
- package/src/test/AdaptiveStatistics.integration.ts +2 -2
- package/src/{tests → test}/BenchMatrix.test.ts +19 -16
- package/src/test/BenchmarkReport.test.ts +63 -13
- package/src/test/BrowserBench.e2e.test.ts +186 -17
- package/src/test/BrowserBench.test.ts +10 -5
- package/src/test/BuildTimeSection.test.ts +130 -0
- package/src/test/CapSamples.test.ts +82 -0
- package/src/test/CoverageExport.test.ts +115 -0
- package/src/test/CoverageSampler.test.ts +33 -0
- package/src/test/HeapAttribution.test.ts +14 -14
- package/src/{tests → test}/MatrixFilter.test.ts +1 -1
- package/src/{tests → test}/MatrixReport.test.ts +1 -1
- package/src/test/PermutationTest.test.ts +1 -1
- package/src/{tests → test}/RealDataValidation.test.ts +6 -6
- package/src/test/RunBenchCLI.test.ts +39 -38
- package/src/test/RunnerOrchestrator.test.ts +12 -12
- package/src/test/StatisticalUtils.test.ts +48 -12
- package/src/{table-util/test → test}/TableReport.test.ts +2 -2
- package/src/test/TestUtils.ts +12 -7
- package/src/test/TimeExport.test.ts +139 -0
- package/src/test/TimeSampler.test.ts +37 -0
- package/src/test/ViewerLive.e2e.test.ts +159 -0
- package/src/test/ViewerStatic.static.e2e.test.ts +137 -0
- package/src/{tests → test}/fixtures/baseline/impl.ts +1 -1
- package/src/{tests → test}/fixtures/bevy30-samples.ts +3 -1
- package/src/test/fixtures/cases/asyncCases.ts +9 -0
- package/src/{tests → test}/fixtures/cases/cases.ts +5 -2
- package/src/test/fixtures/cases/variants/product.ts +2 -0
- package/src/test/fixtures/cases/variants/sum.ts +2 -0
- package/src/test/fixtures/discover/fast.ts +1 -0
- package/src/{tests → test}/fixtures/discover/slow.ts +1 -1
- package/src/test/fixtures/invalid/bad.ts +1 -0
- package/src/test/fixtures/loader/fast.ts +1 -0
- package/src/{tests → test}/fixtures/loader/slow.ts +1 -1
- package/src/test/fixtures/loader/stateful.ts +2 -0
- package/src/test/fixtures/stateful/stateful.ts +2 -0
- package/src/test/fixtures/variants/extra.ts +1 -0
- package/src/test/fixtures/variants/impl.ts +1 -0
- package/src/test/fixtures/worker/fast.ts +1 -0
- package/src/{tests → test}/fixtures/worker/slow.ts +1 -1
- package/src/viewer/DateFormat.ts +30 -0
- package/src/viewer/Helpers.ts +23 -0
- package/src/viewer/LineData.ts +120 -0
- package/src/viewer/Providers.ts +191 -0
- package/src/viewer/ReportData.ts +123 -0
- package/src/viewer/State.ts +49 -0
- package/src/viewer/Theme.ts +15 -0
- package/src/viewer/components/App.tsx +73 -0
- package/src/viewer/components/DropZone.tsx +71 -0
- package/src/viewer/components/LazyPlot.ts +33 -0
- package/src/viewer/components/SamplesPanel.tsx +214 -0
- package/src/viewer/components/Shell.tsx +26 -0
- package/src/viewer/components/SourcePanel.tsx +216 -0
- package/src/viewer/components/SummaryPanel.tsx +332 -0
- package/src/viewer/components/TabBar.tsx +131 -0
- package/src/viewer/components/TabContent.tsx +46 -0
- package/src/viewer/components/ThemeToggle.tsx +50 -0
- package/src/viewer/index.html +20 -0
- package/src/viewer/main.tsx +4 -0
- package/src/viewer/plots/CIPlot.ts +313 -0
- package/src/{html/browser → viewer/plots}/HistogramKde.ts +33 -38
- package/src/viewer/plots/LegendUtils.ts +134 -0
- package/src/viewer/plots/PlotTypes.ts +85 -0
- package/src/viewer/plots/RenderPlots.ts +230 -0
- package/src/viewer/plots/SampleTimeSeries.ts +306 -0
- package/src/viewer/plots/SvgHelpers.ts +136 -0
- package/src/viewer/plots/TimeSeriesMarks.ts +319 -0
- package/src/viewer/report.css +427 -0
- package/src/viewer/shell.css +357 -0
- package/src/viewer/tsconfig.json +11 -0
- package/dist/BrowserHeapSampler-B6asLKWQ.mjs +0 -202
- package/dist/BrowserHeapSampler-B6asLKWQ.mjs.map +0 -1
- package/dist/GcStats-wX7Xyblu.mjs +0 -77
- package/dist/GcStats-wX7Xyblu.mjs.map +0 -1
- package/dist/HeapSampler-B8dtKHn1.mjs.map +0 -1
- package/dist/TimingUtils-DwOwkc8G.mjs +0 -597
- package/dist/TimingUtils-DwOwkc8G.mjs.map +0 -1
- package/dist/browser/index.js +0 -914
- package/dist/src-B-DDaCa9.mjs +0 -3108
- package/dist/src-B-DDaCa9.mjs.map +0 -1
- package/src/BenchMatrix.ts +0 -380
- package/src/BenchmarkReport.ts +0 -161
- package/src/HtmlDataPrep.ts +0 -148
- package/src/StandardSections.ts +0 -261
- package/src/StatisticalUtils.ts +0 -175
- package/src/TypeUtil.ts +0 -8
- package/src/browser/BrowserGcStats.ts +0 -44
- package/src/browser/BrowserHeapSampler.ts +0 -271
- package/src/export/JsonExport.ts +0 -103
- package/src/export/JsonFormat.ts +0 -91
- package/src/export/SpeedscopeExport.ts +0 -202
- package/src/heap-sample/HeapSampleReport.ts +0 -269
- package/src/html/HtmlReport.ts +0 -131
- package/src/html/HtmlTemplate.ts +0 -284
- package/src/html/Types.ts +0 -88
- package/src/html/browser/CIPlot.ts +0 -287
- package/src/html/browser/LegendUtils.ts +0 -163
- package/src/html/browser/RenderPlots.ts +0 -263
- package/src/html/browser/SampleTimeSeries.ts +0 -389
- package/src/html/browser/Types.ts +0 -96
- package/src/html/browser/index.ts +0 -1
- package/src/html/index.ts +0 -17
- package/src/runners/BasicRunner.ts +0 -364
- package/src/table-util/ConvergenceFormatters.ts +0 -19
- package/src/table-util/Formatters.ts +0 -157
- package/src/table-util/README.md +0 -70
- package/src/table-util/TableReport.ts +0 -293
- package/src/tests/fixtures/cases/asyncCases.ts +0 -7
- package/src/tests/fixtures/cases/variants/product.ts +0 -2
- package/src/tests/fixtures/cases/variants/sum.ts +0 -2
- package/src/tests/fixtures/discover/fast.ts +0 -1
- package/src/tests/fixtures/invalid/bad.ts +0 -1
- package/src/tests/fixtures/loader/fast.ts +0 -1
- package/src/tests/fixtures/loader/stateful.ts +0 -2
- package/src/tests/fixtures/stateful/stateful.ts +0 -2
- package/src/tests/fixtures/variants/extra.ts +0 -1
- package/src/tests/fixtures/variants/impl.ts +0 -1
- package/src/tests/fixtures/worker/fast.ts +0 -1
- /package/src/{table-util/test → test}/TableValueExtractor.test.ts +0 -0
- /package/src/{table-util/test → test}/TableValueExtractor.ts +0 -0
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
BootstrapResult,
|
|
3
|
+
CIDirection,
|
|
4
|
+
DifferenceCI,
|
|
5
|
+
HistogramBin,
|
|
6
|
+
StatKind,
|
|
7
|
+
} from "./StatisticalUtils.ts";
|
|
8
|
+
import {
|
|
9
|
+
average,
|
|
10
|
+
bootstrapSamples,
|
|
11
|
+
computeInterval,
|
|
12
|
+
createResample,
|
|
13
|
+
defaultConfidence,
|
|
14
|
+
isBootstrappable,
|
|
15
|
+
maxBootstrapInput,
|
|
16
|
+
maxOf,
|
|
17
|
+
minOf,
|
|
18
|
+
percentile,
|
|
19
|
+
prepareBlocks,
|
|
20
|
+
quickSelect,
|
|
21
|
+
resampleInto,
|
|
22
|
+
statKindToFn,
|
|
23
|
+
subsample,
|
|
24
|
+
} from "./StatisticalUtils.ts";
|
|
25
|
+
|
|
26
|
+
/** Options for blockDifferenceCI (extends DiffOptions with block parameters) */
|
|
27
|
+
export type BlockDiffOptions = DiffOptions & {
|
|
28
|
+
/** Block boundaries for the second sample array (defaults to blocksA) */
|
|
29
|
+
blocksB?: number[];
|
|
30
|
+
/** Disable Tukey trimming of outlier batches */
|
|
31
|
+
noBatchTrim?: boolean;
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
/** Options for difference CI functions */
|
|
35
|
+
type DiffOptions = {
|
|
36
|
+
/** Number of bootstrap resamples (default: 10000) */
|
|
37
|
+
resamples?: number;
|
|
38
|
+
/** Confidence level 0-1 (default: 0.95) */
|
|
39
|
+
confidence?: number;
|
|
40
|
+
/** Equivalence margin in percent. CI within [-margin, +margin] ==> "equivalent" */
|
|
41
|
+
equivMargin?: number;
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
type BinnedCI = {
|
|
45
|
+
estimate: number;
|
|
46
|
+
ci: [number, number];
|
|
47
|
+
histogram: HistogramBin[];
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
interface DiffOp {
|
|
51
|
+
origIndex: number;
|
|
52
|
+
execIndex: number;
|
|
53
|
+
computeA: (buf: number[]) => number;
|
|
54
|
+
computeB: (buf: number[]) => number;
|
|
55
|
+
pointEstimate: (s: number[]) => number;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/** @return sample-level bootstrap CI for percentage difference between baseline (a) and current (b). */
|
|
59
|
+
export function sampleDifferenceCI(
|
|
60
|
+
a: number[],
|
|
61
|
+
b: number[],
|
|
62
|
+
statFn: (s: number[]) => number,
|
|
63
|
+
options: DiffOptions = {},
|
|
64
|
+
): DifferenceCI {
|
|
65
|
+
const { resamples = bootstrapSamples, confidence: conf = defaultConfidence } =
|
|
66
|
+
options;
|
|
67
|
+
const baseVal = statFn(a);
|
|
68
|
+
const currVal = statFn(b);
|
|
69
|
+
const observedPct = ((currVal - baseVal) / baseVal) * 100;
|
|
70
|
+
|
|
71
|
+
const subA = subsample(a, maxBootstrapInput);
|
|
72
|
+
const subB = subsample(b, maxBootstrapInput);
|
|
73
|
+
const bufA = new Array(subA.length);
|
|
74
|
+
const bufB = new Array(subB.length);
|
|
75
|
+
const diffs = Array.from({ length: resamples }, () => {
|
|
76
|
+
resampleInto(subA, bufA);
|
|
77
|
+
resampleInto(subB, bufB);
|
|
78
|
+
const base = statFn(bufA);
|
|
79
|
+
return ((statFn(bufB) - base) / base) * 100;
|
|
80
|
+
});
|
|
81
|
+
const ci = computeInterval(diffs, conf);
|
|
82
|
+
const capped = subA !== a || subB !== b;
|
|
83
|
+
return {
|
|
84
|
+
percent: observedPct,
|
|
85
|
+
ci,
|
|
86
|
+
direction: classifyDirection(ci, observedPct, options.equivMargin),
|
|
87
|
+
histogram: binValues(diffs),
|
|
88
|
+
ciLevel: "sample",
|
|
89
|
+
...(capped && { subsampled: Math.max(a.length, b.length) }),
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/** Shared-resample difference CI: one resample pair per iteration, all stats computed.
|
|
94
|
+
* @return DifferenceCI[] in same order as input stats. */
|
|
95
|
+
export function multiSampleDifferenceCI(
|
|
96
|
+
a: number[],
|
|
97
|
+
b: number[],
|
|
98
|
+
stats: StatKind[],
|
|
99
|
+
options: DiffOptions = {},
|
|
100
|
+
): DifferenceCI[] {
|
|
101
|
+
const { resamples = bootstrapSamples, confidence: conf = defaultConfidence } =
|
|
102
|
+
options;
|
|
103
|
+
const subA = subsample(a, maxBootstrapInput);
|
|
104
|
+
const subB = subsample(b, maxBootstrapInput);
|
|
105
|
+
const bufA = new Array(subA.length);
|
|
106
|
+
const bufB = new Array(subB.length);
|
|
107
|
+
const ops = buildDiffOps(stats, subA.length, subB.length);
|
|
108
|
+
const allDiffs = ops.map(() => new Array<number>(resamples));
|
|
109
|
+
|
|
110
|
+
// Point estimates from original data
|
|
111
|
+
const baseVals = ops.map(op => op.pointEstimate(a));
|
|
112
|
+
const currVals = ops.map(op => op.pointEstimate(b));
|
|
113
|
+
const observedPcts = ops.map(
|
|
114
|
+
(_, j) => ((currVals[j] - baseVals[j]) / baseVals[j]) * 100,
|
|
115
|
+
);
|
|
116
|
+
|
|
117
|
+
for (let i = 0; i < resamples; i++) {
|
|
118
|
+
resampleInto(subA, bufA);
|
|
119
|
+
resampleInto(subB, bufB);
|
|
120
|
+
for (let j = 0; j < ops.length; j++) {
|
|
121
|
+
const base = ops[j].computeA(bufA);
|
|
122
|
+
const curr = ops[j].computeB(bufB);
|
|
123
|
+
allDiffs[j][i] = ((curr - base) / base) * 100;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const capped = subA !== a || subB !== b;
|
|
128
|
+
const results = new Array<DifferenceCI>(stats.length);
|
|
129
|
+
for (const op of ops) {
|
|
130
|
+
const j = op.execIndex;
|
|
131
|
+
const ci = computeInterval(allDiffs[j], conf);
|
|
132
|
+
results[op.origIndex] = {
|
|
133
|
+
percent: observedPcts[j],
|
|
134
|
+
ci,
|
|
135
|
+
direction: classifyDirection(ci, observedPcts[j], options.equivMargin),
|
|
136
|
+
histogram: binValues(allDiffs[j]),
|
|
137
|
+
ciLevel: "sample",
|
|
138
|
+
...(capped && { subsampled: Math.max(a.length, b.length) }),
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
return results;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/** Difference CIs for multiple stats, dispatching block vs sample automatically.
|
|
145
|
+
* Returns undefined for non-bootstrappable stats (min/max). */
|
|
146
|
+
export function diffCIs(
|
|
147
|
+
a: number[],
|
|
148
|
+
aOffsets: number[] | undefined,
|
|
149
|
+
b: number[],
|
|
150
|
+
bOffsets: number[] | undefined,
|
|
151
|
+
stats: StatKind[],
|
|
152
|
+
options: BlockDiffOptions = {},
|
|
153
|
+
): (DifferenceCI | undefined)[] {
|
|
154
|
+
const bsStats = stats.filter(isBootstrappable);
|
|
155
|
+
if (bsStats.length === 0) return stats.map(() => undefined);
|
|
156
|
+
|
|
157
|
+
const hasBlocks =
|
|
158
|
+
(aOffsets?.length ?? 0) >= 2 && (bOffsets?.length ?? 0) >= 2;
|
|
159
|
+
const bsResults = hasBlocks
|
|
160
|
+
? bsStats.map(s =>
|
|
161
|
+
blockDifferenceCI(a, aOffsets!, b, statKindToFn(s), {
|
|
162
|
+
...options,
|
|
163
|
+
blocksB: bOffsets!,
|
|
164
|
+
}),
|
|
165
|
+
)
|
|
166
|
+
: multiSampleDifferenceCI(a, b, bsStats, options);
|
|
167
|
+
|
|
168
|
+
const results: (DifferenceCI | undefined)[] = new Array(stats.length);
|
|
169
|
+
let bi = 0;
|
|
170
|
+
for (let i = 0; i < stats.length; i++) {
|
|
171
|
+
results[i] = isBootstrappable(stats[i]) ? bsResults[bi++] : undefined;
|
|
172
|
+
}
|
|
173
|
+
return results;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
/** @return block bootstrap CI for percentage difference between baseline (a) and current (b).
|
|
177
|
+
* Tukey-trims outlier batches, then resamples per-block statFn values. Requires 2+ blocks. */
|
|
178
|
+
export function blockDifferenceCI(
|
|
179
|
+
a: number[],
|
|
180
|
+
blocksA: number[],
|
|
181
|
+
b: number[],
|
|
182
|
+
statFn: (s: number[]) => number,
|
|
183
|
+
options: BlockDiffOptions = {},
|
|
184
|
+
): DifferenceCI {
|
|
185
|
+
const { resamples = bootstrapSamples, confidence: conf = defaultConfidence } =
|
|
186
|
+
options;
|
|
187
|
+
const bB = options.blocksB ?? blocksA;
|
|
188
|
+
const noTrim = options.noBatchTrim;
|
|
189
|
+
const sideA = prepareBlocks(a, blocksA, statFn, noTrim);
|
|
190
|
+
const sideB = prepareBlocks(b, bB, statFn, noTrim);
|
|
191
|
+
|
|
192
|
+
const baseVal = statFn(sideA.filtered);
|
|
193
|
+
const currVal = statFn(sideB.filtered);
|
|
194
|
+
const observedPct = ((currVal - baseVal) / baseVal) * 100;
|
|
195
|
+
|
|
196
|
+
const drawA = () => average(createResample(sideA.blockVals));
|
|
197
|
+
const drawB = () => average(createResample(sideB.blockVals));
|
|
198
|
+
const diffs = Array.from({ length: resamples }, () => {
|
|
199
|
+
const base = drawA();
|
|
200
|
+
return ((drawB() - base) / base) * 100;
|
|
201
|
+
});
|
|
202
|
+
const ci = computeInterval(diffs, conf);
|
|
203
|
+
return {
|
|
204
|
+
percent: observedPct,
|
|
205
|
+
ci,
|
|
206
|
+
direction: classifyDirection(ci, observedPct, options.equivMargin),
|
|
207
|
+
histogram: binValues(diffs),
|
|
208
|
+
trimmed: [sideA.trimCount, sideB.trimCount],
|
|
209
|
+
ciLevel: "block",
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/** @return binned CI with histogram from a BootstrapResult */
|
|
214
|
+
export function binBootstrapResult(result: BootstrapResult): BinnedCI {
|
|
215
|
+
const { estimate, ci, samples } = result;
|
|
216
|
+
return { estimate, ci, histogram: binValues(samples) };
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/** @return CI direction, with optional equivalence margin (in percent) */
|
|
220
|
+
function classifyDirection(
|
|
221
|
+
ci: [number, number],
|
|
222
|
+
observed: number,
|
|
223
|
+
margin?: number,
|
|
224
|
+
): CIDirection {
|
|
225
|
+
const withinMargin =
|
|
226
|
+
margin != null && margin > 0 && ci[0] >= -margin && ci[1] <= margin;
|
|
227
|
+
if (withinMargin) return "equivalent";
|
|
228
|
+
const excludesZero = ci[0] > 0 || ci[1] < 0;
|
|
229
|
+
if (excludesZero) return observed < 0 ? "faster" : "slower";
|
|
230
|
+
return "uncertain";
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/** @return values binned into histogram for compact visualization */
|
|
234
|
+
function binValues(values: number[], binCount = 30): HistogramBin[] {
|
|
235
|
+
let min = values[0];
|
|
236
|
+
let max = values[0];
|
|
237
|
+
for (let i = 1; i < values.length; i++) {
|
|
238
|
+
if (values[i] < min) min = values[i];
|
|
239
|
+
if (values[i] > max) max = values[i];
|
|
240
|
+
}
|
|
241
|
+
if (min === max) return [{ x: min, count: values.length }];
|
|
242
|
+
|
|
243
|
+
const step = (max - min) / binCount;
|
|
244
|
+
const counts = new Array(binCount).fill(0);
|
|
245
|
+
for (const v of values) {
|
|
246
|
+
const bin = Math.min(Math.floor((v - min) / step), binCount - 1);
|
|
247
|
+
counts[bin]++;
|
|
248
|
+
}
|
|
249
|
+
return counts.map((count, i) => ({ x: min + (i + 0.5) * step, count }));
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
/** Build diff operations: mean/min/max first (non-destructive), then percentiles ascending.
|
|
253
|
+
* Each side (A, B) gets its own quickSelect k values since sample sizes may differ. */
|
|
254
|
+
function buildDiffOps(stats: StatKind[], nA: number, nB: number): DiffOp[] {
|
|
255
|
+
const uniform = (order: number, i: number, fn: (s: number[]) => number) => ({
|
|
256
|
+
order,
|
|
257
|
+
origIndex: i,
|
|
258
|
+
execIndex: 0,
|
|
259
|
+
computeA: fn,
|
|
260
|
+
computeB: fn,
|
|
261
|
+
pointEstimate: fn,
|
|
262
|
+
});
|
|
263
|
+
const entries = stats.map((s, i) => {
|
|
264
|
+
if (s === "mean") return uniform(-3, i, average);
|
|
265
|
+
if (s === "min") return uniform(-2, i, minOf);
|
|
266
|
+
if (s === "max") return uniform(-1, i, maxOf);
|
|
267
|
+
const p = s.percentile;
|
|
268
|
+
const kA = Math.max(0, Math.ceil(nA * p) - 1);
|
|
269
|
+
const kB = Math.max(0, Math.ceil(nB * p) - 1);
|
|
270
|
+
return {
|
|
271
|
+
order: p,
|
|
272
|
+
origIndex: i,
|
|
273
|
+
execIndex: 0,
|
|
274
|
+
computeA: (buf: number[]) => quickSelect(buf, kA),
|
|
275
|
+
computeB: (buf: number[]) => quickSelect(buf, kB),
|
|
276
|
+
pointEstimate: (v: number[]) => percentile(v, p),
|
|
277
|
+
};
|
|
278
|
+
});
|
|
279
|
+
entries.sort((a, b) => a.order - b.order);
|
|
280
|
+
for (let i = 0; i < entries.length; i++) entries[i].execIndex = i;
|
|
281
|
+
return entries;
|
|
282
|
+
}
|
|
@@ -1,14 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Permutation-based hypothesis testing for benchmark comparisons.
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
* from StatisticalUtils.ts instead, which provides confidence intervals on
|
|
6
|
-
* the difference rather than p-values.
|
|
7
|
-
*
|
|
8
|
-
* Kept for potential future use cases where p-values are needed.
|
|
3
|
+
* Currently unused -- the reporting pipeline uses blockDifferenceCI() instead,
|
|
4
|
+
* which provides confidence intervals rather than p-values.
|
|
9
5
|
*/
|
|
10
6
|
|
|
11
|
-
import { average,
|
|
7
|
+
import { average, bootstrapSamples, median } from "./StatisticalUtils.ts";
|
|
12
8
|
|
|
13
9
|
/** Statistical comparison between baseline and current benchmark samples */
|
|
14
10
|
export interface ComparisonResult {
|
|
@@ -37,19 +33,17 @@ export interface ComparisonResult {
|
|
|
37
33
|
const significanceThreshold = 0.05;
|
|
38
34
|
const strongSignificance = 0.001;
|
|
39
35
|
const goodSignificance = 0.01;
|
|
40
|
-
const defaultBootstrapSamples = 10000;
|
|
41
36
|
|
|
42
37
|
/** @return statistical comparison between baseline and current samples */
|
|
43
38
|
export function compareWithBaseline(
|
|
44
39
|
baseline: number[],
|
|
45
40
|
current: number[],
|
|
46
41
|
): ComparisonResult {
|
|
47
|
-
const baselineMedian =
|
|
48
|
-
const currentMedian =
|
|
42
|
+
const baselineMedian = median(baseline);
|
|
43
|
+
const currentMedian = median(current);
|
|
49
44
|
const baselineMean = average(baseline);
|
|
50
45
|
const currentMean = average(current);
|
|
51
46
|
|
|
52
|
-
const median = (s: number[]) => percentile(s, 0.5);
|
|
53
47
|
const medianPValue = bootstrapDifferenceTest(baseline, current, median);
|
|
54
48
|
const meanPValue = bootstrapDifferenceTest(baseline, current, average);
|
|
55
49
|
|
|
@@ -74,12 +68,12 @@ function bootstrapDifferenceTest(
|
|
|
74
68
|
const n1 = sample1.length;
|
|
75
69
|
|
|
76
70
|
let moreExtreme = 0;
|
|
77
|
-
for (let i = 0; i <
|
|
71
|
+
for (let i = 0; i < bootstrapSamples; i++) {
|
|
78
72
|
const { resample1, resample2 } = shuffleAndSplit(combined, n1);
|
|
79
73
|
const diff = statistic(resample2) - statistic(resample1);
|
|
80
74
|
if (Math.abs(diff) >= Math.abs(observedDiff)) moreExtreme++;
|
|
81
75
|
}
|
|
82
|
-
return moreExtreme /
|
|
76
|
+
return moreExtreme / bootstrapSamples;
|
|
83
77
|
}
|
|
84
78
|
|
|
85
79
|
/** @return change statistics for a current vs baseline comparison */
|
|
@@ -100,10 +94,7 @@ function shuffleAndSplit(combined: number[], n1: number) {
|
|
|
100
94
|
const j = Math.floor(Math.random() * (i + 1));
|
|
101
95
|
[shuffled[i], shuffled[j]] = [shuffled[j], shuffled[i]];
|
|
102
96
|
}
|
|
103
|
-
return {
|
|
104
|
-
resample1: shuffled.slice(0, n1),
|
|
105
|
-
resample2: shuffled.slice(n1),
|
|
106
|
-
};
|
|
97
|
+
return { resample1: shuffled.slice(0, n1), resample2: shuffled.slice(n1) };
|
|
107
98
|
}
|
|
108
99
|
|
|
109
100
|
/** @return significance level based on p-value thresholds */
|