benchforge 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +432 -0
- package/bin/benchforge +3 -0
- package/dist/bin/benchforge.mjs +9 -0
- package/dist/bin/benchforge.mjs.map +1 -0
- package/dist/browser/index.js +914 -0
- package/dist/index.mjs +3 -0
- package/dist/src-CGuaC3Wo.mjs +3676 -0
- package/dist/src-CGuaC3Wo.mjs.map +1 -0
- package/package.json +49 -0
- package/src/BenchMatrix.ts +380 -0
- package/src/Benchmark.ts +33 -0
- package/src/BenchmarkReport.ts +156 -0
- package/src/GitUtils.ts +79 -0
- package/src/HtmlDataPrep.ts +148 -0
- package/src/MeasuredResults.ts +127 -0
- package/src/NodeGC.ts +48 -0
- package/src/PermutationTest.ts +115 -0
- package/src/StandardSections.ts +268 -0
- package/src/StatisticalUtils.ts +176 -0
- package/src/TypeUtil.ts +8 -0
- package/src/bin/benchforge.ts +4 -0
- package/src/browser/BrowserGcStats.ts +44 -0
- package/src/browser/BrowserHeapSampler.ts +248 -0
- package/src/cli/CliArgs.ts +64 -0
- package/src/cli/FilterBenchmarks.ts +68 -0
- package/src/cli/RunBenchCLI.ts +856 -0
- package/src/export/JsonExport.ts +103 -0
- package/src/export/JsonFormat.ts +91 -0
- package/src/export/PerfettoExport.ts +203 -0
- package/src/heap-sample/HeapSampleReport.ts +196 -0
- package/src/heap-sample/HeapSampler.ts +78 -0
- package/src/html/HtmlReport.ts +131 -0
- package/src/html/HtmlTemplate.ts +284 -0
- package/src/html/Types.ts +88 -0
- package/src/html/browser/CIPlot.ts +287 -0
- package/src/html/browser/HistogramKde.ts +118 -0
- package/src/html/browser/LegendUtils.ts +163 -0
- package/src/html/browser/RenderPlots.ts +263 -0
- package/src/html/browser/SampleTimeSeries.ts +389 -0
- package/src/html/browser/Types.ts +96 -0
- package/src/html/browser/index.ts +1 -0
- package/src/html/index.ts +17 -0
- package/src/index.ts +92 -0
- package/src/matrix/CaseLoader.ts +36 -0
- package/src/matrix/MatrixFilter.ts +103 -0
- package/src/matrix/MatrixReport.ts +290 -0
- package/src/matrix/VariantLoader.ts +46 -0
- package/src/runners/AdaptiveWrapper.ts +391 -0
- package/src/runners/BasicRunner.ts +368 -0
- package/src/runners/BenchRunner.ts +60 -0
- package/src/runners/CreateRunner.ts +11 -0
- package/src/runners/GcStats.ts +107 -0
- package/src/runners/RunnerOrchestrator.ts +374 -0
- package/src/runners/RunnerUtils.ts +2 -0
- package/src/runners/TimingUtils.ts +13 -0
- package/src/runners/WorkerScript.ts +256 -0
- package/src/table-util/ConvergenceFormatters.ts +19 -0
- package/src/table-util/Formatters.ts +152 -0
- package/src/table-util/README.md +70 -0
- package/src/table-util/TableReport.ts +293 -0
- package/src/table-util/test/TableReport.test.ts +105 -0
- package/src/table-util/test/TableValueExtractor.test.ts +41 -0
- package/src/table-util/test/TableValueExtractor.ts +100 -0
- package/src/test/AdaptiveRunner.test.ts +185 -0
- package/src/test/AdaptiveStatistics.integration.ts +119 -0
- package/src/test/BenchmarkReport.test.ts +82 -0
- package/src/test/BrowserBench.e2e.test.ts +44 -0
- package/src/test/BrowserBench.test.ts +79 -0
- package/src/test/GcStats.test.ts +94 -0
- package/src/test/PermutationTest.test.ts +121 -0
- package/src/test/RunBenchCLI.test.ts +166 -0
- package/src/test/RunnerOrchestrator.test.ts +102 -0
- package/src/test/StatisticalUtils.test.ts +112 -0
- package/src/test/TestUtils.ts +93 -0
- package/src/test/fixtures/test-bench-script.ts +30 -0
- package/src/tests/AdaptiveConvergence.test.ts +177 -0
- package/src/tests/AdaptiveSampling.test.ts +240 -0
- package/src/tests/BenchMatrix.test.ts +366 -0
- package/src/tests/MatrixFilter.test.ts +117 -0
- package/src/tests/MatrixReport.test.ts +139 -0
- package/src/tests/RealDataValidation.test.ts +177 -0
- package/src/tests/fixtures/baseline/impl.ts +4 -0
- package/src/tests/fixtures/bevy30-samples.ts +158 -0
- package/src/tests/fixtures/cases/asyncCases.ts +7 -0
- package/src/tests/fixtures/cases/cases.ts +8 -0
- package/src/tests/fixtures/cases/variants/product.ts +2 -0
- package/src/tests/fixtures/cases/variants/sum.ts +2 -0
- package/src/tests/fixtures/discover/fast.ts +1 -0
- package/src/tests/fixtures/discover/slow.ts +4 -0
- package/src/tests/fixtures/invalid/bad.ts +1 -0
- package/src/tests/fixtures/loader/fast.ts +1 -0
- package/src/tests/fixtures/loader/slow.ts +4 -0
- package/src/tests/fixtures/loader/stateful.ts +2 -0
- package/src/tests/fixtures/stateful/stateful.ts +2 -0
- package/src/tests/fixtures/variants/extra.ts +1 -0
- package/src/tests/fixtures/variants/impl.ts +1 -0
- package/src/tests/fixtures/worker/fast.ts +1 -0
- package/src/tests/fixtures/worker/slow.ts +4 -0
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import { expect, test } from "vitest";
|
|
2
|
+
import { compareWithBaseline } from "../PermutationTest.ts";
|
|
3
|
+
import { assertValid, getSampleData } from "./TestUtils.ts";
|
|
4
|
+
|
|
5
|
+
test("detects 20% performance improvement", () => {
|
|
6
|
+
const baseline = getSampleData(0, 100);
|
|
7
|
+
const improved = baseline.map(v => v * 0.8);
|
|
8
|
+
const result = compareWithBaseline(baseline, improved);
|
|
9
|
+
|
|
10
|
+
expect(result.currentMedian).toBeLessThan(result.baselineMedian);
|
|
11
|
+
expect(result.currentMean).toBeLessThan(result.baselineMean);
|
|
12
|
+
expect(result.medianChange.percent).toBeCloseTo(-20, 0);
|
|
13
|
+
expect(result.meanChange.percent).toBeCloseTo(-20, 0);
|
|
14
|
+
expect(result.medianChange.significant).toBe(true);
|
|
15
|
+
expect(["good", "strong"]).toContain(result.medianChange.significance);
|
|
16
|
+
expect(result.meanChange.significant).toBe(true);
|
|
17
|
+
expect(["good", "strong"]).toContain(result.meanChange.significance);
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
test("detects 30% performance regression", () => {
|
|
21
|
+
const baseline = getSampleData(100, 200);
|
|
22
|
+
const regressed = baseline.map(v => v * 1.3);
|
|
23
|
+
const result = compareWithBaseline(baseline, regressed);
|
|
24
|
+
|
|
25
|
+
expect(result.currentMedian).toBeGreaterThan(result.baselineMedian);
|
|
26
|
+
expect(result.currentMean).toBeGreaterThan(result.baselineMean);
|
|
27
|
+
expect(result.medianChange.percent).toBeCloseTo(30, 0);
|
|
28
|
+
expect(result.meanChange.percent).toBeCloseTo(30, 0);
|
|
29
|
+
expect(result.medianChange.significant).toBe(true);
|
|
30
|
+
expect(result.meanChange.significant).toBe(true);
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
test("detects no change with noise", () => {
|
|
34
|
+
const baseline = getSampleData(200, 300);
|
|
35
|
+
const noisy = baseline.map(v => v + (Math.random() - 0.5) * 2);
|
|
36
|
+
const result = compareWithBaseline(baseline, noisy);
|
|
37
|
+
|
|
38
|
+
expect(Math.abs(result.medianChange.percent)).toBeLessThan(5);
|
|
39
|
+
expect(Math.abs(result.meanChange.percent)).toBeLessThan(5);
|
|
40
|
+
expect(result.medianChange.significant).toBe(false);
|
|
41
|
+
expect(result.medianChange.significance).toBe("none");
|
|
42
|
+
expect(result.meanChange.significant).toBe(false);
|
|
43
|
+
expect(result.meanChange.significance).toBe("none");
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
test("compares early vs late benchmark runs", () => {
|
|
47
|
+
const early = getSampleData(0, 50);
|
|
48
|
+
const late = getSampleData(560, 610);
|
|
49
|
+
const result = compareWithBaseline(early, late);
|
|
50
|
+
|
|
51
|
+
expect(result.baselineMedian).toBeGreaterThan(40);
|
|
52
|
+
expect(result.currentMedian).toBeGreaterThan(40);
|
|
53
|
+
expect(result.baselineMean).toBeGreaterThan(40);
|
|
54
|
+
expect(result.currentMean).toBeGreaterThan(40);
|
|
55
|
+
assertValid.pValue(result.medianChange.pValue);
|
|
56
|
+
assertValid.pValue(result.meanChange.pValue);
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
test("produces high p-values for identical data", () => {
|
|
60
|
+
const samples = getSampleData(300, 350);
|
|
61
|
+
const identical = [...samples];
|
|
62
|
+
const result = compareWithBaseline(samples, identical);
|
|
63
|
+
|
|
64
|
+
expect(result.medianChange.pValue).toBeGreaterThan(0.5);
|
|
65
|
+
expect(result.meanChange.pValue).toBeGreaterThan(0.5);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
test("produces low p-values for 4x performance difference", () => {
|
|
69
|
+
const fast = getSampleData(400, 450).map(v => v * 0.5);
|
|
70
|
+
const slow = getSampleData(450, 500).map(v => v * 2.0);
|
|
71
|
+
const result = compareWithBaseline(fast, slow);
|
|
72
|
+
|
|
73
|
+
expect(result.medianChange.pValue).toBeLessThan(0.01);
|
|
74
|
+
expect(result.meanChange.pValue).toBeLessThan(0.01);
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
test("handles single value vs identical values", () => {
|
|
78
|
+
const single = [50];
|
|
79
|
+
const identical = [50, 50, 50, 50, 50];
|
|
80
|
+
const result = compareWithBaseline(single, identical);
|
|
81
|
+
|
|
82
|
+
expect(result.baselineMedian).toBe(50);
|
|
83
|
+
expect(result.currentMedian).toBe(50);
|
|
84
|
+
expect(result.baselineMean).toBe(50);
|
|
85
|
+
expect(result.currentMean).toBe(50);
|
|
86
|
+
expect(result.medianChange.percent).toBe(0);
|
|
87
|
+
expect(result.meanChange.percent).toBe(0);
|
|
88
|
+
expect(result.medianChange.significant).toBe(false);
|
|
89
|
+
expect(result.meanChange.significant).toBe(false);
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
test("categorizes change significance by magnitude", () => {
|
|
93
|
+
const baseline = getSampleData(0, 100);
|
|
94
|
+
|
|
95
|
+
const slight = baseline.map(v => v * 1.05);
|
|
96
|
+
const moderate = baseline.map(v => v * 1.15);
|
|
97
|
+
const large = baseline.map(v => v * 1.5);
|
|
98
|
+
|
|
99
|
+
const slightResult = compareWithBaseline(baseline, slight);
|
|
100
|
+
const moderateResult = compareWithBaseline(baseline, moderate);
|
|
101
|
+
const largeResult = compareWithBaseline(baseline, large);
|
|
102
|
+
|
|
103
|
+
assertValid.significance(slightResult.medianChange.significance);
|
|
104
|
+
assertValid.significance(moderateResult.medianChange.significance);
|
|
105
|
+
assertValid.significance(largeResult.medianChange.significance);
|
|
106
|
+
|
|
107
|
+
const levels = ["none", "weak", "good", "strong"];
|
|
108
|
+
const slightIdx = levels.indexOf(slightResult.medianChange.significance);
|
|
109
|
+
const largeIdx = levels.indexOf(largeResult.medianChange.significance);
|
|
110
|
+
expect(largeIdx).toBeGreaterThanOrEqual(slightIdx);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
test("compares warmup vs stable performance", () => {
|
|
114
|
+
const warmup = getSampleData(0, 20);
|
|
115
|
+
const stable = getSampleData(100, 120);
|
|
116
|
+
const result = compareWithBaseline(warmup, stable);
|
|
117
|
+
|
|
118
|
+
expect(result.baselineMedian).toBeGreaterThan(result.currentMedian);
|
|
119
|
+
expect(result.medianChange.percent).toBeLessThan(0);
|
|
120
|
+
expect(result.medianChange.absolute).toBeLessThan(0);
|
|
121
|
+
});
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
import { execSync } from "node:child_process";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { expect, test } from "vitest";
|
|
4
|
+
import type { BenchSuite } from "../Benchmark.ts";
|
|
5
|
+
import { filterBenchmarks } from "../cli/FilterBenchmarks.ts";
|
|
6
|
+
import { runBenchCLITest } from "./TestUtils.ts";
|
|
7
|
+
|
|
8
|
+
const testSuite: BenchSuite = {
|
|
9
|
+
name: "Test Suite",
|
|
10
|
+
groups: [
|
|
11
|
+
{
|
|
12
|
+
name: "String Operations",
|
|
13
|
+
benchmarks: [
|
|
14
|
+
{ name: "concatenation", fn: () => "a" + "b" },
|
|
15
|
+
{ name: "template literal", fn: () => `a${"b"}` },
|
|
16
|
+
],
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
name: "Math Operations",
|
|
20
|
+
benchmarks: [
|
|
21
|
+
{ name: "addition", fn: () => 1 + 1 },
|
|
22
|
+
{ name: "multiplication", fn: () => 2 * 2 },
|
|
23
|
+
],
|
|
24
|
+
},
|
|
25
|
+
],
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
const suiteWithSetup: BenchSuite = {
|
|
29
|
+
name: "Array Suite",
|
|
30
|
+
groups: [
|
|
31
|
+
{
|
|
32
|
+
name: "Array Operations",
|
|
33
|
+
setup: () => ({
|
|
34
|
+
numbers: Array.from({ length: 100 }, (_, i) => i),
|
|
35
|
+
strings: Array.from({ length: 100 }, (_, i) => `item${i}`),
|
|
36
|
+
}),
|
|
37
|
+
benchmarks: [
|
|
38
|
+
{
|
|
39
|
+
name: "sum numbers",
|
|
40
|
+
fn: ({ numbers }: any) =>
|
|
41
|
+
numbers.reduce((a: number, b: number) => a + b, 0),
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
name: "join strings",
|
|
45
|
+
fn: ({ strings }: any) => strings.join(","),
|
|
46
|
+
},
|
|
47
|
+
],
|
|
48
|
+
},
|
|
49
|
+
],
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
test("runs all benchmarks", { timeout: 30000 }, async () => {
|
|
53
|
+
const output = await runBenchCLITest(testSuite, "--time 0.1");
|
|
54
|
+
|
|
55
|
+
expect(output).toContain("concatenation");
|
|
56
|
+
expect(output).toContain("template literal");
|
|
57
|
+
expect(output).toContain("addition");
|
|
58
|
+
expect(output).toContain("multiplication");
|
|
59
|
+
expect(output).toContain("mean");
|
|
60
|
+
expect(output).toContain("runs");
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
test("filters by substring", { timeout: 15000 }, async () => {
|
|
64
|
+
const output = await runBenchCLITest(testSuite, "--filter concat --time 0.1");
|
|
65
|
+
|
|
66
|
+
expect(output).toContain("concatenation");
|
|
67
|
+
expect(output).not.toContain("addition");
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
test("filters by regex", { timeout: 15000 }, async () => {
|
|
71
|
+
const output = await runBenchCLITest(
|
|
72
|
+
testSuite,
|
|
73
|
+
"--filter ^template --time 0.1",
|
|
74
|
+
);
|
|
75
|
+
expect(output).toContain("template literal");
|
|
76
|
+
expect(output).not.toContain("addition");
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
test("filter preserves suite structure", () => {
|
|
80
|
+
const filtered = filterBenchmarks(testSuite, "concatenation", false);
|
|
81
|
+
|
|
82
|
+
expect(filtered.name).toBe("Test Suite");
|
|
83
|
+
expect(filtered.groups).toHaveLength(2);
|
|
84
|
+
expect(filtered.groups[0].name).toBe("String Operations");
|
|
85
|
+
expect(filtered.groups[0].benchmarks).toHaveLength(1);
|
|
86
|
+
expect(filtered.groups[0].benchmarks[0].name).toBe("concatenation");
|
|
87
|
+
expect(filtered.groups[1].benchmarks).toHaveLength(0);
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
/** Execute test fixture script and return output */
|
|
91
|
+
function executeTestScript(args = ""): string {
|
|
92
|
+
const script = path.join(
|
|
93
|
+
import.meta.dirname!,
|
|
94
|
+
"fixtures/test-bench-script.ts",
|
|
95
|
+
);
|
|
96
|
+
return execSync(`node --expose-gc --allow-natives-syntax ${script} ${args}`, {
|
|
97
|
+
encoding: "utf8",
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
test("e2e: runs user script", { timeout: 30000 }, () => {
|
|
102
|
+
const output = executeTestScript("--time 0.1");
|
|
103
|
+
|
|
104
|
+
expect(output).toContain("plus");
|
|
105
|
+
expect(output).toContain("multiply");
|
|
106
|
+
expect(output).toContain("mean");
|
|
107
|
+
expect(output).toContain("runs");
|
|
108
|
+
|
|
109
|
+
const lines = output.split("\n");
|
|
110
|
+
const headerLine = lines.find(l => l.includes("name"));
|
|
111
|
+
expect(headerLine).toBeTruthy();
|
|
112
|
+
|
|
113
|
+
const plusLine = lines.find(l => l.includes("plus"));
|
|
114
|
+
expect(plusLine).toBeTruthy();
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
test("e2e: filter flag", { timeout: 30000 }, () => {
|
|
118
|
+
const output = executeTestScript('--filter "plus" --time 0.1');
|
|
119
|
+
|
|
120
|
+
expect(output).toContain("plus");
|
|
121
|
+
expect(output).not.toContain("multiply");
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
test("runs benchmarks with setup function", { timeout: 30000 }, async () => {
|
|
125
|
+
const output = await runBenchCLITest(suiteWithSetup, "--time 0.1");
|
|
126
|
+
|
|
127
|
+
expect(output).toContain("sum numbers");
|
|
128
|
+
expect(output).toContain("join strings");
|
|
129
|
+
expect(output).toContain("mean");
|
|
130
|
+
expect(output).toContain("runs");
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
test(
|
|
134
|
+
"runs benchmarks with baseline comparison",
|
|
135
|
+
{ timeout: 30000 },
|
|
136
|
+
async () => {
|
|
137
|
+
const suiteWithBaseline: BenchSuite = {
|
|
138
|
+
name: "Baseline Test",
|
|
139
|
+
groups: [
|
|
140
|
+
{
|
|
141
|
+
name: "Sort Comparison",
|
|
142
|
+
setup: () => ({
|
|
143
|
+
data: Array.from({ length: 10 }, () => Math.random()),
|
|
144
|
+
}),
|
|
145
|
+
baseline: {
|
|
146
|
+
name: "baseline sort",
|
|
147
|
+
fn: ({ data }: any) => [...data].sort(),
|
|
148
|
+
},
|
|
149
|
+
benchmarks: [
|
|
150
|
+
{
|
|
151
|
+
name: "optimized sort",
|
|
152
|
+
fn: ({ data }: any) => [...data].sort((a, b) => a - b),
|
|
153
|
+
},
|
|
154
|
+
],
|
|
155
|
+
},
|
|
156
|
+
],
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
const output = await runBenchCLITest(suiteWithBaseline, "--time 0.01");
|
|
160
|
+
|
|
161
|
+
expect(output).toContain("baseline sort");
|
|
162
|
+
expect(output).toContain("optimized sort");
|
|
163
|
+
expect(output).toContain("Δ%"); // Diff column should appear
|
|
164
|
+
expect(output).toContain("mean");
|
|
165
|
+
},
|
|
166
|
+
);
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { expect, test } from "vitest";
|
|
2
|
+
import type { BenchmarkSpec } from "../Benchmark.ts";
|
|
3
|
+
import { runBenchmark } from "../runners/RunnerOrchestrator.ts";
|
|
4
|
+
|
|
5
|
+
/** lightweight function for testing worker communication */
|
|
6
|
+
function simpleTestFunction(): number {
|
|
7
|
+
let sum = 0;
|
|
8
|
+
for (let i = 0; i < 1000; i++) sum += Math.sqrt(i);
|
|
9
|
+
return sum;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
test("BasicRunner runs benchmark in worker mode", async () => {
|
|
13
|
+
const spec: BenchmarkSpec = {
|
|
14
|
+
name: "basic-worker-test",
|
|
15
|
+
fn: simpleTestFunction,
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
const results = await runBenchmark({
|
|
19
|
+
spec,
|
|
20
|
+
runner: "basic",
|
|
21
|
+
options: {
|
|
22
|
+
maxTime: 5,
|
|
23
|
+
maxIterations: 50,
|
|
24
|
+
},
|
|
25
|
+
useWorker: true,
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
expect(results).toHaveLength(1);
|
|
29
|
+
const result = results[0];
|
|
30
|
+
|
|
31
|
+
expect(result.name).toBe("basic-worker-test");
|
|
32
|
+
expect(result.samples.length).toBeGreaterThan(0);
|
|
33
|
+
expect(result.samples.length).toBeLessThanOrEqual(500);
|
|
34
|
+
expect(result.time.min).toBeGreaterThan(0);
|
|
35
|
+
expect(result.time.max).toBeGreaterThanOrEqual(result.time.min);
|
|
36
|
+
expect(result.time.avg).toBeGreaterThan(0);
|
|
37
|
+
expect(result.time.p50).toBeGreaterThan(0);
|
|
38
|
+
expect(result.time.p99).toBeGreaterThan(0);
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
test("BasicRunner runs benchmark in non-worker mode", async () => {
|
|
42
|
+
const spec: BenchmarkSpec = {
|
|
43
|
+
name: "basic-test",
|
|
44
|
+
fn: simpleTestFunction,
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
const results = await runBenchmark({
|
|
48
|
+
spec,
|
|
49
|
+
runner: "basic",
|
|
50
|
+
options: {
|
|
51
|
+
maxTime: 5,
|
|
52
|
+
maxIterations: 50,
|
|
53
|
+
},
|
|
54
|
+
useWorker: false,
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
expect(results).toHaveLength(1);
|
|
58
|
+
const result = results[0];
|
|
59
|
+
|
|
60
|
+
expect(result.name).toBe("basic-test");
|
|
61
|
+
expect(result.samples.length).toBeGreaterThan(0);
|
|
62
|
+
expect(result.time.p50).toBeGreaterThan(0);
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
test("BasicRunner with parameterized benchmark", async () => {
|
|
66
|
+
const spec: BenchmarkSpec<number> = {
|
|
67
|
+
name: "parameterized-test",
|
|
68
|
+
fn: (n: number) => {
|
|
69
|
+
let sum = 0;
|
|
70
|
+
for (let i = 0; i < n; i++) sum += i;
|
|
71
|
+
return sum;
|
|
72
|
+
},
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
const results = await runBenchmark({
|
|
76
|
+
spec,
|
|
77
|
+
runner: "basic",
|
|
78
|
+
options: { maxTime: 5, maxIterations: 20 },
|
|
79
|
+
useWorker: false,
|
|
80
|
+
params: 100,
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
expect(results).toHaveLength(1);
|
|
84
|
+
expect(results[0].name).toBe("parameterized-test");
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
test("RunnerOrchestrator propagates errors from worker", async () => {
|
|
88
|
+
const spec: BenchmarkSpec = {
|
|
89
|
+
name: "error-test",
|
|
90
|
+
fn: () => {
|
|
91
|
+
throw new Error("Test error from benchmark");
|
|
92
|
+
},
|
|
93
|
+
};
|
|
94
|
+
|
|
95
|
+
const promise = runBenchmark({
|
|
96
|
+
spec,
|
|
97
|
+
runner: "basic",
|
|
98
|
+
options: { maxTime: 1, maxIterations: 1 },
|
|
99
|
+
useWorker: true,
|
|
100
|
+
});
|
|
101
|
+
await expect(promise).rejects.toThrow("Test error from benchmark");
|
|
102
|
+
});
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import { expect, test } from "vitest";
|
|
2
|
+
import {
|
|
3
|
+
average,
|
|
4
|
+
bootstrapDifferenceCI,
|
|
5
|
+
bootstrapMedian,
|
|
6
|
+
coefficientOfVariation,
|
|
7
|
+
findOutliers,
|
|
8
|
+
medianAbsoluteDeviation,
|
|
9
|
+
percentile,
|
|
10
|
+
standardDeviation,
|
|
11
|
+
} from "../StatisticalUtils.ts";
|
|
12
|
+
import { assertValid, getSampleData } from "./TestUtils.ts";
|
|
13
|
+
|
|
14
|
+
test("calculates mean correctly", () => {
|
|
15
|
+
const subset = getSampleData(0, 10);
|
|
16
|
+
const expected = subset.reduce((a, b) => a + b, 0) / subset.length;
|
|
17
|
+
expect(average(subset)).toBeCloseTo(expected, 5);
|
|
18
|
+
expect(average([10])).toBe(10);
|
|
19
|
+
expect(average([-5, 5])).toBe(0);
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
test("calculates standard deviation", () => {
|
|
23
|
+
const subset = getSampleData(50, 100);
|
|
24
|
+
const stddev = standardDeviation(subset);
|
|
25
|
+
expect(stddev).toBeGreaterThan(0);
|
|
26
|
+
expect(stddev).toBeLessThan(10);
|
|
27
|
+
expect(standardDeviation([5, 5, 5])).toBe(0);
|
|
28
|
+
expect(standardDeviation([5])).toBe(0);
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
test("calculates percentiles in order", () => {
|
|
32
|
+
const subset = getSampleData(100, 200);
|
|
33
|
+
const p25 = percentile(subset, 0.25);
|
|
34
|
+
const p50 = percentile(subset, 0.5);
|
|
35
|
+
const p75 = percentile(subset, 0.75);
|
|
36
|
+
const p99 = percentile(subset, 0.99);
|
|
37
|
+
|
|
38
|
+
assertValid.percentileOrder(p25, p50, p75, p99);
|
|
39
|
+
expect(p50).toBeGreaterThan(40);
|
|
40
|
+
expect(p50).toBeLessThan(60);
|
|
41
|
+
expect(percentile([42], 0.5)).toBe(42);
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
test("calculates coefficient of variation", () => {
|
|
45
|
+
const stable = getSampleData(200, 300);
|
|
46
|
+
const cv = coefficientOfVariation(stable);
|
|
47
|
+
expect(cv).toBeGreaterThan(0);
|
|
48
|
+
expect(cv).toBeLessThan(0.2);
|
|
49
|
+
expect(coefficientOfVariation([-1, 0, 1])).toBe(0);
|
|
50
|
+
expect(coefficientOfVariation([5, 5, 5])).toBe(0);
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
test("calculates median absolute deviation", () => {
|
|
54
|
+
const warmup = getSampleData(0, 30);
|
|
55
|
+
const mad = medianAbsoluteDeviation(warmup);
|
|
56
|
+
expect(mad).toBeGreaterThan(0);
|
|
57
|
+
expect(mad).toBeLessThan(15);
|
|
58
|
+
expect(medianAbsoluteDeviation([1, 2, 3, 4, 5])).toBe(1);
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
test("identifies outliers in mixed data", () => {
|
|
62
|
+
const mixed = [...getSampleData(0, 50)];
|
|
63
|
+
mixed.push(200, 5);
|
|
64
|
+
const outliers = findOutliers(mixed);
|
|
65
|
+
|
|
66
|
+
expect(outliers.rate).toBeGreaterThan(0);
|
|
67
|
+
expect(outliers.indices).toContain(50);
|
|
68
|
+
expect(outliers.indices).toContain(51);
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
test("bootstrap estimates median with confidence intervals", () => {
|
|
72
|
+
const stable = getSampleData(400, 450);
|
|
73
|
+
const actual = percentile(stable, 0.5);
|
|
74
|
+
const result = bootstrapMedian(stable, { resamples: 1000 });
|
|
75
|
+
|
|
76
|
+
expect(result.estimate).toBeCloseTo(actual, 1);
|
|
77
|
+
expect(result.ci[0]).toBeLessThanOrEqual(result.estimate);
|
|
78
|
+
expect(result.ci[1]).toBeGreaterThanOrEqual(result.estimate);
|
|
79
|
+
expect(result.ci[1] - result.ci[0]).toBeLessThan(5);
|
|
80
|
+
expect(result.samples).toHaveLength(1000);
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
test("bootstrapDifferenceCI detects improvement", () => {
|
|
84
|
+
const baseline = getSampleData(0, 100);
|
|
85
|
+
const improved = baseline.map(v => v * 0.8);
|
|
86
|
+
const result = bootstrapDifferenceCI(baseline, improved, { resamples: 1000 });
|
|
87
|
+
|
|
88
|
+
expect(result.percent).toBeCloseTo(-20, 0);
|
|
89
|
+
expect(result.ci[1]).toBeLessThan(0);
|
|
90
|
+
expect(result.direction).toBe("faster");
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
test("bootstrapDifferenceCI detects regression", () => {
|
|
94
|
+
const baseline = getSampleData(0, 100);
|
|
95
|
+
const slower = baseline.map(v => v * 1.2);
|
|
96
|
+
const result = bootstrapDifferenceCI(baseline, slower, { resamples: 1000 });
|
|
97
|
+
|
|
98
|
+
expect(result.percent).toBeCloseTo(20, 0);
|
|
99
|
+
expect(result.ci[0]).toBeGreaterThan(0);
|
|
100
|
+
expect(result.direction).toBe("slower");
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
test("bootstrapDifferenceCI shows uncertainty for noise", () => {
|
|
104
|
+
const baseline = getSampleData(0, 100);
|
|
105
|
+
const noisy = baseline.map(v => v + (Math.random() - 0.5) * 2);
|
|
106
|
+
const result = bootstrapDifferenceCI(baseline, noisy, { resamples: 1000 });
|
|
107
|
+
|
|
108
|
+
// CI should span zero for no real change
|
|
109
|
+
expect(result.ci[0]).toBeLessThanOrEqual(0);
|
|
110
|
+
expect(result.ci[1]).toBeGreaterThanOrEqual(0);
|
|
111
|
+
expect(result.direction).toBe("uncertain");
|
|
112
|
+
});
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import type { BenchSuite } from "../Benchmark.ts";
|
|
2
|
+
import type { BenchmarkReport } from "../BenchmarkReport.ts";
|
|
3
|
+
import type { Configure, DefaultCliArgs } from "../cli/CliArgs.ts";
|
|
4
|
+
import { parseCliArgs } from "../cli/CliArgs.ts";
|
|
5
|
+
import { defaultReport, runBenchmarks } from "../cli/RunBenchCLI.ts";
|
|
6
|
+
import type { MeasuredResults } from "../MeasuredResults.ts";
|
|
7
|
+
import { average, percentile } from "../StatisticalUtils.ts";
|
|
8
|
+
import { bevy30SamplesMs } from "../tests/fixtures/bevy30-samples.ts";
|
|
9
|
+
|
|
10
|
+
/** @return formatted benchmark output for CLI testing */
|
|
11
|
+
export async function runBenchCLITest<T = DefaultCliArgs>(
|
|
12
|
+
suite: BenchSuite,
|
|
13
|
+
args: string,
|
|
14
|
+
configureArgs?: Configure<T>,
|
|
15
|
+
): Promise<string> {
|
|
16
|
+
const argv = args.split(/\s+/).filter(arg => arg.length > 0);
|
|
17
|
+
const parsedArgs = parseCliArgs(argv, configureArgs) as T & DefaultCliArgs;
|
|
18
|
+
const results = await runBenchmarks(suite, parsedArgs);
|
|
19
|
+
return defaultReport(results, parsedArgs);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/** @return slice of bevy30 samples for consistent test data */
|
|
23
|
+
export function getSampleData(start: number, end: number): number[] {
|
|
24
|
+
return bevy30SamplesMs.slice(start, end);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/** @return test MeasuredResults from bevy30 samples */
|
|
28
|
+
export function createMeasuredResults(
|
|
29
|
+
sampleRange: [number, number],
|
|
30
|
+
overrides?: Partial<MeasuredResults>,
|
|
31
|
+
): MeasuredResults {
|
|
32
|
+
const samples = getSampleData(sampleRange[0], sampleRange[1]);
|
|
33
|
+
const sorted = [...samples].sort((a, b) => a - b);
|
|
34
|
+
|
|
35
|
+
return {
|
|
36
|
+
name: "test",
|
|
37
|
+
samples,
|
|
38
|
+
time: {
|
|
39
|
+
min: sorted[0],
|
|
40
|
+
max: sorted[sorted.length - 1],
|
|
41
|
+
avg: average(samples),
|
|
42
|
+
p50: percentile(samples, 0.5),
|
|
43
|
+
p75: percentile(samples, 0.75),
|
|
44
|
+
p99: percentile(samples, 0.99),
|
|
45
|
+
p999: percentile(samples, 0.999),
|
|
46
|
+
},
|
|
47
|
+
nodeGcTime: {
|
|
48
|
+
inRun: 0.09,
|
|
49
|
+
before: 0.01,
|
|
50
|
+
after: 0.02,
|
|
51
|
+
total: 0.12,
|
|
52
|
+
collects: 3,
|
|
53
|
+
events: [],
|
|
54
|
+
},
|
|
55
|
+
...overrides,
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/** @return test BenchmarkReport from bevy30 samples */
|
|
60
|
+
export function createBenchmarkReport(
|
|
61
|
+
name: string,
|
|
62
|
+
sampleRange: [number, number],
|
|
63
|
+
overrides?: Partial<MeasuredResults>,
|
|
64
|
+
): BenchmarkReport {
|
|
65
|
+
return {
|
|
66
|
+
name,
|
|
67
|
+
measuredResults: createMeasuredResults(sampleRange, overrides),
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/** Validation helpers for statistical tests */
|
|
72
|
+
export const assertValid = {
|
|
73
|
+
pValue: (value: number) => {
|
|
74
|
+
if (value < 0 || value > 1) {
|
|
75
|
+
throw new Error(`Expected p-value between 0 and 1, got ${value}`);
|
|
76
|
+
}
|
|
77
|
+
},
|
|
78
|
+
|
|
79
|
+
percentileOrder: (p25: number, p50: number, p75: number, p99: number) => {
|
|
80
|
+
if (!(p25 <= p50 && p50 <= p75 && p75 <= p99)) {
|
|
81
|
+
throw new Error(
|
|
82
|
+
`Percentiles not ordered: p25=${p25}, p50=${p50}, p75=${p75}, p99=${p99}`,
|
|
83
|
+
);
|
|
84
|
+
}
|
|
85
|
+
},
|
|
86
|
+
|
|
87
|
+
significance: (level: string) => {
|
|
88
|
+
const valid = ["none", "weak", "good", "strong"];
|
|
89
|
+
if (!valid.includes(level)) {
|
|
90
|
+
throw new Error(`Invalid significance level: ${level}`);
|
|
91
|
+
}
|
|
92
|
+
},
|
|
93
|
+
};
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
#!/usr/bin/env -S node --expose-gc --allow-natives-syntax
|
|
2
|
+
import { type BenchSuite, runDefaultBench } from "../../index.ts";
|
|
3
|
+
|
|
4
|
+
const suite: BenchSuite = {
|
|
5
|
+
name: "Test",
|
|
6
|
+
groups: [
|
|
7
|
+
{
|
|
8
|
+
name: "Math",
|
|
9
|
+
benchmarks: [
|
|
10
|
+
{ name: "plus", fn: () => 1 + 1 },
|
|
11
|
+
{ name: "multiply", fn: () => 2 * 2 },
|
|
12
|
+
],
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
name: "Array Math",
|
|
16
|
+
setup: () => ({
|
|
17
|
+
nums: [1, 2, 3, 4, 5],
|
|
18
|
+
}),
|
|
19
|
+
benchmarks: [
|
|
20
|
+
{
|
|
21
|
+
name: "array sum",
|
|
22
|
+
fn: ({ nums }: any) =>
|
|
23
|
+
nums.reduce((a: number, b: number) => a + b, 0),
|
|
24
|
+
},
|
|
25
|
+
],
|
|
26
|
+
},
|
|
27
|
+
],
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
await runDefaultBench(suite);
|