benchforge 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +432 -0
- package/bin/benchforge +3 -0
- package/dist/bin/benchforge.mjs +9 -0
- package/dist/bin/benchforge.mjs.map +1 -0
- package/dist/browser/index.js +914 -0
- package/dist/index.mjs +3 -0
- package/dist/src-CGuaC3Wo.mjs +3676 -0
- package/dist/src-CGuaC3Wo.mjs.map +1 -0
- package/package.json +49 -0
- package/src/BenchMatrix.ts +380 -0
- package/src/Benchmark.ts +33 -0
- package/src/BenchmarkReport.ts +156 -0
- package/src/GitUtils.ts +79 -0
- package/src/HtmlDataPrep.ts +148 -0
- package/src/MeasuredResults.ts +127 -0
- package/src/NodeGC.ts +48 -0
- package/src/PermutationTest.ts +115 -0
- package/src/StandardSections.ts +268 -0
- package/src/StatisticalUtils.ts +176 -0
- package/src/TypeUtil.ts +8 -0
- package/src/bin/benchforge.ts +4 -0
- package/src/browser/BrowserGcStats.ts +44 -0
- package/src/browser/BrowserHeapSampler.ts +248 -0
- package/src/cli/CliArgs.ts +64 -0
- package/src/cli/FilterBenchmarks.ts +68 -0
- package/src/cli/RunBenchCLI.ts +856 -0
- package/src/export/JsonExport.ts +103 -0
- package/src/export/JsonFormat.ts +91 -0
- package/src/export/PerfettoExport.ts +203 -0
- package/src/heap-sample/HeapSampleReport.ts +196 -0
- package/src/heap-sample/HeapSampler.ts +78 -0
- package/src/html/HtmlReport.ts +131 -0
- package/src/html/HtmlTemplate.ts +284 -0
- package/src/html/Types.ts +88 -0
- package/src/html/browser/CIPlot.ts +287 -0
- package/src/html/browser/HistogramKde.ts +118 -0
- package/src/html/browser/LegendUtils.ts +163 -0
- package/src/html/browser/RenderPlots.ts +263 -0
- package/src/html/browser/SampleTimeSeries.ts +389 -0
- package/src/html/browser/Types.ts +96 -0
- package/src/html/browser/index.ts +1 -0
- package/src/html/index.ts +17 -0
- package/src/index.ts +92 -0
- package/src/matrix/CaseLoader.ts +36 -0
- package/src/matrix/MatrixFilter.ts +103 -0
- package/src/matrix/MatrixReport.ts +290 -0
- package/src/matrix/VariantLoader.ts +46 -0
- package/src/runners/AdaptiveWrapper.ts +391 -0
- package/src/runners/BasicRunner.ts +368 -0
- package/src/runners/BenchRunner.ts +60 -0
- package/src/runners/CreateRunner.ts +11 -0
- package/src/runners/GcStats.ts +107 -0
- package/src/runners/RunnerOrchestrator.ts +374 -0
- package/src/runners/RunnerUtils.ts +2 -0
- package/src/runners/TimingUtils.ts +13 -0
- package/src/runners/WorkerScript.ts +256 -0
- package/src/table-util/ConvergenceFormatters.ts +19 -0
- package/src/table-util/Formatters.ts +152 -0
- package/src/table-util/README.md +70 -0
- package/src/table-util/TableReport.ts +293 -0
- package/src/table-util/test/TableReport.test.ts +105 -0
- package/src/table-util/test/TableValueExtractor.test.ts +41 -0
- package/src/table-util/test/TableValueExtractor.ts +100 -0
- package/src/test/AdaptiveRunner.test.ts +185 -0
- package/src/test/AdaptiveStatistics.integration.ts +119 -0
- package/src/test/BenchmarkReport.test.ts +82 -0
- package/src/test/BrowserBench.e2e.test.ts +44 -0
- package/src/test/BrowserBench.test.ts +79 -0
- package/src/test/GcStats.test.ts +94 -0
- package/src/test/PermutationTest.test.ts +121 -0
- package/src/test/RunBenchCLI.test.ts +166 -0
- package/src/test/RunnerOrchestrator.test.ts +102 -0
- package/src/test/StatisticalUtils.test.ts +112 -0
- package/src/test/TestUtils.ts +93 -0
- package/src/test/fixtures/test-bench-script.ts +30 -0
- package/src/tests/AdaptiveConvergence.test.ts +177 -0
- package/src/tests/AdaptiveSampling.test.ts +240 -0
- package/src/tests/BenchMatrix.test.ts +366 -0
- package/src/tests/MatrixFilter.test.ts +117 -0
- package/src/tests/MatrixReport.test.ts +139 -0
- package/src/tests/RealDataValidation.test.ts +177 -0
- package/src/tests/fixtures/baseline/impl.ts +4 -0
- package/src/tests/fixtures/bevy30-samples.ts +158 -0
- package/src/tests/fixtures/cases/asyncCases.ts +7 -0
- package/src/tests/fixtures/cases/cases.ts +8 -0
- package/src/tests/fixtures/cases/variants/product.ts +2 -0
- package/src/tests/fixtures/cases/variants/sum.ts +2 -0
- package/src/tests/fixtures/discover/fast.ts +1 -0
- package/src/tests/fixtures/discover/slow.ts +4 -0
- package/src/tests/fixtures/invalid/bad.ts +1 -0
- package/src/tests/fixtures/loader/fast.ts +1 -0
- package/src/tests/fixtures/loader/slow.ts +4 -0
- package/src/tests/fixtures/loader/stateful.ts +2 -0
- package/src/tests/fixtures/stateful/stateful.ts +2 -0
- package/src/tests/fixtures/variants/extra.ts +1 -0
- package/src/tests/fixtures/variants/impl.ts +1 -0
- package/src/tests/fixtures/worker/fast.ts +1 -0
- package/src/tests/fixtures/worker/slow.ts +4 -0
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
ReportColumnGroup,
|
|
3
|
+
ReportGroup,
|
|
4
|
+
ResultsMapper,
|
|
5
|
+
} from "./BenchmarkReport.ts";
|
|
6
|
+
import type { GitVersion } from "./GitUtils.ts";
|
|
7
|
+
import type {
|
|
8
|
+
BenchmarkData,
|
|
9
|
+
DifferenceCI,
|
|
10
|
+
FormattedStat,
|
|
11
|
+
GroupData,
|
|
12
|
+
ReportData,
|
|
13
|
+
} from "./html/index.ts";
|
|
14
|
+
import { bootstrapDifferenceCI } from "./StatisticalUtils.ts";
|
|
15
|
+
|
|
16
|
+
export interface PrepareHtmlOptions {
|
|
17
|
+
cliArgs?: Record<string, unknown>;
|
|
18
|
+
sections?: ResultsMapper[];
|
|
19
|
+
currentVersion?: GitVersion;
|
|
20
|
+
baselineVersion?: GitVersion;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/** Find higherIsBetter from first comparable column in sections */
|
|
24
|
+
function findHigherIsBetter(sections?: ResultsMapper[]): boolean {
|
|
25
|
+
const cols = sections?.flatMap(s => s.columns().flatMap(g => g.columns));
|
|
26
|
+
return cols?.find(c => c.comparable)?.higherIsBetter ?? false;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/** Flip CI percent for metrics where higher is better (e.g., lines/sec) */
|
|
30
|
+
function flipCI(ci: DifferenceCI): DifferenceCI {
|
|
31
|
+
return {
|
|
32
|
+
percent: -ci.percent,
|
|
33
|
+
ci: [-ci.ci[1], -ci.ci[0]],
|
|
34
|
+
direction: ci.direction,
|
|
35
|
+
histogram: ci.histogram?.map(bin => ({ x: -bin.x, count: bin.count })),
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/** Prepare ReportData from benchmark results for HTML rendering */
|
|
40
|
+
export function prepareHtmlData(
|
|
41
|
+
groups: ReportGroup[],
|
|
42
|
+
options: PrepareHtmlOptions,
|
|
43
|
+
): ReportData {
|
|
44
|
+
const { cliArgs, sections, currentVersion, baselineVersion } = options;
|
|
45
|
+
const higherIsBetter = findHigherIsBetter(sections);
|
|
46
|
+
return {
|
|
47
|
+
groups: groups.map(group =>
|
|
48
|
+
prepareGroupData(group, sections, higherIsBetter),
|
|
49
|
+
),
|
|
50
|
+
metadata: {
|
|
51
|
+
timestamp: new Date().toISOString(),
|
|
52
|
+
bencherVersion: process.env.npm_package_version || "unknown",
|
|
53
|
+
cliArgs,
|
|
54
|
+
gcTrackingEnabled: cliArgs?.["gc-stats"] === true,
|
|
55
|
+
currentVersion,
|
|
56
|
+
baselineVersion,
|
|
57
|
+
},
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/** @return group data with bootstrap CI comparisons against baseline */
|
|
62
|
+
function prepareGroupData(
|
|
63
|
+
group: ReportGroup,
|
|
64
|
+
sections?: ResultsMapper[],
|
|
65
|
+
higherIsBetter?: boolean,
|
|
66
|
+
): GroupData {
|
|
67
|
+
const baselineSamples = group.baseline?.measuredResults.samples;
|
|
68
|
+
return {
|
|
69
|
+
name: group.name,
|
|
70
|
+
baseline: group.baseline
|
|
71
|
+
? prepareBenchmarkData(group.baseline, sections)
|
|
72
|
+
: undefined,
|
|
73
|
+
benchmarks: group.reports.map(report => {
|
|
74
|
+
const samples = report.measuredResults.samples;
|
|
75
|
+
const rawCI =
|
|
76
|
+
baselineSamples && samples
|
|
77
|
+
? bootstrapDifferenceCI(baselineSamples, samples)
|
|
78
|
+
: undefined;
|
|
79
|
+
const comparisonCI = rawCI && higherIsBetter ? flipCI(rawCI) : rawCI;
|
|
80
|
+
return { ...prepareBenchmarkData(report, sections), comparisonCI };
|
|
81
|
+
}),
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/** @return benchmark data with samples, stats, and formatted section values */
|
|
86
|
+
function prepareBenchmarkData(
|
|
87
|
+
report: {
|
|
88
|
+
name: string;
|
|
89
|
+
measuredResults: any;
|
|
90
|
+
metadata?: Record<string, unknown>;
|
|
91
|
+
},
|
|
92
|
+
sections?: ResultsMapper[],
|
|
93
|
+
): Omit<BenchmarkData, "comparisonCI"> {
|
|
94
|
+
const { measuredResults } = report;
|
|
95
|
+
return {
|
|
96
|
+
name: report.name,
|
|
97
|
+
samples: measuredResults.samples,
|
|
98
|
+
warmupSamples: measuredResults.warmupSamples,
|
|
99
|
+
allocationSamples: measuredResults.allocationSamples,
|
|
100
|
+
heapSamples: measuredResults.heapSamples,
|
|
101
|
+
gcEvents: measuredResults.nodeGcTime?.events,
|
|
102
|
+
optSamples: measuredResults.optSamples,
|
|
103
|
+
pausePoints: measuredResults.pausePoints,
|
|
104
|
+
stats: measuredResults.time,
|
|
105
|
+
heapSize: measuredResults.heapSize,
|
|
106
|
+
sectionStats: sections ? extractSectionStats(report, sections) : undefined,
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/** @return formatted stats from all sections for tooltip display */
|
|
111
|
+
function extractSectionStats(
|
|
112
|
+
report: { measuredResults: any; metadata?: Record<string, unknown> },
|
|
113
|
+
sections: ResultsMapper[],
|
|
114
|
+
): FormattedStat[] {
|
|
115
|
+
return sections.flatMap(section => {
|
|
116
|
+
const vals = section.extract(report.measuredResults, report.metadata);
|
|
117
|
+
return section.columns().flatMap(g => formatGroupStats(vals, g));
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/** @return formatted stats for one column group, skipping undefined values */
|
|
122
|
+
function formatGroupStats(
|
|
123
|
+
values: Record<string, unknown>,
|
|
124
|
+
group: ReportColumnGroup<Record<string, unknown>>,
|
|
125
|
+
): FormattedStat[] {
|
|
126
|
+
return group.columns
|
|
127
|
+
.map(c => formatColumnStat(values, c, group.groupTitle))
|
|
128
|
+
.filter((s): s is FormattedStat => s !== undefined);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
type ColumnLike = {
|
|
132
|
+
key: string;
|
|
133
|
+
title: string;
|
|
134
|
+
formatter?: (v: unknown) => string | null;
|
|
135
|
+
};
|
|
136
|
+
|
|
137
|
+
/** @return formatted stat for a single column, or undefined if empty/placeholder */
|
|
138
|
+
function formatColumnStat(
|
|
139
|
+
values: Record<string, unknown>,
|
|
140
|
+
col: ColumnLike,
|
|
141
|
+
groupTitle?: string,
|
|
142
|
+
): FormattedStat | undefined {
|
|
143
|
+
const raw = values[col.key];
|
|
144
|
+
if (raw === undefined) return undefined;
|
|
145
|
+
const formatted = col.formatter ? col.formatter(raw) : String(raw);
|
|
146
|
+
if (!formatted || formatted === "—" || formatted === "") return undefined;
|
|
147
|
+
return { label: col.title, value: formatted, groupTitle };
|
|
148
|
+
}
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import type { HeapProfile } from "./heap-sample/HeapSampler.ts";
|
|
2
|
+
import type { NodeGCTime } from "./NodeGC.ts";
|
|
3
|
+
import type { GcStats } from "./runners/GcStats.ts";
|
|
4
|
+
|
|
5
|
+
/** CPU performance counter stats */
|
|
6
|
+
export interface CpuCounts {
|
|
7
|
+
instructions?: number;
|
|
8
|
+
cycles?: number;
|
|
9
|
+
branchMisses?: number;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
/** Benchmark results: times in milliseconds, sizes in kilobytes */
|
|
13
|
+
export interface MeasuredResults {
|
|
14
|
+
name: string;
|
|
15
|
+
|
|
16
|
+
/** Raw execution time samples for custom statistics */
|
|
17
|
+
samples: number[];
|
|
18
|
+
|
|
19
|
+
/** Warmup iteration timings (ms) - captured before gc/settle */
|
|
20
|
+
warmupSamples?: number[];
|
|
21
|
+
|
|
22
|
+
/** Raw allocation samples per iteration (KB) */
|
|
23
|
+
allocationSamples?: number[];
|
|
24
|
+
|
|
25
|
+
/** Heap size per sample (bytes) - used for charts */
|
|
26
|
+
heapSamples?: number[];
|
|
27
|
+
|
|
28
|
+
/** Wall-clock timestamps per sample (μs since process start) - for Perfetto export */
|
|
29
|
+
timestamps?: number[];
|
|
30
|
+
|
|
31
|
+
/** Execution time in milliseconds (measurement overhead excluded by mitata) */
|
|
32
|
+
time: {
|
|
33
|
+
min: number;
|
|
34
|
+
max: number;
|
|
35
|
+
avg: number;
|
|
36
|
+
p25?: number;
|
|
37
|
+
p50: number;
|
|
38
|
+
p75: number;
|
|
39
|
+
p95?: number;
|
|
40
|
+
p99: number;
|
|
41
|
+
p999: number;
|
|
42
|
+
cv?: number;
|
|
43
|
+
mad?: number;
|
|
44
|
+
outlierRate?: number;
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
/** Heap size increase during test run (kilobytes) */
|
|
48
|
+
heapSize?: {
|
|
49
|
+
avg: number;
|
|
50
|
+
min: number;
|
|
51
|
+
max: number;
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Time for explicit gc() call after test execution (milliseconds).
|
|
56
|
+
* Does not include GC time during test execution.
|
|
57
|
+
* Only reported by mitata runner.
|
|
58
|
+
*/
|
|
59
|
+
gcTime?: {
|
|
60
|
+
avg: number;
|
|
61
|
+
min: number;
|
|
62
|
+
max: number;
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
/** CPU counter stats from @mitata/counters (requires root access) */
|
|
66
|
+
cpu?: CpuCounts;
|
|
67
|
+
|
|
68
|
+
/** L1 cache miss rate */
|
|
69
|
+
cpuCacheMiss?: number;
|
|
70
|
+
|
|
71
|
+
/** CPU stall rate (macOS only) */
|
|
72
|
+
cpuStall?: number;
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Stop-the-world GC time blocking main thread (milliseconds).
|
|
76
|
+
* Measured via Node's performance hooks when nodeObserveGC is true.
|
|
77
|
+
* Excludes parallel thread collection time and indirect slowdowns.
|
|
78
|
+
*/
|
|
79
|
+
nodeGcTime?: NodeGCTime;
|
|
80
|
+
|
|
81
|
+
/** Total time spent collecting samples (seconds) */
|
|
82
|
+
totalTime?: number;
|
|
83
|
+
|
|
84
|
+
/** Convergence information for adaptive mode */
|
|
85
|
+
convergence?: {
|
|
86
|
+
converged: boolean;
|
|
87
|
+
confidence: number;
|
|
88
|
+
reason: string;
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
/** V8 optimization tier tracking (requires --allow-natives-syntax) */
|
|
92
|
+
optStatus?: OptStatusInfo;
|
|
93
|
+
|
|
94
|
+
/** Per-sample V8 optimization status codes (for chart visualization) */
|
|
95
|
+
optSamples?: number[];
|
|
96
|
+
|
|
97
|
+
/** Points where pauses occurred for V8 optimization */
|
|
98
|
+
pausePoints?: PausePoint[];
|
|
99
|
+
|
|
100
|
+
/** GC stats from V8's --trace-gc-nvp (requires --gc-stats and worker mode) */
|
|
101
|
+
gcStats?: GcStats;
|
|
102
|
+
|
|
103
|
+
/** Heap sampling allocation profile (requires --heap-sample and worker mode) */
|
|
104
|
+
heapProfile?: HeapProfile;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/** A pause point during sample collection for V8 optimization */
|
|
108
|
+
export interface PausePoint {
|
|
109
|
+
/** Sample index where pause occurred (after this iteration) */
|
|
110
|
+
sampleIndex: number;
|
|
111
|
+
/** Pause duration in milliseconds */
|
|
112
|
+
durationMs: number;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/** V8 optimization tier distribution */
|
|
116
|
+
export interface OptTierInfo {
|
|
117
|
+
count: number;
|
|
118
|
+
medianMs: number;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/** V8 optimization status summary */
|
|
122
|
+
export interface OptStatusInfo {
|
|
123
|
+
/** Samples by tier name (e.g., "turbofan", "sparkplug") */
|
|
124
|
+
byTier: Record<string, OptTierInfo>;
|
|
125
|
+
/** Number of samples with deopt flag set */
|
|
126
|
+
deoptCount: number;
|
|
127
|
+
}
|
package/src/NodeGC.ts
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import type { PerformanceEntry } from "node:perf_hooks";
|
|
2
|
+
|
|
3
|
+
/** Individual GC event for visualization */
|
|
4
|
+
export interface GcEvent {
|
|
5
|
+
/** Offset from collection start (ms) - can be negative for warmup GCs */
|
|
6
|
+
offset: number;
|
|
7
|
+
/** Duration of GC pause (ms) */
|
|
8
|
+
duration: number;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
/** GC time measured by Node's performance hooks */
|
|
12
|
+
export interface NodeGCTime {
|
|
13
|
+
inRun: number;
|
|
14
|
+
before: number;
|
|
15
|
+
after: number;
|
|
16
|
+
total: number;
|
|
17
|
+
collects: number;
|
|
18
|
+
/** Individual GC events during sample collection (for visualization) */
|
|
19
|
+
events: GcEvent[];
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/** Correlate GC events with benchmark timing */
|
|
23
|
+
export function analyzeGCEntries(
|
|
24
|
+
gcRecords: PerformanceEntry[],
|
|
25
|
+
benchTime: [number, number],
|
|
26
|
+
): NodeGCTime {
|
|
27
|
+
const [start, end] = benchTime;
|
|
28
|
+
let inRun = 0;
|
|
29
|
+
let before = 0;
|
|
30
|
+
let after = 0;
|
|
31
|
+
let collects = 0;
|
|
32
|
+
const events: GcEvent[] = [];
|
|
33
|
+
|
|
34
|
+
gcRecords.forEach(record => {
|
|
35
|
+
const { duration, startTime } = record;
|
|
36
|
+
if (startTime < start) {
|
|
37
|
+
before += duration;
|
|
38
|
+
} else if (startTime > end) {
|
|
39
|
+
after += duration;
|
|
40
|
+
} else {
|
|
41
|
+
inRun += duration;
|
|
42
|
+
collects++;
|
|
43
|
+
events.push({ offset: startTime - start, duration });
|
|
44
|
+
}
|
|
45
|
+
});
|
|
46
|
+
const total = inRun + before + after;
|
|
47
|
+
return { inRun, before, after, total, collects, events };
|
|
48
|
+
}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Permutation-based hypothesis testing for benchmark comparisons.
|
|
3
|
+
*
|
|
4
|
+
* Currently unused - the main reporting pipeline uses bootstrapDifferenceCI()
|
|
5
|
+
* from StatisticalUtils.ts instead, which provides confidence intervals on
|
|
6
|
+
* the difference rather than p-values.
|
|
7
|
+
*
|
|
8
|
+
* Kept for potential future use cases where p-values are needed.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { average, percentile } from "./StatisticalUtils.ts";
|
|
12
|
+
|
|
13
|
+
const significanceThreshold = 0.05;
|
|
14
|
+
const strongSignificance = 0.001;
|
|
15
|
+
const goodSignificance = 0.01;
|
|
16
|
+
const defaultBootstrapSamples = 10000;
|
|
17
|
+
|
|
18
|
+
/** Statistical comparison between baseline and current benchmark samples */
|
|
19
|
+
export interface ComparisonResult {
|
|
20
|
+
baselineMedian: number;
|
|
21
|
+
currentMedian: number;
|
|
22
|
+
baselineMean: number;
|
|
23
|
+
currentMean: number;
|
|
24
|
+
|
|
25
|
+
medianChange: {
|
|
26
|
+
absolute: number;
|
|
27
|
+
percent: number;
|
|
28
|
+
pValue: number;
|
|
29
|
+
significant: boolean;
|
|
30
|
+
significance: "strong" | "good" | "weak" | "none";
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
meanChange: {
|
|
34
|
+
absolute: number;
|
|
35
|
+
percent: number;
|
|
36
|
+
pValue: number;
|
|
37
|
+
significant: boolean;
|
|
38
|
+
significance: "strong" | "good" | "weak" | "none";
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/** @return statistical comparison between baseline and current samples */
|
|
43
|
+
export function compareWithBaseline(
|
|
44
|
+
baseline: number[],
|
|
45
|
+
current: number[],
|
|
46
|
+
): ComparisonResult {
|
|
47
|
+
const baselineMedian = percentile(baseline, 0.5);
|
|
48
|
+
const currentMedian = percentile(current, 0.5);
|
|
49
|
+
const baselineMean = average(baseline);
|
|
50
|
+
const currentMean = average(current);
|
|
51
|
+
|
|
52
|
+
const median = (s: number[]) => percentile(s, 0.5);
|
|
53
|
+
const medianPValue = bootstrapDifferenceTest(baseline, current, median);
|
|
54
|
+
const meanPValue = bootstrapDifferenceTest(baseline, current, average);
|
|
55
|
+
|
|
56
|
+
return {
|
|
57
|
+
baselineMedian,
|
|
58
|
+
currentMedian,
|
|
59
|
+
baselineMean,
|
|
60
|
+
currentMean,
|
|
61
|
+
medianChange: changeStats(currentMedian, baselineMedian, medianPValue),
|
|
62
|
+
meanChange: changeStats(currentMean, baselineMean, meanPValue),
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/** @return change statistics for a current vs baseline comparison */
|
|
67
|
+
function changeStats(current: number, base: number, pValue: number) {
|
|
68
|
+
return {
|
|
69
|
+
absolute: current - base,
|
|
70
|
+
percent: ((current - base) / base) * 100,
|
|
71
|
+
pValue,
|
|
72
|
+
significant: pValue < significanceThreshold,
|
|
73
|
+
significance: getSignificance(pValue),
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/** @return significance level based on p-value thresholds */
|
|
78
|
+
function getSignificance(pValue: number): "strong" | "good" | "weak" | "none" {
|
|
79
|
+
if (pValue < strongSignificance) return "strong";
|
|
80
|
+
if (pValue < goodSignificance) return "good";
|
|
81
|
+
if (pValue < significanceThreshold) return "weak";
|
|
82
|
+
return "none";
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/** @return p-value from permutation test for difference in statistics */
|
|
86
|
+
function bootstrapDifferenceTest(
|
|
87
|
+
sample1: number[],
|
|
88
|
+
sample2: number[],
|
|
89
|
+
statistic: (samples: number[]) => number,
|
|
90
|
+
): number {
|
|
91
|
+
const observedDiff = statistic(sample2) - statistic(sample1);
|
|
92
|
+
const combined = [...sample1, ...sample2];
|
|
93
|
+
const n1 = sample1.length;
|
|
94
|
+
|
|
95
|
+
let moreExtreme = 0;
|
|
96
|
+
for (let i = 0; i < defaultBootstrapSamples; i++) {
|
|
97
|
+
const { resample1, resample2 } = shuffleAndSplit(combined, n1);
|
|
98
|
+
const diff = statistic(resample2) - statistic(resample1);
|
|
99
|
+
if (Math.abs(diff) >= Math.abs(observedDiff)) moreExtreme++;
|
|
100
|
+
}
|
|
101
|
+
return moreExtreme / defaultBootstrapSamples;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/** @return randomly shuffled samples split at n1 (Fisher-Yates shuffle) */
|
|
105
|
+
function shuffleAndSplit(combined: number[], n1: number) {
|
|
106
|
+
const shuffled = [...combined];
|
|
107
|
+
for (let i = shuffled.length - 1; i > 0; i--) {
|
|
108
|
+
const j = Math.floor(Math.random() * (i + 1));
|
|
109
|
+
[shuffled[i], shuffled[j]] = [shuffled[j], shuffled[i]];
|
|
110
|
+
}
|
|
111
|
+
return {
|
|
112
|
+
resample1: shuffled.slice(0, n1),
|
|
113
|
+
resample2: shuffled.slice(n1),
|
|
114
|
+
};
|
|
115
|
+
}
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
import type { ReportColumnGroup, ResultsMapper } from "./BenchmarkReport.ts";
|
|
2
|
+
import type { MeasuredResults } from "./MeasuredResults.ts";
|
|
3
|
+
import { formatConvergence } from "./table-util/ConvergenceFormatters.ts";
|
|
4
|
+
import {
|
|
5
|
+
formatBytes,
|
|
6
|
+
integer,
|
|
7
|
+
percent,
|
|
8
|
+
percentPrecision,
|
|
9
|
+
timeMs,
|
|
10
|
+
} from "./table-util/Formatters.ts";
|
|
11
|
+
|
|
12
|
+
export interface TimeStats {
|
|
13
|
+
mean?: number;
|
|
14
|
+
p50?: number;
|
|
15
|
+
p99?: number;
|
|
16
|
+
convergence?: number;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/** Section: mean, p50, p99 timing with convergence */
|
|
20
|
+
export const timeSection: ResultsMapper<TimeStats> = {
|
|
21
|
+
extract: (results: MeasuredResults) => ({
|
|
22
|
+
mean: results.time?.avg,
|
|
23
|
+
p50: results.time?.p50,
|
|
24
|
+
p99: results.time?.p99,
|
|
25
|
+
convergence: results.convergence?.confidence,
|
|
26
|
+
}),
|
|
27
|
+
columns: (): ReportColumnGroup<TimeStats>[] => [
|
|
28
|
+
{
|
|
29
|
+
groupTitle: "time",
|
|
30
|
+
columns: [
|
|
31
|
+
{ key: "mean", title: "mean", formatter: timeMs, comparable: true },
|
|
32
|
+
{ key: "p50", title: "p50", formatter: timeMs, comparable: true },
|
|
33
|
+
{ key: "p99", title: "p99", formatter: timeMs, comparable: true },
|
|
34
|
+
],
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
columns: [
|
|
38
|
+
{ key: "convergence", title: "conv%", formatter: formatConvergence },
|
|
39
|
+
],
|
|
40
|
+
},
|
|
41
|
+
],
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
export interface GcSectionStats {
|
|
45
|
+
gc?: number; // GC time as fraction of total bench time
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/** Section: GC time as fraction of total benchmark time (Node performance hooks) */
|
|
49
|
+
export const gcSection: ResultsMapper<GcSectionStats> = {
|
|
50
|
+
extract: (results: MeasuredResults) => {
|
|
51
|
+
const { nodeGcTime, time, samples } = results;
|
|
52
|
+
if (!nodeGcTime || !time?.avg) return { gc: undefined };
|
|
53
|
+
const totalBenchTime = time.avg * samples.length;
|
|
54
|
+
if (totalBenchTime <= 0) return { gc: undefined };
|
|
55
|
+
const gcTime = nodeGcTime.inRun / totalBenchTime;
|
|
56
|
+
// GC time can't exceed total time
|
|
57
|
+
return { gc: gcTime <= 1 ? gcTime : undefined };
|
|
58
|
+
},
|
|
59
|
+
columns: (): ReportColumnGroup<GcSectionStats>[] => [
|
|
60
|
+
{
|
|
61
|
+
groupTitle: "gc",
|
|
62
|
+
columns: [
|
|
63
|
+
{ key: "gc", title: "mean", formatter: percent, comparable: true },
|
|
64
|
+
],
|
|
65
|
+
},
|
|
66
|
+
],
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
export interface GcStatsInfo {
|
|
70
|
+
allocPerIter?: number;
|
|
71
|
+
collected?: number;
|
|
72
|
+
scavenges?: number;
|
|
73
|
+
fullGCs?: number;
|
|
74
|
+
promoPercent?: number;
|
|
75
|
+
pausePerIter?: number;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/** Section: detailed GC stats from --trace-gc-nvp (allocation, promotion, pauses) */
|
|
79
|
+
export const gcStatsSection: ResultsMapper<GcStatsInfo> = {
|
|
80
|
+
extract: (results: MeasuredResults) => {
|
|
81
|
+
const { gcStats, samples } = results;
|
|
82
|
+
if (!gcStats) return {};
|
|
83
|
+
const iterations = samples.length || 1;
|
|
84
|
+
const { totalAllocated, totalPromoted } = gcStats;
|
|
85
|
+
const hasAlloc = totalAllocated && totalAllocated > 0;
|
|
86
|
+
const promoPercent = hasAlloc
|
|
87
|
+
? (totalPromoted ?? 0) / totalAllocated
|
|
88
|
+
: undefined;
|
|
89
|
+
return {
|
|
90
|
+
allocPerIter:
|
|
91
|
+
totalAllocated != null ? totalAllocated / iterations : undefined,
|
|
92
|
+
collected: gcStats.totalCollected || undefined,
|
|
93
|
+
scavenges: gcStats.scavenges,
|
|
94
|
+
fullGCs: gcStats.markCompacts,
|
|
95
|
+
promoPercent,
|
|
96
|
+
pausePerIter: gcStats.gcPauseTime / iterations,
|
|
97
|
+
};
|
|
98
|
+
},
|
|
99
|
+
columns: (): ReportColumnGroup<GcStatsInfo>[] => [
|
|
100
|
+
{
|
|
101
|
+
groupTitle: "gc",
|
|
102
|
+
columns: [
|
|
103
|
+
{ key: "allocPerIter", title: "alloc/iter", formatter: formatBytes },
|
|
104
|
+
{ key: "collected", title: "collected", formatter: formatBytes },
|
|
105
|
+
{ key: "scavenges", title: "scav", formatter: integer },
|
|
106
|
+
{ key: "fullGCs", title: "full", formatter: integer },
|
|
107
|
+
{ key: "promoPercent", title: "promo%", formatter: percent },
|
|
108
|
+
{ key: "pausePerIter", title: "pause/iter", formatter: timeMs },
|
|
109
|
+
],
|
|
110
|
+
},
|
|
111
|
+
],
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
/** Browser GC section: only fields available from CDP tracing */
|
|
115
|
+
export const browserGcStatsSection: ResultsMapper<GcStatsInfo> = {
|
|
116
|
+
extract: gcStatsSection.extract,
|
|
117
|
+
columns: (): ReportColumnGroup<GcStatsInfo>[] => [
|
|
118
|
+
{
|
|
119
|
+
groupTitle: "gc",
|
|
120
|
+
columns: [
|
|
121
|
+
{ key: "collected", title: "collected", formatter: formatBytes },
|
|
122
|
+
{ key: "scavenges", title: "scav", formatter: integer },
|
|
123
|
+
{ key: "fullGCs", title: "full", formatter: integer },
|
|
124
|
+
{ key: "pausePerIter", title: "pause", formatter: timeMs },
|
|
125
|
+
],
|
|
126
|
+
},
|
|
127
|
+
],
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
export interface CpuStats {
|
|
131
|
+
cpuCacheMiss?: number;
|
|
132
|
+
cpuStall?: number;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/** Section: CPU L1 cache miss rate and stall rate (requires @mitata/counters) */
|
|
136
|
+
export const cpuSection: ResultsMapper<CpuStats> = {
|
|
137
|
+
extract: (results: MeasuredResults) => ({
|
|
138
|
+
cpuCacheMiss: results.cpuCacheMiss,
|
|
139
|
+
cpuStall: results.cpuStall,
|
|
140
|
+
}),
|
|
141
|
+
columns: (): ReportColumnGroup<CpuStats>[] => [
|
|
142
|
+
{
|
|
143
|
+
groupTitle: "cpu",
|
|
144
|
+
columns: [
|
|
145
|
+
{ key: "cpuCacheMiss", title: "L1 miss", formatter: percent },
|
|
146
|
+
{ key: "cpuStall", title: "stalls", formatter: percentPrecision(2) },
|
|
147
|
+
],
|
|
148
|
+
},
|
|
149
|
+
],
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
export interface RunStats {
|
|
153
|
+
runs?: number;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/** Section: number of sample iterations */
|
|
157
|
+
export const runsSection: ResultsMapper<RunStats> = {
|
|
158
|
+
extract: (results: MeasuredResults) => ({
|
|
159
|
+
runs: results.samples.length,
|
|
160
|
+
}),
|
|
161
|
+
columns: (): ReportColumnGroup<RunStats>[] => [
|
|
162
|
+
{ columns: [{ key: "runs", title: "runs", formatter: integer }] },
|
|
163
|
+
],
|
|
164
|
+
};
|
|
165
|
+
|
|
166
|
+
/** Section: total sampling duration in seconds (brackets if >= 30s) */
|
|
167
|
+
export const totalTimeSection: ResultsMapper<{ totalTime?: number }> = {
|
|
168
|
+
extract: (results: MeasuredResults) => ({
|
|
169
|
+
totalTime: results.totalTime,
|
|
170
|
+
}),
|
|
171
|
+
columns: (): ReportColumnGroup<{ totalTime?: number }>[] => [
|
|
172
|
+
{
|
|
173
|
+
columns: [
|
|
174
|
+
{
|
|
175
|
+
key: "totalTime",
|
|
176
|
+
title: "time",
|
|
177
|
+
formatter: v => {
|
|
178
|
+
if (typeof v !== "number") return "";
|
|
179
|
+
return v >= 30 ? `[${v.toFixed(1)}s]` : `${v.toFixed(1)}s`;
|
|
180
|
+
},
|
|
181
|
+
},
|
|
182
|
+
],
|
|
183
|
+
},
|
|
184
|
+
],
|
|
185
|
+
};
|
|
186
|
+
|
|
187
|
+
export interface AdaptiveStats {
|
|
188
|
+
median?: number;
|
|
189
|
+
mean?: number;
|
|
190
|
+
p99?: number;
|
|
191
|
+
convergence?: number;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/** Section: median, mean, p99, and convergence for adaptive mode */
|
|
195
|
+
export const adaptiveSection: ResultsMapper<AdaptiveStats> = {
|
|
196
|
+
extract: (results: MeasuredResults) => ({
|
|
197
|
+
median: results.time?.p50,
|
|
198
|
+
mean: results.time?.avg,
|
|
199
|
+
p99: results.time?.p99,
|
|
200
|
+
convergence: results.convergence?.confidence,
|
|
201
|
+
}),
|
|
202
|
+
columns: (): ReportColumnGroup<AdaptiveStats>[] => [
|
|
203
|
+
{
|
|
204
|
+
groupTitle: "time",
|
|
205
|
+
columns: [
|
|
206
|
+
{ key: "median", title: "median", formatter: timeMs, comparable: true },
|
|
207
|
+
{ key: "mean", title: "mean", formatter: timeMs, comparable: true },
|
|
208
|
+
{ key: "p99", title: "p99", formatter: timeMs },
|
|
209
|
+
],
|
|
210
|
+
},
|
|
211
|
+
{
|
|
212
|
+
columns: [
|
|
213
|
+
{ key: "convergence", title: "conv%", formatter: formatConvergence },
|
|
214
|
+
],
|
|
215
|
+
},
|
|
216
|
+
],
|
|
217
|
+
};
|
|
218
|
+
|
|
219
|
+
/** Build generic sections based on CLI flags */
|
|
220
|
+
export function buildGenericSections(args: {
|
|
221
|
+
"gc-stats"?: boolean;
|
|
222
|
+
"heap-sample"?: boolean;
|
|
223
|
+
}): ResultsMapper[] {
|
|
224
|
+
const sections: ResultsMapper[] = [];
|
|
225
|
+
if (args["gc-stats"]) sections.push(gcStatsSection);
|
|
226
|
+
sections.push(runsSection);
|
|
227
|
+
return sections;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
export interface OptStats {
|
|
231
|
+
tiers?: string; // tier distribution summary
|
|
232
|
+
deopt?: number; // deopt count
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/** Section: V8 optimization tier distribution and deopt count */
|
|
236
|
+
export const optSection: ResultsMapper<OptStats> = {
|
|
237
|
+
extract: (results: MeasuredResults) => {
|
|
238
|
+
const opt = results.optStatus;
|
|
239
|
+
if (!opt) return {};
|
|
240
|
+
|
|
241
|
+
const total = Object.values(opt.byTier).reduce((s, t) => s + t.count, 0);
|
|
242
|
+
const tierParts = Object.entries(opt.byTier)
|
|
243
|
+
.sort((a, b) => b[1].count - a[1].count)
|
|
244
|
+
.map(([name, t]) => `${name}:${((t.count / total) * 100).toFixed(0)}%`);
|
|
245
|
+
|
|
246
|
+
return {
|
|
247
|
+
tiers: tierParts.join(" "),
|
|
248
|
+
deopt: opt.deoptCount > 0 ? opt.deoptCount : undefined,
|
|
249
|
+
};
|
|
250
|
+
},
|
|
251
|
+
columns: (): ReportColumnGroup<OptStats>[] => [
|
|
252
|
+
{
|
|
253
|
+
groupTitle: "v8 opt",
|
|
254
|
+
columns: [
|
|
255
|
+
{
|
|
256
|
+
key: "tiers",
|
|
257
|
+
title: "tiers",
|
|
258
|
+
formatter: v => (typeof v === "string" ? v : ""),
|
|
259
|
+
},
|
|
260
|
+
{
|
|
261
|
+
key: "deopt",
|
|
262
|
+
title: "deopt",
|
|
263
|
+
formatter: v => (typeof v === "number" ? String(v) : ""),
|
|
264
|
+
},
|
|
265
|
+
],
|
|
266
|
+
},
|
|
267
|
+
],
|
|
268
|
+
};
|