perfshield 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cli.js +58 -1
- package/lib/config.js +19 -2
- package/lib/report/console.js +22 -2
- package/lib/runner.js +180 -59
- package/lib/stats.js +32 -0
- package/package.json +1 -1
package/lib/cli.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
+
import { WriteStream } from "node:tty";
|
|
2
3
|
import { ensureBaseline, saveBaseline } from "./artifacts.js";
|
|
3
4
|
import { runBuild } from "./build.js";
|
|
4
5
|
import { ConfigError, formatConfigError, loadConfig } from "./config.js";
|
|
@@ -20,17 +21,73 @@ const runPrepare = async config => {
|
|
|
20
21
|
const baselinePath = await saveBaseline(config, outputPath);
|
|
21
22
|
console.log(`Baseline saved to ${baselinePath}`);
|
|
22
23
|
};
|
|
24
|
+
const isStderrTTY = () => {
|
|
25
|
+
return process.stderr instanceof WriteStream;
|
|
26
|
+
};
|
|
27
|
+
const createProgressReporter = engineName => {
|
|
28
|
+
let lastWrite = 0;
|
|
29
|
+
let lastLine = "";
|
|
30
|
+
const isTTY = isStderrTTY();
|
|
31
|
+
const writeLine = line => {
|
|
32
|
+
if (isTTY) {
|
|
33
|
+
const padded = line.padEnd(80, " ");
|
|
34
|
+
process.stderr.write(`\r${padded}`);
|
|
35
|
+
} else if (line !== lastLine) {
|
|
36
|
+
process.stderr.write(`${line}\n`);
|
|
37
|
+
}
|
|
38
|
+
lastLine = line;
|
|
39
|
+
};
|
|
40
|
+
const report = event => {
|
|
41
|
+
const now = Date.now();
|
|
42
|
+
if (isTTY && now - lastWrite < 100) {
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
lastWrite = now;
|
|
46
|
+
let message = "";
|
|
47
|
+
if (event.phase === "warmup") {
|
|
48
|
+
const current = (event.benchmarkIndex ?? 0) + 1;
|
|
49
|
+
const total = event.benchmarkCount ?? 0;
|
|
50
|
+
const name = event.benchmarkName == null ? "" : ` ${event.benchmarkName}`;
|
|
51
|
+
message = `Warmup ${current}/${total}${name} (${engineName})`;
|
|
52
|
+
} else if (event.phase === "samples") {
|
|
53
|
+
const completed = event.completed ?? 0;
|
|
54
|
+
const total = event.total ?? 0;
|
|
55
|
+
const percent = total > 0 ? Math.round(completed / total * 100) : 0;
|
|
56
|
+
message = `Sampling ${completed}/${total} (${percent}%) (${engineName})`;
|
|
57
|
+
} else {
|
|
58
|
+
const completed = event.completed ?? 0;
|
|
59
|
+
const elapsed = event.elapsedMs != null ? Math.round(event.elapsedMs / 1000) : 0;
|
|
60
|
+
message = `Auto-sampling ${completed} samples (${elapsed}s) (${engineName})`;
|
|
61
|
+
}
|
|
62
|
+
writeLine(message);
|
|
63
|
+
};
|
|
64
|
+
const finish = () => {
|
|
65
|
+
if (isTTY && lastLine) {
|
|
66
|
+
process.stderr.write("\n");
|
|
67
|
+
}
|
|
68
|
+
};
|
|
69
|
+
return {
|
|
70
|
+
finish,
|
|
71
|
+
report
|
|
72
|
+
};
|
|
73
|
+
};
|
|
23
74
|
const runCompare = async config => {
|
|
24
75
|
const outputPath = await runBuild(config.build);
|
|
25
76
|
const baselinePath = await ensureBaseline(config);
|
|
26
77
|
const results = [];
|
|
78
|
+
const showProgress = config.report.formats.includes("console") || isStderrTTY();
|
|
27
79
|
for (const engine of config.engines) {
|
|
80
|
+
const progress = showProgress ? createProgressReporter(engine.name) : null;
|
|
28
81
|
results.push(await runEngineComparison({
|
|
29
82
|
baselinePath,
|
|
30
83
|
config,
|
|
31
84
|
currentPath: outputPath,
|
|
32
|
-
engine
|
|
85
|
+
engine,
|
|
86
|
+
progress: progress ? progress.report : undefined
|
|
33
87
|
}));
|
|
88
|
+
if (progress) {
|
|
89
|
+
progress.finish();
|
|
90
|
+
}
|
|
34
91
|
}
|
|
35
92
|
const outputs = renderReports(results, config.report.formats);
|
|
36
93
|
for (const output of outputs) {
|
package/lib/config.js
CHANGED
|
@@ -163,7 +163,7 @@ const parseSamplingConfig = (value, issues) => {
|
|
|
163
163
|
if (!sampling) {
|
|
164
164
|
return null;
|
|
165
165
|
}
|
|
166
|
-
validateKeys(sampling, ["conditions", "minSamples", "timeoutMs"], "config.sampling", issues);
|
|
166
|
+
validateKeys(sampling, ["conditions", "delayMs", "maxRelativeMargin", "minSamples", "minTimeMs", "timeoutMs"], "config.sampling", issues);
|
|
167
167
|
const minSamples = asNumber(sampling.minSamples, "config.sampling.minSamples", issues, {
|
|
168
168
|
integer: true,
|
|
169
169
|
min: 1
|
|
@@ -172,15 +172,32 @@ const parseSamplingConfig = (value, issues) => {
|
|
|
172
172
|
integer: true,
|
|
173
173
|
min: 1
|
|
174
174
|
});
|
|
175
|
+
const minTimeMs = sampling.minTimeMs == null ? undefined : asNumber(sampling.minTimeMs, "config.sampling.minTimeMs", issues, {
|
|
176
|
+
integer: true,
|
|
177
|
+
min: 0
|
|
178
|
+
});
|
|
179
|
+
const delayMs = sampling.delayMs == null ? undefined : asNumber(sampling.delayMs, "config.sampling.delayMs", issues, {
|
|
180
|
+
integer: true,
|
|
181
|
+
min: 0
|
|
182
|
+
});
|
|
183
|
+
const maxRelativeMargin = sampling.maxRelativeMargin == null ? undefined : asNumber(sampling.maxRelativeMargin, "config.sampling.maxRelativeMargin", issues, {
|
|
184
|
+
min: 0
|
|
185
|
+
});
|
|
186
|
+
if (maxRelativeMargin != null && maxRelativeMargin > 1) {
|
|
187
|
+
addIssue(issues, "config.sampling.maxRelativeMargin must be at most 1.");
|
|
188
|
+
}
|
|
175
189
|
const conditions = asNumberArray(sampling.conditions, "config.sampling.conditions", issues, {
|
|
176
190
|
minLength: 1
|
|
177
191
|
});
|
|
178
|
-
if (minSamples == null || timeoutMs == null || !conditions) {
|
|
192
|
+
if (minSamples == null || timeoutMs == null || !conditions || minTimeMs === null || delayMs === null || maxRelativeMargin === null) {
|
|
179
193
|
return null;
|
|
180
194
|
}
|
|
181
195
|
return {
|
|
182
196
|
conditions,
|
|
197
|
+
delayMs,
|
|
198
|
+
maxRelativeMargin,
|
|
183
199
|
minSamples,
|
|
200
|
+
minTimeMs,
|
|
184
201
|
timeoutMs
|
|
185
202
|
};
|
|
186
203
|
};
|
package/lib/report/console.js
CHANGED
|
@@ -10,16 +10,36 @@ const formatRelativeInterval = (interval, decimals) => formatInterval({
|
|
|
10
10
|
low: interval.low * 100
|
|
11
11
|
}, decimals, "%");
|
|
12
12
|
const formatRelativeValue = (value, decimals) => `${formatNumber(value * 100, decimals)}%`;
|
|
13
|
+
const classifyDifference = interval => {
|
|
14
|
+
if (interval.low > 0 && interval.high > 0) {
|
|
15
|
+
return "regression";
|
|
16
|
+
}
|
|
17
|
+
if (interval.low < 0 && interval.high < 0) {
|
|
18
|
+
return "improvement";
|
|
19
|
+
}
|
|
20
|
+
return "no significant change";
|
|
21
|
+
};
|
|
13
22
|
export const renderConsoleReport = results => {
|
|
14
23
|
const lines = [];
|
|
15
24
|
for (const result of results) {
|
|
25
|
+
let regressions = 0;
|
|
26
|
+
let improvements = 0;
|
|
27
|
+
let neutral = 0;
|
|
16
28
|
lines.push(`Engine: ${result.engine.name}`);
|
|
17
29
|
for (const entry of result.benchmarks) {
|
|
18
30
|
const unit = entry.benchmark.unit != null ? ` ${entry.benchmark.unit}` : "";
|
|
19
|
-
const
|
|
31
|
+
const status = classifyDifference(entry.difference.relative.ci);
|
|
32
|
+
if (status === "regression") {
|
|
33
|
+
regressions += 1;
|
|
34
|
+
} else if (status === "improvement") {
|
|
35
|
+
improvements += 1;
|
|
36
|
+
} else {
|
|
37
|
+
neutral += 1;
|
|
38
|
+
}
|
|
39
|
+
const benchmarkLines = [` Benchmark: ${entry.benchmark.name}`, ` Result: ${status} (mean=${formatRelativeValue(entry.difference.relative.mean, 2)} ci=${formatRelativeInterval(entry.difference.relative.ci, 2)})`, ` baseline mean=${formatNumber(entry.stats.baseline.mean, 4)}${unit} ci=${formatInterval(entry.stats.baseline.meanCI, 4)} sd=${formatNumber(entry.stats.baseline.standardDeviation, 4)}`, ` current mean=${formatNumber(entry.stats.current.mean, 4)}${unit} ci=${formatInterval(entry.stats.current.meanCI, 4)} sd=${formatNumber(entry.stats.current.standardDeviation, 4)}`, ` diff rel mean=${formatRelativeValue(entry.difference.relative.mean, 2)} ci=${formatRelativeInterval(entry.difference.relative.ci, 2)}`];
|
|
20
40
|
lines.push(...benchmarkLines);
|
|
21
41
|
}
|
|
22
|
-
lines.push("");
|
|
42
|
+
lines.push(` Summary: regressions=${regressions} improvements=${improvements} neutral=${neutral}`, "");
|
|
23
43
|
}
|
|
24
44
|
return lines.join("\n").trimEnd();
|
|
25
45
|
};
|
package/lib/runner.js
CHANGED
|
@@ -4,10 +4,12 @@ import { join, resolve } from "node:path";
|
|
|
4
4
|
import { fileURLToPath } from "node:url";
|
|
5
5
|
import { transformFileAsync } from "@babel/core";
|
|
6
6
|
import { createNodeHarness } from "./engines/node.js";
|
|
7
|
-
import {
|
|
7
|
+
import { computeRelativeDifferenceFromSamples, summaryStats } from "./stats.js";
|
|
8
8
|
const versions = ["baseline", "current"];
|
|
9
9
|
const autoSampleBatchSize = 10;
|
|
10
10
|
const harnessTempPrefix = "perfshield-harness-";
|
|
11
|
+
const defaultMinTimeMs = 20;
|
|
12
|
+
const defaultMaxRelativeMargin = 0.05;
|
|
11
13
|
const getVersionOrder = seed => {
|
|
12
14
|
if (seed % 2 === 0) {
|
|
13
15
|
return versions;
|
|
@@ -59,91 +61,193 @@ const buildHarnessIfNeeded = async sourcePath => {
|
|
|
59
61
|
path: harnessPath
|
|
60
62
|
};
|
|
61
63
|
};
|
|
62
|
-
const
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
64
|
+
const sleep = async delayMs => {
|
|
65
|
+
if (delayMs <= 0) {
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
await new Promise(resolve => {
|
|
69
|
+
setTimeout(resolve, delayMs);
|
|
70
|
+
});
|
|
71
|
+
};
|
|
72
|
+
const warmupBenchmarks = async (harness, benchmarks, delayMs, progress) => {
|
|
73
|
+
const warmups = [];
|
|
74
|
+
for (let index = 0; index < benchmarks.length; index += 1) {
|
|
75
|
+
const descriptor = benchmarks[index];
|
|
76
|
+
const order = getVersionOrder(index);
|
|
77
|
+
let baselineSample;
|
|
78
|
+
let currentSample;
|
|
79
|
+
for (const version of order) {
|
|
80
|
+
const result = await harness.runSample({
|
|
81
|
+
index,
|
|
82
|
+
iterations: descriptor.iterations,
|
|
83
|
+
version
|
|
84
|
+
});
|
|
85
|
+
if (version === "baseline") {
|
|
86
|
+
baselineSample = result.durationMs;
|
|
87
|
+
} else {
|
|
88
|
+
currentSample = result.durationMs;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
if (baselineSample == null || currentSample == null) {
|
|
92
|
+
throw new Error("Warmup did not collect baseline/current samples.");
|
|
93
|
+
}
|
|
94
|
+
warmups.push({
|
|
95
|
+
baseline: baselineSample,
|
|
96
|
+
current: currentSample
|
|
97
|
+
});
|
|
98
|
+
if (progress) {
|
|
99
|
+
progress({
|
|
100
|
+
benchmarkCount: benchmarks.length,
|
|
101
|
+
benchmarkIndex: index,
|
|
102
|
+
benchmarkName: descriptor.name,
|
|
103
|
+
phase: "warmup"
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
await sleep(delayMs);
|
|
68
107
|
}
|
|
108
|
+
return warmups;
|
|
69
109
|
};
|
|
70
|
-
const
|
|
71
|
-
|
|
110
|
+
const computeIterationOverrides = (benchmarks, warmups, minTimeMs) => {
|
|
111
|
+
if (minTimeMs <= 0) {
|
|
112
|
+
return benchmarks.map(() => undefined);
|
|
113
|
+
}
|
|
114
|
+
return benchmarks.map((benchmark, index) => {
|
|
115
|
+
const iterationsBase = benchmark.iterations ?? 1;
|
|
116
|
+
const warmup = warmups[index];
|
|
117
|
+
const baselinePerIter = warmup.baseline / iterationsBase;
|
|
118
|
+
const currentPerIter = warmup.current / iterationsBase;
|
|
119
|
+
const maxPerIter = Math.max(baselinePerIter, currentPerIter);
|
|
120
|
+
if (!Number.isFinite(maxPerIter) || maxPerIter <= 0) {
|
|
121
|
+
return iterationsBase;
|
|
122
|
+
}
|
|
123
|
+
return Math.max(iterationsBase, Math.ceil(minTimeMs / maxPerIter));
|
|
124
|
+
});
|
|
125
|
+
};
|
|
126
|
+
const updateIterations = (currentIterations, baselineSample, currentSample, minTimeMs, minimumIterations) => {
|
|
127
|
+
if (minTimeMs <= 0) {
|
|
128
|
+
return currentIterations;
|
|
129
|
+
}
|
|
130
|
+
const perIter = Math.max(baselineSample, currentSample) / currentIterations;
|
|
131
|
+
if (!Number.isFinite(perIter) || perIter <= 0) {
|
|
132
|
+
return currentIterations;
|
|
133
|
+
}
|
|
134
|
+
return Math.max(minimumIterations, Math.ceil(minTimeMs / perIter));
|
|
135
|
+
};
|
|
136
|
+
const relativeMarginFromDifference = difference => {
|
|
137
|
+
const mean = difference.relative.mean;
|
|
138
|
+
if (mean === 0) {
|
|
139
|
+
return Number.POSITIVE_INFINITY;
|
|
140
|
+
}
|
|
141
|
+
const margin = Math.abs(difference.relative.ci.high - mean);
|
|
142
|
+
return Math.abs(margin / mean);
|
|
143
|
+
};
|
|
144
|
+
const buildSamplePayload = (index, version, iterations) => {
|
|
145
|
+
const payload = {
|
|
146
|
+
index,
|
|
147
|
+
version
|
|
148
|
+
};
|
|
149
|
+
if (iterations != null) {
|
|
150
|
+
payload.iterations = iterations;
|
|
151
|
+
}
|
|
152
|
+
return payload;
|
|
153
|
+
};
|
|
154
|
+
const runSamplePair = async (harness, index, iterations, order) => {
|
|
155
|
+
let baselineSample;
|
|
156
|
+
let currentSample;
|
|
72
157
|
for (const version of order) {
|
|
73
|
-
const
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
payload.iterations = iterations;
|
|
158
|
+
const result = await harness.runSample(buildSamplePayload(index, version, iterations));
|
|
159
|
+
if (version === "baseline") {
|
|
160
|
+
baselineSample = result.durationMs;
|
|
161
|
+
} else {
|
|
162
|
+
currentSample = result.durationMs;
|
|
79
163
|
}
|
|
80
|
-
const result = await harness.runSample(payload);
|
|
81
|
-
results[version] = result.durationMs;
|
|
82
164
|
}
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
const warmupBenchmarks = async (engine, harnessPath, baselinePath, currentPath, benchmarks) => {
|
|
86
|
-
let roundRobinSeed = 0;
|
|
87
|
-
for (let index = 0; index < benchmarks.length; index += 1) {
|
|
88
|
-
const descriptor = benchmarks[index];
|
|
89
|
-
const order = getVersionOrder(roundRobinSeed);
|
|
90
|
-
roundRobinSeed += 1;
|
|
91
|
-
await runIterationInFreshHarness(engine, harnessPath, baselinePath, currentPath, index, descriptor.iterations, order);
|
|
165
|
+
if (baselineSample == null || currentSample == null) {
|
|
166
|
+
throw new Error("Sample pair did not include baseline/current.");
|
|
92
167
|
}
|
|
168
|
+
return {
|
|
169
|
+
baseline: baselineSample,
|
|
170
|
+
current: currentSample
|
|
171
|
+
};
|
|
93
172
|
};
|
|
94
|
-
const collectSamples = async (
|
|
95
|
-
const
|
|
173
|
+
const collectSamples = async (harness, benchmarks, minSamples, iterationOverrides, delayMs, minTimeMs, samples, progress) => {
|
|
174
|
+
const buckets = samples ?? benchmarks.map(() => ({
|
|
96
175
|
baseline: [],
|
|
97
176
|
current: []
|
|
98
177
|
}));
|
|
99
|
-
let
|
|
178
|
+
let completed = 0;
|
|
179
|
+
const total = minSamples * benchmarks.length;
|
|
100
180
|
for (let iteration = 0; iteration < minSamples; iteration += 1) {
|
|
181
|
+
const order = getVersionOrder(iteration);
|
|
101
182
|
for (let index = 0; index < benchmarks.length; index += 1) {
|
|
102
|
-
const
|
|
103
|
-
const
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
183
|
+
const iterations = iterationOverrides[index];
|
|
184
|
+
const minimumIterations = benchmarks[index].iterations ?? 1;
|
|
185
|
+
const result = await runSamplePair(harness, index, iterations, order);
|
|
186
|
+
buckets[index].baseline.push(result.baseline);
|
|
187
|
+
buckets[index].current.push(result.current);
|
|
188
|
+
const nextIterations = updateIterations(iterations ?? minimumIterations, result.baseline, result.current, minTimeMs, minimumIterations);
|
|
189
|
+
iterationOverrides[index] = nextIterations;
|
|
190
|
+
completed += 1;
|
|
191
|
+
if (progress) {
|
|
192
|
+
progress({
|
|
193
|
+
completed,
|
|
194
|
+
phase: "samples",
|
|
195
|
+
total
|
|
196
|
+
});
|
|
111
197
|
}
|
|
198
|
+
await sleep(delayMs);
|
|
112
199
|
}
|
|
113
200
|
}
|
|
114
|
-
return
|
|
201
|
+
return buckets;
|
|
115
202
|
};
|
|
116
203
|
const intervalContains = (interval, value) => interval.low <= value && value <= interval.high;
|
|
117
|
-
const autoSampleResolved = (samples, conditions) => samples.every(bucket => {
|
|
118
|
-
const
|
|
119
|
-
const
|
|
120
|
-
|
|
204
|
+
const autoSampleResolved = (samples, conditions, maxRelativeMargin) => samples.every(bucket => {
|
|
205
|
+
const diff = computeRelativeDifferenceFromSamples(bucket.baseline, bucket.current);
|
|
206
|
+
const onlyZeroConditions = conditions.every(condition => condition === 0);
|
|
207
|
+
if (onlyZeroConditions) {
|
|
208
|
+
const maxAbsBound = Math.max(Math.abs(diff.relative.ci.low), Math.abs(diff.relative.ci.high));
|
|
209
|
+
if (maxAbsBound <= maxRelativeMargin) {
|
|
210
|
+
return true;
|
|
211
|
+
}
|
|
212
|
+
}
|
|
121
213
|
for (const condition of conditions) {
|
|
122
214
|
if (intervalContains(diff.relative.ci, condition)) {
|
|
123
215
|
return false;
|
|
124
216
|
}
|
|
125
217
|
}
|
|
218
|
+
if (relativeMarginFromDifference(diff) > maxRelativeMargin) {
|
|
219
|
+
return false;
|
|
220
|
+
}
|
|
126
221
|
return true;
|
|
127
222
|
});
|
|
128
|
-
const autoSample = async (
|
|
223
|
+
const autoSample = async (harness, benchmarks, samples, conditions, maxRelativeMargin, iterationOverrides, delayMs, minTimeMs, progress, timeoutMs) => {
|
|
129
224
|
const startTime = Date.now();
|
|
130
225
|
let roundRobinSeed = 0;
|
|
226
|
+
let completed = 0;
|
|
131
227
|
while (Date.now() - startTime < timeoutMs) {
|
|
132
|
-
if (autoSampleResolved(samples, conditions)) {
|
|
228
|
+
if (autoSampleResolved(samples, conditions, maxRelativeMargin)) {
|
|
133
229
|
return;
|
|
134
230
|
}
|
|
135
231
|
for (let batch = 0; batch < autoSampleBatchSize; batch += 1) {
|
|
232
|
+
const order = getVersionOrder(roundRobinSeed);
|
|
233
|
+
roundRobinSeed += 1;
|
|
136
234
|
for (let index = 0; index < benchmarks.length; index += 1) {
|
|
137
|
-
const
|
|
138
|
-
const
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
235
|
+
const iterations = iterationOverrides[index];
|
|
236
|
+
const minimumIterations = benchmarks[index].iterations ?? 1;
|
|
237
|
+
const result = await runSamplePair(harness, index, iterations, order);
|
|
238
|
+
samples[index].baseline.push(result.baseline);
|
|
239
|
+
samples[index].current.push(result.current);
|
|
240
|
+
const nextIterations = updateIterations(iterations ?? minimumIterations, result.baseline, result.current, minTimeMs, minimumIterations);
|
|
241
|
+
iterationOverrides[index] = nextIterations;
|
|
242
|
+
completed += 1;
|
|
243
|
+
if (progress) {
|
|
244
|
+
progress({
|
|
245
|
+
completed,
|
|
246
|
+
elapsedMs: Date.now() - startTime,
|
|
247
|
+
phase: "autosample"
|
|
248
|
+
});
|
|
146
249
|
}
|
|
250
|
+
await sleep(delayMs);
|
|
147
251
|
}
|
|
148
252
|
}
|
|
149
253
|
}
|
|
@@ -158,17 +262,33 @@ export const runEngineComparison = async options => {
|
|
|
158
262
|
const harnessArtifact = await buildHarnessIfNeeded(getHarnessPath());
|
|
159
263
|
const resolvedBaseline = resolve(baselinePath);
|
|
160
264
|
const resolvedCurrent = resolve(currentPath);
|
|
265
|
+
const harness = await createNodeHarness(engine, harnessArtifact.path, resolvedBaseline, resolvedCurrent);
|
|
161
266
|
try {
|
|
162
|
-
const benchmarks = await
|
|
163
|
-
|
|
164
|
-
const
|
|
165
|
-
|
|
267
|
+
const benchmarks = await harness.listBenchmarks();
|
|
268
|
+
const minTimeMs = config.sampling.minTimeMs ?? defaultMinTimeMs;
|
|
269
|
+
const minSamples = Math.max(1, config.sampling.minSamples);
|
|
270
|
+
const sampleScale = Math.sqrt(minSamples / 20);
|
|
271
|
+
const benchmarkScale = Math.sqrt(Math.max(1, benchmarks.length));
|
|
272
|
+
const effectiveMinTimeMs = minTimeMs / Math.max(1, sampleScale * benchmarkScale);
|
|
273
|
+
const delayMs = config.sampling.delayMs ?? 0;
|
|
274
|
+
const maxRelativeMargin = config.sampling.maxRelativeMargin ?? defaultMaxRelativeMargin;
|
|
275
|
+
const warmups = await warmupBenchmarks(harness, benchmarks, delayMs, options.progress);
|
|
276
|
+
const iterationOverrides = computeIterationOverrides(benchmarks, warmups, effectiveMinTimeMs);
|
|
277
|
+
const samples = warmups.map(warmup => ({
|
|
278
|
+
baseline: [warmup.baseline],
|
|
279
|
+
current: [warmup.current]
|
|
280
|
+
}));
|
|
281
|
+
const remainingSamples = Math.max(0, config.sampling.minSamples - 1);
|
|
282
|
+
if (remainingSamples > 0) {
|
|
283
|
+
await collectSamples(harness, benchmarks, remainingSamples, iterationOverrides, delayMs, effectiveMinTimeMs, samples, options.progress);
|
|
284
|
+
}
|
|
285
|
+
await autoSample(harness, benchmarks, samples, config.sampling.conditions, maxRelativeMargin, iterationOverrides, delayMs, effectiveMinTimeMs, options.progress, config.sampling.timeoutMs);
|
|
166
286
|
const benchmarkResults = benchmarks.map((benchmark, index) => {
|
|
167
287
|
const baselineSamples = samples[index].baseline;
|
|
168
288
|
const currentSamples = samples[index].current;
|
|
169
289
|
const baselineStats = summaryStats(baselineSamples);
|
|
170
290
|
const currentStats = summaryStats(currentSamples);
|
|
171
|
-
const difference =
|
|
291
|
+
const difference = computeRelativeDifferenceFromSamples(baselineSamples, currentSamples);
|
|
172
292
|
return {
|
|
173
293
|
benchmark,
|
|
174
294
|
difference,
|
|
@@ -187,6 +307,7 @@ export const runEngineComparison = async options => {
|
|
|
187
307
|
engine
|
|
188
308
|
};
|
|
189
309
|
} finally {
|
|
310
|
+
await harness.close();
|
|
190
311
|
if (harnessArtifact.cleanup) {
|
|
191
312
|
await harnessArtifact.cleanup();
|
|
192
313
|
}
|
package/lib/stats.js
CHANGED
|
@@ -1,4 +1,11 @@
|
|
|
1
1
|
import jstat from "jstat";
|
|
2
|
+
export const relativeMarginOfError = stats => {
|
|
3
|
+
if (stats.mean === 0) {
|
|
4
|
+
return Number.POSITIVE_INFINITY;
|
|
5
|
+
}
|
|
6
|
+
const margin = Math.abs(stats.meanCI.high - stats.mean);
|
|
7
|
+
return Math.abs(margin / stats.mean);
|
|
8
|
+
};
|
|
2
9
|
const sumOf = values => values.reduce((total, value) => total + value, 0);
|
|
3
10
|
const squareResiduals = (values, mean) => values.map(value => {
|
|
4
11
|
const diff = value - mean;
|
|
@@ -70,6 +77,31 @@ export const computeDifference = (baseline, current) => {
|
|
|
70
77
|
}
|
|
71
78
|
};
|
|
72
79
|
};
|
|
80
|
+
const computePairedRelativeStats = (baselineSamples, currentSamples) => {
|
|
81
|
+
const size = Math.min(baselineSamples.length, currentSamples.length);
|
|
82
|
+
if (size === 0) {
|
|
83
|
+
throw new Error("Cannot compute differences with empty sample sets.");
|
|
84
|
+
}
|
|
85
|
+
const diffs = [];
|
|
86
|
+
for (let index = 0; index < size; index += 1) {
|
|
87
|
+
const baseline = baselineSamples[index];
|
|
88
|
+
if (baseline === 0) {
|
|
89
|
+
throw new Error("Cannot compute relative difference with baseline sample 0.");
|
|
90
|
+
}
|
|
91
|
+
diffs.push((currentSamples[index] - baseline) / baseline);
|
|
92
|
+
}
|
|
93
|
+
const diffStats = summaryStats(diffs);
|
|
94
|
+
return {
|
|
95
|
+
ci: confidenceInterval95(samplingDistributionOfTheMean({
|
|
96
|
+
mean: diffStats.mean,
|
|
97
|
+
variance: diffStats.variance
|
|
98
|
+
}, diffStats.size), diffStats.size),
|
|
99
|
+
mean: diffStats.mean
|
|
100
|
+
};
|
|
101
|
+
};
|
|
102
|
+
export const computeRelativeDifferenceFromSamples = (baselineSamples, currentSamples) => ({
|
|
103
|
+
relative: computePairedRelativeStats(baselineSamples, currentSamples)
|
|
104
|
+
});
|
|
73
105
|
export const computeDifferences = stats => stats.map(result => ({
|
|
74
106
|
...result,
|
|
75
107
|
differences: stats.map(other => other === result ? null : computeDifference(other.stats, result.stats))
|