perfshield 0.0.8 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/runner.js +93 -95
- package/lib/stats.js +98 -16
- package/package.json +1 -1
package/lib/runner.js
CHANGED
|
@@ -37,43 +37,37 @@ const sleep = async delayMs => {
|
|
|
37
37
|
setTimeout(resolve, delayMs);
|
|
38
38
|
});
|
|
39
39
|
};
|
|
40
|
-
const
|
|
41
|
-
const
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
index,
|
|
50
|
-
iterations: descriptor.iterations,
|
|
51
|
-
version
|
|
52
|
-
});
|
|
53
|
-
if (version === "baseline") {
|
|
54
|
-
baselineSample = result.durationMs;
|
|
55
|
-
} else {
|
|
56
|
-
currentSample = result.durationMs;
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
if (baselineSample == null || currentSample == null) {
|
|
60
|
-
throw new Error("Warmup did not collect baseline/current samples.");
|
|
61
|
-
}
|
|
62
|
-
warmups.push({
|
|
63
|
-
baseline: baselineSample,
|
|
64
|
-
current: currentSample
|
|
40
|
+
const warmupBenchmark = async (harness, benchmark, index, delayMs, progress) => {
|
|
41
|
+
const order = getVersionOrder(index);
|
|
42
|
+
let baselineSample;
|
|
43
|
+
let currentSample;
|
|
44
|
+
for (const version of order) {
|
|
45
|
+
const result = await harness.runSample({
|
|
46
|
+
index,
|
|
47
|
+
iterations: benchmark.iterations,
|
|
48
|
+
version
|
|
65
49
|
});
|
|
66
|
-
if (
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
benchmarkName: descriptor.name,
|
|
71
|
-
phase: "warmup"
|
|
72
|
-
});
|
|
50
|
+
if (version === "baseline") {
|
|
51
|
+
baselineSample = result.durationMs;
|
|
52
|
+
} else {
|
|
53
|
+
currentSample = result.durationMs;
|
|
73
54
|
}
|
|
74
|
-
await sleep(delayMs);
|
|
75
55
|
}
|
|
76
|
-
|
|
56
|
+
if (baselineSample == null || currentSample == null) {
|
|
57
|
+
throw new Error("Warmup did not collect baseline/current samples.");
|
|
58
|
+
}
|
|
59
|
+
if (progress) {
|
|
60
|
+
progress({
|
|
61
|
+
benchmarkIndex: index,
|
|
62
|
+
benchmarkName: benchmark.name,
|
|
63
|
+
phase: "warmup"
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
await sleep(delayMs);
|
|
67
|
+
return {
|
|
68
|
+
baseline: baselineSample,
|
|
69
|
+
current: currentSample
|
|
70
|
+
};
|
|
77
71
|
};
|
|
78
72
|
const computeIterationOverrides = (benchmarks, warmups, minTimeMs) => {
|
|
79
73
|
if (minTimeMs <= 0) {
|
|
@@ -138,36 +132,26 @@ const runSamplePair = async (harness, index, iterations, order) => {
|
|
|
138
132
|
current: currentSample
|
|
139
133
|
};
|
|
140
134
|
};
|
|
141
|
-
const
|
|
142
|
-
const buckets = samples ?? benchmarks.map(() => ({
|
|
143
|
-
baseline: [],
|
|
144
|
-
current: []
|
|
145
|
-
}));
|
|
146
|
-
let completed = 0;
|
|
147
|
-
const total = minSamples * benchmarks.length;
|
|
135
|
+
const collectSamplesForBenchmark = async (harness, benchmark, index, minSamples, iterationOverrides, delayMs, minTimeMs, bucket, progress, progressState) => {
|
|
148
136
|
for (let iteration = 0; iteration < minSamples; iteration += 1) {
|
|
149
137
|
const order = getVersionOrder(iteration);
|
|
150
|
-
const
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
total
|
|
165
|
-
});
|
|
166
|
-
}
|
|
167
|
-
await sleep(delayMs);
|
|
138
|
+
const iterations = iterationOverrides[index];
|
|
139
|
+
const minimumIterations = benchmark.iterations ?? 1;
|
|
140
|
+
const result = await runSamplePair(harness, index, iterations, order);
|
|
141
|
+
bucket.baseline.push(result.baseline);
|
|
142
|
+
bucket.current.push(result.current);
|
|
143
|
+
const nextIterations = updateIterations(iterations ?? minimumIterations, result.baseline, result.current, minTimeMs, minimumIterations);
|
|
144
|
+
iterationOverrides[index] = nextIterations;
|
|
145
|
+
if (progress && progressState) {
|
|
146
|
+
progressState.completed += 1;
|
|
147
|
+
progress({
|
|
148
|
+
completed: progressState.completed,
|
|
149
|
+
phase: "samples",
|
|
150
|
+
total: progressState.total
|
|
151
|
+
});
|
|
168
152
|
}
|
|
153
|
+
await sleep(delayMs);
|
|
169
154
|
}
|
|
170
|
-
return buckets;
|
|
171
155
|
};
|
|
172
156
|
const intervalContains = (interval, value) => interval.low <= value && value <= interval.high;
|
|
173
157
|
const autoSampleResolved = (samples, conditions, maxRelativeMargin) => samples.every(bucket => {
|
|
@@ -189,36 +173,33 @@ const autoSampleResolved = (samples, conditions, maxRelativeMargin) => samples.e
|
|
|
189
173
|
}
|
|
190
174
|
return true;
|
|
191
175
|
});
|
|
192
|
-
const
|
|
176
|
+
const autoSampleForBenchmark = async (harness, benchmark, index, bucket, conditions, maxRelativeMargin, iterationOverrides, delayMs, minTimeMs, progress, timeoutMs) => {
|
|
193
177
|
const startTime = Date.now();
|
|
194
178
|
let roundRobinSeed = 0;
|
|
195
179
|
let completed = 0;
|
|
196
180
|
while (Date.now() - startTime < timeoutMs) {
|
|
197
|
-
if (autoSampleResolved(
|
|
181
|
+
if (autoSampleResolved([bucket], conditions, maxRelativeMargin)) {
|
|
198
182
|
return;
|
|
199
183
|
}
|
|
200
184
|
for (let batch = 0; batch < autoSampleBatchSize; batch += 1) {
|
|
201
185
|
const order = getVersionOrder(roundRobinSeed);
|
|
202
|
-
const indexOrder = buildIndexOrder(benchmarks.length, roundRobinSeed);
|
|
203
186
|
roundRobinSeed += 1;
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
});
|
|
219
|
-
}
|
|
220
|
-
await sleep(delayMs);
|
|
187
|
+
const iterations = iterationOverrides[index];
|
|
188
|
+
const minimumIterations = benchmark.iterations ?? 1;
|
|
189
|
+
const result = await runSamplePair(harness, index, iterations, order);
|
|
190
|
+
bucket.baseline.push(result.baseline);
|
|
191
|
+
bucket.current.push(result.current);
|
|
192
|
+
const nextIterations = updateIterations(iterations ?? minimumIterations, result.baseline, result.current, minTimeMs, minimumIterations);
|
|
193
|
+
iterationOverrides[index] = nextIterations;
|
|
194
|
+
completed += 1;
|
|
195
|
+
if (progress) {
|
|
196
|
+
progress({
|
|
197
|
+
completed,
|
|
198
|
+
elapsedMs: Date.now() - startTime,
|
|
199
|
+
phase: "autosample"
|
|
200
|
+
});
|
|
221
201
|
}
|
|
202
|
+
await sleep(delayMs);
|
|
222
203
|
}
|
|
223
204
|
}
|
|
224
205
|
};
|
|
@@ -242,24 +223,41 @@ export const runEngineComparison = async options => {
|
|
|
242
223
|
const effectiveMinTimeMs = minTimeMs / Math.max(1, sampleScale * benchmarkScale);
|
|
243
224
|
const delayMs = config.sampling.delayMs ?? 0;
|
|
244
225
|
const maxRelativeMargin = config.sampling.maxRelativeMargin ?? defaultMaxRelativeMargin;
|
|
245
|
-
const warmups = await warmupBenchmarks(harness, benchmarks, delayMs, options.progress);
|
|
246
|
-
const iterationOverrides = computeIterationOverrides(benchmarks, warmups, effectiveMinTimeMs);
|
|
247
|
-
const samples = warmups.map(warmup => ({
|
|
248
|
-
baseline: [warmup.baseline],
|
|
249
|
-
current: [warmup.current]
|
|
250
|
-
}));
|
|
251
226
|
const remainingSamples = Math.max(0, config.sampling.minSamples - 1);
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
const benchmarkResults = benchmarks.
|
|
257
|
-
|
|
258
|
-
|
|
227
|
+
const progressState = {
|
|
228
|
+
completed: 0,
|
|
229
|
+
total: remainingSamples * benchmarks.length
|
|
230
|
+
};
|
|
231
|
+
const benchmarkResults = new Array(benchmarks.length);
|
|
232
|
+
const iterationOverrides = benchmarks.map(() => undefined);
|
|
233
|
+
const benchmarkOrder = buildIndexOrder(benchmarks.length, 0);
|
|
234
|
+
const autoSampleDeadline = Date.now() + config.sampling.timeoutMs;
|
|
235
|
+
for (const index of benchmarkOrder) {
|
|
236
|
+
const benchmark = benchmarks[index];
|
|
237
|
+
const progress = options.progress;
|
|
238
|
+
const warmupSample = await warmupBenchmark(harness, benchmark, index, delayMs, progress ? event => progress({
|
|
239
|
+
...event,
|
|
240
|
+
benchmarkCount: benchmarks.length
|
|
241
|
+
}) : undefined);
|
|
242
|
+
const iterationOverride = computeIterationOverrides([benchmark], [warmupSample], effectiveMinTimeMs)[0];
|
|
243
|
+
iterationOverrides[index] = iterationOverride;
|
|
244
|
+
const bucket = {
|
|
245
|
+
baseline: [warmupSample.baseline],
|
|
246
|
+
current: [warmupSample.current]
|
|
247
|
+
};
|
|
248
|
+
if (remainingSamples > 0) {
|
|
249
|
+
await collectSamplesForBenchmark(harness, benchmark, index, remainingSamples, iterationOverrides, delayMs, effectiveMinTimeMs, bucket, progress, progressState);
|
|
250
|
+
}
|
|
251
|
+
const remainingTimeoutMs = Math.max(0, autoSampleDeadline - Date.now());
|
|
252
|
+
if (remainingTimeoutMs > 0) {
|
|
253
|
+
await autoSampleForBenchmark(harness, benchmark, index, bucket, config.sampling.conditions, maxRelativeMargin, iterationOverrides, delayMs, effectiveMinTimeMs, progress, remainingTimeoutMs);
|
|
254
|
+
}
|
|
255
|
+
const baselineSamples = bucket.baseline;
|
|
256
|
+
const currentSamples = bucket.current;
|
|
259
257
|
const baselineStats = summaryStats(baselineSamples);
|
|
260
258
|
const currentStats = summaryStats(currentSamples);
|
|
261
259
|
const difference = computeRelativeDifferenceFromSamples(baselineSamples, currentSamples);
|
|
262
|
-
|
|
260
|
+
benchmarkResults[index] = {
|
|
263
261
|
benchmark,
|
|
264
262
|
difference,
|
|
265
263
|
samples: {
|
|
@@ -271,7 +269,7 @@ export const runEngineComparison = async options => {
|
|
|
271
269
|
current: currentStats
|
|
272
270
|
}
|
|
273
271
|
};
|
|
274
|
-
}
|
|
272
|
+
}
|
|
275
273
|
return {
|
|
276
274
|
benchmarks: benchmarkResults,
|
|
277
275
|
engine
|
package/lib/stats.js
CHANGED
|
@@ -7,6 +7,102 @@ export const relativeMarginOfError = stats => {
|
|
|
7
7
|
return Math.abs(margin / stats.mean);
|
|
8
8
|
};
|
|
9
9
|
const sumOf = values => values.reduce((total, value) => total + value, 0);
|
|
10
|
+
const sortNumbers = values => [...values].sort((a, b) => a - b);
|
|
11
|
+
const medianOfSorted = values => {
|
|
12
|
+
if (values.length === 0) {
|
|
13
|
+
throw new Error("Cannot compute median of an empty sample set.");
|
|
14
|
+
}
|
|
15
|
+
const mid = Math.floor(values.length / 2);
|
|
16
|
+
if (values.length % 2 === 1) {
|
|
17
|
+
return values[mid];
|
|
18
|
+
}
|
|
19
|
+
return (values[mid - 1] + values[mid]) / 2;
|
|
20
|
+
};
|
|
21
|
+
const walshAverages = values => {
|
|
22
|
+
const averages = [];
|
|
23
|
+
for (let i = 0; i < values.length; i += 1) {
|
|
24
|
+
const base = values[i];
|
|
25
|
+
for (let j = i; j < values.length; j += 1) {
|
|
26
|
+
averages.push((base + values[j]) / 2);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return averages;
|
|
30
|
+
};
|
|
31
|
+
const normalQuantile = prob => {
|
|
32
|
+
if (prob <= 0 || prob >= 1) {
|
|
33
|
+
throw new Error("Probability must be between 0 and 1.");
|
|
34
|
+
}
|
|
35
|
+
const a1 = Number.parseFloat("-39.69683028665376");
|
|
36
|
+
const a2 = Number.parseFloat("220.9460984245205");
|
|
37
|
+
const a3 = Number.parseFloat("-275.9285104469687");
|
|
38
|
+
const a4 = Number.parseFloat("138.357751867269");
|
|
39
|
+
const a5 = Number.parseFloat("-30.66479806614716");
|
|
40
|
+
const a6 = Number.parseFloat("2.506628277459239");
|
|
41
|
+
const b1 = Number.parseFloat("-54.47609879822406");
|
|
42
|
+
const b2 = Number.parseFloat("161.5858368580409");
|
|
43
|
+
const b3 = Number.parseFloat("-155.6989798598866");
|
|
44
|
+
const b4 = Number.parseFloat("66.80131188771972");
|
|
45
|
+
const b5 = Number.parseFloat("-13.28068155288572");
|
|
46
|
+
const c1 = Number.parseFloat("-0.007784894002430293");
|
|
47
|
+
const c2 = Number.parseFloat("-0.3223964580411365");
|
|
48
|
+
const c3 = Number.parseFloat("-2.400758277161838");
|
|
49
|
+
const c4 = Number.parseFloat("-2.549732539343734");
|
|
50
|
+
const c5 = Number.parseFloat("4.374664141464968");
|
|
51
|
+
const c6 = Number.parseFloat("2.938163982698783");
|
|
52
|
+
const d1 = Number.parseFloat("0.007784695709041462");
|
|
53
|
+
const d2 = Number.parseFloat("0.3224671290700398");
|
|
54
|
+
const d3 = Number.parseFloat("2.445134137142996");
|
|
55
|
+
const d4 = Number.parseFloat("3.754408661907416");
|
|
56
|
+
const plow = Number.parseFloat("0.02425");
|
|
57
|
+
const phigh = 1 - plow;
|
|
58
|
+
if (prob < plow) {
|
|
59
|
+
const q = Math.sqrt(-2 * Math.log(prob));
|
|
60
|
+
return (((((c1 * q + c2) * q + c3) * q + c4) * q + c5) * q + c6) / ((((d1 * q + d2) * q + d3) * q + d4) * q + 1);
|
|
61
|
+
}
|
|
62
|
+
if (prob > phigh) {
|
|
63
|
+
const q = Math.sqrt(-2 * Math.log(1 - prob));
|
|
64
|
+
return -(((((c1 * q + c2) * q + c3) * q + c4) * q + c5) * q + c6) / ((((d1 * q + d2) * q + d3) * q + d4) * q + 1);
|
|
65
|
+
}
|
|
66
|
+
const q = prob - 0.5;
|
|
67
|
+
const r = q * q;
|
|
68
|
+
return (((((a1 * r + a2) * r + a3) * r + a4) * r + a5) * r + a6) * q / (((((b1 * r + b2) * r + b3) * r + b4) * r + b5) * r + 1);
|
|
69
|
+
};
|
|
70
|
+
const hodgesLehmannConfidenceInterval = (sortedWalsh, size) => {
|
|
71
|
+
if (sortedWalsh.length === 0) {
|
|
72
|
+
throw new Error("Cannot compute confidence interval for empty samples.");
|
|
73
|
+
}
|
|
74
|
+
if (size <= 1) {
|
|
75
|
+
const estimate = medianOfSorted(sortedWalsh);
|
|
76
|
+
return {
|
|
77
|
+
high: estimate,
|
|
78
|
+
low: estimate
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
const walshCount = sortedWalsh.length;
|
|
82
|
+
const alpha = 0.05;
|
|
83
|
+
const meanRank = size * (size + 1) / 4;
|
|
84
|
+
const varianceRank = size * (size + 1) * (2 * size + 1) / 24;
|
|
85
|
+
const stdDevRank = Math.sqrt(varianceRank);
|
|
86
|
+
const z = normalQuantile(1 - alpha / 2);
|
|
87
|
+
const critical = Math.floor(meanRank - z * stdDevRank);
|
|
88
|
+
const lowIndex = Math.max(0, Math.min(walshCount - 1, critical));
|
|
89
|
+
const highIndex = Math.max(lowIndex, Math.min(walshCount - 1, walshCount - critical - 1));
|
|
90
|
+
return {
|
|
91
|
+
high: sortedWalsh[highIndex],
|
|
92
|
+
low: sortedWalsh[lowIndex]
|
|
93
|
+
};
|
|
94
|
+
};
|
|
95
|
+
const hodgesLehmannStats = values => {
|
|
96
|
+
if (values.length === 0) {
|
|
97
|
+
throw new Error("Cannot compute stats for an empty sample set.");
|
|
98
|
+
}
|
|
99
|
+
const walsh = walshAverages(values);
|
|
100
|
+
const sortedWalsh = sortNumbers(walsh);
|
|
101
|
+
return {
|
|
102
|
+
ci: hodgesLehmannConfidenceInterval(sortedWalsh, values.length),
|
|
103
|
+
mean: medianOfSorted(sortedWalsh)
|
|
104
|
+
};
|
|
105
|
+
};
|
|
10
106
|
const squareResiduals = (values, mean) => values.map(value => {
|
|
11
107
|
const diff = value - mean;
|
|
12
108
|
return diff * diff;
|
|
@@ -99,14 +195,7 @@ const computePairedRelativeStats = (baselineSamples, currentSamples) => {
|
|
|
99
195
|
}
|
|
100
196
|
diffs.push((currentSamples[index] - baseline) / baseline);
|
|
101
197
|
}
|
|
102
|
-
|
|
103
|
-
return {
|
|
104
|
-
ci: confidenceInterval95(samplingDistributionOfTheMean({
|
|
105
|
-
mean: diffStats.mean,
|
|
106
|
-
variance: diffStats.variance
|
|
107
|
-
}, diffStats.size), diffStats.size),
|
|
108
|
-
mean: diffStats.mean
|
|
109
|
-
};
|
|
198
|
+
return hodgesLehmannStats(diffs);
|
|
110
199
|
};
|
|
111
200
|
const computePairedAbsoluteStats = (baselineSamples, currentSamples) => {
|
|
112
201
|
const size = Math.min(baselineSamples.length, currentSamples.length);
|
|
@@ -117,14 +206,7 @@ const computePairedAbsoluteStats = (baselineSamples, currentSamples) => {
|
|
|
117
206
|
for (let index = 0; index < size; index += 1) {
|
|
118
207
|
diffs.push(currentSamples[index] - baselineSamples[index]);
|
|
119
208
|
}
|
|
120
|
-
|
|
121
|
-
return {
|
|
122
|
-
ci: confidenceInterval95(samplingDistributionOfTheMean({
|
|
123
|
-
mean: diffStats.mean,
|
|
124
|
-
variance: diffStats.variance
|
|
125
|
-
}, diffStats.size), diffStats.size),
|
|
126
|
-
mean: diffStats.mean
|
|
127
|
-
};
|
|
209
|
+
return hodgesLehmannStats(diffs);
|
|
128
210
|
};
|
|
129
211
|
export const computeRelativeDifferenceFromSamples = (baselineSamples, currentSamples) => ({
|
|
130
212
|
absolute: computePairedAbsoluteStats(baselineSamples, currentSamples),
|