perfshield 0.0.8 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/lib/runner.js +93 -95
  2. package/lib/stats.js +98 -16
  3. package/package.json +1 -1
package/lib/runner.js CHANGED
@@ -37,43 +37,37 @@ const sleep = async delayMs => {
37
37
  setTimeout(resolve, delayMs);
38
38
  });
39
39
  };
40
- const warmupBenchmarks = async (harness, benchmarks, delayMs, progress) => {
41
- const warmups = [];
42
- for (let index = 0; index < benchmarks.length; index += 1) {
43
- const descriptor = benchmarks[index];
44
- const order = getVersionOrder(index);
45
- let baselineSample;
46
- let currentSample;
47
- for (const version of order) {
48
- const result = await harness.runSample({
49
- index,
50
- iterations: descriptor.iterations,
51
- version
52
- });
53
- if (version === "baseline") {
54
- baselineSample = result.durationMs;
55
- } else {
56
- currentSample = result.durationMs;
57
- }
58
- }
59
- if (baselineSample == null || currentSample == null) {
60
- throw new Error("Warmup did not collect baseline/current samples.");
61
- }
62
- warmups.push({
63
- baseline: baselineSample,
64
- current: currentSample
40
+ const warmupBenchmark = async (harness, benchmark, index, delayMs, progress) => {
41
+ const order = getVersionOrder(index);
42
+ let baselineSample;
43
+ let currentSample;
44
+ for (const version of order) {
45
+ const result = await harness.runSample({
46
+ index,
47
+ iterations: benchmark.iterations,
48
+ version
65
49
  });
66
- if (progress) {
67
- progress({
68
- benchmarkCount: benchmarks.length,
69
- benchmarkIndex: index,
70
- benchmarkName: descriptor.name,
71
- phase: "warmup"
72
- });
50
+ if (version === "baseline") {
51
+ baselineSample = result.durationMs;
52
+ } else {
53
+ currentSample = result.durationMs;
73
54
  }
74
- await sleep(delayMs);
75
55
  }
76
- return warmups;
56
+ if (baselineSample == null || currentSample == null) {
57
+ throw new Error("Warmup did not collect baseline/current samples.");
58
+ }
59
+ if (progress) {
60
+ progress({
61
+ benchmarkIndex: index,
62
+ benchmarkName: benchmark.name,
63
+ phase: "warmup"
64
+ });
65
+ }
66
+ await sleep(delayMs);
67
+ return {
68
+ baseline: baselineSample,
69
+ current: currentSample
70
+ };
77
71
  };
78
72
  const computeIterationOverrides = (benchmarks, warmups, minTimeMs) => {
79
73
  if (minTimeMs <= 0) {
@@ -138,36 +132,26 @@ const runSamplePair = async (harness, index, iterations, order) => {
138
132
  current: currentSample
139
133
  };
140
134
  };
141
- const collectSamples = async (harness, benchmarks, minSamples, iterationOverrides, delayMs, minTimeMs, samples, progress) => {
142
- const buckets = samples ?? benchmarks.map(() => ({
143
- baseline: [],
144
- current: []
145
- }));
146
- let completed = 0;
147
- const total = minSamples * benchmarks.length;
135
+ const collectSamplesForBenchmark = async (harness, benchmark, index, minSamples, iterationOverrides, delayMs, minTimeMs, bucket, progress, progressState) => {
148
136
  for (let iteration = 0; iteration < minSamples; iteration += 1) {
149
137
  const order = getVersionOrder(iteration);
150
- const indexOrder = buildIndexOrder(benchmarks.length, iteration);
151
- for (const index of indexOrder) {
152
- const iterations = iterationOverrides[index];
153
- const minimumIterations = benchmarks[index].iterations ?? 1;
154
- const result = await runSamplePair(harness, index, iterations, order);
155
- buckets[index].baseline.push(result.baseline);
156
- buckets[index].current.push(result.current);
157
- const nextIterations = updateIterations(iterations ?? minimumIterations, result.baseline, result.current, minTimeMs, minimumIterations);
158
- iterationOverrides[index] = nextIterations;
159
- completed += 1;
160
- if (progress) {
161
- progress({
162
- completed,
163
- phase: "samples",
164
- total
165
- });
166
- }
167
- await sleep(delayMs);
138
+ const iterations = iterationOverrides[index];
139
+ const minimumIterations = benchmark.iterations ?? 1;
140
+ const result = await runSamplePair(harness, index, iterations, order);
141
+ bucket.baseline.push(result.baseline);
142
+ bucket.current.push(result.current);
143
+ const nextIterations = updateIterations(iterations ?? minimumIterations, result.baseline, result.current, minTimeMs, minimumIterations);
144
+ iterationOverrides[index] = nextIterations;
145
+ if (progress && progressState) {
146
+ progressState.completed += 1;
147
+ progress({
148
+ completed: progressState.completed,
149
+ phase: "samples",
150
+ total: progressState.total
151
+ });
168
152
  }
153
+ await sleep(delayMs);
169
154
  }
170
- return buckets;
171
155
  };
172
156
  const intervalContains = (interval, value) => interval.low <= value && value <= interval.high;
173
157
  const autoSampleResolved = (samples, conditions, maxRelativeMargin) => samples.every(bucket => {
@@ -189,36 +173,33 @@ const autoSampleResolved = (samples, conditions, maxRelativeMargin) => samples.e
189
173
  }
190
174
  return true;
191
175
  });
192
- const autoSample = async (harness, benchmarks, samples, conditions, maxRelativeMargin, iterationOverrides, delayMs, minTimeMs, progress, timeoutMs) => {
176
+ const autoSampleForBenchmark = async (harness, benchmark, index, bucket, conditions, maxRelativeMargin, iterationOverrides, delayMs, minTimeMs, progress, timeoutMs) => {
193
177
  const startTime = Date.now();
194
178
  let roundRobinSeed = 0;
195
179
  let completed = 0;
196
180
  while (Date.now() - startTime < timeoutMs) {
197
- if (autoSampleResolved(samples, conditions, maxRelativeMargin)) {
181
+ if (autoSampleResolved([bucket], conditions, maxRelativeMargin)) {
198
182
  return;
199
183
  }
200
184
  for (let batch = 0; batch < autoSampleBatchSize; batch += 1) {
201
185
  const order = getVersionOrder(roundRobinSeed);
202
- const indexOrder = buildIndexOrder(benchmarks.length, roundRobinSeed);
203
186
  roundRobinSeed += 1;
204
- for (const index of indexOrder) {
205
- const iterations = iterationOverrides[index];
206
- const minimumIterations = benchmarks[index].iterations ?? 1;
207
- const result = await runSamplePair(harness, index, iterations, order);
208
- samples[index].baseline.push(result.baseline);
209
- samples[index].current.push(result.current);
210
- const nextIterations = updateIterations(iterations ?? minimumIterations, result.baseline, result.current, minTimeMs, minimumIterations);
211
- iterationOverrides[index] = nextIterations;
212
- completed += 1;
213
- if (progress) {
214
- progress({
215
- completed,
216
- elapsedMs: Date.now() - startTime,
217
- phase: "autosample"
218
- });
219
- }
220
- await sleep(delayMs);
187
+ const iterations = iterationOverrides[index];
188
+ const minimumIterations = benchmark.iterations ?? 1;
189
+ const result = await runSamplePair(harness, index, iterations, order);
190
+ bucket.baseline.push(result.baseline);
191
+ bucket.current.push(result.current);
192
+ const nextIterations = updateIterations(iterations ?? minimumIterations, result.baseline, result.current, minTimeMs, minimumIterations);
193
+ iterationOverrides[index] = nextIterations;
194
+ completed += 1;
195
+ if (progress) {
196
+ progress({
197
+ completed,
198
+ elapsedMs: Date.now() - startTime,
199
+ phase: "autosample"
200
+ });
221
201
  }
202
+ await sleep(delayMs);
222
203
  }
223
204
  }
224
205
  };
@@ -242,24 +223,41 @@ export const runEngineComparison = async options => {
242
223
  const effectiveMinTimeMs = minTimeMs / Math.max(1, sampleScale * benchmarkScale);
243
224
  const delayMs = config.sampling.delayMs ?? 0;
244
225
  const maxRelativeMargin = config.sampling.maxRelativeMargin ?? defaultMaxRelativeMargin;
245
- const warmups = await warmupBenchmarks(harness, benchmarks, delayMs, options.progress);
246
- const iterationOverrides = computeIterationOverrides(benchmarks, warmups, effectiveMinTimeMs);
247
- const samples = warmups.map(warmup => ({
248
- baseline: [warmup.baseline],
249
- current: [warmup.current]
250
- }));
251
226
  const remainingSamples = Math.max(0, config.sampling.minSamples - 1);
252
- if (remainingSamples > 0) {
253
- await collectSamples(harness, benchmarks, remainingSamples, iterationOverrides, delayMs, effectiveMinTimeMs, samples, options.progress);
254
- }
255
- await autoSample(harness, benchmarks, samples, config.sampling.conditions, maxRelativeMargin, iterationOverrides, delayMs, effectiveMinTimeMs, options.progress, config.sampling.timeoutMs);
256
- const benchmarkResults = benchmarks.map((benchmark, index) => {
257
- const baselineSamples = samples[index].baseline;
258
- const currentSamples = samples[index].current;
227
+ const progressState = {
228
+ completed: 0,
229
+ total: remainingSamples * benchmarks.length
230
+ };
231
+ const benchmarkResults = new Array(benchmarks.length);
232
+ const iterationOverrides = benchmarks.map(() => undefined);
233
+ const benchmarkOrder = buildIndexOrder(benchmarks.length, 0);
234
+ const autoSampleDeadline = Date.now() + config.sampling.timeoutMs;
235
+ for (const index of benchmarkOrder) {
236
+ const benchmark = benchmarks[index];
237
+ const progress = options.progress;
238
+ const warmupSample = await warmupBenchmark(harness, benchmark, index, delayMs, progress ? event => progress({
239
+ ...event,
240
+ benchmarkCount: benchmarks.length
241
+ }) : undefined);
242
+ const iterationOverride = computeIterationOverrides([benchmark], [warmupSample], effectiveMinTimeMs)[0];
243
+ iterationOverrides[index] = iterationOverride;
244
+ const bucket = {
245
+ baseline: [warmupSample.baseline],
246
+ current: [warmupSample.current]
247
+ };
248
+ if (remainingSamples > 0) {
249
+ await collectSamplesForBenchmark(harness, benchmark, index, remainingSamples, iterationOverrides, delayMs, effectiveMinTimeMs, bucket, progress, progressState);
250
+ }
251
+ const remainingTimeoutMs = Math.max(0, autoSampleDeadline - Date.now());
252
+ if (remainingTimeoutMs > 0) {
253
+ await autoSampleForBenchmark(harness, benchmark, index, bucket, config.sampling.conditions, maxRelativeMargin, iterationOverrides, delayMs, effectiveMinTimeMs, progress, remainingTimeoutMs);
254
+ }
255
+ const baselineSamples = bucket.baseline;
256
+ const currentSamples = bucket.current;
259
257
  const baselineStats = summaryStats(baselineSamples);
260
258
  const currentStats = summaryStats(currentSamples);
261
259
  const difference = computeRelativeDifferenceFromSamples(baselineSamples, currentSamples);
262
- return {
260
+ benchmarkResults[index] = {
263
261
  benchmark,
264
262
  difference,
265
263
  samples: {
@@ -271,7 +269,7 @@ export const runEngineComparison = async options => {
271
269
  current: currentStats
272
270
  }
273
271
  };
274
- });
272
+ }
275
273
  return {
276
274
  benchmarks: benchmarkResults,
277
275
  engine
package/lib/stats.js CHANGED
@@ -7,6 +7,102 @@ export const relativeMarginOfError = stats => {
7
7
  return Math.abs(margin / stats.mean);
8
8
  };
9
9
  const sumOf = values => values.reduce((total, value) => total + value, 0);
10
+ const sortNumbers = values => [...values].sort((a, b) => a - b);
11
+ const medianOfSorted = values => {
12
+ if (values.length === 0) {
13
+ throw new Error("Cannot compute median of an empty sample set.");
14
+ }
15
+ const mid = Math.floor(values.length / 2);
16
+ if (values.length % 2 === 1) {
17
+ return values[mid];
18
+ }
19
+ return (values[mid - 1] + values[mid]) / 2;
20
+ };
21
+ const walshAverages = values => {
22
+ const averages = [];
23
+ for (let i = 0; i < values.length; i += 1) {
24
+ const base = values[i];
25
+ for (let j = i; j < values.length; j += 1) {
26
+ averages.push((base + values[j]) / 2);
27
+ }
28
+ }
29
+ return averages;
30
+ };
31
+ const normalQuantile = prob => {
32
+ if (prob <= 0 || prob >= 1) {
33
+ throw new Error("Probability must be between 0 and 1.");
34
+ }
35
+ const a1 = Number.parseFloat("-39.69683028665376");
36
+ const a2 = Number.parseFloat("220.9460984245205");
37
+ const a3 = Number.parseFloat("-275.9285104469687");
38
+ const a4 = Number.parseFloat("138.357751867269");
39
+ const a5 = Number.parseFloat("-30.66479806614716");
40
+ const a6 = Number.parseFloat("2.506628277459239");
41
+ const b1 = Number.parseFloat("-54.47609879822406");
42
+ const b2 = Number.parseFloat("161.5858368580409");
43
+ const b3 = Number.parseFloat("-155.6989798598866");
44
+ const b4 = Number.parseFloat("66.80131188771972");
45
+ const b5 = Number.parseFloat("-13.28068155288572");
46
+ const c1 = Number.parseFloat("-0.007784894002430293");
47
+ const c2 = Number.parseFloat("-0.3223964580411365");
48
+ const c3 = Number.parseFloat("-2.400758277161838");
49
+ const c4 = Number.parseFloat("-2.549732539343734");
50
+ const c5 = Number.parseFloat("4.374664141464968");
51
+ const c6 = Number.parseFloat("2.938163982698783");
52
+ const d1 = Number.parseFloat("0.007784695709041462");
53
+ const d2 = Number.parseFloat("0.3224671290700398");
54
+ const d3 = Number.parseFloat("2.445134137142996");
55
+ const d4 = Number.parseFloat("3.754408661907416");
56
+ const plow = Number.parseFloat("0.02425");
57
+ const phigh = 1 - plow;
58
+ if (prob < plow) {
59
+ const q = Math.sqrt(-2 * Math.log(prob));
60
+ return (((((c1 * q + c2) * q + c3) * q + c4) * q + c5) * q + c6) / ((((d1 * q + d2) * q + d3) * q + d4) * q + 1);
61
+ }
62
+ if (prob > phigh) {
63
+ const q = Math.sqrt(-2 * Math.log(1 - prob));
64
+ return -(((((c1 * q + c2) * q + c3) * q + c4) * q + c5) * q + c6) / ((((d1 * q + d2) * q + d3) * q + d4) * q + 1);
65
+ }
66
+ const q = prob - 0.5;
67
+ const r = q * q;
68
+ return (((((a1 * r + a2) * r + a3) * r + a4) * r + a5) * r + a6) * q / (((((b1 * r + b2) * r + b3) * r + b4) * r + b5) * r + 1);
69
+ };
70
+ const hodgesLehmannConfidenceInterval = (sortedWalsh, size) => {
71
+ if (sortedWalsh.length === 0) {
72
+ throw new Error("Cannot compute confidence interval for empty samples.");
73
+ }
74
+ if (size <= 1) {
75
+ const estimate = medianOfSorted(sortedWalsh);
76
+ return {
77
+ high: estimate,
78
+ low: estimate
79
+ };
80
+ }
81
+ const walshCount = sortedWalsh.length;
82
+ const alpha = 0.05;
83
+ const meanRank = size * (size + 1) / 4;
84
+ const varianceRank = size * (size + 1) * (2 * size + 1) / 24;
85
+ const stdDevRank = Math.sqrt(varianceRank);
86
+ const z = normalQuantile(1 - alpha / 2);
87
+ const critical = Math.floor(meanRank - z * stdDevRank);
88
+ const lowIndex = Math.max(0, Math.min(walshCount - 1, critical));
89
+ const highIndex = Math.max(lowIndex, Math.min(walshCount - 1, walshCount - critical - 1));
90
+ return {
91
+ high: sortedWalsh[highIndex],
92
+ low: sortedWalsh[lowIndex]
93
+ };
94
+ };
95
+ const hodgesLehmannStats = values => {
96
+ if (values.length === 0) {
97
+ throw new Error("Cannot compute stats for an empty sample set.");
98
+ }
99
+ const walsh = walshAverages(values);
100
+ const sortedWalsh = sortNumbers(walsh);
101
+ return {
102
+ ci: hodgesLehmannConfidenceInterval(sortedWalsh, values.length),
103
+ mean: medianOfSorted(sortedWalsh)
104
+ };
105
+ };
10
106
  const squareResiduals = (values, mean) => values.map(value => {
11
107
  const diff = value - mean;
12
108
  return diff * diff;
@@ -99,14 +195,7 @@ const computePairedRelativeStats = (baselineSamples, currentSamples) => {
99
195
  }
100
196
  diffs.push((currentSamples[index] - baseline) / baseline);
101
197
  }
102
- const diffStats = summaryStats(diffs);
103
- return {
104
- ci: confidenceInterval95(samplingDistributionOfTheMean({
105
- mean: diffStats.mean,
106
- variance: diffStats.variance
107
- }, diffStats.size), diffStats.size),
108
- mean: diffStats.mean
109
- };
198
+ return hodgesLehmannStats(diffs);
110
199
  };
111
200
  const computePairedAbsoluteStats = (baselineSamples, currentSamples) => {
112
201
  const size = Math.min(baselineSamples.length, currentSamples.length);
@@ -117,14 +206,7 @@ const computePairedAbsoluteStats = (baselineSamples, currentSamples) => {
117
206
  for (let index = 0; index < size; index += 1) {
118
207
  diffs.push(currentSamples[index] - baselineSamples[index]);
119
208
  }
120
- const diffStats = summaryStats(diffs);
121
- return {
122
- ci: confidenceInterval95(samplingDistributionOfTheMean({
123
- mean: diffStats.mean,
124
- variance: diffStats.variance
125
- }, diffStats.size), diffStats.size),
126
- mean: diffStats.mean
127
- };
209
+ return hodgesLehmannStats(diffs);
128
210
  };
129
211
  export const computeRelativeDifferenceFromSamples = (baselineSamples, currentSamples) => ({
130
212
  absolute: computePairedAbsoluteStats(baselineSamples, currentSamples),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "perfshield",
3
- "version": "0.0.8",
3
+ "version": "0.0.10",
4
4
  "description": "A tool for doing web benchmarking across multiple JS engines and with statistical signifigance",
5
5
  "license": "MIT",
6
6
  "type": "module",