perfshield 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/cli.js CHANGED
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env node
2
+ import { WriteStream } from "node:tty";
2
3
  import { ensureBaseline, saveBaseline } from "./artifacts.js";
3
4
  import { runBuild } from "./build.js";
4
5
  import { ConfigError, formatConfigError, loadConfig } from "./config.js";
@@ -20,17 +21,73 @@ const runPrepare = async config => {
20
21
  const baselinePath = await saveBaseline(config, outputPath);
21
22
  console.log(`Baseline saved to ${baselinePath}`);
22
23
  };
24
+ const isStderrTTY = () => {
25
+ return process.stderr instanceof WriteStream;
26
+ };
27
+ const createProgressReporter = engineName => {
28
+ let lastWrite = 0;
29
+ let lastLine = "";
30
+ const isTTY = isStderrTTY();
31
+ const writeLine = line => {
32
+ if (isTTY) {
33
+ const padded = line.padEnd(80, " ");
34
+ process.stderr.write(`\r${padded}`);
35
+ } else if (line !== lastLine) {
36
+ process.stderr.write(`${line}\n`);
37
+ }
38
+ lastLine = line;
39
+ };
40
+ const report = event => {
41
+ const now = Date.now();
42
+ if (isTTY && now - lastWrite < 100) {
43
+ return;
44
+ }
45
+ lastWrite = now;
46
+ let message = "";
47
+ if (event.phase === "warmup") {
48
+ const current = (event.benchmarkIndex ?? 0) + 1;
49
+ const total = event.benchmarkCount ?? 0;
50
+ const name = event.benchmarkName == null ? "" : ` ${event.benchmarkName}`;
51
+ message = `Warmup ${current}/${total}${name} (${engineName})`;
52
+ } else if (event.phase === "samples") {
53
+ const completed = event.completed ?? 0;
54
+ const total = event.total ?? 0;
55
+ const percent = total > 0 ? Math.round(completed / total * 100) : 0;
56
+ message = `Sampling ${completed}/${total} (${percent}%) (${engineName})`;
57
+ } else {
58
+ const completed = event.completed ?? 0;
59
+ const elapsed = event.elapsedMs != null ? Math.round(event.elapsedMs / 1000) : 0;
60
+ message = `Auto-sampling ${completed} samples (${elapsed}s) (${engineName})`;
61
+ }
62
+ writeLine(message);
63
+ };
64
+ const finish = () => {
65
+ if (isTTY && lastLine) {
66
+ process.stderr.write("\n");
67
+ }
68
+ };
69
+ return {
70
+ finish,
71
+ report
72
+ };
73
+ };
23
74
  const runCompare = async config => {
24
75
  const outputPath = await runBuild(config.build);
25
76
  const baselinePath = await ensureBaseline(config);
26
77
  const results = [];
78
+ const showProgress = config.report.formats.includes("console") || isStderrTTY();
27
79
  for (const engine of config.engines) {
80
+ const progress = showProgress ? createProgressReporter(engine.name) : null;
28
81
  results.push(await runEngineComparison({
29
82
  baselinePath,
30
83
  config,
31
84
  currentPath: outputPath,
32
- engine
85
+ engine,
86
+ progress: progress ? progress.report : undefined
33
87
  }));
88
+ if (progress) {
89
+ progress.finish();
90
+ }
34
91
  }
35
92
  const outputs = renderReports(results, config.report.formats);
36
93
  for (const output of outputs) {
package/lib/config.js CHANGED
@@ -163,7 +163,7 @@ const parseSamplingConfig = (value, issues) => {
163
163
  if (!sampling) {
164
164
  return null;
165
165
  }
166
- validateKeys(sampling, ["conditions", "minSamples", "timeoutMs"], "config.sampling", issues);
166
+ validateKeys(sampling, ["conditions", "delayMs", "maxRelativeMargin", "minSamples", "minTimeMs", "timeoutMs"], "config.sampling", issues);
167
167
  const minSamples = asNumber(sampling.minSamples, "config.sampling.minSamples", issues, {
168
168
  integer: true,
169
169
  min: 1
@@ -172,15 +172,32 @@ const parseSamplingConfig = (value, issues) => {
172
172
  integer: true,
173
173
  min: 1
174
174
  });
175
+ const minTimeMs = sampling.minTimeMs == null ? undefined : asNumber(sampling.minTimeMs, "config.sampling.minTimeMs", issues, {
176
+ integer: true,
177
+ min: 0
178
+ });
179
+ const delayMs = sampling.delayMs == null ? undefined : asNumber(sampling.delayMs, "config.sampling.delayMs", issues, {
180
+ integer: true,
181
+ min: 0
182
+ });
183
+ const maxRelativeMargin = sampling.maxRelativeMargin == null ? undefined : asNumber(sampling.maxRelativeMargin, "config.sampling.maxRelativeMargin", issues, {
184
+ min: 0
185
+ });
186
+ if (maxRelativeMargin != null && maxRelativeMargin > 1) {
187
+ addIssue(issues, "config.sampling.maxRelativeMargin must be at most 1.");
188
+ }
175
189
  const conditions = asNumberArray(sampling.conditions, "config.sampling.conditions", issues, {
176
190
  minLength: 1
177
191
  });
178
- if (minSamples == null || timeoutMs == null || !conditions) {
192
+ if (minSamples == null || timeoutMs == null || !conditions || minTimeMs === null || delayMs === null || maxRelativeMargin === null) {
179
193
  return null;
180
194
  }
181
195
  return {
182
196
  conditions,
197
+ delayMs,
198
+ maxRelativeMargin,
183
199
  minSamples,
200
+ minTimeMs,
184
201
  timeoutMs
185
202
  };
186
203
  };
@@ -10,16 +10,36 @@ const formatRelativeInterval = (interval, decimals) => formatInterval({
10
10
  low: interval.low * 100
11
11
  }, decimals, "%");
12
12
  const formatRelativeValue = (value, decimals) => `${formatNumber(value * 100, decimals)}%`;
13
+ const classifyDifference = interval => {
14
+ if (interval.low > 0 && interval.high > 0) {
15
+ return "regression";
16
+ }
17
+ if (interval.low < 0 && interval.high < 0) {
18
+ return "improvement";
19
+ }
20
+ return "no significant change";
21
+ };
13
22
  export const renderConsoleReport = results => {
14
23
  const lines = [];
15
24
  for (const result of results) {
25
+ let regressions = 0;
26
+ let improvements = 0;
27
+ let neutral = 0;
16
28
  lines.push(`Engine: ${result.engine.name}`);
17
29
  for (const entry of result.benchmarks) {
18
30
  const unit = entry.benchmark.unit != null ? ` ${entry.benchmark.unit}` : "";
19
- const benchmarkLines = [` Benchmark: ${entry.benchmark.name}`, ` baseline mean=${formatNumber(entry.stats.baseline.mean, 4)}${unit} ci=${formatInterval(entry.stats.baseline.meanCI, 4)} sd=${formatNumber(entry.stats.baseline.standardDeviation, 4)}`, ` current mean=${formatNumber(entry.stats.current.mean, 4)}${unit} ci=${formatInterval(entry.stats.current.meanCI, 4)} sd=${formatNumber(entry.stats.current.standardDeviation, 4)}`, ` diff rel mean=${formatRelativeValue(entry.difference.relative.mean, 2)} ci=${formatRelativeInterval(entry.difference.relative.ci, 2)}`];
31
+ const status = classifyDifference(entry.difference.relative.ci);
32
+ if (status === "regression") {
33
+ regressions += 1;
34
+ } else if (status === "improvement") {
35
+ improvements += 1;
36
+ } else {
37
+ neutral += 1;
38
+ }
39
+ const benchmarkLines = [` Benchmark: ${entry.benchmark.name}`, ` Result: ${status} (mean=${formatRelativeValue(entry.difference.relative.mean, 2)} ci=${formatRelativeInterval(entry.difference.relative.ci, 2)})`, ` baseline mean=${formatNumber(entry.stats.baseline.mean, 4)}${unit} ci=${formatInterval(entry.stats.baseline.meanCI, 4)} sd=${formatNumber(entry.stats.baseline.standardDeviation, 4)}`, ` current mean=${formatNumber(entry.stats.current.mean, 4)}${unit} ci=${formatInterval(entry.stats.current.meanCI, 4)} sd=${formatNumber(entry.stats.current.standardDeviation, 4)}`, ` diff rel mean=${formatRelativeValue(entry.difference.relative.mean, 2)} ci=${formatRelativeInterval(entry.difference.relative.ci, 2)}`];
20
40
  lines.push(...benchmarkLines);
21
41
  }
22
- lines.push("");
42
+ lines.push(` Summary: regressions=${regressions} improvements=${improvements} neutral=${neutral}`, "");
23
43
  }
24
44
  return lines.join("\n").trimEnd();
25
45
  };
package/lib/runner.js CHANGED
@@ -4,10 +4,12 @@ import { join, resolve } from "node:path";
4
4
  import { fileURLToPath } from "node:url";
5
5
  import { transformFileAsync } from "@babel/core";
6
6
  import { createNodeHarness } from "./engines/node.js";
7
- import { computeDifference, summaryStats } from "./stats.js";
7
+ import { computeRelativeDifferenceFromSamples, summaryStats } from "./stats.js";
8
8
  const versions = ["baseline", "current"];
9
9
  const autoSampleBatchSize = 10;
10
10
  const harnessTempPrefix = "perfshield-harness-";
11
+ const defaultMinTimeMs = 20;
12
+ const defaultMaxRelativeMargin = 0.05;
11
13
  const getVersionOrder = seed => {
12
14
  if (seed % 2 === 0) {
13
15
  return versions;
@@ -59,81 +61,193 @@ const buildHarnessIfNeeded = async sourcePath => {
59
61
  path: harnessPath
60
62
  };
61
63
  };
62
- const warmupBenchmarks = async (harness, benchmarks) => {
64
+ const sleep = async delayMs => {
65
+ if (delayMs <= 0) {
66
+ return;
67
+ }
68
+ await new Promise(resolve => {
69
+ setTimeout(resolve, delayMs);
70
+ });
71
+ };
72
+ const warmupBenchmarks = async (harness, benchmarks, delayMs, progress) => {
73
+ const warmups = [];
63
74
  for (let index = 0; index < benchmarks.length; index += 1) {
64
75
  const descriptor = benchmarks[index];
65
- for (const version of versions) {
66
- await harness.runSample({
76
+ const order = getVersionOrder(index);
77
+ let baselineSample;
78
+ let currentSample;
79
+ for (const version of order) {
80
+ const result = await harness.runSample({
67
81
  index,
68
82
  iterations: descriptor.iterations,
69
83
  version
70
84
  });
85
+ if (version === "baseline") {
86
+ baselineSample = result.durationMs;
87
+ } else {
88
+ currentSample = result.durationMs;
89
+ }
90
+ }
91
+ if (baselineSample == null || currentSample == null) {
92
+ throw new Error("Warmup did not collect baseline/current samples.");
93
+ }
94
+ warmups.push({
95
+ baseline: baselineSample,
96
+ current: currentSample
97
+ });
98
+ if (progress) {
99
+ progress({
100
+ benchmarkCount: benchmarks.length,
101
+ benchmarkIndex: index,
102
+ benchmarkName: descriptor.name,
103
+ phase: "warmup"
104
+ });
71
105
  }
106
+ await sleep(delayMs);
72
107
  }
108
+ return warmups;
73
109
  };
74
- const collectSamples = async (harness, benchmarks, minSamples) => {
75
- const samples = benchmarks.map(() => ({
110
+ const computeIterationOverrides = (benchmarks, warmups, minTimeMs) => {
111
+ if (minTimeMs <= 0) {
112
+ return benchmarks.map(() => undefined);
113
+ }
114
+ return benchmarks.map((benchmark, index) => {
115
+ const iterationsBase = benchmark.iterations ?? 1;
116
+ const warmup = warmups[index];
117
+ const baselinePerIter = warmup.baseline / iterationsBase;
118
+ const currentPerIter = warmup.current / iterationsBase;
119
+ const maxPerIter = Math.max(baselinePerIter, currentPerIter);
120
+ if (!Number.isFinite(maxPerIter) || maxPerIter <= 0) {
121
+ return iterationsBase;
122
+ }
123
+ return Math.max(iterationsBase, Math.ceil(minTimeMs / maxPerIter));
124
+ });
125
+ };
126
+ const updateIterations = (currentIterations, baselineSample, currentSample, minTimeMs, minimumIterations) => {
127
+ if (minTimeMs <= 0) {
128
+ return currentIterations;
129
+ }
130
+ const perIter = Math.max(baselineSample, currentSample) / currentIterations;
131
+ if (!Number.isFinite(perIter) || perIter <= 0) {
132
+ return currentIterations;
133
+ }
134
+ return Math.max(minimumIterations, Math.ceil(minTimeMs / perIter));
135
+ };
136
+ const relativeMarginFromDifference = difference => {
137
+ const mean = difference.relative.mean;
138
+ if (mean === 0) {
139
+ return Number.POSITIVE_INFINITY;
140
+ }
141
+ const margin = Math.abs(difference.relative.ci.high - mean);
142
+ return Math.abs(margin / mean);
143
+ };
144
+ const buildSamplePayload = (index, version, iterations) => {
145
+ const payload = {
146
+ index,
147
+ version
148
+ };
149
+ if (iterations != null) {
150
+ payload.iterations = iterations;
151
+ }
152
+ return payload;
153
+ };
154
+ const runSamplePair = async (harness, index, iterations, order) => {
155
+ let baselineSample;
156
+ let currentSample;
157
+ for (const version of order) {
158
+ const result = await harness.runSample(buildSamplePayload(index, version, iterations));
159
+ if (version === "baseline") {
160
+ baselineSample = result.durationMs;
161
+ } else {
162
+ currentSample = result.durationMs;
163
+ }
164
+ }
165
+ if (baselineSample == null || currentSample == null) {
166
+ throw new Error("Sample pair did not include baseline/current.");
167
+ }
168
+ return {
169
+ baseline: baselineSample,
170
+ current: currentSample
171
+ };
172
+ };
173
+ const collectSamples = async (harness, benchmarks, minSamples, iterationOverrides, delayMs, minTimeMs, samples, progress) => {
174
+ const buckets = samples ?? benchmarks.map(() => ({
76
175
  baseline: [],
77
176
  current: []
78
177
  }));
79
- let roundRobinSeed = 0;
178
+ let completed = 0;
179
+ const total = minSamples * benchmarks.length;
80
180
  for (let iteration = 0; iteration < minSamples; iteration += 1) {
181
+ const order = getVersionOrder(iteration);
81
182
  for (let index = 0; index < benchmarks.length; index += 1) {
82
- const descriptor = benchmarks[index];
83
- const order = getVersionOrder(roundRobinSeed);
84
- roundRobinSeed += 1;
85
- for (const version of order) {
86
- const result = await harness.runSample({
87
- index,
88
- iterations: descriptor.iterations,
89
- version
183
+ const iterations = iterationOverrides[index];
184
+ const minimumIterations = benchmarks[index].iterations ?? 1;
185
+ const result = await runSamplePair(harness, index, iterations, order);
186
+ buckets[index].baseline.push(result.baseline);
187
+ buckets[index].current.push(result.current);
188
+ const nextIterations = updateIterations(iterations ?? minimumIterations, result.baseline, result.current, minTimeMs, minimumIterations);
189
+ iterationOverrides[index] = nextIterations;
190
+ completed += 1;
191
+ if (progress) {
192
+ progress({
193
+ completed,
194
+ phase: "samples",
195
+ total
90
196
  });
91
- if (version === "baseline") {
92
- samples[index].baseline.push(result.durationMs);
93
- } else {
94
- samples[index].current.push(result.durationMs);
95
- }
96
197
  }
198
+ await sleep(delayMs);
97
199
  }
98
200
  }
99
- return samples;
201
+ return buckets;
100
202
  };
101
203
  const intervalContains = (interval, value) => interval.low <= value && value <= interval.high;
102
- const autoSampleResolved = (samples, conditions) => samples.every(bucket => {
103
- const baselineStats = summaryStats(bucket.baseline);
104
- const currentStats = summaryStats(bucket.current);
105
- const diff = computeDifference(baselineStats, currentStats);
204
+ const autoSampleResolved = (samples, conditions, maxRelativeMargin) => samples.every(bucket => {
205
+ const diff = computeRelativeDifferenceFromSamples(bucket.baseline, bucket.current);
206
+ const onlyZeroConditions = conditions.every(condition => condition === 0);
207
+ if (onlyZeroConditions) {
208
+ const maxAbsBound = Math.max(Math.abs(diff.relative.ci.low), Math.abs(diff.relative.ci.high));
209
+ if (maxAbsBound <= maxRelativeMargin) {
210
+ return true;
211
+ }
212
+ }
106
213
  for (const condition of conditions) {
107
214
  if (intervalContains(diff.relative.ci, condition)) {
108
215
  return false;
109
216
  }
110
217
  }
218
+ if (relativeMarginFromDifference(diff) > maxRelativeMargin) {
219
+ return false;
220
+ }
111
221
  return true;
112
222
  });
113
- const autoSample = async (harness, benchmarks, samples, conditions, timeoutMs) => {
223
+ const autoSample = async (harness, benchmarks, samples, conditions, maxRelativeMargin, iterationOverrides, delayMs, minTimeMs, progress, timeoutMs) => {
114
224
  const startTime = Date.now();
115
225
  let roundRobinSeed = 0;
226
+ let completed = 0;
116
227
  while (Date.now() - startTime < timeoutMs) {
117
- if (autoSampleResolved(samples, conditions)) {
228
+ if (autoSampleResolved(samples, conditions, maxRelativeMargin)) {
118
229
  return;
119
230
  }
120
231
  for (let batch = 0; batch < autoSampleBatchSize; batch += 1) {
232
+ const order = getVersionOrder(roundRobinSeed);
233
+ roundRobinSeed += 1;
121
234
  for (let index = 0; index < benchmarks.length; index += 1) {
122
- const descriptor = benchmarks[index];
123
- const order = getVersionOrder(roundRobinSeed);
124
- roundRobinSeed += 1;
125
- for (const version of order) {
126
- const result = await harness.runSample({
127
- index,
128
- iterations: descriptor.iterations,
129
- version
235
+ const iterations = iterationOverrides[index];
236
+ const minimumIterations = benchmarks[index].iterations ?? 1;
237
+ const result = await runSamplePair(harness, index, iterations, order);
238
+ samples[index].baseline.push(result.baseline);
239
+ samples[index].current.push(result.current);
240
+ const nextIterations = updateIterations(iterations ?? minimumIterations, result.baseline, result.current, minTimeMs, minimumIterations);
241
+ iterationOverrides[index] = nextIterations;
242
+ completed += 1;
243
+ if (progress) {
244
+ progress({
245
+ completed,
246
+ elapsedMs: Date.now() - startTime,
247
+ phase: "autosample"
130
248
  });
131
- if (version === "baseline") {
132
- samples[index].baseline.push(result.durationMs);
133
- } else {
134
- samples[index].current.push(result.durationMs);
135
- }
136
249
  }
250
+ await sleep(delayMs);
137
251
  }
138
252
  }
139
253
  }
@@ -146,18 +260,35 @@ export const runEngineComparison = async options => {
146
260
  engine
147
261
  } = options;
148
262
  const harnessArtifact = await buildHarnessIfNeeded(getHarnessPath());
149
- const harness = await createNodeHarness(engine, harnessArtifact.path, resolve(baselinePath), resolve(currentPath));
263
+ const resolvedBaseline = resolve(baselinePath);
264
+ const resolvedCurrent = resolve(currentPath);
265
+ const harness = await createNodeHarness(engine, harnessArtifact.path, resolvedBaseline, resolvedCurrent);
150
266
  try {
151
267
  const benchmarks = await harness.listBenchmarks();
152
- await warmupBenchmarks(harness, benchmarks);
153
- const samples = await collectSamples(harness, benchmarks, config.sampling.minSamples);
154
- await autoSample(harness, benchmarks, samples, config.sampling.conditions, config.sampling.timeoutMs);
268
+ const minTimeMs = config.sampling.minTimeMs ?? defaultMinTimeMs;
269
+ const minSamples = Math.max(1, config.sampling.minSamples);
270
+ const sampleScale = Math.sqrt(minSamples / 20);
271
+ const benchmarkScale = Math.sqrt(Math.max(1, benchmarks.length));
272
+ const effectiveMinTimeMs = minTimeMs / Math.max(1, sampleScale * benchmarkScale);
273
+ const delayMs = config.sampling.delayMs ?? 0;
274
+ const maxRelativeMargin = config.sampling.maxRelativeMargin ?? defaultMaxRelativeMargin;
275
+ const warmups = await warmupBenchmarks(harness, benchmarks, delayMs, options.progress);
276
+ const iterationOverrides = computeIterationOverrides(benchmarks, warmups, effectiveMinTimeMs);
277
+ const samples = warmups.map(warmup => ({
278
+ baseline: [warmup.baseline],
279
+ current: [warmup.current]
280
+ }));
281
+ const remainingSamples = Math.max(0, config.sampling.minSamples - 1);
282
+ if (remainingSamples > 0) {
283
+ await collectSamples(harness, benchmarks, remainingSamples, iterationOverrides, delayMs, effectiveMinTimeMs, samples, options.progress);
284
+ }
285
+ await autoSample(harness, benchmarks, samples, config.sampling.conditions, maxRelativeMargin, iterationOverrides, delayMs, effectiveMinTimeMs, options.progress, config.sampling.timeoutMs);
155
286
  const benchmarkResults = benchmarks.map((benchmark, index) => {
156
287
  const baselineSamples = samples[index].baseline;
157
288
  const currentSamples = samples[index].current;
158
289
  const baselineStats = summaryStats(baselineSamples);
159
290
  const currentStats = summaryStats(currentSamples);
160
- const difference = computeDifference(baselineStats, currentStats);
291
+ const difference = computeRelativeDifferenceFromSamples(baselineSamples, currentSamples);
161
292
  return {
162
293
  benchmark,
163
294
  difference,
package/lib/stats.js CHANGED
@@ -1,4 +1,11 @@
1
1
  import jstat from "jstat";
2
+ export const relativeMarginOfError = stats => {
3
+ if (stats.mean === 0) {
4
+ return Number.POSITIVE_INFINITY;
5
+ }
6
+ const margin = Math.abs(stats.meanCI.high - stats.mean);
7
+ return Math.abs(margin / stats.mean);
8
+ };
2
9
  const sumOf = values => values.reduce((total, value) => total + value, 0);
3
10
  const squareResiduals = (values, mean) => values.map(value => {
4
11
  const diff = value - mean;
@@ -70,6 +77,31 @@ export const computeDifference = (baseline, current) => {
70
77
  }
71
78
  };
72
79
  };
80
+ const computePairedRelativeStats = (baselineSamples, currentSamples) => {
81
+ const size = Math.min(baselineSamples.length, currentSamples.length);
82
+ if (size === 0) {
83
+ throw new Error("Cannot compute differences with empty sample sets.");
84
+ }
85
+ const diffs = [];
86
+ for (let index = 0; index < size; index += 1) {
87
+ const baseline = baselineSamples[index];
88
+ if (baseline === 0) {
89
+ throw new Error("Cannot compute relative difference with baseline sample 0.");
90
+ }
91
+ diffs.push((currentSamples[index] - baseline) / baseline);
92
+ }
93
+ const diffStats = summaryStats(diffs);
94
+ return {
95
+ ci: confidenceInterval95(samplingDistributionOfTheMean({
96
+ mean: diffStats.mean,
97
+ variance: diffStats.variance
98
+ }, diffStats.size), diffStats.size),
99
+ mean: diffStats.mean
100
+ };
101
+ };
102
+ export const computeRelativeDifferenceFromSamples = (baselineSamples, currentSamples) => ({
103
+ relative: computePairedRelativeStats(baselineSamples, currentSamples)
104
+ });
73
105
  export const computeDifferences = stats => stats.map(result => ({
74
106
  ...result,
75
107
  differences: stats.map(other => other === result ? null : computeDifference(other.stats, result.stats))
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "perfshield",
3
- "version": "0.0.3",
3
+ "version": "0.0.5",
4
4
  "description": "A tool for doing web benchmarking across multiple JS engines and with statistical signifigance",
5
5
  "license": "MIT",
6
6
  "type": "module",