perfshield 0.0.4 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,7 +11,7 @@ runtime JS engines (Node/V8 today).
11
11
  - Saves a baseline bundle (`prepare`).
12
12
  - Builds the current bundle and compares it to the baseline (`compare`).
13
13
  - Reports results in console and/or JSON.
14
- - Exits with code 1 when a regression is detected (CI excludes 0 in the slower direction).
14
+ - Exits with code 1 when a regression is detected (both relative and absolute CIs exclude 0 in the slower direction).
15
15
 
16
16
  ## Requirements
17
17
 
@@ -77,6 +77,15 @@ perfshield prepare --config perfshield.config.json
77
77
  perfshield compare --config perfshield.config.json
78
78
  ```
79
79
 
80
+ 4. (Optional) Calibrate sampling defaults based on the prepared baseline:
81
+
82
+ ```
83
+ perfshield calibrate --config perfshield.config.json
84
+ ```
85
+
86
+ This prints a JSON snippet with recommended `sampling` values you can paste into
87
+ your config.
88
+
80
89
  ## Benchmark bundle contract
81
90
 
82
91
  The build output must be a single ESM file that exports:
@@ -115,7 +124,7 @@ Supported formats:
115
124
  - `console`: human‑readable summary.
116
125
  - `json`: machine‑readable report.
117
126
 
118
- If any benchmark shows a regression (CI excludes 0 in the slower direction),
127
+ If any benchmark shows a regression (both relative and absolute CIs exclude 0 in the slower direction),
119
128
  the process exits with code 1.
120
129
 
121
130
  ## Examples
@@ -0,0 +1,145 @@
1
+ import { resolve } from "node:path";
2
+ import jstat from "jstat";
3
+ import { createNodeHarness } from "./engines/node.js";
4
+ import { buildHarnessIfNeeded, getHarnessPath } from "./harness.js";
5
+ import { summaryStats } from "./stats.js";
6
+ const calibrationSampleCount = 20;
7
+ const calibrationMinSampleMs = 2;
8
+ const targetMinSampleMs = 5;
9
+ const minRecommendedSamples = 5;
10
+ const minTimeoutMs = 1000;
11
+ const timeoutSafetyFactor = 1.5;
12
+ const confidenceLevel = 0.95;
13
+ const defaultMaxRelativeMargin = 0.05;
14
+ const computeIterationsForTarget = (iterationsBase, warmupMs, targetMs) => {
15
+ if (targetMs <= 0) {
16
+ return iterationsBase;
17
+ }
18
+ const perIter = warmupMs / iterationsBase;
19
+ if (!Number.isFinite(perIter) || perIter <= 0) {
20
+ return iterationsBase;
21
+ }
22
+ return Math.max(iterationsBase, Math.ceil(targetMs / perIter));
23
+ };
24
+ const collectWarmups = async (harness, benchmarks) => {
25
+ const warmups = [];
26
+ for (let index = 0; index < benchmarks.length; index += 1) {
27
+ const descriptor = benchmarks[index];
28
+ const result = await harness.runSample({
29
+ index,
30
+ iterations: descriptor.iterations,
31
+ version: "baseline"
32
+ });
33
+ warmups.push(result.durationMs);
34
+ }
35
+ return warmups;
36
+ };
37
+ const collectCalibrationSamples = async (harness, benchmarks, iterationOverrides) => {
38
+ const samples = benchmarks.map(() => []);
39
+ for (let iteration = 0; iteration < calibrationSampleCount; iteration += 1) {
40
+ for (let index = 0; index < benchmarks.length; index += 1) {
41
+ const result = await harness.runSample({
42
+ index,
43
+ iterations: iterationOverrides[index],
44
+ version: "baseline"
45
+ });
46
+ samples[index].push(result.durationMs);
47
+ }
48
+ }
49
+ return samples;
50
+ };
51
+ const computeRequiredSamples = (stats, targetMargin, tValue) => {
52
+ if (targetMargin <= 0) {
53
+ return minRecommendedSamples;
54
+ }
55
+ const rsd = stats.relativeStandardDeviation;
56
+ if (!Number.isFinite(rsd) || rsd <= 0) {
57
+ return minRecommendedSamples;
58
+ }
59
+ const required = Math.ceil((tValue * rsd / targetMargin) ** 2);
60
+ if (!Number.isFinite(required) || required <= 0) {
61
+ return minRecommendedSamples;
62
+ }
63
+ return Math.max(minRecommendedSamples, required);
64
+ };
65
+ const computeMinTimeMs = warmups => {
66
+ const finiteWarmups = warmups.filter(value => Number.isFinite(value));
67
+ if (finiteWarmups.length === 0) {
68
+ return targetMinSampleMs;
69
+ }
70
+ const fastest = Math.min(...finiteWarmups);
71
+ if (fastest < targetMinSampleMs) {
72
+ return targetMinSampleMs;
73
+ }
74
+ return 0;
75
+ };
76
+ const computeTimeoutMs = (warmups, minSamples, minTimeMs, delayMs) => {
77
+ let perRound = 0;
78
+ for (const warmup of warmups) {
79
+ const base = Number.isFinite(warmup) && warmup > 0 ? warmup : minTimeMs;
80
+ perRound += Math.max(base, minTimeMs);
81
+ }
82
+ const sampleCount = Math.max(1, minSamples);
83
+ const totalSamples = sampleCount * warmups.length;
84
+ const estimated = perRound * sampleCount + delayMs * Math.max(0, totalSamples - 1);
85
+ const timeout = Math.ceil(estimated * timeoutSafetyFactor);
86
+ if (!Number.isFinite(timeout) || timeout <= 0) {
87
+ return minTimeoutMs;
88
+ }
89
+ return Math.max(minTimeoutMs, timeout);
90
+ };
91
+ const calibrateEngine = async (engine, baselinePath, maxRelativeMargin, delayMs, harnessPath) => {
92
+ const resolvedBaseline = resolve(baselinePath);
93
+ const harness = await createNodeHarness(engine, harnessPath, resolvedBaseline, resolvedBaseline);
94
+ try {
95
+ const benchmarks = await harness.listBenchmarks();
96
+ const warmups = await collectWarmups(harness, benchmarks);
97
+ const iterationOverrides = benchmarks.map((benchmark, index) => computeIterationsForTarget(benchmark.iterations ?? 1, warmups[index], calibrationMinSampleMs));
98
+ const samples = await collectCalibrationSamples(harness, benchmarks, iterationOverrides);
99
+ const benchmarkStats = benchmarks.map((benchmark, index) => ({
100
+ benchmark,
101
+ stats: summaryStats(samples[index]),
102
+ warmupMs: warmups[index]
103
+ }));
104
+ const tValue = jstat.studentt.inv(1 - (1 - confidenceLevel) / 2, Math.max(1, calibrationSampleCount - 1));
105
+ const minSamples = benchmarkStats.reduce((max, entry) => {
106
+ const required = computeRequiredSamples(entry.stats, maxRelativeMargin, tValue);
107
+ return Math.max(max, required);
108
+ }, minRecommendedSamples);
109
+ const minTimeMs = computeMinTimeMs(warmups);
110
+ const timeoutMs = computeTimeoutMs(warmups, minSamples, minTimeMs, delayMs);
111
+ return {
112
+ engine,
113
+ minSamples,
114
+ minTimeMs,
115
+ timeoutMs
116
+ };
117
+ } finally {
118
+ await harness.close();
119
+ }
120
+ };
121
+ export const calibrateSampling = async (config, baselinePath) => {
122
+ const maxRelativeMargin = config.sampling.maxRelativeMargin ?? defaultMaxRelativeMargin;
123
+ const delayMs = config.sampling.delayMs ?? 0;
124
+ const harnessArtifact = await buildHarnessIfNeeded(getHarnessPath());
125
+ try {
126
+ const engineResults = [];
127
+ for (const engine of config.engines) {
128
+ engineResults.push(await calibrateEngine(engine, baselinePath, maxRelativeMargin, delayMs, harnessArtifact.path));
129
+ }
130
+ const minSamples = engineResults.reduce((max, entry) => Math.max(max, entry.minSamples), minRecommendedSamples);
131
+ const minTimeMs = engineResults.reduce((max, entry) => Math.max(max, entry.minTimeMs), 0);
132
+ const timeoutMs = engineResults.reduce((max, entry) => Math.max(max, entry.timeoutMs), minTimeoutMs);
133
+ return {
134
+ conditions: config.sampling.conditions,
135
+ maxRelativeMargin,
136
+ minSamples,
137
+ minTimeMs,
138
+ timeoutMs
139
+ };
140
+ } finally {
141
+ if (harnessArtifact.cleanup) {
142
+ await harnessArtifact.cleanup();
143
+ }
144
+ }
145
+ };
package/lib/cli.js CHANGED
@@ -1,12 +1,14 @@
1
1
  #!/usr/bin/env node
2
+ import { WriteStream } from "node:tty";
2
3
  import { ensureBaseline, saveBaseline } from "./artifacts.js";
4
+ import { calibrateSampling } from "./calibrate.js";
3
5
  import { runBuild } from "./build.js";
4
6
  import { ConfigError, formatConfigError, loadConfig } from "./config.js";
5
7
  import { getRegressions } from "./regression.js";
6
8
  import { renderReports } from "./report/index.js";
7
9
  import { runEngineComparison } from "./runner.js";
8
10
  const usage = () => {
9
- console.error("Usage: perfshield <prepare|compare> [--config path]");
11
+ console.error("Usage: perfshield <prepare|compare|calibrate> [--config path]");
10
12
  };
11
13
  const getFlagValue = (args, flag) => {
12
14
  const index = args.indexOf(flag);
@@ -20,17 +22,73 @@ const runPrepare = async config => {
20
22
  const baselinePath = await saveBaseline(config, outputPath);
21
23
  console.log(`Baseline saved to ${baselinePath}`);
22
24
  };
25
+ const isStderrTTY = () => {
26
+ return process.stderr instanceof WriteStream;
27
+ };
28
+ const createProgressReporter = engineName => {
29
+ let lastWrite = 0;
30
+ let lastLine = "";
31
+ const isTTY = isStderrTTY();
32
+ const writeLine = line => {
33
+ if (isTTY) {
34
+ const padded = line.padEnd(80, " ");
35
+ process.stderr.write(`\r${padded}`);
36
+ } else if (line !== lastLine) {
37
+ process.stderr.write(`${line}\n`);
38
+ }
39
+ lastLine = line;
40
+ };
41
+ const report = event => {
42
+ const now = Date.now();
43
+ if (isTTY && now - lastWrite < 100) {
44
+ return;
45
+ }
46
+ lastWrite = now;
47
+ let message = "";
48
+ if (event.phase === "warmup") {
49
+ const current = (event.benchmarkIndex ?? 0) + 1;
50
+ const total = event.benchmarkCount ?? 0;
51
+ const name = event.benchmarkName == null ? "" : ` ${event.benchmarkName}`;
52
+ message = `Warmup ${current}/${total}${name} (${engineName})`;
53
+ } else if (event.phase === "samples") {
54
+ const completed = event.completed ?? 0;
55
+ const total = event.total ?? 0;
56
+ const percent = total > 0 ? Math.round(completed / total * 100) : 0;
57
+ message = `Sampling ${completed}/${total} (${percent}%) (${engineName})`;
58
+ } else {
59
+ const completed = event.completed ?? 0;
60
+ const elapsed = event.elapsedMs != null ? Math.round(event.elapsedMs / 1000) : 0;
61
+ message = `Auto-sampling ${completed} samples (${elapsed}s) (${engineName})`;
62
+ }
63
+ writeLine(message);
64
+ };
65
+ const finish = () => {
66
+ if (isTTY && lastLine) {
67
+ process.stderr.write("\n");
68
+ }
69
+ };
70
+ return {
71
+ finish,
72
+ report
73
+ };
74
+ };
23
75
  const runCompare = async config => {
24
76
  const outputPath = await runBuild(config.build);
25
77
  const baselinePath = await ensureBaseline(config);
26
78
  const results = [];
79
+ const showProgress = config.report.formats.includes("console") || isStderrTTY();
27
80
  for (const engine of config.engines) {
81
+ const progress = showProgress ? createProgressReporter(engine.name) : null;
28
82
  results.push(await runEngineComparison({
29
83
  baselinePath,
30
84
  config,
31
85
  currentPath: outputPath,
32
- engine
86
+ engine,
87
+ progress: progress ? progress.report : undefined
33
88
  }));
89
+ if (progress) {
90
+ progress.finish();
91
+ }
34
92
  }
35
93
  const outputs = renderReports(results, config.report.formats);
36
94
  for (const output of outputs) {
@@ -42,10 +100,17 @@ const runCompare = async config => {
42
100
  process.exitCode = 1;
43
101
  }
44
102
  };
103
+ const runCalibrate = async config => {
104
+ const baselinePath = await ensureBaseline(config);
105
+ const sampling = await calibrateSampling(config, baselinePath);
106
+ console.log(JSON.stringify({
107
+ sampling
108
+ }, null, 2));
109
+ };
45
110
  const main = async () => {
46
111
  const args = process.argv.slice(2);
47
112
  const command = args[0];
48
- if (command !== "prepare" && command !== "compare") {
113
+ if (command !== "prepare" && command !== "compare" && command !== "calibrate") {
49
114
  usage();
50
115
  process.exitCode = 1;
51
116
  return;
@@ -66,6 +131,10 @@ const main = async () => {
66
131
  await runPrepare(config);
67
132
  return;
68
133
  }
134
+ if (command === "calibrate") {
135
+ await runCalibrate(config);
136
+ return;
137
+ }
69
138
  await runCompare(config);
70
139
  };
71
140
  main().catch(error => {
package/lib/config.js CHANGED
@@ -163,7 +163,7 @@ const parseSamplingConfig = (value, issues) => {
163
163
  if (!sampling) {
164
164
  return null;
165
165
  }
166
- validateKeys(sampling, ["conditions", "minSamples", "timeoutMs"], "config.sampling", issues);
166
+ validateKeys(sampling, ["conditions", "delayMs", "maxRelativeMargin", "minSamples", "minTimeMs", "timeoutMs"], "config.sampling", issues);
167
167
  const minSamples = asNumber(sampling.minSamples, "config.sampling.minSamples", issues, {
168
168
  integer: true,
169
169
  min: 1
@@ -172,15 +172,32 @@ const parseSamplingConfig = (value, issues) => {
172
172
  integer: true,
173
173
  min: 1
174
174
  });
175
+ const minTimeMs = sampling.minTimeMs == null ? undefined : asNumber(sampling.minTimeMs, "config.sampling.minTimeMs", issues, {
176
+ integer: true,
177
+ min: 0
178
+ });
179
+ const delayMs = sampling.delayMs == null ? undefined : asNumber(sampling.delayMs, "config.sampling.delayMs", issues, {
180
+ integer: true,
181
+ min: 0
182
+ });
183
+ const maxRelativeMargin = sampling.maxRelativeMargin == null ? undefined : asNumber(sampling.maxRelativeMargin, "config.sampling.maxRelativeMargin", issues, {
184
+ min: 0
185
+ });
186
+ if (maxRelativeMargin != null && maxRelativeMargin > 1) {
187
+ addIssue(issues, "config.sampling.maxRelativeMargin must be at most 1.");
188
+ }
175
189
  const conditions = asNumberArray(sampling.conditions, "config.sampling.conditions", issues, {
176
190
  minLength: 1
177
191
  });
178
- if (minSamples == null || timeoutMs == null || !conditions) {
192
+ if (minSamples == null || timeoutMs == null || !conditions || minTimeMs === null || delayMs === null || maxRelativeMargin === null) {
179
193
  return null;
180
194
  }
181
195
  return {
182
196
  conditions,
197
+ delayMs,
198
+ maxRelativeMargin,
183
199
  minSamples,
200
+ minTimeMs,
184
201
  timeoutMs
185
202
  };
186
203
  };
package/lib/harness.js ADDED
@@ -0,0 +1,51 @@
1
+ import { access, mkdtemp, readFile, rm, writeFile } from "node:fs/promises";
2
+ import { tmpdir } from "node:os";
3
+ import { join, resolve } from "node:path";
4
+ import { fileURLToPath } from "node:url";
5
+ import { transformFileAsync } from "@babel/core";
6
+ const harnessTempPrefix = "perfshield-harness-";
7
+ const hasBabelConfig = async () => {
8
+ const configPath = resolve(process.cwd(), "babel.config.cjs");
9
+ try {
10
+ await access(configPath);
11
+ return true;
12
+ } catch {
13
+ return false;
14
+ }
15
+ };
16
+ export const getHarnessPath = () => {
17
+ const override = process.env.WEB_BENCHMARKER_HARNESS_PATH;
18
+ if (override != null) {
19
+ return override;
20
+ }
21
+ return fileURLToPath(new URL("./engines/node-harness.js", import.meta.url).toString());
22
+ };
23
+ export const buildHarnessIfNeeded = async sourcePath => {
24
+ const contents = await readFile(sourcePath, "utf8");
25
+ if (!contents.includes("import type") && !contents.includes("@flow")) {
26
+ return {
27
+ cleanup: null,
28
+ path: sourcePath
29
+ };
30
+ }
31
+ const usesConfig = await hasBabelConfig();
32
+ const result = await transformFileAsync(sourcePath, {
33
+ configFile: usesConfig ? resolve(process.cwd(), "babel.config.cjs") : false,
34
+ presets: usesConfig ? [] : ["@babel/preset-flow"]
35
+ });
36
+ if (!result || !result.code) {
37
+ throw new Error("Failed to compile node harness.");
38
+ }
39
+ const dir = await mkdtemp(join(tmpdir(), harnessTempPrefix));
40
+ const harnessPath = join(dir, "node-harness.js");
41
+ await writeFile(harnessPath, result.code, "utf8");
42
+ return {
43
+ cleanup: async () => {
44
+ await rm(dir, {
45
+ force: true,
46
+ recursive: true
47
+ });
48
+ },
49
+ path: harnessPath
50
+ };
51
+ };
package/lib/regression.js CHANGED
@@ -3,7 +3,7 @@ export const getRegressions = results => {
3
3
  const findings = [];
4
4
  for (const result of results) {
5
5
  for (const entry of result.benchmarks) {
6
- if (isPositiveInterval(entry.difference.relative.ci)) {
6
+ if (isPositiveInterval(entry.difference.relative.ci) && isPositiveInterval(entry.difference.absolute.ci)) {
7
7
  findings.push({
8
8
  benchmark: entry.benchmark.name,
9
9
  engine: result.engine.name,
@@ -10,16 +10,38 @@ const formatRelativeInterval = (interval, decimals) => formatInterval({
10
10
  low: interval.low * 100
11
11
  }, decimals, "%");
12
12
  const formatRelativeValue = (value, decimals) => `${formatNumber(value * 100, decimals)}%`;
13
+ const isPositiveInterval = interval => interval.low > 0 && interval.high > 0;
14
+ const isNegativeInterval = interval => interval.low < 0 && interval.high < 0;
15
+ const classifyDifference = difference => {
16
+ if (isPositiveInterval(difference.relative.ci) && isPositiveInterval(difference.absolute.ci)) {
17
+ return "regression";
18
+ }
19
+ if (isNegativeInterval(difference.relative.ci) && isNegativeInterval(difference.absolute.ci)) {
20
+ return "improvement";
21
+ }
22
+ return "no significant change";
23
+ };
13
24
  export const renderConsoleReport = results => {
14
25
  const lines = [];
15
26
  for (const result of results) {
27
+ let regressions = 0;
28
+ let improvements = 0;
29
+ let neutral = 0;
16
30
  lines.push(`Engine: ${result.engine.name}`);
17
31
  for (const entry of result.benchmarks) {
18
32
  const unit = entry.benchmark.unit != null ? ` ${entry.benchmark.unit}` : "";
19
- const benchmarkLines = [` Benchmark: ${entry.benchmark.name}`, ` baseline mean=${formatNumber(entry.stats.baseline.mean, 4)}${unit} ci=${formatInterval(entry.stats.baseline.meanCI, 4)} sd=${formatNumber(entry.stats.baseline.standardDeviation, 4)}`, ` current mean=${formatNumber(entry.stats.current.mean, 4)}${unit} ci=${formatInterval(entry.stats.current.meanCI, 4)} sd=${formatNumber(entry.stats.current.standardDeviation, 4)}`, ` diff rel mean=${formatRelativeValue(entry.difference.relative.mean, 2)} ci=${formatRelativeInterval(entry.difference.relative.ci, 2)}`];
33
+ const status = classifyDifference(entry.difference);
34
+ if (status === "regression") {
35
+ regressions += 1;
36
+ } else if (status === "improvement") {
37
+ improvements += 1;
38
+ } else {
39
+ neutral += 1;
40
+ }
41
+ const benchmarkLines = [` Benchmark: ${entry.benchmark.name}`, ` Result: ${status} (mean=${formatRelativeValue(entry.difference.relative.mean, 2)} ci=${formatRelativeInterval(entry.difference.relative.ci, 2)})`, ` baseline mean=${formatNumber(entry.stats.baseline.mean, 4)}${unit} ci=${formatInterval(entry.stats.baseline.meanCI, 4)} sd=${formatNumber(entry.stats.baseline.standardDeviation, 4)}`, ` current mean=${formatNumber(entry.stats.current.mean, 4)}${unit} ci=${formatInterval(entry.stats.current.meanCI, 4)} sd=${formatNumber(entry.stats.current.standardDeviation, 4)}`, ` diff rel mean=${formatRelativeValue(entry.difference.relative.mean, 2)} ci=${formatRelativeInterval(entry.difference.relative.ci, 2)}`, ` diff abs mean=${formatNumber(entry.difference.absolute.mean, 4)}${unit} ci=${formatInterval(entry.difference.absolute.ci, 4)}`];
20
42
  lines.push(...benchmarkLines);
21
43
  }
22
- lines.push("");
44
+ lines.push(` Summary: regressions=${regressions} improvements=${improvements} neutral=${neutral}`, "");
23
45
  }
24
46
  return lines.join("\n").trimEnd();
25
47
  };
package/lib/runner.js CHANGED
@@ -1,149 +1,204 @@
1
- import { access, mkdtemp, readFile, rm, writeFile } from "node:fs/promises";
2
- import { tmpdir } from "node:os";
3
- import { join, resolve } from "node:path";
4
- import { fileURLToPath } from "node:url";
5
- import { transformFileAsync } from "@babel/core";
1
+ import { resolve } from "node:path";
6
2
  import { createNodeHarness } from "./engines/node.js";
7
- import { computeDifference, summaryStats } from "./stats.js";
3
+ import { buildHarnessIfNeeded, getHarnessPath } from "./harness.js";
4
+ import { computeRelativeDifferenceFromSamples, summaryStats } from "./stats.js";
8
5
  const versions = ["baseline", "current"];
9
6
  const autoSampleBatchSize = 10;
10
- const harnessTempPrefix = "perfshield-harness-";
7
+ const defaultMinTimeMs = 20;
8
+ const defaultMaxRelativeMargin = 0.05;
11
9
  const getVersionOrder = seed => {
12
10
  if (seed % 2 === 0) {
13
11
  return versions;
14
12
  }
15
13
  return [versions[1], versions[0]];
16
14
  };
17
- const getHarnessPath = () => {
18
- const override = process.env.WEB_BENCHMARKER_HARNESS_PATH;
19
- if (override != null) {
20
- return override;
15
+ const sleep = async delayMs => {
16
+ if (delayMs <= 0) {
17
+ return;
21
18
  }
22
- return fileURLToPath(new URL("./engines/node-harness.js", import.meta.url).toString());
19
+ await new Promise(resolve => {
20
+ setTimeout(resolve, delayMs);
21
+ });
23
22
  };
24
- const hasBabelConfig = async () => {
25
- const configPath = resolve(process.cwd(), "babel.config.cjs");
26
- try {
27
- await access(configPath);
28
- return true;
29
- } catch {
30
- return false;
23
+ const warmupBenchmarks = async (harness, benchmarks, delayMs, progress) => {
24
+ const warmups = [];
25
+ for (let index = 0; index < benchmarks.length; index += 1) {
26
+ const descriptor = benchmarks[index];
27
+ const order = getVersionOrder(index);
28
+ let baselineSample;
29
+ let currentSample;
30
+ for (const version of order) {
31
+ const result = await harness.runSample({
32
+ index,
33
+ iterations: descriptor.iterations,
34
+ version
35
+ });
36
+ if (version === "baseline") {
37
+ baselineSample = result.durationMs;
38
+ } else {
39
+ currentSample = result.durationMs;
40
+ }
41
+ }
42
+ if (baselineSample == null || currentSample == null) {
43
+ throw new Error("Warmup did not collect baseline/current samples.");
44
+ }
45
+ warmups.push({
46
+ baseline: baselineSample,
47
+ current: currentSample
48
+ });
49
+ if (progress) {
50
+ progress({
51
+ benchmarkCount: benchmarks.length,
52
+ benchmarkIndex: index,
53
+ benchmarkName: descriptor.name,
54
+ phase: "warmup"
55
+ });
56
+ }
57
+ await sleep(delayMs);
31
58
  }
59
+ return warmups;
32
60
  };
33
- const buildHarnessIfNeeded = async sourcePath => {
34
- const contents = await readFile(sourcePath, "utf8");
35
- if (!contents.includes("import type") && !contents.includes("@flow")) {
36
- return {
37
- cleanup: null,
38
- path: sourcePath
39
- };
61
+ const computeIterationOverrides = (benchmarks, warmups, minTimeMs) => {
62
+ if (minTimeMs <= 0) {
63
+ return benchmarks.map(() => undefined);
40
64
  }
41
- const usesConfig = await hasBabelConfig();
42
- const result = await transformFileAsync(sourcePath, {
43
- configFile: usesConfig ? resolve(process.cwd(), "babel.config.cjs") : false,
44
- presets: usesConfig ? [] : ["@babel/preset-flow"]
65
+ return benchmarks.map((benchmark, index) => {
66
+ const iterationsBase = benchmark.iterations ?? 1;
67
+ const warmup = warmups[index];
68
+ const baselinePerIter = warmup.baseline / iterationsBase;
69
+ const currentPerIter = warmup.current / iterationsBase;
70
+ const maxPerIter = Math.max(baselinePerIter, currentPerIter);
71
+ if (!Number.isFinite(maxPerIter) || maxPerIter <= 0) {
72
+ return iterationsBase;
73
+ }
74
+ return Math.max(iterationsBase, Math.ceil(minTimeMs / maxPerIter));
45
75
  });
46
- if (!result || !result.code) {
47
- throw new Error("Failed to compile node harness.");
76
+ };
77
+ const updateIterations = (currentIterations, baselineSample, currentSample, minTimeMs, minimumIterations) => {
78
+ if (minTimeMs <= 0) {
79
+ return currentIterations;
48
80
  }
49
- const dir = await mkdtemp(join(tmpdir(), harnessTempPrefix));
50
- const harnessPath = join(dir, "node-harness.js");
51
- await writeFile(harnessPath, result.code, "utf8");
52
- return {
53
- cleanup: async () => {
54
- await rm(dir, {
55
- force: true,
56
- recursive: true
57
- });
58
- },
59
- path: harnessPath
60
- };
81
+ const perIter = Math.max(baselineSample, currentSample) / currentIterations;
82
+ if (!Number.isFinite(perIter) || perIter <= 0) {
83
+ return currentIterations;
84
+ }
85
+ return Math.max(minimumIterations, Math.ceil(minTimeMs / perIter));
61
86
  };
62
- const withFreshHarness = async (engine, harnessPath, baselinePath, currentPath, callback) => {
63
- const harness = await createNodeHarness(engine, harnessPath, baselinePath, currentPath);
64
- try {
65
- return await callback(harness);
66
- } finally {
67
- await harness.close();
87
+ const relativeMarginFromDifference = difference => {
88
+ const mean = difference.relative.mean;
89
+ if (mean === 0) {
90
+ return Number.POSITIVE_INFINITY;
91
+ }
92
+ const margin = Math.abs(difference.relative.ci.high - mean);
93
+ return Math.abs(margin / mean);
94
+ };
95
+ const buildSamplePayload = (index, version, iterations) => {
96
+ const payload = {
97
+ index,
98
+ version
99
+ };
100
+ if (iterations != null) {
101
+ payload.iterations = iterations;
68
102
  }
103
+ return payload;
69
104
  };
70
- const runIterationInFreshHarness = async (engine, harnessPath, baselinePath, currentPath, index, iterations, order) => await withFreshHarness(engine, harnessPath, baselinePath, currentPath, async harness => {
71
- const results = {};
105
+ const runSamplePair = async (harness, index, iterations, order) => {
106
+ let baselineSample;
107
+ let currentSample;
72
108
  for (const version of order) {
73
- const payload = {
74
- index,
75
- version
76
- };
77
- if (iterations != null) {
78
- payload.iterations = iterations;
109
+ const result = await harness.runSample(buildSamplePayload(index, version, iterations));
110
+ if (version === "baseline") {
111
+ baselineSample = result.durationMs;
112
+ } else {
113
+ currentSample = result.durationMs;
79
114
  }
80
- const result = await harness.runSample(payload);
81
- results[version] = result.durationMs;
82
115
  }
83
- return results;
84
- });
85
- const warmupBenchmarks = async (engine, harnessPath, baselinePath, currentPath, benchmarks) => {
86
- let roundRobinSeed = 0;
87
- for (let index = 0; index < benchmarks.length; index += 1) {
88
- const descriptor = benchmarks[index];
89
- const order = getVersionOrder(roundRobinSeed);
90
- roundRobinSeed += 1;
91
- await runIterationInFreshHarness(engine, harnessPath, baselinePath, currentPath, index, descriptor.iterations, order);
116
+ if (baselineSample == null || currentSample == null) {
117
+ throw new Error("Sample pair did not include baseline/current.");
92
118
  }
119
+ return {
120
+ baseline: baselineSample,
121
+ current: currentSample
122
+ };
93
123
  };
94
- const collectSamples = async (engine, harnessPath, baselinePath, currentPath, benchmarks, minSamples) => {
95
- const samples = benchmarks.map(() => ({
124
+ const collectSamples = async (harness, benchmarks, minSamples, iterationOverrides, delayMs, minTimeMs, samples, progress) => {
125
+ const buckets = samples ?? benchmarks.map(() => ({
96
126
  baseline: [],
97
127
  current: []
98
128
  }));
99
- let roundRobinSeed = 0;
129
+ let completed = 0;
130
+ const total = minSamples * benchmarks.length;
100
131
  for (let iteration = 0; iteration < minSamples; iteration += 1) {
132
+ const order = getVersionOrder(iteration);
101
133
  for (let index = 0; index < benchmarks.length; index += 1) {
102
- const descriptor = benchmarks[index];
103
- const order = getVersionOrder(roundRobinSeed);
104
- roundRobinSeed += 1;
105
- const result = await runIterationInFreshHarness(engine, harnessPath, baselinePath, currentPath, index, descriptor.iterations, order);
106
- if (result.baseline != null) {
107
- samples[index].baseline.push(result.baseline);
108
- }
109
- if (result.current != null) {
110
- samples[index].current.push(result.current);
134
+ const iterations = iterationOverrides[index];
135
+ const minimumIterations = benchmarks[index].iterations ?? 1;
136
+ const result = await runSamplePair(harness, index, iterations, order);
137
+ buckets[index].baseline.push(result.baseline);
138
+ buckets[index].current.push(result.current);
139
+ const nextIterations = updateIterations(iterations ?? minimumIterations, result.baseline, result.current, minTimeMs, minimumIterations);
140
+ iterationOverrides[index] = nextIterations;
141
+ completed += 1;
142
+ if (progress) {
143
+ progress({
144
+ completed,
145
+ phase: "samples",
146
+ total
147
+ });
111
148
  }
149
+ await sleep(delayMs);
112
150
  }
113
151
  }
114
- return samples;
152
+ return buckets;
115
153
  };
116
154
  const intervalContains = (interval, value) => interval.low <= value && value <= interval.high;
117
- const autoSampleResolved = (samples, conditions) => samples.every(bucket => {
118
- const baselineStats = summaryStats(bucket.baseline);
119
- const currentStats = summaryStats(bucket.current);
120
- const diff = computeDifference(baselineStats, currentStats);
155
+ const autoSampleResolved = (samples, conditions, maxRelativeMargin) => samples.every(bucket => {
156
+ const diff = computeRelativeDifferenceFromSamples(bucket.baseline, bucket.current);
157
+ const onlyZeroConditions = conditions.every(condition => condition === 0);
158
+ if (onlyZeroConditions) {
159
+ const maxAbsBound = Math.max(Math.abs(diff.relative.ci.low), Math.abs(diff.relative.ci.high));
160
+ if (maxAbsBound <= maxRelativeMargin) {
161
+ return true;
162
+ }
163
+ }
121
164
  for (const condition of conditions) {
122
165
  if (intervalContains(diff.relative.ci, condition)) {
123
166
  return false;
124
167
  }
125
168
  }
169
+ if (relativeMarginFromDifference(diff) > maxRelativeMargin) {
170
+ return false;
171
+ }
126
172
  return true;
127
173
  });
128
- const autoSample = async (engine, harnessPath, baselinePath, currentPath, benchmarks, samples, conditions, timeoutMs) => {
174
+ const autoSample = async (harness, benchmarks, samples, conditions, maxRelativeMargin, iterationOverrides, delayMs, minTimeMs, progress, timeoutMs) => {
129
175
  const startTime = Date.now();
130
176
  let roundRobinSeed = 0;
177
+ let completed = 0;
131
178
  while (Date.now() - startTime < timeoutMs) {
132
- if (autoSampleResolved(samples, conditions)) {
179
+ if (autoSampleResolved(samples, conditions, maxRelativeMargin)) {
133
180
  return;
134
181
  }
135
182
  for (let batch = 0; batch < autoSampleBatchSize; batch += 1) {
183
+ const order = getVersionOrder(roundRobinSeed);
184
+ roundRobinSeed += 1;
136
185
  for (let index = 0; index < benchmarks.length; index += 1) {
137
- const descriptor = benchmarks[index];
138
- const order = getVersionOrder(roundRobinSeed);
139
- roundRobinSeed += 1;
140
- const result = await runIterationInFreshHarness(engine, harnessPath, baselinePath, currentPath, index, descriptor.iterations, order);
141
- if (result.baseline != null) {
142
- samples[index].baseline.push(result.baseline);
143
- }
144
- if (result.current != null) {
145
- samples[index].current.push(result.current);
186
+ const iterations = iterationOverrides[index];
187
+ const minimumIterations = benchmarks[index].iterations ?? 1;
188
+ const result = await runSamplePair(harness, index, iterations, order);
189
+ samples[index].baseline.push(result.baseline);
190
+ samples[index].current.push(result.current);
191
+ const nextIterations = updateIterations(iterations ?? minimumIterations, result.baseline, result.current, minTimeMs, minimumIterations);
192
+ iterationOverrides[index] = nextIterations;
193
+ completed += 1;
194
+ if (progress) {
195
+ progress({
196
+ completed,
197
+ elapsedMs: Date.now() - startTime,
198
+ phase: "autosample"
199
+ });
146
200
  }
201
+ await sleep(delayMs);
147
202
  }
148
203
  }
149
204
  }
@@ -158,17 +213,33 @@ export const runEngineComparison = async options => {
158
213
  const harnessArtifact = await buildHarnessIfNeeded(getHarnessPath());
159
214
  const resolvedBaseline = resolve(baselinePath);
160
215
  const resolvedCurrent = resolve(currentPath);
216
+ const harness = await createNodeHarness(engine, harnessArtifact.path, resolvedBaseline, resolvedCurrent);
161
217
  try {
162
- const benchmarks = await withFreshHarness(engine, harnessArtifact.path, resolvedBaseline, resolvedCurrent, async harness => await harness.listBenchmarks());
163
- await warmupBenchmarks(engine, harnessArtifact.path, resolvedBaseline, resolvedCurrent, benchmarks);
164
- const samples = await collectSamples(engine, harnessArtifact.path, resolvedBaseline, resolvedCurrent, benchmarks, config.sampling.minSamples);
165
- await autoSample(engine, harnessArtifact.path, resolvedBaseline, resolvedCurrent, benchmarks, samples, config.sampling.conditions, config.sampling.timeoutMs);
218
+ const benchmarks = await harness.listBenchmarks();
219
+ const minTimeMs = config.sampling.minTimeMs ?? defaultMinTimeMs;
220
+ const minSamples = Math.max(1, config.sampling.minSamples);
221
+ const sampleScale = Math.sqrt(minSamples / 20);
222
+ const benchmarkScale = Math.sqrt(Math.max(1, benchmarks.length));
223
+ const effectiveMinTimeMs = minTimeMs / Math.max(1, sampleScale * benchmarkScale);
224
+ const delayMs = config.sampling.delayMs ?? 0;
225
+ const maxRelativeMargin = config.sampling.maxRelativeMargin ?? defaultMaxRelativeMargin;
226
+ const warmups = await warmupBenchmarks(harness, benchmarks, delayMs, options.progress);
227
+ const iterationOverrides = computeIterationOverrides(benchmarks, warmups, effectiveMinTimeMs);
228
+ const samples = warmups.map(warmup => ({
229
+ baseline: [warmup.baseline],
230
+ current: [warmup.current]
231
+ }));
232
+ const remainingSamples = Math.max(0, config.sampling.minSamples - 1);
233
+ if (remainingSamples > 0) {
234
+ await collectSamples(harness, benchmarks, remainingSamples, iterationOverrides, delayMs, effectiveMinTimeMs, samples, options.progress);
235
+ }
236
+ await autoSample(harness, benchmarks, samples, config.sampling.conditions, maxRelativeMargin, iterationOverrides, delayMs, effectiveMinTimeMs, options.progress, config.sampling.timeoutMs);
166
237
  const benchmarkResults = benchmarks.map((benchmark, index) => {
167
238
  const baselineSamples = samples[index].baseline;
168
239
  const currentSamples = samples[index].current;
169
240
  const baselineStats = summaryStats(baselineSamples);
170
241
  const currentStats = summaryStats(currentSamples);
171
- const difference = computeDifference(baselineStats, currentStats);
242
+ const difference = computeRelativeDifferenceFromSamples(baselineSamples, currentSamples);
172
243
  return {
173
244
  benchmark,
174
245
  difference,
@@ -187,6 +258,7 @@ export const runEngineComparison = async options => {
187
258
  engine
188
259
  };
189
260
  } finally {
261
+ await harness.close();
190
262
  if (harnessArtifact.cleanup) {
191
263
  await harnessArtifact.cleanup();
192
264
  }
package/lib/stats.js CHANGED
@@ -1,4 +1,11 @@
1
1
  import jstat from "jstat";
2
+ export const relativeMarginOfError = stats => {
3
+ if (stats.mean === 0) {
4
+ return Number.POSITIVE_INFINITY;
5
+ }
6
+ const margin = Math.abs(stats.meanCI.high - stats.mean);
7
+ return Math.abs(margin / stats.mean);
8
+ };
2
9
  const sumOf = values => values.reduce((total, value) => total + value, 0);
3
10
  const squareResiduals = (values, mean) => values.map(value => {
4
11
  const diff = value - mean;
@@ -27,6 +34,10 @@ export const samplingDistributionOfRelativeDifferenceOfMeans = (a, b) => ({
27
34
  mean: (b.mean - a.mean) / a.mean,
28
35
  variance: (a.variance * b.mean * b.mean + b.variance * a.mean * a.mean) / (a.mean * a.mean * a.mean * a.mean)
29
36
  });
37
+ export const samplingDistributionOfDifferenceOfMeans = (a, b) => ({
38
+ mean: b.mean - a.mean,
39
+ variance: a.variance + b.variance
40
+ });
30
41
  export const summaryStats = values => {
31
42
  if (values.length === 0) {
32
43
  throw new Error("Cannot compute stats for an empty sample set.");
@@ -61,15 +72,64 @@ export const computeDifference = (baseline, current) => {
61
72
  mean: current.mean,
62
73
  variance: current.variance
63
74
  }, current.size);
75
+ const absoluteDist = samplingDistributionOfDifferenceOfMeans(baselineDist, currentDist);
64
76
  const relativeDist = samplingDistributionOfRelativeDifferenceOfMeans(baselineDist, currentDist);
65
77
  const size = Math.min(baseline.size, current.size);
66
78
  return {
79
+ absolute: {
80
+ ci: confidenceInterval95(absoluteDist, size),
81
+ mean: absoluteDist.mean
82
+ },
67
83
  relative: {
68
84
  ci: confidenceInterval95(relativeDist, size),
69
85
  mean: relativeDist.mean
70
86
  }
71
87
  };
72
88
  };
89
+ const computePairedRelativeStats = (baselineSamples, currentSamples) => {
90
+ const size = Math.min(baselineSamples.length, currentSamples.length);
91
+ if (size === 0) {
92
+ throw new Error("Cannot compute differences with empty sample sets.");
93
+ }
94
+ const diffs = [];
95
+ for (let index = 0; index < size; index += 1) {
96
+ const baseline = baselineSamples[index];
97
+ if (baseline === 0) {
98
+ throw new Error("Cannot compute relative difference with baseline sample 0.");
99
+ }
100
+ diffs.push((currentSamples[index] - baseline) / baseline);
101
+ }
102
+ const diffStats = summaryStats(diffs);
103
+ return {
104
+ ci: confidenceInterval95(samplingDistributionOfTheMean({
105
+ mean: diffStats.mean,
106
+ variance: diffStats.variance
107
+ }, diffStats.size), diffStats.size),
108
+ mean: diffStats.mean
109
+ };
110
+ };
111
+ const computePairedAbsoluteStats = (baselineSamples, currentSamples) => {
112
+ const size = Math.min(baselineSamples.length, currentSamples.length);
113
+ if (size === 0) {
114
+ throw new Error("Cannot compute differences with empty sample sets.");
115
+ }
116
+ const diffs = [];
117
+ for (let index = 0; index < size; index += 1) {
118
+ diffs.push(currentSamples[index] - baselineSamples[index]);
119
+ }
120
+ const diffStats = summaryStats(diffs);
121
+ return {
122
+ ci: confidenceInterval95(samplingDistributionOfTheMean({
123
+ mean: diffStats.mean,
124
+ variance: diffStats.variance
125
+ }, diffStats.size), diffStats.size),
126
+ mean: diffStats.mean
127
+ };
128
+ };
129
+ export const computeRelativeDifferenceFromSamples = (baselineSamples, currentSamples) => ({
130
+ absolute: computePairedAbsoluteStats(baselineSamples, currentSamples),
131
+ relative: computePairedRelativeStats(baselineSamples, currentSamples)
132
+ });
73
133
  export const computeDifferences = stats => stats.map(result => ({
74
134
  ...result,
75
135
  differences: stats.map(other => other === result ? null : computeDifference(other.stats, result.stats))
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "perfshield",
3
- "version": "0.0.4",
3
+ "version": "0.0.7",
4
4
  "description": "A tool for doing web benchmarking across multiple JS engines and with statistical signifigance",
5
5
  "license": "MIT",
6
6
  "type": "module",