npm - benchforge - Versions diffs - 0.1.0 - Mend

benchforge 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

package/README.md +432 -0
package/bin/benchforge +3 -0
package/dist/bin/benchforge.mjs +9 -0
package/dist/bin/benchforge.mjs.map +1 -0
package/dist/browser/index.js +914 -0
package/dist/index.mjs +3 -0
package/dist/src-CGuaC3Wo.mjs +3676 -0
package/dist/src-CGuaC3Wo.mjs.map +1 -0
package/package.json +49 -0
package/src/BenchMatrix.ts +380 -0
package/src/Benchmark.ts +33 -0
package/src/BenchmarkReport.ts +156 -0
package/src/GitUtils.ts +79 -0
package/src/HtmlDataPrep.ts +148 -0
package/src/MeasuredResults.ts +127 -0
package/src/NodeGC.ts +48 -0
package/src/PermutationTest.ts +115 -0
package/src/StandardSections.ts +268 -0
package/src/StatisticalUtils.ts +176 -0
package/src/TypeUtil.ts +8 -0
package/src/bin/benchforge.ts +4 -0
package/src/browser/BrowserGcStats.ts +44 -0
package/src/browser/BrowserHeapSampler.ts +248 -0
package/src/cli/CliArgs.ts +64 -0
package/src/cli/FilterBenchmarks.ts +68 -0
package/src/cli/RunBenchCLI.ts +856 -0
package/src/export/JsonExport.ts +103 -0
package/src/export/JsonFormat.ts +91 -0
package/src/export/PerfettoExport.ts +203 -0
package/src/heap-sample/HeapSampleReport.ts +196 -0
package/src/heap-sample/HeapSampler.ts +78 -0
package/src/html/HtmlReport.ts +131 -0
package/src/html/HtmlTemplate.ts +284 -0
package/src/html/Types.ts +88 -0
package/src/html/browser/CIPlot.ts +287 -0
package/src/html/browser/HistogramKde.ts +118 -0
package/src/html/browser/LegendUtils.ts +163 -0
package/src/html/browser/RenderPlots.ts +263 -0
package/src/html/browser/SampleTimeSeries.ts +389 -0
package/src/html/browser/Types.ts +96 -0
package/src/html/browser/index.ts +1 -0
package/src/html/index.ts +17 -0
package/src/index.ts +92 -0
package/src/matrix/CaseLoader.ts +36 -0
package/src/matrix/MatrixFilter.ts +103 -0
package/src/matrix/MatrixReport.ts +290 -0
package/src/matrix/VariantLoader.ts +46 -0
package/src/runners/AdaptiveWrapper.ts +391 -0
package/src/runners/BasicRunner.ts +368 -0
package/src/runners/BenchRunner.ts +60 -0
package/src/runners/CreateRunner.ts +11 -0
package/src/runners/GcStats.ts +107 -0
package/src/runners/RunnerOrchestrator.ts +374 -0
package/src/runners/RunnerUtils.ts +2 -0
package/src/runners/TimingUtils.ts +13 -0
package/src/runners/WorkerScript.ts +256 -0
package/src/table-util/ConvergenceFormatters.ts +19 -0
package/src/table-util/Formatters.ts +152 -0
package/src/table-util/README.md +70 -0
package/src/table-util/TableReport.ts +293 -0
package/src/table-util/test/TableReport.test.ts +105 -0
package/src/table-util/test/TableValueExtractor.test.ts +41 -0
package/src/table-util/test/TableValueExtractor.ts +100 -0
package/src/test/AdaptiveRunner.test.ts +185 -0
package/src/test/AdaptiveStatistics.integration.ts +119 -0
package/src/test/BenchmarkReport.test.ts +82 -0
package/src/test/BrowserBench.e2e.test.ts +44 -0
package/src/test/BrowserBench.test.ts +79 -0
package/src/test/GcStats.test.ts +94 -0
package/src/test/PermutationTest.test.ts +121 -0
package/src/test/RunBenchCLI.test.ts +166 -0
package/src/test/RunnerOrchestrator.test.ts +102 -0
package/src/test/StatisticalUtils.test.ts +112 -0
package/src/test/TestUtils.ts +93 -0
package/src/test/fixtures/test-bench-script.ts +30 -0
package/src/tests/AdaptiveConvergence.test.ts +177 -0
package/src/tests/AdaptiveSampling.test.ts +240 -0
package/src/tests/BenchMatrix.test.ts +366 -0
package/src/tests/MatrixFilter.test.ts +117 -0
package/src/tests/MatrixReport.test.ts +139 -0
package/src/tests/RealDataValidation.test.ts +177 -0
package/src/tests/fixtures/baseline/impl.ts +4 -0
package/src/tests/fixtures/bevy30-samples.ts +158 -0
package/src/tests/fixtures/cases/asyncCases.ts +7 -0
package/src/tests/fixtures/cases/cases.ts +8 -0
package/src/tests/fixtures/cases/variants/product.ts +2 -0
package/src/tests/fixtures/cases/variants/sum.ts +2 -0
package/src/tests/fixtures/discover/fast.ts +1 -0
package/src/tests/fixtures/discover/slow.ts +4 -0
package/src/tests/fixtures/invalid/bad.ts +1 -0
package/src/tests/fixtures/loader/fast.ts +1 -0
package/src/tests/fixtures/loader/slow.ts +4 -0
package/src/tests/fixtures/loader/stateful.ts +2 -0
package/src/tests/fixtures/stateful/stateful.ts +2 -0
package/src/tests/fixtures/variants/extra.ts +1 -0
package/src/tests/fixtures/variants/impl.ts +1 -0
package/src/tests/fixtures/worker/fast.ts +1 -0
package/src/tests/fixtures/worker/slow.ts +4 -0

package/src/tests/AdaptiveConvergence.test.ts ADDED Viewed

@@ -0,0 +1,177 @@
+import { test } from "vitest";
+import { checkConvergence } from "../runners/AdaptiveWrapper.ts";
+import { bevy30SamplesNs } from "./fixtures/bevy30-samples.ts";
+test("convergence with insufficient samples", () => {
+  const samples = [1e6, 2e6, 3e6]; // 3 samples in nanoseconds
+  const result = checkConvergence(samples);
+  if (result.converged) throw new Error("Should not converge with few samples");
+  if (result.confidence >= 10)
+    throw new Error("Confidence too high for 3 samples");
+  if (!result.reason.includes("Collecting samples")) {
+    throw new Error("Wrong reason for non-convergence");
+  }
+});
+test("convergence with stable samples", () => {
+  // Create very stable samples (all within 1% of each other)
+  const base = 50e6; // 50ms in nanoseconds
+  const samples = Array.from(
+    { length: 200 },
+    () => base + (Math.random() - 0.5) * base * 0.01,
+  );
+  const result = checkConvergence(samples);
+  if (!result.converged) throw new Error("Should converge with stable samples");
+  if (result.confidence !== 100) throw new Error("Should have 100% confidence");
+  if (!result.reason.includes("Stable")) {
+    throw new Error("Wrong reason for convergence");
+  }
+});
+test("convergence with drifting median", () => {
+  // Create samples with increasing median over time
+  const samples = Array.from(
+    { length: 200 },
+    (_, i) => 50e6 + i * 0.5e6 + (Math.random() - 0.5) * 1e6,
+  );
+  const result = checkConvergence(samples);
+  if (result.converged)
+    throw new Error("Should not converge with drifting median");
+  if (result.confidence >= 80)
+    throw new Error("Confidence too high for drifting data");
+  if (!result.reason.includes("Median drifting")) {
+    throw new Error("Should identify median drift");
+  }
+});
+test("convergence with outliers", () => {
+  // Create stable samples with occasional outliers every 20 samples
+  const base = 50e6;
+  const samples = Array.from({ length: 200 }, (_, i) =>
+    i % 20 === 0 ? base * 2 : base + (Math.random() - 0.5) * base * 0.01,
+  );
+  const result = checkConvergence(samples);
+  // May or may not converge depending on outlier impact calculation
+  if (result.converged && result.confidence !== 100) {
+    throw new Error("Should have 100% confidence if converged");
+  }
+});
+test("convergence with real bevy30 data - early samples", () => {
+  // Test with first 100 samples (should show initial instability)
+  const early = bevy30SamplesNs.slice(0, 100);
+  const result = checkConvergence(early);
+  // Early samples include warm-up, may not be fully converged
+  if (result.confidence > 100 || result.confidence < 0) {
+    throw new Error(`Confidence out of range: ${result.confidence}`);
+  }
+  console.log(
+    `Early samples (100): converged=${result.converged}, confidence=${result.confidence}%`,
+  );
+});
+test("convergence with real bevy30 data - middle samples", () => {
+  // Test with middle 200 samples (should be more stable)
+  const middle = bevy30SamplesNs.slice(200, 400);
+  const result = checkConvergence(middle);
+  if (result.confidence > 100 || result.confidence < 0) {
+    throw new Error(`Confidence out of range: ${result.confidence}`);
+  }
+  console.log(
+    `Middle samples (200): converged=${result.converged}, confidence=${result.confidence}%`,
+  );
+});
+test("convergence with real bevy30 data - all samples", () => {
+  const result = checkConvergence(bevy30SamplesNs);
+  if (result.confidence > 100 || result.confidence < 0) {
+    throw new Error(`Confidence out of range: ${result.confidence}`);
+  }
+  // With 30 seconds of data, should have high confidence
+  if (result.confidence < 80) {
+    console.warn(`Low confidence with 30s of data: ${result.confidence}%`);
+  }
+  console.log(
+    `All samples (610): converged=${result.converged}, confidence=${result.confidence}%`,
+  );
+});
+test("convergence progression over time", () => {
+  const checkpoints = [50, 100, 150, 200, 300, 400, 500, 610];
+  const progressions = checkpoints.map(n => {
+    const result = checkConvergence(bevy30SamplesNs.slice(0, n));
+    return { samples: n, confidence: result.confidence };
+  });
+  // Confidence should generally increase with more samples
+  console.log("Convergence progression:");
+  for (const { samples, confidence } of progressions) {
+    console.log(`  ${samples} samples: ${confidence.toFixed(1)}%`);
+  }
+  const earlyConfidence = progressions[0].confidence;
+  const lateConfidence = progressions.at(-1)!.confidence;
+  if (lateConfidence < earlyConfidence) {
+    console.warn(
+      "Confidence decreased over time - may indicate benchmark instability",
+    );
+  }
+});
+test("window size adaptation for different execution times", () => {
+  // Fast samples (microseconds)
+  const fastSamples = Array.from(
+    { length: 100 },
+    () => 10e3 + Math.random() * 1e3, // 10-11us
+  );
+  const fastResult = checkConvergence(fastSamples);
+  // Slow samples (milliseconds)
+  const slowSamples = Array.from(
+    { length: 100 },
+    () => 50e6 + Math.random() * 1e6, // 50-51ms
+  );
+  const slowResult = checkConvergence(slowSamples);
+  console.log(`Fast samples (10μs): confidence=${fastResult.confidence}%`);
+  console.log(`Slow samples (50ms): confidence=${slowResult.confidence}%`);
+  if (fastResult.confidence > 100 || slowResult.confidence > 100) {
+    throw new Error("Confidence exceeds 100%");
+  }
+});
+test("outlier impact calculation", () => {
+  // 95 stable samples + 5 outliers (2x slower)
+  const base = 50e6; // 50ms
+  const stable = Array.from(
+    { length: 95 },
+    () => base + (Math.random() - 0.5) * 1e6,
+  );
+  const samples = [...stable, ...Array(5).fill(base * 2)];
+  const result = checkConvergence(samples);
+  // With 5% outliers doubling execution time, should impact convergence
+  console.log(
+    `With 5% outliers: converged=${result.converged}, confidence=${result.confidence}%`,
+  );
+  if (result.reason.includes("Outlier impact") && result.confidence > 90) {
+    throw new Error("Should detect outlier impact or have lower confidence");
+  }
+});

package/src/tests/AdaptiveSampling.test.ts ADDED Viewed

@@ -0,0 +1,240 @@
+import { test } from "vitest";
+import type { BenchmarkSpec } from "../Benchmark.ts";
+import type { MeasuredResults } from "../MeasuredResults.ts";
+import { createAdaptiveWrapper } from "../runners/AdaptiveWrapper.ts";
+import type { BenchRunner } from "../runners/BenchRunner.ts";
+import { bevy30SamplesMs } from "./fixtures/bevy30-samples.ts";
+/** Assert convergence data exists, return the result for further checks. */
+function requireConvergence(result: MeasuredResults): MeasuredResults {
+  if (!result.convergence) throw new Error("Missing convergence data");
+  return result;
+}
+/** Mock runner that returns pre-recorded samples */
+function createMockRunner(samples: number[]): BenchRunner {
+  let sampleIndex = 0;
+  return {
+    async runBench(benchmark, options) {
+      const { minTime = 100, maxIterations = 10 } = options;
+      const batchSamples: number[] = [];
+      const startTime = performance.now();
+      while (
+        sampleIndex < samples.length &&
+        batchSamples.length < (maxIterations ?? 10) &&
+        performance.now() - startTime < minTime
+      ) {
+        batchSamples.push(samples[sampleIndex++]);
+        await new Promise(resolve => setTimeout(resolve, 1));
+      }
+      const sorted = [...batchSamples].sort((a, b) => a - b);
+      const avg = batchSamples.reduce((a, b) => a + b, 0) / batchSamples.length;
+      const p50 = sorted[Math.floor(sorted.length / 2)];
+      const time = { min: sorted[0], max: sorted.at(-1)!, avg, p50 };
+      return [
+        { name: benchmark.name, samples: batchSamples, time },
+      ] as MeasuredResults[];
+    },
+  };
+}
+test("adaptive wrapper stops early with stable samples", async () => {
+  const stableSamples = Array.from(
+    { length: 500 },
+    () => 50 + Math.random() * 0.5,
+  );
+  const mockRunner = createMockRunner(stableSamples);
+  const adaptiveRunner = createAdaptiveWrapper(mockRunner, {});
+  const bench: BenchmarkSpec = { name: "stable-test", fn: () => {} };
+  const startTime = performance.now();
+  const results = await adaptiveRunner.runBench(bench, {
+    minTime: 500, // 0.5s minimum
+    maxTime: 5000, // 5s maximum
+  });
+  const duration = performance.now() - startTime;
+  // Should stop early due to convergence
+  if (duration > 3000) {
+    console.warn(`Took ${duration}ms - may not have converged early`);
+  }
+  const result = requireConvergence(results[0]);
+  console.log(
+    `Stable samples: ${result.samples.length} samples, ${result.convergence!.confidence}% confidence`,
+  );
+  if (result.convergence!.confidence < 95) {
+    throw new Error("Should achieve high confidence with stable samples");
+  }
+});
+test("adaptive wrapper continues with unstable samples", async () => {
+  const unstableSamples = Array.from(
+    { length: 500 },
+    () => 30 + Math.random() * 40,
+  );
+  const mockRunner = createMockRunner(unstableSamples);
+  const adaptiveRunner = createAdaptiveWrapper(mockRunner, {});
+  const bench: BenchmarkSpec = { name: "unstable-test", fn: () => {} };
+  const results = await adaptiveRunner.runBench(bench, {
+    minTime: 100, // 0.1s minimum
+    maxTime: 500, // 0.5s maximum
+  });
+  const result = requireConvergence(results[0]);
+  console.log(
+    `Unstable samples: ${result.samples.length} samples, ${result.convergence!.confidence}% confidence`,
+  );
+  if (result.convergence!.confidence > 80) {
+    console.warn("Achieved high confidence despite unstable samples");
+  }
+});
+test("adaptive wrapper with real bevy30 data", async () => {
+  const bench: BenchmarkSpec = { name: "bevy-test", fn: () => {} };
+  const configs = [
+    { minTime: 1000, maxTime: 5000, label: "1-5s" },
+    { minTime: 2000, maxTime: 10000, label: "2-10s" },
+    { minTime: 5000, maxTime: 30000, label: "5-30s" },
+  ];
+  for (const config of configs) {
+    // Reset sample index for each test
+    const runner = createMockRunner(bevy30SamplesMs);
+    const adaptive = createAdaptiveWrapper(runner, {});
+    const results = await adaptive.runBench(bench, config);
+    const result = requireConvergence(results[0]);
+    console.log(
+      `Config ${config.label}: ${result.samples.length} samples, ${result.convergence!.confidence}% confidence`,
+    );
+  }
+});
+test("adaptive wrapper respects target confidence", async () => {
+  const mockRunner = createMockRunner(bevy30SamplesMs);
+  const wrapper = createAdaptiveWrapper(mockRunner, { convergence: 50 });
+  const bench: BenchmarkSpec = { name: "low-confidence-test", fn: () => {} };
+  const startTime = performance.now();
+  const results = await wrapper.runBench(bench, {
+    minTime: 500,
+    maxTime: 10000,
+  });
+  const duration = performance.now() - startTime;
+  const result = requireConvergence(results[0]);
+  console.log(
+    `Low target (50%): ${result.samples.length} samples in ${duration}ms, ${result.convergence!.confidence}% confidence`,
+  );
+  // Should stop relatively quickly with low target
+  if (duration > 5000 && result.convergence!.confidence > 50) {
+    console.warn("Took longer than expected for low confidence target");
+  }
+});
+test("adaptive wrapper handles warm-up period", async () => {
+  // Simulate warm-up: slow samples at start, then stable
+  // Decreasing from 100ms to 60ms, then stable at ~50ms
+  const warmup = Array.from({ length: 20 }, (_, i) => 100 - i * 2);
+  const stable = Array.from({ length: 200 }, () => 50 + Math.random());
+  const warmupSamples = [...warmup, ...stable];
+  const mockRunner = createMockRunner(warmupSamples);
+  const adaptiveRunner = createAdaptiveWrapper(mockRunner, {});
+  const bench: BenchmarkSpec = { name: "warmup-test", fn: () => {} };
+  const results = await adaptiveRunner.runBench(bench, {
+    minTime: 1000,
+    maxTime: 5000,
+  });
+  const result = requireConvergence(results[0]);
+  if (!result.time) throw new Error("Missing time stats");
+  console.log(
+    `Warmup test: median=${result.time.p50?.toFixed(1)}ms, mean=${result.time.avg?.toFixed(1)}ms`,
+  );
+  // Median should be close to stable value (50ms) despite warm-up
+  if (result.time.p50 && Math.abs(result.time.p50 - 50) > 5) {
+    console.warn(`Median ${result.time.p50}ms differs from stable 50ms`);
+  }
+});
+test("adaptive wrapper statistics calculation", async () => {
+  const samples = bevy30SamplesMs.slice(100, 200);
+  const mockRunner = createMockRunner(samples);
+  const adaptiveRunner = createAdaptiveWrapper(mockRunner, {});
+  const bench: BenchmarkSpec = { name: "stats-test", fn: () => {} };
+  const results = await adaptiveRunner.runBench(bench, {
+    minTime: 100,
+    maxTime: 1000,
+  });
+  const result = results[0];
+  if (!result.time) throw new Error("Missing time statistics");
+  const { min, p25, p50, p75, p95, p99, max } = result.time;
+  const ordered = [min, p25, p50, p75, p95, p99, max];
+  if (ordered.some(v => v == null)) throw new Error("Missing percentile data");
+  if (ordered.some((v, i) => i > 0 && v! < ordered[i - 1]!)) {
+    throw new Error("Percentiles not in correct order");
+  }
+  console.log(
+    `Statistics: min=${min.toFixed(1)}, p50=${p50.toFixed(1)}, p99=${p99.toFixed(1)}, max=${max.toFixed(1)}`,
+  );
+  if (result.time.cv === undefined || result.time.mad === undefined) {
+    throw new Error("Missing variability metrics (CV or MAD)");
+  }
+  console.log(
+    `Variability: CV=${(result.time.cv * 100).toFixed(1)}%, MAD=${result.time.mad.toFixed(2)}`,
+  );
+});
+test("adaptive wrapper total time tracking", async () => {
+  const mockRunner = createMockRunner(bevy30SamplesMs.slice(0, 100));
+  const adaptiveRunner = createAdaptiveWrapper(mockRunner, {});
+  const bench: BenchmarkSpec = { name: "time-tracking-test", fn: () => {} };
+  const startTime = performance.now();
+  const results = await adaptiveRunner.runBench(bench, {
+    minTime: 200,
+    maxTime: 1000,
+  });
+  const actualDuration = (performance.now() - startTime) / 1000;
+  const result = results[0];
+  if (!result.totalTime) throw new Error("Missing totalTime");
+  console.log(
+    `Total time: reported=${result.totalTime.toFixed(2)}s, actual=${actualDuration.toFixed(2)}s`,
+  );
+  // Total time should be close to actual duration
+  if (Math.abs(result.totalTime - actualDuration) > 0.5) {
+    console.warn(
+      `Time tracking mismatch: ${Math.abs(result.totalTime - actualDuration).toFixed(2)}s difference`,
+    );
+  }
+});