npm - @bitclaw/sqlite - Versions diffs - 1.2.0 → 1.3.0 - Mend

@bitclaw/sqlite 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/scripts/load-test-utils.d.ts +23 -1
package/dist/scripts/load-test-utils.d.ts.map +1 -1
package/dist/scripts/load-test-utils.js +98 -13
package/package.json +1 -1
package/scripts/load-test-utils.test.ts +75 -0
package/scripts/load-test-utils.ts +146 -20

package/dist/scripts/load-test-utils.d.ts CHANGED Viewed

@@ -4,7 +4,7 @@
  *
  * Unlike benchmark.ts (which tests raw SQLite pool.exec() calls), these utilities
  * measure end-to-end HTTP performance through the full stack: HTTP server, middleware,
- * ORM (Prisma), SSR rendering, etc.
+ * the ORM/query layer, SSR rendering, etc.
  *
  * Usage:
  *   Import into app-specific load tests:
@@ -24,6 +24,14 @@ export type LoadTestConfig = {
     durationSec: number;
     /** Optional: warm-up requests before timing */
     warmupRequests?: number;
+    /**
+     * Optional: number of times to repeat each scenario (default 1).
+     * When > 1, each (endpoint, concurrency) runs N times and results are
+     * aggregated — medians for throughput/latency, plus a coefficient of
+     * variation so run-to-run dispersion is visible. Defends against the
+     * ~±30% single-run variance seen on virtualized hosts (e.g. WSL2).
+     */
+    repeat?: number;
 };
 export type EndpointConfig = {
     /** Path relative to baseUrl (e.g., '/healthcheck') */
@@ -63,6 +71,20 @@ export type ScenarioResult = {
     statusCodes: Record<number, number>;
     avgBodySize: number;
     via?: 'cdn' | 'direct';
+    /**
+     * Variance fields — only populated when the scenario was run more than
+     * once (LoadTestConfig.repeat > 1). Absent for single-run scenarios, so
+     * the single-run output shape is unchanged.
+     */
+    runs?: number;
+    /** Lowest per-run throughput (req/s) across the N runs. */
+    throughputMin?: number;
+    /** Highest per-run throughput (req/s) across the N runs. */
+    throughputMax?: number;
+    /** Coefficient of variation of throughput across runs, as a percent. */
+    throughputCoV?: number;
+    /** Median of the per-run p95 values (NOT p95 of pooled latencies). */
+    p95Median?: number;
 };
 export type LoadTestResults = {
     baseUrl: string;

package/dist/scripts/load-test-utils.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"load-test-utils.d.ts","sourceRoot":"","sources":["../../scripts/load-test-utils.ts"],"names":[],"mappings":";AACA;;;;;;;;;;;;;GAaG;AAKH,MAAM,MAAM,cAAc,GAAG;IAC3B,gEAAgE;IAChE,OAAO,EAAE,MAAM,CAAC;IAChB,6CAA6C;IAC7C,SAAS,EAAE,cAAc,EAAE,CAAC;IAC5B,iCAAiC;IACjC,iBAAiB,EAAE,MAAM,EAAE,CAAC;IAC5B,uCAAuC;IACvC,WAAW,EAAE,MAAM,CAAC;IACpB,+CAA+C;IAC/C,cAAc,CAAC,EAAE,MAAM,CAAC;~~CACzB~~,CAAC;AAEF,MAAM,MAAM,cAAc,GAAG;IAC3B,sDAAsD;IACtD,IAAI,EAAE,MAAM,CAAC;IACb,iCAAiC;IACjC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,gCAAgC;IAChC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,yBAAyB;IACzB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,2BAA2B;IAC3B,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB,CAAC;AAEF,MAAM,MAAM,aAAa,GAAG;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;CAClB,CAAC;AAEF,MAAM,MAAM,cAAc,GAAG;IAC3B,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IAEpB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IAEpB,UAAU,EAAE,MAAM,CAAC;IAEnB,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IAEZ,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACpC,WAAW,EAAE,MAAM,CAAC;IAEpB,GAAG,CAAC,EAAE,KAAK,GAAG,QAAQ,CAAC;~~CACxB~~,CAAC;AAEF,MAAM,MAAM,eAAe,GAAG;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,cAAc,EAAE,CAAC;CAC7B,CAAC;AAKF,wBAAgB,UAAU,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,GAAG,MAAM,CAS9D;AAKD,wBAAsB,mBAAmB,CACvC,GAAG,EAAE,MAAM,EACX,MAAM,SAAQ,EACd,IAAI,CAAC,EAAE,MAAM,EACb,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAC/B,OAAO,CAAC,aAAa,CAAC,CA6BxB;~~AA8FD~~,wBAAsB,WAAW,CAC/B,MAAM,EAAE,cAAc,GACrB,OAAO,CAAC,eAAe,CAAC,~~CA0B1B~~;AAKD,wBAAgB,aAAa,CAAC,OAAO,EAAE,eAAe,GAAG,MAAM,~~CAoE9D~~"}
1	+ {"version":3,"file":"load-test-utils.d.ts","sourceRoot":"","sources":["../../scripts/load-test-utils.ts"],"names":[],"mappings":";AACA;;;;;;;;;;;;;GAaG;AAKH,MAAM,MAAM,cAAc,GAAG;IAC3B,gEAAgE;IAChE,OAAO,EAAE,MAAM,CAAC;IAChB,6CAA6C;IAC7C,SAAS,EAAE,cAAc,EAAE,CAAC;IAC5B,iCAAiC;IACjC,iBAAiB,EAAE,MAAM,EAAE,CAAC;IAC5B,uCAAuC;IACvC,WAAW,EAAE,MAAM,CAAC;IACpB,+CAA+C;IAC/C,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;;;;;;OAMG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF,MAAM,MAAM,cAAc,GAAG;IAC3B,sDAAsD;IACtD,IAAI,EAAE,MAAM,CAAC;IACb,iCAAiC;IACjC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,gCAAgC;IAChC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,yBAAyB;IACzB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,2BAA2B;IAC3B,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB,CAAC;AAEF,MAAM,MAAM,aAAa,GAAG;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;CAClB,CAAC;AAEF,MAAM,MAAM,cAAc,GAAG;IAC3B,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IAEpB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IAEpB,UAAU,EAAE,MAAM,CAAC;IAEnB,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IAEZ,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACpC,WAAW,EAAE,MAAM,CAAC;IAEpB,GAAG,CAAC,EAAE,KAAK,GAAG,QAAQ,CAAC;IAEvB;;;;OAIG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,2DAA2D;IAC3D,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,4DAA4D;IAC5D,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,wEAAwE;IACxE,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,sEAAsE;IACtE,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,CAAC;AAEF,MAAM,MAAM,eAAe,GAAG;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,cAAc,EAAE,CAAC;CAC7B,CAAC;AAKF,wBAAgB,UAAU,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,GAAG,MAAM,CAS9D;AAKD,wBAAsB,mBAAmB,CACvC,GAAG,EAAE,MAAM,EACX,MAAM,SAAQ,EACd,IAAI,CAAC,EAAE,MAAM,EACb,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAC/B,OAAO,CAAC,aAAa,CAAC,CA6BxB;AA0KD,wBAAsB,WAAW,CAC/B,MAAM,EAAE,cAAc,GACrB,OAAO,CAAC,eAAe,CAAC,CA8B1B;AAKD,wBAAgB,aAAa,CAAC,OAAO,EAAE,eAAe,GAAG,MAAM,CA2F9D"}

package/dist/scripts/load-test-utils.js CHANGED Viewed

@@ -4,7 +4,7 @@
  *
  * Unlike benchmark.ts (which tests raw SQLite pool.exec() calls), these utilities
  * measure end-to-end HTTP performance through the full stack: HTTP server, middleware,
- * ORM (Prisma), SSR rendering, etc.
+ * the ORM/query layer, SSR rendering, etc.
  *
  * Usage:
  *   Import into app-specific load tests:
@@ -119,6 +119,72 @@ async function runScenario(baseUrl, endpoint, concurrency, durationSec, warmupRe
         avgBodySize: results.length > 0 ? totalBodySize / results.length : 0
     };
 }
+/* ------------------------------------------------------------------
+ * Multi-run aggregation
+ * ------------------------------------------------------------------ */
+function median(values) {
+    if (values.length === 0)
+        return 0;
+    const sorted = [...values].sort((a, b) => a - b);
+    const mid = Math.floor(sorted.length / 2);
+    return sorted.length % 2 !== 0
+        ? sorted[mid]
+        : (sorted[mid - 1] + sorted[mid]) / 2;
+}
+function coefficientOfVariation(values) {
+    if (values.length < 2)
+        return 0;
+    const mean = values.reduce((a, b) => a + b, 0) / values.length;
+    if (mean === 0)
+        return 0;
+    const variance = values.reduce((sum, v) => sum + (v - mean) ** 2, 0) / values.length;
+    return (Math.sqrt(variance) / mean) * 100;
+}
+/**
+ * Collapse N per-run ScenarioResults into one. Threshold-checked fields
+ * (throughput, successRate) use the median so a single outlier run does not
+ * flip pass/fail; counts are summed; variance fields expose dispersion.
+ * A single run is returned unchanged (no variance fields → identical shape).
+ */
+function aggregateRuns(runs) {
+    if (runs.length === 1)
+        return runs[0];
+    const first = runs[0];
+    const throughputs = runs.map(r => r.throughput);
+    const statusCodes = {};
+    for (const r of runs) {
+        for (const [code, count] of Object.entries(r.statusCodes)) {
+            statusCodes[Number(code)] = (statusCodes[Number(code)] ?? 0) + count;
+        }
+    }
+    const p95Median = median(runs.map(r => r.p95));
+    return {
+        endpoint: first.endpoint,
+        label: first.label,
+        method: first.method,
+        concurrency: first.concurrency,
+        durationSec: first.durationSec,
+        totalRequests: runs.reduce((s, r) => s + r.totalRequests, 0),
+        successCount: runs.reduce((s, r) => s + r.successCount, 0),
+        failCount: runs.reduce((s, r) => s + r.failCount, 0),
+        successRate: median(runs.map(r => r.successRate)),
+        throughput: median(throughputs),
+        p50: median(runs.map(r => r.p50)),
+        p95: p95Median,
+        p99: median(runs.map(r => r.p99)),
+        min: Math.min(...runs.map(r => r.min)),
+        max: Math.max(...runs.map(r => r.max)),
+        avg: runs.reduce((s, r) => s + r.avg, 0) / runs.length,
+        statusCodes,
+        avgBodySize: runs.reduce((s, r) => s + r.avgBodySize, 0) / runs.length,
+        via: first.via,
+        runs: runs.length,
+        throughputMin: Math.min(...throughputs),
+        throughputMax: Math.max(...throughputs),
+        throughputCoV: coefficientOfVariation(throughputs),
+        p95Median
+    };
+}
 /* ------------------------------------------------------------------
  * Main load test runner
  * ------------------------------------------------------------------ */
@@ -126,11 +192,14 @@ export async function runLoadTest(config) {
     const startedAt = new Date().toISOString();
     const scenarios = [];
     const warmup = config.warmupRequests ?? 5;
+    const repeat = Math.max(1, config.repeat ?? 1);
     for (const endpoint of config.endpoints) {
         for (const concurrency of config.concurrencyLevels) {
-            const _label = endpoint.label ?? endpoint.path;
-            const result = await runScenario(config.baseUrl, endpoint, concurrency, config.durationSec, warmup);
-            scenarios.push(result);
+            const runs = [];
+            for (let i = 0; i < repeat; i++) {
+                runs.push(await runScenario(config.baseUrl, endpoint, concurrency, config.durationSec, warmup));
+            }
+            scenarios.push(aggregateRuns(runs));
         }
     }
     return {
@@ -153,8 +222,11 @@ export function formatResults(results) {
     lines.push(`  Completed: ${results.completedAt}`);
     lines.push('='.repeat(100));
     lines.push('');
+    // Show the variance column only when at least one scenario was repeated.
+    // Single-run output keeps its original columns unchanged.
+    const showCoV = results.scenarios.some(s => (s.runs ?? 1) > 1);
     // Summary table header
-    const header = [
+    const headerCols = [
         'Endpoint'.padEnd(25),
         'Conc'.padStart(5),
         'Req/s'.padStart(8),
@@ -164,11 +236,15 @@ export function formatResults(results) {
         'P99ms'.padStart(8),
         'Success'.padStart(8),
         'AvgBody'.padStart(8)
-    ].join(' | ');
+    ];
+    if (showCoV) {
+        headerCols.push('Runs'.padStart(5), 'CoV%'.padStart(7));
+    }
+    const header = headerCols.join(' | ');
     lines.push(header);
     lines.push('-'.repeat(header.length));
     for (const s of results.scenarios) {
-        const row = [
+        const cols = [
             s.label.padEnd(25).slice(0, 25),
             String(s.concurrency).padStart(5),
             s.throughput.toFixed(0).padStart(8),
@@ -178,15 +254,24 @@ export function formatResults(results) {
             s.p99.toFixed(1).padStart(8),
             `${s.successRate.toFixed(1)}%`.padStart(8),
             formatBytes(s.avgBodySize).padStart(8)
-        ].join(' | ');
-        lines.push(row);
+        ];
+        if (showCoV) {
+            cols.push(String(s.runs ?? 1).padStart(5), (s.throughputCoV !== undefined
+                ? `±${s.throughputCoV.toFixed(0)}%`
+                : '-').padStart(7));
+        }
+        lines.push(cols.join(' | '));
     }
     lines.push('');
-    // Pool-level comparison note
+    // Stack-overhead note
     lines.push('-'.repeat(100));
-    lines.push('  NOTE: Pool-level benchmarks (raw pool.exec) show 6,102-13,781 req/s.');
-    lines.push('  Application-level throughput is lower due to HTTP overhead, middleware,');
-    lines.push('  Prisma ORM, SSR rendering, and serialization.');
+    lines.push('  NOTE: Application-level throughput (full HTTP stack) is lower than raw');
+    lines.push('  pool.exec() benchmarks due to HTTP overhead, middleware, the ORM/query');
+    lines.push('  layer, SSR rendering, and serialization.');
+    if (showCoV) {
+        lines.push('  CoV% = coefficient of variation of throughput across repeated runs');
+        lines.push('  (higher = noisier host; treat deltas below CoV% as noise).');
+    }
     lines.push('-'.repeat(100));
     lines.push('');
     // Status code breakdown

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@bitclaw/sqlite",
-  "version": "1.2.0",
+  "version": "1.3.0",
   "description": "High-performance SQLite worker pool and utilities using bun:sqlite",
   "files": [
     "dist",

package/scripts/load-test-utils.test.ts ADDED Viewed

@@ -0,0 +1,75 @@
+import { describe, expect, test } from 'bun:test';
+import {
+  formatResults,
+  type LoadTestResults,
+  type ScenarioResult
+} from './load-test-utils';
+function makeScenario(overrides: Partial<ScenarioResult> = {}): ScenarioResult {
+  return {
+    endpoint: '/dashboard',
+    label: 'Dashboard',
+    method: 'GET',
+    concurrency: 100,
+    durationSec: 10,
+    totalRequests: 12_000,
+    successCount: 12_000,
+    failCount: 0,
+    successRate: 100,
+    throughput: 1200,
+    p50: 60,
+    p95: 150,
+    p99: 240,
+    min: 5,
+    max: 400,
+    avg: 70,
+    statusCodes: { 200: 12_000 },
+    avgBodySize: 6400,
+    ...overrides
+  };
+}
+function makeResults(scenarios: ScenarioResult[]): LoadTestResults {
+  return {
+    baseUrl: 'http://localhost:3000',
+    startedAt: new Date().toISOString(),
+    completedAt: new Date().toISOString(),
+    scenarios
+  };
+}
+describe('formatResults — stack-overhead note', () => {
+  test('given any results, when formatted, then note does not mention Prisma', () => {
+    const report = formatResults(makeResults([makeScenario()]));
+    expect(report).not.toContain('Prisma');
+  });
+  test('given any results, when formatted, then note omits stale hardcoded pool numbers', () => {
+    const report = formatResults(makeResults([makeScenario()]));
+    expect(report).not.toContain('6,102-13,781');
+  });
+  test('given any results, when formatted, then note credits the ORM/query layer generically', () => {
+    const report = formatResults(makeResults([makeScenario()]));
+    expect(report).toContain('ORM/query');
+  });
+});
+describe('formatResults — variance (CoV) column', () => {
+  test('given single-run scenarios, when formatted, then no CoV column is shown', () => {
+    const report = formatResults(makeResults([makeScenario()]));
+    expect(report).not.toContain('CoV%');
+    expect(report).not.toContain('Runs');
+  });
+  test('given a repeated scenario, when formatted, then a CoV column appears', () => {
+    const report = formatResults(
+      makeResults([
+        makeScenario({ runs: 3, throughputCoV: 18, p95Median: 150 })
+      ])
+    );
+    expect(report).toContain('CoV%');
+    expect(report).toContain('±18%');
+    expect(report).toContain('coefficient of variation');
+  });
+});

package/scripts/load-test-utils.ts CHANGED Viewed

@@ -4,7 +4,7 @@
  *
  * Unlike benchmark.ts (which tests raw SQLite pool.exec() calls), these utilities
  * measure end-to-end HTTP performance through the full stack: HTTP server, middleware,
- * ORM (Prisma), SSR rendering, etc.
+ * the ORM/query layer, SSR rendering, etc.
  *
  * Usage:
  *   Import into app-specific load tests:
@@ -28,6 +28,14 @@ export type LoadTestConfig = {
   durationSec: number;
   /** Optional: warm-up requests before timing */
   warmupRequests?: number;
+  /**
+   * Optional: number of times to repeat each scenario (default 1).
+   * When > 1, each (endpoint, concurrency) runs N times and results are
+   * aggregated — medians for throughput/latency, plus a coefficient of
+   * variation so run-to-run dispersion is visible. Defends against the
+   * ~±30% single-run variance seen on virtualized hosts (e.g. WSL2).
+   */
+  repeat?: number;
 };
 export type EndpointConfig = {
@@ -75,6 +83,21 @@ export type ScenarioResult = {
   avgBodySize: number;
   via?: 'cdn' | 'direct';
+  /**
+   * Variance fields — only populated when the scenario was run more than
+   * once (LoadTestConfig.repeat > 1). Absent for single-run scenarios, so
+   * the single-run output shape is unchanged.
+   */
+  runs?: number;
+  /** Lowest per-run throughput (req/s) across the N runs. */
+  throughputMin?: number;
+  /** Highest per-run throughput (req/s) across the N runs. */
+  throughputMax?: number;
+  /** Coefficient of variation of throughput across runs, as a percent. */
+  throughputCoV?: number;
+  /** Median of the per-run p95 values (NOT p95 of pooled latencies). */
+  p95Median?: number;
 };
 export type LoadTestResults = {
@@ -226,6 +249,82 @@ async function runScenario(
   };
 }
+/* ------------------------------------------------------------------
+ * Multi-run aggregation
+ * ------------------------------------------------------------------ */
+function median(values: number[]): number {
+  if (values.length === 0) return 0;
+  const sorted = [...values].sort((a, b) => a - b);
+  const mid = Math.floor(sorted.length / 2);
+  return sorted.length % 2 !== 0
+    ? sorted[mid]!
+    : (sorted[mid - 1]! + sorted[mid]!) / 2;
+}
+function coefficientOfVariation(values: number[]): number {
+  if (values.length < 2) return 0;
+  const mean = values.reduce((a, b) => a + b, 0) / values.length;
+  if (mean === 0) return 0;
+  const variance =
+    values.reduce((sum, v) => sum + (v - mean) ** 2, 0) / values.length;
+  return (Math.sqrt(variance) / mean) * 100;
+}
+/**
+ * Collapse N per-run ScenarioResults into one. Threshold-checked fields
+ * (throughput, successRate) use the median so a single outlier run does not
+ * flip pass/fail; counts are summed; variance fields expose dispersion.
+ * A single run is returned unchanged (no variance fields → identical shape).
+ */
+function aggregateRuns(runs: ScenarioResult[]): ScenarioResult {
+  if (runs.length === 1) return runs[0]!;
+  const first = runs[0]!;
+  const throughputs = runs.map(r => r.throughput);
+  const statusCodes: Record<number, number> = {};
+  for (const r of runs) {
+    for (const [code, count] of Object.entries(r.statusCodes)) {
+      statusCodes[Number(code)] = (statusCodes[Number(code)] ?? 0) + count;
+    }
+  }
+  const p95Median = median(runs.map(r => r.p95));
+  return {
+    endpoint: first.endpoint,
+    label: first.label,
+    method: first.method,
+    concurrency: first.concurrency,
+    durationSec: first.durationSec,
+    totalRequests: runs.reduce((s, r) => s + r.totalRequests, 0),
+    successCount: runs.reduce((s, r) => s + r.successCount, 0),
+    failCount: runs.reduce((s, r) => s + r.failCount, 0),
+    successRate: median(runs.map(r => r.successRate)),
+    throughput: median(throughputs),
+    p50: median(runs.map(r => r.p50)),
+    p95: p95Median,
+    p99: median(runs.map(r => r.p99)),
+    min: Math.min(...runs.map(r => r.min)),
+    max: Math.max(...runs.map(r => r.max)),
+    avg: runs.reduce((s, r) => s + r.avg, 0) / runs.length,
+    statusCodes,
+    avgBodySize: runs.reduce((s, r) => s + r.avgBodySize, 0) / runs.length,
+    via: first.via,
+    runs: runs.length,
+    throughputMin: Math.min(...throughputs),
+    throughputMax: Math.max(...throughputs),
+    throughputCoV: coefficientOfVariation(throughputs),
+    p95Median
+  };
+}
 /* ------------------------------------------------------------------
  * Main load test runner
  * ------------------------------------------------------------------ */
@@ -235,19 +334,23 @@ export async function runLoadTest(
   const startedAt = new Date().toISOString();
   const scenarios: ScenarioResult[] = [];
   const warmup = config.warmupRequests ?? 5;
+  const repeat = Math.max(1, config.repeat ?? 1);
   for (const endpoint of config.endpoints) {
     for (const concurrency of config.concurrencyLevels) {
-      const _label = endpoint.label ?? endpoint.path;
-      const result = await runScenario(
-        config.baseUrl,
-        endpoint,
-        concurrency,
-        config.durationSec,
-        warmup
-      );
-      scenarios.push(result);
+      const runs: ScenarioResult[] = [];
+      for (let i = 0; i < repeat; i++) {
+        runs.push(
+          await runScenario(
+            config.baseUrl,
+            endpoint,
+            concurrency,
+            config.durationSec,
+            warmup
+          )
+        );
+      }
+      scenarios.push(aggregateRuns(runs));
     }
   }
@@ -274,8 +377,12 @@ export function formatResults(results: LoadTestResults): string {
   lines.push('='.repeat(100));
   lines.push('');
+  // Show the variance column only when at least one scenario was repeated.
+  // Single-run output keeps its original columns unchanged.
+  const showCoV = results.scenarios.some(s => (s.runs ?? 1) > 1);
   // Summary table header
-  const header = [
+  const headerCols = [
     'Endpoint'.padEnd(25),
     'Conc'.padStart(5),
     'Req/s'.padStart(8),
@@ -285,13 +392,17 @@ export function formatResults(results: LoadTestResults): string {
     'P99ms'.padStart(8),
     'Success'.padStart(8),
     'AvgBody'.padStart(8)
-  ].join(' | ');
+  ];
+  if (showCoV) {
+    headerCols.push('Runs'.padStart(5), 'CoV%'.padStart(7));
+  }
+  const header = headerCols.join(' | ');
   lines.push(header);
   lines.push('-'.repeat(header.length));
   for (const s of results.scenarios) {
-    const row = [
+    const cols = [
       s.label.padEnd(25).slice(0, 25),
       String(s.concurrency).padStart(5),
       s.throughput.toFixed(0).padStart(8),
@@ -301,21 +412,36 @@ export function formatResults(results: LoadTestResults): string {
       s.p99.toFixed(1).padStart(8),
       `${s.successRate.toFixed(1)}%`.padStart(8),
       formatBytes(s.avgBodySize).padStart(8)
-    ].join(' | ');
-    lines.push(row);
+    ];
+    if (showCoV) {
+      cols.push(
+        String(s.runs ?? 1).padStart(5),
+        (s.throughputCoV !== undefined
+          ? `±${s.throughputCoV.toFixed(0)}%`
+          : '-'
+        ).padStart(7)
+      );
+    }
+    lines.push(cols.join(' | '));
   }
   lines.push('');
-  // Pool-level comparison note
+  // Stack-overhead note
   lines.push('-'.repeat(100));
   lines.push(
-    '  NOTE: Pool-level benchmarks (raw pool.exec) show 6,102-13,781 req/s.'
+    '  NOTE: Application-level throughput (full HTTP stack) is lower than raw'
   );
   lines.push(
-    '  Application-level throughput is lower due to HTTP overhead, middleware,'
+    '  pool.exec() benchmarks due to HTTP overhead, middleware, the ORM/query'
   );
-  lines.push('  Prisma ORM, SSR rendering, and serialization.');
+  lines.push('  layer, SSR rendering, and serialization.');
+  if (showCoV) {
+    lines.push(
+      '  CoV% = coefficient of variation of throughput across repeated runs'
+    );
+    lines.push('  (higher = noisier host; treat deltas below CoV% as noise).');
+  }
   lines.push('-'.repeat(100));
   lines.push('');