npm - @tangle-network/agent-eval - Versions diffs - 0.20.12 → 0.22.0 - Mend

@tangle-network/agent-eval 0.20.12 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

package/CHANGELOG.md +177 -0
package/README.md +43 -1
package/dist/{chunk-KWUAAIHR.js → chunk-4W4NCYM2.js} +182 -1
package/dist/chunk-4W4NCYM2.js.map +1 -0
package/dist/{chunk-PKCVBYTQ.js → chunk-5IIQKMD5.js} +38 -2
package/dist/chunk-5IIQKMD5.js.map +1 -0
package/dist/{chunk-HNJLMAJ2.js → chunk-6KQG5HAH.js} +2 -2
package/dist/chunk-6M774GY6.js +53 -0
package/dist/chunk-6M774GY6.js.map +1 -0
package/dist/{chunk-MCMV7DUL.js → chunk-ARZ6BEV6.js} +2 -2
package/dist/chunk-IOXMGMHQ.js +1226 -0
package/dist/chunk-IOXMGMHQ.js.map +1 -0
package/dist/{chunk-75MCTH7P.js → chunk-KAO3Q65R.js} +198 -3
package/dist/chunk-KAO3Q65R.js.map +1 -0
package/dist/chunk-QUKKGHTZ.js +121 -0
package/dist/chunk-QUKKGHTZ.js.map +1 -0
package/dist/chunk-SQQLHODJ.js +163 -0
package/dist/chunk-SQQLHODJ.js.map +1 -0
package/dist/{chunk-IKFVX537.js → chunk-UAND2LOT.js} +232 -211
package/dist/chunk-UAND2LOT.js.map +1 -0
package/dist/{chunk-HKYRWNHV.js → chunk-USHQBPMH.js} +283 -7
package/dist/chunk-USHQBPMH.js.map +1 -0
package/dist/cli.js +3 -2
package/dist/cli.js.map +1 -1
package/dist/{control-C8NKbF3w.d.ts → control-cxwMOAsy.d.ts} +3 -2
package/dist/control.d.ts +4 -3
package/dist/control.js +2 -2
package/dist/emitter-B2XqDKFU.d.ts +121 -0
package/dist/{feedback-trajectory-BGQ_ANCN.d.ts → feedback-trajectory-CB0A32o3.d.ts} +2 -1
package/dist/index.d.ts +16 -302
package/dist/index.js +70 -62
package/dist/index.js.map +1 -1
package/dist/integrity-K2oVlF57.d.ts +210 -0
package/dist/openapi.json +1 -1
package/dist/optimization-UVDNKaO6.d.ts +574 -0
package/dist/optimization.d.ts +7 -144
package/dist/optimization.js +9 -2
package/dist/reporting-B82RSv9C.d.ts +593 -0
package/dist/reporting.d.ts +5 -426
package/dist/reporting.js +17 -6
package/dist/{emitter-BYO2nSDA.d.ts → store-u47QaJ9G.d.ts} +1 -91
package/dist/{multi-shot-optimization-Bvtz294B.d.ts → summary-report-D4p7RlDu.d.ts} +381 -1
package/dist/traces.d.ts +179 -3
package/dist/traces.js +35 -4
package/dist/wire/index.js +3 -2
package/docs/research-report-methodology.md +170 -0
package/docs/wire-protocol.md +1 -1
package/package.json +11 -13
package/dist/chunk-75MCTH7P.js.map +0 -1
package/dist/chunk-HKYRWNHV.js.map +0 -1
package/dist/chunk-IKFVX537.js.map +0 -1
package/dist/chunk-KWUAAIHR.js.map +0 -1
package/dist/chunk-ODFINDLQ.js +0 -413
package/dist/chunk-ODFINDLQ.js.map +0 -1
package/dist/chunk-PKCVBYTQ.js.map +0 -1
/package/dist/{chunk-HNJLMAJ2.js.map → chunk-6KQG5HAH.js.map} +0 -0
/package/dist/{chunk-MCMV7DUL.js.map → chunk-ARZ6BEV6.js.map} +0 -0

package/dist/{chunk-IKFVX537.js → chunk-UAND2LOT.js} RENAMED Viewed

@@ -1,10 +1,6 @@
 import {
-  benjaminiHochberg,
-  cohensD,
-  confidenceInterval,
-  pairedBootstrap,
-  wilcoxonSignedRank
-} from "./chunk-ODFINDLQ.js";
+  summaryTable
+} from "./chunk-IOXMGMHQ.js";
 // src/release-confidence.ts
 var DEFAULT_THRESHOLDS = {
@@ -289,219 +285,244 @@ function fmt(x) {
   return x.toFixed(4);
 }
-// src/summary-report.ts
-function summaryTable(runs, opts = {}) {
-  const split = opts.split ?? "holdout";
-  const confidence = opts.confidence ?? 0.95;
-  const fdr = opts.fdr ?? 0.05;
-  const comparator = opts.comparator ?? null;
-  const scoreField = split === "holdout" ? "holdoutScore" : "searchScore";
-  const byCandidate = /* @__PURE__ */ new Map();
-  for (const r of runs) {
-    if (r.splitTag !== split) continue;
-    const v = r.outcome[scoreField];
-    if (typeof v !== "number" || !Number.isFinite(v)) continue;
-    const bucket = byCandidate.get(r.candidateId) ?? { runs: [], scores: [] };
-    bucket.runs.push(r);
-    bucket.scores.push(v);
-    byCandidate.set(r.candidateId, bucket);
-  }
-  const candidateIds = [...byCandidate.keys()].sort();
-  const compRuns = comparator ? byCandidate.get(comparator) : void 0;
-  const tentative = [];
-  for (const id of candidateIds) {
-    const bucket = byCandidate.get(id);
-    const ci = confidenceInterval(bucket.scores, confidence);
-    let rawP = Number.NaN;
-    let d = Number.NaN;
-    if (comparator && compRuns && id !== comparator) {
-      const paired = pairScoresByKey(bucket.runs, compRuns.runs, scoreField);
-      if (paired.before.length >= 6) {
-        rawP = wilcoxonSignedRank(paired.before, paired.after).p;
-      }
-      d = cohensD(compRuns.scores, bucket.scores);
+// src/meta-eval/rubric-predictive-validity.ts
+async function rubricPredictiveValidity(input) {
+  const minSamples = input.minSamples ?? 8;
+  const reduction = input.reduction ?? "latest";
+  const resamples = input.bootstrapResamples ?? 500;
+  const rng = makeRng(input.seed);
+  const outcomes = await input.outcomes.list();
+  const outcomesByRun = /* @__PURE__ */ new Map();
+  for (const o of outcomes) {
+    const arr = outcomesByRun.get(o.runId) ?? [];
+    arr.push(o);
+    outcomesByRun.set(o.runId, arr);
+  }
+  const observedRubrics = /* @__PURE__ */ new Set();
+  for (const r of input.runs) {
+    for (const k of Object.keys(r.outcome.raw)) observedRubrics.add(k);
+  }
+  const rubrics = input.rubrics ?? [...observedRubrics];
+  const buckets = [];
+  for (const r of rubrics) {
+    for (const o of input.outcomeMetrics) {
+      buckets.push({ rubric: r, outcome: o, xs: [], ys: [] });
     }
-    tentative.push({
-      candidateId: id,
-      n: bucket.scores.length,
-      mean: ci.mean,
-      ciLow: ci.lower,
-      ciHigh: ci.upper,
-      qValue: rawP,
-      cohensD: d,
-      rawP
-    });
   }
-  if (comparator) {
-    const idxs = [];
-    const ps = [];
-    for (let i = 0; i < tentative.length; i++) {
-      const r = tentative[i];
-      if (r.candidateId === comparator) continue;
-      if (!Number.isFinite(r.rawP)) continue;
-      idxs.push(i);
-      ps.push(r.rawP);
+  let joined = 0;
+  let skipped = 0;
+  for (const run of input.runs) {
+    const os = outcomesByRun.get(run.runId);
+    if (!os || os.length === 0) {
+      skipped++;
+      continue;
     }
-    if (ps.length > 0) {
-      const { qValues } = benjaminiHochberg(ps, fdr);
-      for (let k = 0; k < idxs.length; k++) {
-        tentative[idxs[k]].qValue = qValues[k];
+    let joinedThisRun = false;
+    for (const r of rubrics) {
+      const x = run.outcome.raw[r];
+      if (typeof x !== "number" || !Number.isFinite(x)) continue;
+      for (const o of input.outcomeMetrics) {
+        const values = os.map((row) => row.metrics[o]).filter((v) => typeof v === "number" && Number.isFinite(v));
+        if (values.length === 0) continue;
+        const y = reduce(values, os, o, reduction);
+        if (y === null) continue;
+        const bucket = buckets.find((b) => b.rubric === r && b.outcome === o);
+        bucket.xs.push(x);
+        bucket.ys.push(y);
+        joinedThisRun = true;
       }
     }
-  }
-  const rows = tentative.map(({ rawP: _rawP, ...rest }) => rest);
-  const markdown = renderSummaryTableMarkdown(rows, comparator, split);
-  return { rows, comparator, split, markdown };
-}
-function pairScoresByKey(candidate, baseline, scoreField) {
-  const baseIdx = /* @__PURE__ */ new Map();
-  for (const r of baseline) {
-    const v = r.outcome[scoreField];
-    if (typeof v === "number" && Number.isFinite(v)) {
-      baseIdx.set(`${r.experimentId}::${r.seed}`, v);
-    }
-  }
-  const before = [];
-  const after = [];
-  for (const r of candidate) {
-    const v = r.outcome[scoreField];
-    if (typeof v !== "number" || !Number.isFinite(v)) continue;
-    const key = `${r.experimentId}::${r.seed}`;
-    const b = baseIdx.get(key);
-    if (b === void 0) continue;
-    before.push(b);
-    after.push(v);
-  }
-  return { before, after };
-}
-function renderSummaryTableMarkdown(rows, comparator, split) {
-  const lines = [];
-  const cmpLabel = comparator ? ` (vs ${comparator})` : "";
-  lines.push(`Summary Table \u2014 ${split} split${cmpLabel}`);
-  lines.push("");
-  lines.push("| Candidate | N | Mean | 95% CI | q (BH) | Cohen's d |");
-  lines.push("|---|---:|---:|---|---:|---:|");
-  for (const r of rows) {
-    const ci = `[${fmt2(r.ciLow)}, ${fmt2(r.ciHigh)}]`;
-    const q = Number.isFinite(r.qValue) ? r.qValue.toFixed(4) : "\u2014";
-    const d = Number.isFinite(r.cohensD) ? r.cohensD.toFixed(3) : "\u2014";
-    lines.push(`| ${r.candidateId} | ${r.n} | ${fmt2(r.mean)} | ${ci} | ${q} | ${d} |`);
-  }
-  return lines.join("\n");
-}
-function paretoChart(runs, opts = {}) {
-  const split = opts.split ?? "holdout";
-  const scoreField = split === "holdout" ? "holdoutScore" : "searchScore";
-  const buckets = /* @__PURE__ */ new Map();
-  for (const r of runs) {
-    if (r.splitTag !== split) continue;
-    const v = r.outcome[scoreField];
-    if (typeof v !== "number" || !Number.isFinite(v)) continue;
-    const bucket = buckets.get(r.candidateId) ?? { cost: [], quality: [] };
-    bucket.cost.push(r.costUsd);
-    bucket.quality.push(v);
-    buckets.set(r.candidateId, bucket);
-  }
-  const points = [];
-  for (const [candidateId, bucket] of buckets.entries()) {
-    points.push({
-      candidateId,
-      cost: avg(bucket.cost),
-      quality: avg(bucket.quality),
-      n: bucket.cost.length,
-      onFrontier: false,
-      gate: opts.gateDecisions?.[candidateId] ? gateLabel(opts.gateDecisions[candidateId]) : void 0
+    if (joinedThisRun) joined++;
+  }
+  const pairs = [];
+  for (const b of buckets) {
+    if (b.xs.length < minSamples) continue;
+    const pearson = pearsonR(b.xs, b.ys);
+    const spearman = pearsonR(rankWithTies(b.xs), rankWithTies(b.ys));
+    const ci = bootstrapCi(b.xs, b.ys, resamples, rng);
+    const verdict = Math.abs(spearman) >= 0.7 ? "load_bearing" : Math.abs(spearman) >= 0.4 ? "informative" : "decorative";
+    pairs.push({
+      rubric: b.rubric,
+      outcome: b.outcome,
+      n: b.xs.length,
+      pearson,
+      spearman,
+      ci95: ci,
+      verdict
     });
   }
-  for (const p of points) {
-    p.onFrontier = !points.some((q) => q !== p && dominates(q, p));
+  const byRubric = /* @__PURE__ */ new Map();
+  for (const p of pairs) {
+    const arr = byRubric.get(p.rubric) ?? [];
+    arr.push(p);
+    byRubric.set(p.rubric, arr);
   }
-  return {
-    kind: "pareto-cost-quality",
-    split,
-    axes: { x: "costUsd", y: "score" },
-    points
-  };
-}
-function dominates(a, b) {
-  return a.cost <= b.cost && a.quality >= b.quality && (a.cost < b.cost || a.quality > b.quality);
-}
-function gateLabel(d) {
-  if (d.promote) return "promote";
-  if (d.rejectionCode === "few_runs") return "reject_few_runs";
-  if (d.rejectionCode === "negative_delta") return "reject_negative_delta";
-  if (d.rejectionCode === "overfit_gap") return "reject_overfit_gap";
-  return null;
-}
-function gainHistogram(runs, candidateId, comparator, opts = {}) {
-  const split = opts.split ?? "holdout";
-  const scoreField = split === "holdout" ? "holdoutScore" : "searchScore";
-  const binCount = opts.bins ?? 11;
-  if (binCount < 1) throw new Error("gainHistogram: bins must be \u2265 1");
-  const candidate = runs.filter((r) => r.candidateId === candidateId && r.splitTag === split);
-  const baseline = runs.filter((r) => r.candidateId === comparator && r.splitTag === split);
-  const { before, after } = pairScoresByKey(candidate, baseline, scoreField);
-  const n = before.length;
-  if (n === 0) {
+  const ranked = [...byRubric.entries()].map(([rubric, ps]) => {
+    const best = ps.reduce((a, b) => Math.abs(b.spearman) > Math.abs(a.spearman) ? b : a);
     return {
-      kind: "gain-distribution",
-      candidateId,
-      comparator,
-      split,
-      n: 0,
-      bins: [],
-      median: 0,
-      ci: { low: 0, high: 0 }
+      rubric,
+      bestOutcome: best.outcome,
+      spearman: best.spearman,
+      pearson: best.pearson,
+      n: best.n,
+      verdict: best.verdict
     };
+  }).sort((a, b) => Math.abs(b.spearman) - Math.abs(a.spearman));
+  const rubricsWithoutData = rubrics.filter((r) => !byRubric.has(r));
+  return { pairs, ranked, joinedSamples: joined, skippedRuns: skipped, rubricsWithoutData };
+}
+function reduce(values, outcomes, metric, kind) {
+  if (values.length === 0) return null;
+  if (kind === "mean") return values.reduce((s, v) => s + v, 0) / values.length;
+  if (kind === "max") return Math.max(...values);
+  const sorted = [...outcomes].filter((o) => typeof o.metrics[metric] === "number").sort((a, b) => b.capturedAt - a.capturedAt);
+  return sorted[0]?.metrics[metric] ?? null;
+}
+function pearsonR(a, b) {
+  if (a.length !== b.length || a.length < 2) return Number.NaN;
+  const ma = a.reduce((s, v) => s + v, 0) / a.length;
+  const mb = b.reduce((s, v) => s + v, 0) / b.length;
+  let num2 = 0, da = 0, db = 0;
+  for (let i = 0; i < a.length; i++) {
+    const xa = a[i] - ma;
+    const xb = b[i] - mb;
+    num2 += xa * xb;
+    da += xa * xa;
+    db += xb * xb;
+  }
+  if (da === 0 || db === 0) return da === 0 && db === 0 ? 1 : 0;
+  return num2 / Math.sqrt(da * db);
+}
+function rankWithTies(xs) {
+  const indexed = xs.map((v, i) => ({ v, i })).sort((a, b) => a.v - b.v);
+  const r = new Array(xs.length);
+  for (let i = 0; i < indexed.length; ) {
+    let j = i;
+    while (j + 1 < indexed.length && indexed[j + 1].v === indexed[i].v) j++;
+    const avg = (i + j + 2) / 2;
+    for (let k = i; k <= j; k++) r[indexed[k].i] = avg;
+    i = j + 1;
+  }
+  return r;
+}
+function bootstrapCi(xs, ys, iterations, rng) {
+  const n = xs.length;
+  if (n < 3) return { low: Number.NaN, high: Number.NaN };
+  const samples = [];
+  for (let b = 0; b < iterations; b++) {
+    const rx = new Array(n);
+    const ry = new Array(n);
+    for (let i = 0; i < n; i++) {
+      const idx = Math.floor(rng() * n);
+      rx[i] = xs[idx];
+      ry[i] = ys[idx];
+    }
+    const r = pearsonR(rx, ry);
+    if (Number.isFinite(r)) samples.push(r);
   }
-  const deltas = before.map((b, i) => after[i] - b);
-  const sortedDeltas = [...deltas].sort((a, b) => a - b);
-  const median = medianOfSorted(sortedDeltas);
-  const min = sortedDeltas[0];
-  const max = sortedDeltas[sortedDeltas.length - 1];
-  const bound = Math.max(Math.abs(min), Math.abs(max), 1e-6);
-  const lo = -bound;
-  const hi = bound;
-  const width = (hi - lo) / binCount;
-  const bins = [];
-  for (let i = 0; i < binCount; i++) {
-    bins.push({ lo: lo + i * width, hi: lo + (i + 1) * width, count: 0 });
-  }
-  for (const d of deltas) {
-    let idx = Math.floor((d - lo) / width);
-    if (idx < 0) idx = 0;
-    if (idx >= binCount) idx = binCount - 1;
-    bins[idx].count += 1;
-  }
-  const ci = pairedBootstrap(before, after, {
-    confidence: opts.confidence ?? 0.95,
-    resamples: opts.resamples ?? 2e3,
-    statistic: "median",
-    seed: opts.seed
-  });
+  samples.sort((a, b) => a - b);
+  if (samples.length === 0) return { low: Number.NaN, high: Number.NaN };
   return {
-    kind: "gain-distribution",
-    candidateId,
-    comparator,
-    split,
-    n,
-    bins,
-    median,
-    ci: { low: ci.low, high: ci.high }
+    low: samples[Math.floor(0.025 * samples.length)],
+    high: samples[Math.min(samples.length - 1, Math.floor(0.975 * samples.length))]
   };
 }
-function avg(xs) {
-  if (xs.length === 0) return Number.NaN;
-  return xs.reduce((s, x) => s + x, 0) / xs.length;
-}
-function medianOfSorted(sorted) {
-  if (sorted.length === 0) return 0;
-  const mid = Math.floor(sorted.length / 2);
-  return sorted.length % 2 === 0 ? (sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid];
+function makeRng(seed) {
+  if (seed === void 0) return Math.random;
+  let s = seed >>> 0;
+  return () => {
+    s = s + 1831565813 >>> 0;
+    let t = s;
+    t = Math.imul(t ^ t >>> 15, t | 1);
+    t ^= t + Math.imul(t ^ t >>> 7, t | 61);
+    return ((t ^ t >>> 14) >>> 0) / 4294967296;
+  };
 }
-function fmt2(x) {
-  if (!Number.isFinite(x)) return String(x);
-  return x.toFixed(4);
+// src/sequential.ts
+function pairedEvalueSequence(deltas, opts = {}) {
+  const c = opts.bound ?? 1;
+  const alpha = opts.alpha ?? 0.05;
+  const initialShrink = opts.initialBetShrinkage ?? 0.5;
+  const rope = opts.rope ?? null;
+  if (c <= 0) throw new Error("pairedEvalueSequence: bound must be > 0");
+  if (alpha <= 0 || alpha >= 1) throw new Error("pairedEvalueSequence: alpha must be in (0,1)");
+  if (rope && !(Number.isFinite(rope.low) && Number.isFinite(rope.high) && rope.low <= rope.high)) {
+    throw new Error("pairedEvalueSequence: rope must satisfy low \u2264 high");
+  }
+  const steps = [];
+  let clipped = false;
+  let evalue = 1;
+  let decisionFiredAt = null;
+  let sum = 0;
+  let sumSq = 0;
+  let count = 0;
+  for (let i = 0; i < deltas.length; i++) {
+    let d = deltas[i];
+    if (d < -c || d > c) {
+      d = Math.max(-c, Math.min(c, d));
+      clipped = true;
+    }
+    const muHat = count === 0 ? 0 : sum / count;
+    const varHat = count === 0 ? c * c : Math.max(1e-12, sumSq / count - muHat * muHat);
+    const t = i + 1;
+    const shrink = initialShrink * Math.min(1, count / 32);
+    let lambda = muHat / (varHat + c * c) * shrink;
+    const lambdaMax = 0.99 / c;
+    if (lambda > lambdaMax) lambda = lambdaMax;
+    if (lambda < -lambdaMax) lambda = -lambdaMax;
+    evalue = evalue * (1 + lambda * d);
+    if (!Number.isFinite(evalue) || evalue < 0) evalue = 0;
+    sum += d;
+    sumSq += d * d;
+    count += 1;
+    const pValue = Math.min(1, 1 / Math.max(evalue, 1e-300));
+    const cs = empiricalBernsteinCs(sum, sumSq, count, c, alpha);
+    let decision = "continue";
+    if (rope && cs.low >= rope.low && cs.high <= rope.high) decision = "equivalent";
+    else if (evalue >= 2 / alpha && muHat > 0) decision = "promote_now";
+    else if (evalue >= 2 / alpha && muHat < 0) decision = "reject_now";
+    else if (rope && cs.high < rope.low) decision = "reject_now";
+    if (decision !== "continue" && decisionFiredAt === null) decisionFiredAt = t;
+    steps.push({ t, delta: d, evalue, pValue, csLow: cs.low, csHigh: cs.high, decision });
+  }
+  const finalDecision = steps.length === 0 ? "continue" : steps[steps.length - 1].decision;
+  return { steps, finalDecision, decisionFiredAt, clipped };
+}
+function evaluateInterimReleaseConfidence(input) {
+  const candidates = input.deltaSeries.map((s) => {
+    const seq = pairedEvalueSequence(s.deltas, {
+      alpha: input.alpha,
+      bound: input.bound,
+      rope: input.rope
+    });
+    const last = seq.steps[seq.steps.length - 1];
+    return {
+      candidateId: s.candidateId,
+      decision: seq.finalDecision,
+      decisionFiredAt: seq.decisionFiredAt,
+      finalEvalue: last?.evalue ?? 1,
+      finalPValue: last?.pValue ?? 1,
+      pairs: seq.steps.length,
+      csLow: last?.csLow ?? Number.NEGATIVE_INFINITY,
+      csHigh: last?.csHigh ?? Number.POSITIVE_INFINITY
+    };
+  });
+  const promote = candidates.find((c) => c.decision === "promote_now");
+  if (promote) return { candidates, recommendation: { decision: "promote_now", candidateId: promote.candidateId } };
+  const live = candidates.find((c) => c.decision === "continue");
+  if (live) return { candidates, recommendation: { decision: "continue", candidateId: null } };
+  const equiv = candidates.find((c) => c.decision === "equivalent");
+  if (equiv) return { candidates, recommendation: { decision: "equivalent", candidateId: equiv.candidateId } };
+  return { candidates, recommendation: { decision: "reject_now", candidateId: null } };
+}
+function empiricalBernsteinCs(sum, sumSq, n, bound, alpha) {
+  if (n === 0) return { low: -bound, high: bound };
+  const mean3 = sum / n;
+  const variance = Math.max(0, sumSq / n - mean3 * mean3);
+  const psi = Math.log(2 / alpha) + 1.7 * Math.log(Math.log(Math.max(Math.E, n)) + 1);
+  const radius = Math.sqrt(2 * variance * psi / n) + 3 * bound * psi / n;
+  return { low: mean3 - radius, high: mean3 + radius };
 }
 // src/release-report.ts
@@ -593,7 +614,7 @@ function num(value) {
 }
 // src/promotion-gate.ts
-function bootstrapCi(baseline, candidate, options = {}) {
+function bootstrapCi2(baseline, candidate, options = {}) {
   const alpha = options.alpha ?? 0.05;
   const iterations = options.iterations ?? 1e3;
   const minTotal = options.minTotalSamples ?? 6;
@@ -677,7 +698,7 @@ async function judgeReplayGate(args) {
   const concurrency = args.judgeConcurrency ?? 4;
   const baselineScores = await scoreAll(args.baselineOutputs, args.judge, concurrency);
   const candidateScores = await scoreAll(args.candidateOutputs, args.judge, concurrency);
-  const ci = bootstrapCi(baselineScores, candidateScores, {
+  const ci = bootstrapCi2(baselineScores, candidateScores, {
     ...args.alpha !== void 0 ? { alpha: args.alpha } : {},
     ...args.iterations !== void 0 ? { iterations: args.iterations } : {},
     ...args.seed !== void 0 ? { seed: args.seed } : {}
@@ -707,11 +728,11 @@ export {
   releaseTraceEvidenceFromMultiShotTrials,
   evaluateReleaseConfidence,
   assertReleaseConfidence,
-  summaryTable,
-  paretoChart,
-  gainHistogram,
+  rubricPredictiveValidity,
+  pairedEvalueSequence,
+  evaluateInterimReleaseConfidence,
   renderReleaseReport,
-  bootstrapCi,
+  bootstrapCi2 as bootstrapCi,
   judgeReplayGate
 };
-//# sourceMappingURL=chunk-IKFVX537.js.map
+//# sourceMappingURL=chunk-UAND2LOT.js.map