npm - @tangle-network/agent-eval - Versions diffs - 0.23.1 → 0.25.0 - Mend

@tangle-network/agent-eval 0.23.1 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (148) hide show

package/CHANGELOG.md +145 -0
package/README.md +212 -79
package/dist/baseline-4R5deP0N.d.ts +108 -0
package/dist/benchmarks/index.d.ts +3 -2
package/dist/benchmarks/index.js +1 -1
package/dist/builder-eval/index.d.ts +249 -0
package/dist/builder-eval/index.js +391 -0
package/dist/builder-eval/index.js.map +1 -0
package/dist/{chunk-IOXMGMHQ.js → chunk-2A5XJB43.js} +142 -318
package/dist/chunk-2A5XJB43.js.map +1 -0
package/dist/chunk-47X6LRCE.js +76 -0
package/dist/chunk-47X6LRCE.js.map +1 -0
package/dist/{chunk-6M774GY6.js → chunk-4F5DQN55.js} +1 -1
package/dist/chunk-4F5DQN55.js.map +1 -0
package/dist/{chunk-KAO3Q65R.js → chunk-4S4BM3QQ.js} +15 -13
package/dist/chunk-4S4BM3QQ.js.map +1 -0
package/dist/chunk-5BKGXME7.js +65 -0
package/dist/chunk-5BKGXME7.js.map +1 -0
package/dist/{chunk-6KQG5HAH.js → chunk-5LBB5B3Z.js} +376 -72
package/dist/chunk-5LBB5B3Z.js.map +1 -0
package/dist/{chunk-42I2QC2L.js → chunk-6QDKWHLS.js} +18 -14
package/dist/chunk-6QDKWHLS.js.map +1 -0
package/dist/{chunk-VQQSPGSM.js → chunk-EDUKQ5AM.js} +247 -189
package/dist/chunk-EDUKQ5AM.js.map +1 -0
package/dist/chunk-I4MBDTY5.js +272 -0
package/dist/chunk-I4MBDTY5.js.map +1 -0
package/dist/chunk-JLZQWFV3.js +618 -0
package/dist/chunk-JLZQWFV3.js.map +1 -0
package/dist/chunk-K2TPS5LB.js +569 -0
package/dist/chunk-K2TPS5LB.js.map +1 -0
package/dist/chunk-KKHDIONI.js +414 -0
package/dist/chunk-KKHDIONI.js.map +1 -0
package/dist/chunk-KMPRBJK4.js +74 -0
package/dist/chunk-KMPRBJK4.js.map +1 -0
package/dist/{chunk-QUKKGHTZ.js → chunk-KTGTIOFD.js} +6 -3
package/dist/chunk-KTGTIOFD.js.map +1 -0
package/dist/chunk-LSH4MMOZ.js +838 -0
package/dist/chunk-LSH4MMOZ.js.map +1 -0
package/dist/chunk-NG236HPC.js +57 -0
package/dist/chunk-NG236HPC.js.map +1 -0
package/dist/{chunk-QBW3YBTR.js → chunk-NLMNWKVM.js} +14 -6
package/dist/chunk-NLMNWKVM.js.map +1 -0
package/dist/chunk-NU65VQ7M.js +99 -0
package/dist/chunk-NU65VQ7M.js.map +1 -0
package/dist/chunk-OWLAAMME.js +250 -0
package/dist/chunk-OWLAAMME.js.map +1 -0
package/dist/{chunk-SQQLHODJ.js → chunk-PC4UYEBM.js} +7 -4
package/dist/chunk-PC4UYEBM.js.map +1 -0
package/dist/{chunk-7EAUOUQS.js → chunk-RAF443UI.js} +213 -115
package/dist/chunk-RAF443UI.js.map +1 -0
package/dist/chunk-RZTMDUO7.js +49 -0
package/dist/chunk-RZTMDUO7.js.map +1 -0
package/dist/{chunk-EXGR4XEM.js → chunk-SESZDQPX.js} +23 -19
package/dist/chunk-SESZDQPX.js.map +1 -0
package/dist/{chunk-5IIQKMD5.js → chunk-TVVP3ZZQ.js} +14 -4
package/dist/chunk-TVVP3ZZQ.js.map +1 -0
package/dist/chunk-WWYCWKUM.js +196 -0
package/dist/chunk-WWYCWKUM.js.map +1 -0
package/dist/{chunk-AXHNWLIX.js → chunk-YRZ4M5GS.js} +2 -90
package/dist/chunk-YRZ4M5GS.js.map +1 -0
package/dist/chunk-ZN274SWR.js +613 -0
package/dist/chunk-ZN274SWR.js.map +1 -0
package/dist/cli.js +10 -6
package/dist/cli.js.map +1 -1
package/dist/{control-DvkH87qJ.d.ts → control-CBShYYA6.d.ts} +32 -33
package/dist/control-runtime-BuJHoLg0.d.ts +180 -0
package/dist/control.d.ts +8 -6
package/dist/control.js +10 -7
package/dist/{dataset-B9qvlm_o.d.ts → dataset-CiK_3LDr.d.ts} +5 -2
package/dist/{emitter-B2XqDKFU.d.ts → emitter-DP_cSSiw.d.ts} +1 -1
package/dist/errors-BZ9sTdz7.d.ts +70 -0
package/dist/failure-cluster-C2EGSDiT.d.ts +76 -0
package/dist/feedback-trajectory-DfFdrraJ.d.ts +169 -0
package/dist/governance/index.d.ts +5 -0
package/dist/governance/index.js +18 -0
package/dist/governance/index.js.map +1 -0
package/dist/{index-DDTlbHEK.d.ts → index--fVrWDiR.d.ts} +1 -1
package/dist/index-Oj9fAPPN.d.ts +270 -0
package/dist/index.d.ts +2018 -3003
package/dist/index.js +7443 -9102
package/dist/index.js.map +1 -1
package/dist/{integrity-Cr5YodSY.d.ts → integrity-DK2EBVZC.d.ts} +4 -3
package/dist/knowledge/index.d.ts +102 -0
package/dist/knowledge/index.js +18 -0
package/dist/knowledge/index.js.map +1 -0
package/dist/meta-eval/index.d.ts +99 -0
package/dist/meta-eval/index.js +324 -0
package/dist/meta-eval/index.js.map +1 -0
package/dist/multi-layer-verifier-LkP3LVKj.d.ts +141 -0
package/dist/openapi.json +491 -1
package/dist/optimization.d.ts +11 -8
package/dist/optimization.js +11 -9
package/dist/outcome-store-D6KWmYvj.d.ts +63 -0
package/dist/pipelines/index.d.ts +172 -0
package/dist/pipelines/index.js +345 -0
package/dist/pipelines/index.js.map +1 -0
package/dist/prm/index.d.ts +99 -0
package/dist/prm/index.js +222 -0
package/dist/prm/index.js.map +1 -0
package/dist/query-DODUYdPg.d.ts +30 -0
package/dist/release-report-BNgMdqPF.d.ts +292 -0
package/dist/replay-BL96gCEP.d.ts +226 -0
package/dist/reporting.d.ts +10 -295
package/dist/reporting.js +10 -6
package/dist/{eval-campaign-Ds5QljIh.d.ts → researcher-BPT8x_NT.d.ts} +148 -146
package/dist/rl.d.ts +1762 -8
package/dist/rl.js +2035 -58
package/dist/rl.js.map +1 -1
package/dist/rubric-D5tjHNJQ.d.ts +72 -0
package/dist/rubric-predictive-validity-C0uDYwG6.d.ts +105 -0
package/dist/{run-record-DNiOMBrZ.d.ts → run-record-CqzahIbx.d.ts} +4 -1
package/dist/sequential-Dgz1n51-.d.ts +139 -0
package/dist/{store-u47QaJ9G.d.ts → store-Db2Bv8Cf.d.ts} +1 -1
package/dist/{summary-report-Ce1r4EYo.d.ts → summary-report-C7VPYEj2.d.ts} +3 -76
package/dist/telemetry/file.js +4 -1
package/dist/telemetry/file.js.map +1 -1
package/dist/telemetry/index.js +57 -57
package/dist/telemetry/index.js.map +1 -1
package/dist/test-graded-scenario-B2kWEdh9.d.ts +146 -0
package/dist/traces.d.ts +142 -387
package/dist/traces.js +1302 -40
package/dist/traces.js.map +1 -1
package/dist/trajectory-CnoBo-JY.d.ts +32 -0
package/dist/wire/index.d.ts +369 -25
package/dist/wire/index.js +22 -3
package/package.json +44 -18
package/dist/chunk-42I2QC2L.js.map +0 -1
package/dist/chunk-5IIQKMD5.js.map +0 -1
package/dist/chunk-6KQG5HAH.js.map +0 -1
package/dist/chunk-6M774GY6.js.map +0 -1
package/dist/chunk-7EAUOUQS.js.map +0 -1
package/dist/chunk-AXHNWLIX.js.map +0 -1
package/dist/chunk-EXGR4XEM.js.map +0 -1
package/dist/chunk-IOXMGMHQ.js.map +0 -1
package/dist/chunk-KAO3Q65R.js.map +0 -1
package/dist/chunk-LZKIOBG2.js +0 -2026
package/dist/chunk-LZKIOBG2.js.map +0 -1
package/dist/chunk-QBW3YBTR.js.map +0 -1
package/dist/chunk-QUKKGHTZ.js.map +0 -1
package/dist/chunk-SQQLHODJ.js.map +0 -1
package/dist/chunk-V5QSWN7L.js +0 -1310
package/dist/chunk-V5QSWN7L.js.map +0 -1
package/dist/chunk-VQQSPGSM.js.map +0 -1
package/dist/chunk-XPHOZPOM.js +0 -1947
package/dist/chunk-XPHOZPOM.js.map +0 -1
package/dist/feedback-trajectory-c43WGtTX.d.ts +0 -346
package/dist/index-ekBXweiQ.d.ts +0 -1894
package/dist/sequential-DgU2mFsE.d.ts +0 -304

package/dist/{chunk-IOXMGMHQ.js → chunk-2A5XJB43.js} RENAMED Viewed

@@ -1,263 +1,12 @@
+import {
+  cohensD,
+  confidenceInterval,
+  wilcoxonSignedRank
+} from "./chunk-I4MBDTY5.js";
 import {
   canonicalize,
   hashJson
-} from "./chunk-6M774GY6.js";
-// src/statistics.ts
-var INVERTED_DIMENSIONS = /* @__PURE__ */ new Set([
-  "hallucination",
-  "false_confidence",
-  "worst_failure"
-]);
-function normalizeScores(scores) {
-  return scores.map((s) => {
-    if (INVERTED_DIMENSIONS.has(s.dimension)) {
-      return s;
-    }
-    return s;
-  });
-}
-function weightedMean(scores) {
-  if (scores.length === 0) return 0;
-  let totalWeight = 0;
-  let weightedSum = 0;
-  for (const { score, weight } of scores) {
-    const w = weight ?? 1;
-    weightedSum += score * w;
-    totalWeight += w;
-  }
-  return totalWeight > 0 ? weightedSum / totalWeight : 0;
-}
-function confidenceInterval(scores, confidence = 0.95) {
-  if (scores.length === 0) return { mean: 0, lower: 0, upper: 0 };
-  if (scores.length === 1) return { mean: scores[0], lower: scores[0], upper: scores[0] };
-  const n = scores.length;
-  const mean = scores.reduce((a, b) => a + b, 0) / n;
-  const B = 1e3;
-  const bootstrapMeans = [];
-  for (let i = 0; i < B; i++) {
-    let sum = 0;
-    for (let j = 0; j < n; j++) {
-      sum += scores[Math.floor(Math.random() * n)];
-    }
-    bootstrapMeans.push(sum / n);
-  }
-  bootstrapMeans.sort((a, b) => a - b);
-  const alpha = 1 - confidence;
-  const lowerIdx = Math.floor(alpha / 2 * B);
-  const upperIdx = Math.floor((1 - alpha / 2) * B) - 1;
-  return {
-    mean,
-    lower: bootstrapMeans[lowerIdx],
-    upper: bootstrapMeans[Math.min(upperIdx, B - 1)]
-  };
-}
-function interRaterReliability(judgeScores) {
-  if (judgeScores.length < 2) return 1;
-  const dimensionMap = /* @__PURE__ */ new Map();
-  for (const judgeSet of judgeScores) {
-    for (const s of judgeSet) {
-      if (!dimensionMap.has(s.dimension)) dimensionMap.set(s.dimension, []);
-      const arr = dimensionMap.get(s.dimension);
-      if (arr.length === 0 || arr[arr.length - 1].length >= judgeScores.length) {
-        arr.push([s.score]);
-      } else {
-        arr[arr.length - 1].push(s.score);
-      }
-    }
-  }
-  const allValues = [];
-  const pairDiffs = [];
-  for (const items of dimensionMap.values()) {
-    for (const ratings of items) {
-      if (ratings.length < 2) continue;
-      for (const v of ratings) allValues.push(v);
-      for (let i = 0; i < ratings.length; i++) {
-        for (let j = i + 1; j < ratings.length; j++) {
-          pairDiffs.push((ratings[i] - ratings[j]) ** 2);
-        }
-      }
-    }
-  }
-  if (pairDiffs.length === 0 || allValues.length < 2) return 1;
-  const observedDisagreement = pairDiffs.reduce((a, b) => a + b, 0) / pairDiffs.length;
-  let expectedDisagreement = 0;
-  let expectedCount = 0;
-  for (let i = 0; i < allValues.length; i++) {
-    for (let j = i + 1; j < allValues.length; j++) {
-      expectedDisagreement += (allValues[i] - allValues[j]) ** 2;
-      expectedCount++;
-    }
-  }
-  expectedDisagreement = expectedCount > 0 ? expectedDisagreement / expectedCount : 0;
-  if (expectedDisagreement === 0) return 1;
-  return 1 - observedDisagreement / expectedDisagreement;
-}
-function mannWhitneyU(a, b) {
-  if (a.length === 0 || b.length === 0) return { u: 0, p: 1 };
-  const n1 = a.length;
-  const n2 = b.length;
-  const combined = [
-    ...a.map((v) => ({ v, group: "a" })),
-    ...b.map((v) => ({ v, group: "b" }))
-  ].sort((x, y) => x.v - y.v);
-  const ranks = new Array(combined.length);
-  let i = 0;
-  while (i < combined.length) {
-    let j = i;
-    while (j < combined.length && combined[j].v === combined[i].v) j++;
-    const avgRank = (i + 1 + j) / 2;
-    for (let k = i; k < j; k++) ranks[k] = avgRank;
-    i = j;
-  }
-  let r1 = 0;
-  for (let k = 0; k < combined.length; k++) {
-    if (combined[k].group === "a") r1 += ranks[k];
-  }
-  const u1 = r1 - n1 * (n1 + 1) / 2;
-  const u2 = n1 * n2 - u1;
-  const u = Math.min(u1, u2);
-  const mu = n1 * n2 / 2;
-  const sigma = Math.sqrt(n1 * n2 * (n1 + n2 + 1) / 12);
-  if (sigma === 0) return { u, p: 1 };
-  const z = Math.abs(u - mu) / sigma;
-  const p = 2 * (1 - normalCdf(z));
-  return { u, p };
-}
-function partialCredit(current, target) {
-  if (target <= 0) return 1;
-  return Math.min(1, Math.max(0, current / target));
-}
-function pairedTTest(before, after) {
-  if (before.length !== after.length) {
-    throw new Error(`pairedTTest: unequal sample sizes (${before.length} vs ${after.length})`);
-  }
-  const n = before.length;
-  if (n < 2) return { t: 0, df: 0, p: 1 };
-  const diffs = before.map((b, i) => after[i] - b);
-  const mean = diffs.reduce((a, b) => a + b, 0) / n;
-  const variance = diffs.reduce((acc, d) => acc + (d - mean) ** 2, 0) / (n - 1);
-  const se = Math.sqrt(variance / n);
-  if (se === 0) return { t: mean === 0 ? 0 : Infinity, df: n - 1, p: mean === 0 ? 1 : 0 };
-  const t = mean / se;
-  const df = n - 1;
-  const p = 2 * (1 - studentTCdf(Math.abs(t), df));
-  return { t, df, p };
-}
-function wilcoxonSignedRank(before, after) {
-  if (before.length !== after.length) {
-    throw new Error(`wilcoxonSignedRank: unequal sample sizes (${before.length} vs ${after.length})`);
-  }
-  const diffs = before.map((b, i2) => after[i2] - b).filter((d) => d !== 0);
-  const n = diffs.length;
-  if (n < 6) return { w: 0, p: 1 };
-  const absRanks = diffs.map((d, i2) => ({ abs: Math.abs(d), sign: Math.sign(d), i: i2 })).sort((a, b) => a.abs - b.abs);
-  const ranks = new Array(n);
-  let i = 0;
-  while (i < n) {
-    let j = i;
-    while (j < n && absRanks[j].abs === absRanks[i].abs) j++;
-    const avg2 = (i + 1 + j) / 2;
-    for (let k = i; k < j; k++) ranks[absRanks[k].i] = avg2;
-    i = j;
-  }
-  let wPlus = 0;
-  for (let k = 0; k < n; k++) if (diffs[k] > 0) wPlus += ranks[k];
-  const mean = n * (n + 1) / 4;
-  const variance = n * (n + 1) * (2 * n + 1) / 24;
-  const z = (wPlus - mean) / Math.sqrt(variance);
-  const p = 2 * (1 - normalCdf(Math.abs(z)));
-  return { w: wPlus, p };
-}
-function cohensD(a, b) {
-  if (a.length < 2 || b.length < 2) return 0;
-  const meanA = a.reduce((x, y) => x + y, 0) / a.length;
-  const meanB = b.reduce((x, y) => x + y, 0) / b.length;
-  const varA = a.reduce((acc, x) => acc + (x - meanA) ** 2, 0) / (a.length - 1);
-  const varB = b.reduce((acc, x) => acc + (x - meanB) ** 2, 0) / (b.length - 1);
-  const pooled = Math.sqrt(
-    ((a.length - 1) * varA + (b.length - 1) * varB) / (a.length + b.length - 2)
-  );
-  if (pooled === 0) return 0;
-  return (meanB - meanA) / pooled;
-}
-function studentTCdf(t, df) {
-  if (df <= 0) return 0.5;
-  if (df > 100) return normalCdf(t);
-  const x = df / (df + t * t);
-  const a = df / 2;
-  const b = 0.5;
-  const ib = incompleteBeta(x, a, b);
-  return t >= 0 ? 1 - 0.5 * ib : 0.5 * ib;
-}
-function incompleteBeta(x, a, b) {
-  if (x <= 0) return 0;
-  if (x >= 1) return 1;
-  const lnBeta = lnGamma(a) + lnGamma(b) - lnGamma(a + b);
-  const front = Math.exp(Math.log(x) * a + Math.log(1 - x) * b - lnBeta) / a;
-  const maxIter = 200;
-  const eps = 3e-7;
-  let c = 1;
-  let d = 1 - (a + b) * x / (a + 1);
-  if (Math.abs(d) < 1e-30) d = 1e-30;
-  d = 1 / d;
-  let f = d;
-  for (let m = 1; m <= maxIter; m++) {
-    const m2 = 2 * m;
-    let num = m * (b - m) * x / ((a + m2 - 1) * (a + m2));
-    d = 1 + num * d;
-    if (Math.abs(d) < 1e-30) d = 1e-30;
-    c = 1 + num / c;
-    if (Math.abs(c) < 1e-30) c = 1e-30;
-    d = 1 / d;
-    f *= d * c;
-    num = -((a + m) * (a + b + m) * x) / ((a + m2) * (a + m2 + 1));
-    d = 1 + num * d;
-    if (Math.abs(d) < 1e-30) d = 1e-30;
-    c = 1 + num / c;
-    if (Math.abs(c) < 1e-30) c = 1e-30;
-    d = 1 / d;
-    const delta = d * c;
-    f *= delta;
-    if (Math.abs(delta - 1) < eps) break;
-  }
-  return front * f;
-}
-function lnGamma(z) {
-  const g = 7;
-  const coefs = [
-    0.9999999999998099,
-    676.5203681218851,
-    -1259.1392167224028,
-    771.3234287776531,
-    -176.6150291621406,
-    12.507343278686905,
-    -0.13857109526572012,
-    9984369578019572e-21,
-    15056327351493116e-23
-  ];
-  if (z < 0.5) {
-    return Math.log(Math.PI / Math.sin(Math.PI * z)) - lnGamma(1 - z);
-  }
-  z -= 1;
-  let x = coefs[0];
-  for (let i = 1; i < g + 2; i++) x += coefs[i] / (z + i);
-  const t = z + g + 0.5;
-  return 0.5 * Math.log(2 * Math.PI) + (z + 0.5) * Math.log(t) - t + Math.log(x);
-}
-function normalCdf(x) {
-  const a1 = 0.254829592;
-  const a2 = -0.284496736;
-  const a3 = 1.421413741;
-  const a4 = -1.453152027;
-  const a5 = 1.061405429;
-  const p = 0.3275911;
-  const sign = x < 0 ? -1 : 1;
-  const absX = Math.abs(x);
-  const t = 1 / (1 + p * absX);
-  const y = 1 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp(-absX * absX / 2);
-  return 0.5 * (1 + sign * y);
-}
+} from "./chunk-4F5DQN55.js";
 // src/power-analysis.ts
 function requiredSampleSize(opts) {
@@ -268,7 +17,7 @@ function requiredSampleSize(opts) {
   const twoSided = opts.twoSided ?? true;
   const zAlpha = zQuantile(twoSided ? 1 - alpha / 2 : 1 - alpha);
   const zBeta = zQuantile(power);
-  const n = 2 * Math.pow((zAlpha + zBeta) / effect, 2);
+  const n = 2 * ((zAlpha + zBeta) / effect) ** 2;
   return Math.ceil(n);
 }
 function pairedMde(opts) {
@@ -294,10 +43,11 @@ function benjaminiHochberg(pValues, fdr = 0.05) {
   let minRight = 1;
   for (let k = n - 1; k >= 0; k--) {
     const rank = k + 1;
-    const raw = indexed[k].p * n / rank;
+    const entry = indexed[k];
+    const raw = entry.p * n / rank;
     const bounded = Math.min(minRight, raw);
     minRight = bounded;
-    q[indexed[k].i] = Math.min(1, bounded);
+    q[entry.i] = Math.min(1, bounded);
   }
   const significant = q.map((v) => v < fdr);
   return { qValues: q, significant };
@@ -308,9 +58,29 @@ function zQuantile(p) {
     if (p === 1) return Infinity;
     return NaN;
   }
-  const a = [-39.69683028665376, 220.9460984245205, -275.9285104469687, 138.357751867269, -30.66479806614716, 2.506628277459239];
-  const b = [-54.47609879822406, 161.5858368580409, -155.6989798598866, 66.80131188771972, -13.28068155288572];
-  const c = [-0.007784894002430293, -0.3223964580411365, -2.400758277161838, -2.549732539343734, 4.374664141464968, 2.938163982698783];
+  const a = [
+    -39.69683028665376,
+    220.9460984245205,
+    -275.9285104469687,
+    138.357751867269,
+    -30.66479806614716,
+    2.506628277459239
+  ];
+  const b = [
+    -54.47609879822406,
+    161.5858368580409,
+    -155.6989798598866,
+    66.80131188771972,
+    -13.28068155288572
+  ];
+  const c = [
+    -0.007784894002430293,
+    -0.3223964580411365,
+    -2.400758277161838,
+    -2.549732539343734,
+    4.374664141464968,
+    2.938163982698783
+  ];
   const d = [0.007784695709041462, 0.3224671290700398, 2.445134137142996, 3.754408661907416];
   const pLow = 0.02425;
   const pHigh = 1 - pLow;
@@ -332,9 +102,7 @@ function zQuantile(p) {
 // src/paired-stats.ts
 function pairedBootstrap(before, after, opts = {}) {
   if (before.length !== after.length) {
-    throw new Error(
-      `pairedBootstrap: unequal sample sizes (${before.length} vs ${after.length})`
-    );
+    throw new Error(`pairedBootstrap: unequal sample sizes (${before.length} vs ${after.length})`);
   }
   const confidence = opts.confidence ?? 0.95;
   const resamples = opts.resamples ?? 2e3;
@@ -686,7 +454,9 @@ async function researchReport(runs, opts = {}) {
   const generatedAt = opts.generatedAt ?? (/* @__PURE__ */ new Date()).toISOString();
   const preregistrationHash = opts.preregistrationHash ?? null;
   if (rope && !(Number.isFinite(rope.low) && Number.isFinite(rope.high) && rope.low <= rope.high)) {
-    throw new Error(`researchReport: rope must satisfy low \u2264 high with finite bounds, got ${JSON.stringify(rope)}`);
+    throw new Error(
+      `researchReport: rope must satisfy low \u2264 high with finite bounds, got ${JSON.stringify(rope)}`
+    );
   }
   const summary = summaryTable(runs, {
     comparator: comparator ?? void 0,
@@ -696,24 +466,29 @@ async function researchReport(runs, opts = {}) {
   });
   const pareto = paretoChart(runs, { split, gateDecisions: opts.gateDecisions });
   const candidateIds = opts.candidateIds ?? summary.rows.map((r) => r.candidateId).filter((id) => id !== comparator);
-  const gains = comparator ? candidateIds.map((id) => gainHistogram(runs, id, comparator, {
-    split,
-    confidence,
-    seed: opts.seed
-  })) : [];
+  const gains = comparator ? candidateIds.map(
+    (id) => gainHistogram(runs, id, comparator, {
+      split,
+      confidence,
+      seed: opts.seed
+    })
+  ) : [];
   const gainByCandidate = new Map(gains.map((g) => [g.candidateId, g]));
   const paretoByCandidate = new Map(pareto.points.map((p) => [p.candidateId, p]));
   const posteriorByCandidate = /* @__PURE__ */ new Map();
   if (comparator) {
     for (const id of candidateIds) {
-      posteriorByCandidate.set(id, pairedPosterior(runs, id, comparator, {
-        split,
-        confidence,
-        seed: opts.seed,
-        rope,
-        mdePower,
-        mdeAlpha
-      }));
+      posteriorByCandidate.set(
+        id,
+        pairedPosterior(runs, id, comparator, {
+          split,
+          confidence,
+          seed: opts.seed,
+          rope,
+          mdePower,
+          mdeAlpha
+        })
+      );
     }
   }
   const candidates = summary.rows.map((row) => {
@@ -767,12 +542,23 @@ async function researchReport(runs, opts = {}) {
     failureClusters: opts.failureClusters,
     preregistrationHash
   });
-  const methodology = buildMethodology({ split, comparator, fdr, minPairs, rope, confidence, mdePower, mdeAlpha });
-  const runFingerprint = await hashJson(canonicalize({
-    triples: runs.filter((r) => r.splitTag === split).map((r) => ({ runId: r.runId, candidateId: r.candidateId, splitTag: r.splitTag })).sort((a, b) => a.runId.localeCompare(b.runId)),
+  const methodology = buildMethodology({
+    split,
     comparator,
-    split
-  }));
+    fdr,
+    minPairs,
+    rope,
+    confidence,
+    mdePower,
+    mdeAlpha
+  });
+  const runFingerprint = await hashJson(
+    canonicalize({
+      triples: runs.filter((r) => r.splitTag === split).map((r) => ({ runId: r.runId, candidateId: r.candidateId, splitTag: r.splitTag })).sort((a, b) => a.runId.localeCompare(b.runId)),
+      comparator,
+      split
+    })
+  );
   const markdown = renderResearchMarkdown({
     title,
     generatedAt,
@@ -818,7 +604,9 @@ function buildMethodology(ctx) {
     `Decisions are pre-specified at fdr=${ctx.fdr}, minPairs=${ctx.minPairs}, confidence=${ctx.confidence}; deviating from these post-hoc invalidates the false-discovery control.`
   ];
   if (ctx.rope) {
-    assumptions.push(`The Region of Practical Equivalence ${formatRope(ctx.rope)} is supplied by the domain owner; equivalent verdicts are only meaningful if that range is treated as the standing definition of "no material difference."`);
+    assumptions.push(
+      `The Region of Practical Equivalence ${formatRope(ctx.rope)} is supplied by the domain owner; equivalent verdicts are only meaningful if that range is treated as the standing definition of "no material difference."`
+    );
   }
   if (ctx.comparator === null) {
     assumptions.push("No comparator was configured; this run is descriptive, not causal.");
@@ -884,7 +672,10 @@ function classifyCandidate(row, ctx) {
   const gainPositive = ci.low > 0;
   const gainNegative = ci.high < 0;
   if (gainNegative) {
-    return { decision: "reject", reason: `Paired-delta CI [${fmt(ci.low)}, ${fmt(ci.high)}] lies entirely below zero.` };
+    return {
+      decision: "reject",
+      reason: `Paired-delta CI [${fmt(ci.low)}, ${fmt(ci.high)}] lies entirely below zero.`
+    };
   }
   if (ctx.posterior.n < ctx.minPairs) {
     return {
@@ -916,7 +707,9 @@ function buildRecommendation(candidates, ctx) {
     rationale.push(`${chosen.candidateId}: ${chosen.decisionReason}`);
     if (chosen.gainCi) {
       const probSummary = chosen.prGreaterThanZero !== null ? `, Pr(\u0394>0)=${fmt(chosen.prGreaterThanZero)}` : "";
-      rationale.push(`Median paired gain CI: [${fmt(chosen.gainCi.low)}, ${fmt(chosen.gainCi.high)}]${probSummary}.`);
+      rationale.push(
+        `Median paired gain CI: [${fmt(chosen.gainCi.low)}, ${fmt(chosen.gainCi.high)}]${probSummary}.`
+      );
     }
     if (chosen.mde !== null && Number.isFinite(chosen.mde)) {
       rationale.push(`MDE at current paired N=${chosen.pairedN}: ${fmt(chosen.mde)} score units.`);
@@ -927,22 +720,36 @@ function buildRecommendation(candidates, ctx) {
     nextActions.push("Re-run with a stable comparator candidate for paired inference.");
   }
   if (!ctx.preregistrationHash) {
-    risks.push("No preregistration hash supplied; readers cannot verify the analysis was specified before data inspection.");
-    nextActions.push("Sign a HypothesisManifest before the next sweep and pass `preregistrationHash` so the report cites it.");
+    risks.push(
+      "No preregistration hash supplied; readers cannot verify the analysis was specified before data inspection."
+    );
+    nextActions.push(
+      "Sign a HypothesisManifest before the next sweep and pass `preregistrationHash` so the report cites it."
+    );
   }
   if (ctx.rope === null && nonComparator.length > 0) {
-    risks.push('No ROPE configured; the report cannot distinguish "equivalent" from "inconclusive".');
-    nextActions.push("Define a domain-specific Region of Practical Equivalence and pass it to lock in the equivalence threshold.");
+    risks.push(
+      'No ROPE configured; the report cannot distinguish "equivalent" from "inconclusive".'
+    );
+    nextActions.push(
+      "Define a domain-specific Region of Practical Equivalence and pass it to lock in the equivalence threshold."
+    );
   }
   const inconclusive = nonComparator.filter((c) => c.decision === "needs_more_data");
   if (inconclusive.length > 0) {
     const worst = inconclusive.reduce((a, b) => b.pairedN < a.pairedN ? b : a);
-    risks.push(`${inconclusive.length} candidate(s) below soft floor (${ctx.minPairs} pairs); thinnest is ${worst.candidateId} with ${worst.pairedN}.`);
-    nextActions.push(`Collect at least ${ctx.minPairs - worst.pairedN} more matched holdout runs for ${worst.candidateId}.`);
+    risks.push(
+      `${inconclusive.length} candidate(s) below soft floor (${ctx.minPairs} pairs); thinnest is ${worst.candidateId} with ${worst.pairedN}.`
+    );
+    nextActions.push(
+      `Collect at least ${ctx.minPairs - worst.pairedN} more matched holdout runs for ${worst.candidateId}.`
+    );
   }
   const rejected = nonComparator.filter((c) => c.decision === "reject");
   if (rejected.length > 0) {
-    risks.push(`${rejected.length} candidate(s) failed the paired test or held-out gate; do not ship those variants.`);
+    risks.push(
+      `${rejected.length} candidate(s) failed the paired test or held-out gate; do not ship those variants.`
+    );
   }
   if (ctx.failureClusters && ctx.failureClusters.clusters.length > 0) {
     const top = ctx.failureClusters.clusters[0];
@@ -954,9 +761,13 @@ function buildRecommendation(candidates, ctx) {
   } else if (decision === "hold") {
     nextActions.push("Keep current production candidate while expanding holdout evidence.");
   } else if (decision === "equivalent") {
-    nextActions.push("Either keep the comparator (no quality regression) or promote on cost/latency grounds \u2014 equivalence does not justify either; the choice is a product decision, not a stats one.");
+    nextActions.push(
+      "Either keep the comparator (no quality regression) or promote on cost/latency grounds \u2014 equivalence does not justify either; the choice is a product decision, not a stats one."
+    );
   } else if (decision === "reject") {
-    nextActions.push("Do not promote this sweep; inspect failures and generate a revised candidate.");
+    nextActions.push(
+      "Do not promote this sweep; inspect failures and generate a revised candidate."
+    );
   }
   return {
     decision,
@@ -969,20 +780,30 @@ function buildRecommendation(candidates, ctx) {
 function buildExecutiveSummary(candidates, recommendation, ctx) {
   const lines = [];
   const nonComparator = candidates.filter((c) => c.candidateId !== ctx.comparator);
-  lines.push(`Evaluated ${nonComparator.length} candidate(s) on the ${ctx.split} split${ctx.comparator ? ` against ${ctx.comparator}` : ""}.`);
-  lines.push(`Recommendation: ${recommendation.decision}${recommendation.candidateId ? ` ${recommendation.candidateId}` : ""}.`);
+  lines.push(
+    `Evaluated ${nonComparator.length} candidate(s) on the ${ctx.split} split${ctx.comparator ? ` against ${ctx.comparator}` : ""}.`
+  );
+  lines.push(
+    `Recommendation: ${recommendation.decision}${recommendation.candidateId ? ` ${recommendation.candidateId}` : ""}.`
+  );
   const promoted = nonComparator.filter((c) => c.decision === "promote").length;
   const held = nonComparator.filter((c) => c.decision === "hold").length;
   const equivalent = nonComparator.filter((c) => c.decision === "equivalent").length;
   const rejected = nonComparator.filter((c) => c.decision === "reject").length;
   const more = nonComparator.filter((c) => c.decision === "needs_more_data").length;
-  lines.push(`Decision mix: ${promoted} promote, ${equivalent} equivalent, ${held} hold, ${rejected} reject, ${more} need more data.`);
+  lines.push(
+    `Decision mix: ${promoted} promote, ${equivalent} equivalent, ${held} hold, ${rejected} reject, ${more} need more data.`
+  );
   const frontier = nonComparator.filter((c) => c.onParetoFrontier).map((c) => c.candidateId);
   if (frontier.length > 0) lines.push(`Pareto-frontier candidates: ${frontier.join(", ")}.`);
   if (ctx.failureClusters) {
-    lines.push(`Failure clustering found ${ctx.failureClusters.totalFailures}/${ctx.failureClusters.totalRuns} failed runs across ${ctx.failureClusters.clusters.length} reportable cluster(s).`);
+    lines.push(
+      `Failure clustering found ${ctx.failureClusters.totalFailures}/${ctx.failureClusters.totalRuns} failed runs across ${ctx.failureClusters.clusters.length} reportable cluster(s).`
+    );
   }
-  lines.push(ctx.preregistrationHash ? `Preregistered analysis: ${ctx.preregistrationHash.slice(0, 12)}\u2026` : "Analysis is post-hoc \u2014 no preregistration hash supplied.");
+  lines.push(
+    ctx.preregistrationHash ? `Preregistered analysis: ${ctx.preregistrationHash.slice(0, 12)}\u2026` : "Analysis is post-hoc \u2014 no preregistration hash supplied."
+  );
   return lines;
 }
 function renderResearchMarkdown(report) {
@@ -994,7 +815,9 @@ function renderResearchMarkdown(report) {
   lines.push(`**Comparator:** ${report.comparator ?? "not configured"}`);
   lines.push(`**ROPE:** ${report.rope ? formatRope(report.rope) : "not configured"}`);
   lines.push(`**Run fingerprint:** \`${report.runFingerprint}\``);
-  lines.push(`**Preregistration:** ${report.preregistrationHash ? `\`${report.preregistrationHash}\`` : "none"}`);
+  lines.push(
+    `**Preregistration:** ${report.preregistrationHash ? `\`${report.preregistrationHash}\`` : "none"}`
+  );
   lines.push("");
   lines.push("## Executive Summary");
   lines.push("");
@@ -1021,7 +844,9 @@ function renderResearchMarkdown(report) {
   lines.push("");
   lines.push("## Candidate Decision Table");
   lines.push("");
-  lines.push("| Candidate | Decision | Mean | \u0394\u0304 | Pr(\u0394>0) | q | d | Paired N | Median Gain CI | MDE | Pareto | Gate |");
+  lines.push(
+    "| Candidate | Decision | Mean | \u0394\u0304 | Pr(\u0394>0) | q | d | Paired N | Median Gain CI | MDE | Pareto | Gate |"
+  );
   lines.push("|---|---|---:|---:|---:|---:|---:|---:|---|---:|---|---|");
   for (const c of report.candidates) {
     const delta = c.meanDeltaVsComparator === null ? "-" : signed(c.meanDeltaVsComparator);
@@ -1030,7 +855,9 @@ function renderResearchMarkdown(report) {
     const d = Number.isFinite(c.cohensD) ? c.cohensD.toFixed(3) : "-";
     const gain = c.gainCi ? `[${fmt(c.gainCi.low)}, ${fmt(c.gainCi.high)}]` : "-";
     const mde = c.mde === null || !Number.isFinite(c.mde) ? "-" : fmt(c.mde);
-    lines.push(`| ${c.candidateId} | ${c.decision} | ${fmt(c.mean)} | ${delta} | ${prGt} | ${q} | ${d} | ${c.pairedN} | ${gain} | ${mde} | ${c.onParetoFrontier ? "yes" : "no"} | ${c.gate ?? "-"} |`);
+    lines.push(
+      `| ${c.candidateId} | ${c.decision} | ${fmt(c.mean)} | ${delta} | ${prGt} | ${q} | ${d} | ${c.pairedN} | ${gain} | ${mde} | ${c.onParetoFrontier ? "yes" : "no"} | ${c.gate ?? "-"} |`
+    );
   }
   lines.push("");
   lines.push("## Statistical Summary");
@@ -1061,7 +888,9 @@ function renderResearchMarkdown(report) {
   lines.push("");
   lines.push("## Chart Specs");
   lines.push("");
-  lines.push("The report carries JSON chart specs for Pareto cost/quality and paired gain histograms.");
+  lines.push(
+    "The report carries JSON chart specs for Pareto cost/quality and paired gain histograms."
+  );
   lines.push("");
   lines.push("```json");
   lines.push(JSON.stringify({ pareto: report.pareto, gains: report.gains }, null, 2));
@@ -1073,7 +902,9 @@ function renderResearchMarkdown(report) {
     lines.push("| Failure Class | Runs | Scenarios | Tool | Example |");
     lines.push("|---|---:|---:|---|---|");
     for (const c of report.failureClusters.clusters.slice(0, 10)) {
-      lines.push(`| ${c.failureClass} | ${c.runCount} | ${c.scenarioIds.length} | ${c.toolName ?? "-"} | ${escapePipes(c.exampleError ?? c.exampleRunId)} |`);
+      lines.push(
+        `| ${c.failureClass} | ${c.runCount} | ${c.scenarioIds.length} | ${c.toolName ?? "-"} | ${escapePipes(c.exampleError ?? c.exampleRunId)} |`
+      );
     }
   }
   return lines.join("\n");
@@ -1161,7 +992,9 @@ function markdownToHtml(markdown) {
   return html.join("\n");
 }
 function renderMarkdownTable(lines) {
-  const rows = lines.filter((line) => !/^\|[-:\s|]+\|$/.test(line)).map((line) => line.slice(1, -1).split("|").map((cell) => inlineMarkdown(cell.trim())));
+  const rows = lines.filter((line) => !/^\|[-:\s|]+\|$/.test(line)).map(
+    (line) => line.slice(1, -1).split("|").map((cell) => inlineMarkdown(cell.trim()))
+  );
   if (rows.length === 0) return "";
   const [head, ...body] = rows;
   const th = head.map((cell) => `<th>${cell}</th>`).join("");
@@ -1202,15 +1035,6 @@ function fmt(x) {
 }
 export {
-  normalizeScores,
-  weightedMean,
-  confidenceInterval,
-  interRaterReliability,
-  mannWhitneyU,
-  partialCredit,
-  pairedTTest,
-  wilcoxonSignedRank,
-  cohensD,
   requiredSampleSize,
   bonferroni,
   benjaminiHochberg,
@@ -1223,4 +1047,4 @@ export {
   RESEARCH_REPORT_HARD_PAIR_FLOOR,
   researchReport
 };
-//# sourceMappingURL=chunk-IOXMGMHQ.js.map
+//# sourceMappingURL=chunk-2A5XJB43.js.map