@tangle-network/agent-eval 0.20.10 → 0.20.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +129 -126
  2. package/dist/benchmarks/index.d.ts +2 -1
  3. package/dist/{chunk-JAOLXRIA.js → chunk-75MCTH7P.js} +8 -2
  4. package/dist/chunk-75MCTH7P.js.map +1 -0
  5. package/dist/chunk-HKYRWNHV.js +1354 -0
  6. package/dist/chunk-HKYRWNHV.js.map +1 -0
  7. package/dist/{chunk-LSR4IAYN.js → chunk-HNJLMAJ2.js} +2 -2
  8. package/dist/chunk-IKFVX537.js +717 -0
  9. package/dist/chunk-IKFVX537.js.map +1 -0
  10. package/dist/chunk-KWUAAIHR.js +1764 -0
  11. package/dist/chunk-KWUAAIHR.js.map +1 -0
  12. package/dist/chunk-MCMV7DUL.js +1310 -0
  13. package/dist/chunk-MCMV7DUL.js.map +1 -0
  14. package/dist/chunk-ODFINDLQ.js +413 -0
  15. package/dist/chunk-ODFINDLQ.js.map +1 -0
  16. package/dist/chunk-PKCVBYTQ.js +200 -0
  17. package/dist/chunk-PKCVBYTQ.js.map +1 -0
  18. package/dist/chunk-YUFXO3TU.js +148 -0
  19. package/dist/chunk-YUFXO3TU.js.map +1 -0
  20. package/dist/cli.js +2 -2
  21. package/dist/control-C8NKbF3w.d.ts +258 -0
  22. package/dist/control.d.ts +5 -0
  23. package/dist/control.js +30 -0
  24. package/dist/control.js.map +1 -0
  25. package/dist/dataset-B9qvlm_o.d.ts +112 -0
  26. package/dist/emitter-BYO2nSDA.d.ts +387 -0
  27. package/dist/feedback-trajectory-BGQ_ANCN.d.ts +345 -0
  28. package/dist/{index-1PZOtZFr.d.ts → index-c5saLbKD.d.ts} +2 -133
  29. package/dist/index.d.ts +115 -2870
  30. package/dist/index.js +1049 -6156
  31. package/dist/index.js.map +1 -1
  32. package/dist/multi-shot-optimization-Bvtz294B.d.ts +598 -0
  33. package/dist/openapi.json +1 -1
  34. package/dist/optimization.d.ts +145 -0
  35. package/dist/optimization.js +60 -0
  36. package/dist/optimization.js.map +1 -0
  37. package/dist/reporting.d.ts +426 -0
  38. package/dist/reporting.js +32 -0
  39. package/dist/reporting.js.map +1 -0
  40. package/dist/run-record-CX_jcAyr.d.ts +134 -0
  41. package/dist/traces.d.ts +658 -0
  42. package/dist/traces.js +100 -0
  43. package/dist/traces.js.map +1 -0
  44. package/dist/wire/index.js +2 -2
  45. package/docs/concepts.md +16 -11
  46. package/docs/feature-guide.md +10 -17
  47. package/docs/integration-launch-gates.md +77 -0
  48. package/docs/product-eval-adoption.md +221 -0
  49. package/docs/trace-analysis.md +75 -0
  50. package/package.json +21 -1
  51. package/dist/chunk-JAOLXRIA.js.map +0 -1
  52. /package/dist/{chunk-LSR4IAYN.js.map → chunk-HNJLMAJ2.js.map} +0 -0
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/statistics.ts","../src/power-analysis.ts","../src/paired-stats.ts"],"sourcesContent":["import type { JudgeScore } from './types'\n\n/** Dimensions where lower raw score = better outcome (inverted semantics) */\nconst INVERTED_DIMENSIONS = new Set([\n 'hallucination',\n 'false_confidence',\n 'worst_failure',\n])\n\n/**\n * Normalize scores so all dimensions follow \"higher = better\".\n * Inverted dimensions (hallucination, false_confidence, worst_failure)\n * already use inverted scoring in the prompt (10 = no hallucination),\n * but this function ensures consistency if raw scores leak through.\n */\nexport function normalizeScores(scores: JudgeScore[]): JudgeScore[] {\n return scores.map((s) => {\n if (INVERTED_DIMENSIONS.has(s.dimension)) {\n return s\n }\n return s\n })\n}\n\n/** Weighted mean — falls back to uniform weights when omitted */\nexport function weightedMean(scores: { score: number; weight?: number }[]): number {\n if (scores.length === 0) return 0\n let totalWeight = 0\n let weightedSum = 0\n for (const { score, weight } of scores) {\n const w = weight ?? 1\n weightedSum += score * w\n totalWeight += w\n }\n return totalWeight > 0 ? weightedSum / totalWeight : 0\n}\n\n/** Bootstrap confidence interval */\nexport function confidenceInterval(\n scores: number[],\n confidence = 0.95,\n): { mean: number; lower: number; upper: number } {\n if (scores.length === 0) return { mean: 0, lower: 0, upper: 0 }\n if (scores.length === 1) return { mean: scores[0], lower: scores[0], upper: scores[0] }\n\n const n = scores.length\n const mean = scores.reduce((a, b) => a + b, 0) / n\n\n const B = 1000\n const bootstrapMeans: number[] = []\n\n for (let i = 0; i < B; i++) {\n let sum = 0\n for (let j = 0; j < n; j++) {\n sum += scores[Math.floor(Math.random() * n)]\n }\n bootstrapMeans.push(sum / n)\n }\n\n bootstrapMeans.sort((a, b) => a - b)\n\n const alpha = 1 - confidence\n const lowerIdx = Math.floor((alpha / 2) * B)\n const upperIdx = Math.floor((1 - alpha / 2) * B) - 1\n\n return {\n mean,\n lower: bootstrapMeans[lowerIdx],\n upper: bootstrapMeans[Math.min(upperIdx, B - 1)],\n }\n}\n\n/**\n * Inter-rater reliability — simplified Krippendorff's alpha.\n *\n * Each inner array is one judge's scores for all items.\n * All arrays must have the same length (same items scored).\n */\nexport function interRaterReliability(judgeScores: JudgeScore[][]): number {\n if (judgeScores.length < 2) return 1\n\n // Group scores by dimension across judges\n const dimensionMap = new Map<string, number[][]>()\n for (const judgeSet of judgeScores) {\n for (const s of judgeSet) {\n if (!dimensionMap.has(s.dimension)) dimensionMap.set(s.dimension, [])\n const arr = dimensionMap.get(s.dimension)!\n if (arr.length === 0 || arr[arr.length - 1].length >= judgeScores.length) {\n arr.push([s.score])\n } else {\n arr[arr.length - 1].push(s.score)\n }\n }\n }\n\n // Collect all paired ratings\n const allValues: number[] = []\n const pairDiffs: number[] = []\n\n for (const items of dimensionMap.values()) {\n for (const ratings of items) {\n if (ratings.length < 2) continue\n for (const v of ratings) allValues.push(v)\n for (let i = 0; i < ratings.length; i++) {\n for (let j = i + 1; j < ratings.length; j++) {\n pairDiffs.push((ratings[i] - ratings[j]) ** 2)\n }\n }\n }\n }\n\n if (pairDiffs.length === 0 || allValues.length < 2) return 1\n\n const observedDisagreement = pairDiffs.reduce((a, b) => a + b, 0) / pairDiffs.length\n\n // Expected disagreement from all possible pairings of values\n let expectedDisagreement = 0\n let expectedCount = 0\n for (let i = 0; i < allValues.length; i++) {\n for (let j = i + 1; j < allValues.length; j++) {\n expectedDisagreement += (allValues[i] - allValues[j]) ** 2\n expectedCount++\n }\n }\n expectedDisagreement = expectedCount > 0 ? expectedDisagreement / expectedCount : 0\n\n if (expectedDisagreement === 0) return 1\n return 1 - observedDisagreement / expectedDisagreement\n}\n\n/**\n * Mann-Whitney U test for comparing two independent groups.\n * Returns U statistic and approximate p-value (normal approximation).\n */\nexport function mannWhitneyU(a: number[], b: number[]): { u: number; p: number } {\n if (a.length === 0 || b.length === 0) return { u: 0, p: 1 }\n\n const n1 = a.length\n const n2 = b.length\n\n // Rank all values together\n const combined = [\n ...a.map((v) => ({ v, group: 'a' as const })),\n ...b.map((v) => ({ v, group: 'b' as const })),\n ].sort((x, y) => x.v - y.v)\n\n // Assign ranks with tie handling\n const ranks: number[] = new Array(combined.length)\n let i = 0\n while (i < combined.length) {\n let j = i\n while (j < combined.length && combined[j].v === combined[i].v) j++\n const avgRank = (i + 1 + j) / 2\n for (let k = i; k < j; k++) ranks[k] = avgRank\n i = j\n }\n\n // Sum ranks for group a\n let r1 = 0\n for (let k = 0; k < combined.length; k++) {\n if (combined[k].group === 'a') r1 += ranks[k]\n }\n\n const u1 = r1 - (n1 * (n1 + 1)) / 2\n const u2 = n1 * n2 - u1\n const u = Math.min(u1, u2)\n\n // Normal approximation for p-value\n const mu = (n1 * n2) / 2\n const sigma = Math.sqrt((n1 * n2 * (n1 + n2 + 1)) / 12)\n\n if (sigma === 0) return { u, p: 1 }\n\n const z = Math.abs(u - mu) / sigma\n // Two-tailed p-value from z-score (approximation)\n const p = 2 * (1 - normalCdf(z))\n\n return { u, p }\n}\n\n/** Partial credit: returns 0-1 ratio of current toward target */\nexport function partialCredit(current: number, target: number): number {\n if (target <= 0) return 1\n return Math.min(1, Math.max(0, current / target))\n}\n\n/**\n * Paired t-test — before/after measurements on the SAME items.\n * Pairing removes inter-item variance, giving tighter significance than\n * an unpaired test when comparing prompt v1 vs prompt v2 on identical\n * scenarios.\n */\nexport function pairedTTest(before: number[], after: number[]): { t: number; df: number; p: number } {\n if (before.length !== after.length) {\n throw new Error(`pairedTTest: unequal sample sizes (${before.length} vs ${after.length})`)\n }\n const n = before.length\n if (n < 2) return { t: 0, df: 0, p: 1 }\n\n const diffs = before.map((b, i) => after[i] - b)\n const mean = diffs.reduce((a, b) => a + b, 0) / n\n const variance = diffs.reduce((acc, d) => acc + (d - mean) ** 2, 0) / (n - 1)\n const se = Math.sqrt(variance / n)\n if (se === 0) return { t: mean === 0 ? 0 : Infinity, df: n - 1, p: mean === 0 ? 1 : 0 }\n\n const t = mean / se\n const df = n - 1\n const p = 2 * (1 - studentTCdf(Math.abs(t), df))\n return { t, df, p }\n}\n\n/**\n * Wilcoxon signed-rank test — paired non-parametric alternative.\n * Use when the differences aren't normally distributed.\n */\nexport function wilcoxonSignedRank(before: number[], after: number[]): { w: number; p: number } {\n if (before.length !== after.length) {\n throw new Error(`wilcoxonSignedRank: unequal sample sizes (${before.length} vs ${after.length})`)\n }\n const diffs = before.map((b, i) => after[i] - b).filter((d) => d !== 0)\n const n = diffs.length\n if (n < 6) return { w: 0, p: 1 }\n\n const absRanks = diffs\n .map((d, i) => ({ abs: Math.abs(d), sign: Math.sign(d), i }))\n .sort((a, b) => a.abs - b.abs)\n const ranks: number[] = new Array(n)\n let i = 0\n while (i < n) {\n let j = i\n while (j < n && absRanks[j].abs === absRanks[i].abs) j++\n const avg = (i + 1 + j) / 2\n for (let k = i; k < j; k++) ranks[absRanks[k].i] = avg\n i = j\n }\n let wPlus = 0\n for (let k = 0; k < n; k++) if (diffs[k] > 0) wPlus += ranks[k]\n\n const mean = (n * (n + 1)) / 4\n const variance = (n * (n + 1) * (2 * n + 1)) / 24\n const z = (wPlus - mean) / Math.sqrt(variance)\n const p = 2 * (1 - normalCdf(Math.abs(z)))\n return { w: wPlus, p }\n}\n\n/**\n * Cohen's d — standardized effect size for two independent groups.\n * Positive d means group b has higher mean than group a.\n * Rule of thumb: |d| < 0.2 negligible, 0.2–0.5 small, 0.5–0.8 medium, > 0.8 large.\n */\nexport function cohensD(a: number[], b: number[]): number {\n if (a.length < 2 || b.length < 2) return 0\n const meanA = a.reduce((x, y) => x + y, 0) / a.length\n const meanB = b.reduce((x, y) => x + y, 0) / b.length\n const varA = a.reduce((acc, x) => acc + (x - meanA) ** 2, 0) / (a.length - 1)\n const varB = b.reduce((acc, x) => acc + (x - meanB) ** 2, 0) / (b.length - 1)\n const pooled = Math.sqrt(\n ((a.length - 1) * varA + (b.length - 1) * varB) / (a.length + b.length - 2),\n )\n if (pooled === 0) return 0\n return (meanB - meanA) / pooled\n}\n\n/** Student-t CDF approximation via Abramowitz-Stegun series. */\nfunction studentTCdf(t: number, df: number): number {\n if (df <= 0) return 0.5\n if (df > 100) return normalCdf(t)\n const x = df / (df + t * t)\n const a = df / 2\n const b = 0.5\n const ib = incompleteBeta(x, a, b)\n return t >= 0 ? 1 - 0.5 * ib : 0.5 * ib\n}\n\n/** Regularized incomplete beta function via continued fraction (Lentz). */\nfunction incompleteBeta(x: number, a: number, b: number): number {\n if (x <= 0) return 0\n if (x >= 1) return 1\n const lnBeta = lnGamma(a) + lnGamma(b) - lnGamma(a + b)\n const front = Math.exp(Math.log(x) * a + Math.log(1 - x) * b - lnBeta) / a\n const maxIter = 200\n const eps = 3e-7\n let c = 1\n let d = 1 - ((a + b) * x) / (a + 1)\n if (Math.abs(d) < 1e-30) d = 1e-30\n d = 1 / d\n let f = d\n for (let m = 1; m <= maxIter; m++) {\n const m2 = 2 * m\n let num = (m * (b - m) * x) / ((a + m2 - 1) * (a + m2))\n d = 1 + num * d\n if (Math.abs(d) < 1e-30) d = 1e-30\n c = 1 + num / c\n if (Math.abs(c) < 1e-30) c = 1e-30\n d = 1 / d\n f *= d * c\n num = -((a + m) * (a + b + m) * x) / ((a + m2) * (a + m2 + 1))\n d = 1 + num * d\n if (Math.abs(d) < 1e-30) d = 1e-30\n c = 1 + num / c\n if (Math.abs(c) < 1e-30) c = 1e-30\n d = 1 / d\n const delta = d * c\n f *= delta\n if (Math.abs(delta - 1) < eps) break\n }\n return front * f\n}\n\n/** Lanczos approximation to ln Γ(z). */\nfunction lnGamma(z: number): number {\n const g = 7\n const coefs = [\n 0.99999999999980993, 676.5203681218851, -1259.1392167224028,\n 771.32342877765313, -176.61502916214059, 12.507343278686905,\n -0.13857109526572012, 9.9843695780195716e-6, 1.5056327351493116e-7,\n ]\n if (z < 0.5) {\n return Math.log(Math.PI / Math.sin(Math.PI * z)) - lnGamma(1 - z)\n }\n z -= 1\n let x = coefs[0]\n for (let i = 1; i < g + 2; i++) x += coefs[i] / (z + i)\n const t = z + g + 0.5\n return 0.5 * Math.log(2 * Math.PI) + (z + 0.5) * Math.log(t) - t + Math.log(x)\n}\n\n// Standard normal CDF approximation (Abramowitz and Stegun)\nfunction normalCdf(x: number): number {\n const a1 = 0.254829592\n const a2 = -0.284496736\n const a3 = 1.421413741\n const a4 = -1.453152027\n const a5 = 1.061405429\n const p = 0.3275911\n\n const sign = x < 0 ? -1 : 1\n const absX = Math.abs(x)\n const t = 1 / (1 + p * absX)\n const y = 1 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp(-absX * absX / 2)\n\n return 0.5 * (1 + sign * y)\n}\n","/**\n * Power analysis + multiple-comparison correction.\n *\n * Two jobs:\n * 1. Before running: `requiredSampleSize({ effect, alpha, power })`\n * returns the N per arm needed to detect a given effect size.\n * 2. After running: `benjaminiHochberg(pValues, fdr)` and\n * `bonferroni(pValues, alpha)` correct for multiple pairwise tests\n * so pairwise variant comparisons stay statistically honest.\n *\n * Fixes the correctness bug in 0.2's pairwise optimizer which applied\n * alpha directly across n*(n-1)/2 pairwise tests without correction —\n * dramatically inflating false-positive rate when variants ≥ 3.\n */\n\n/**\n * Required N per arm for a two-sample comparison at target effect size,\n * alpha, and power. Uses the normal-approximation formula:\n *\n * n = 2 * ( (z_{1-α/2} + z_{1-β}) / d )^2\n *\n * where d is Cohen's d. Returns Infinity for effect ≤ 0.\n */\nexport function requiredSampleSize(opts: { effect: number; alpha?: number; power?: number; twoSided?: boolean }): number {\n const effect = opts.effect\n if (!Number.isFinite(effect) || effect <= 0) return Infinity\n const alpha = opts.alpha ?? 0.05\n const power = opts.power ?? 0.8\n const twoSided = opts.twoSided ?? true\n const zAlpha = zQuantile(twoSided ? 1 - alpha / 2 : 1 - alpha)\n const zBeta = zQuantile(power)\n const n = 2 * Math.pow((zAlpha + zBeta) / effect, 2)\n return Math.ceil(n)\n}\n\n/** Bonferroni adjustment: multiply every p-value by the number of tests, clamp at 1. */\nexport function bonferroni(pValues: number[], alpha = 0.05): { adjusted: number[]; significant: boolean[] } {\n const k = pValues.length\n const adjusted = pValues.map((p) => Math.min(1, p * k))\n const significant = adjusted.map((p) => p < alpha)\n return { adjusted, significant }\n}\n\n/**\n * Benjamini–Hochberg false discovery rate. Returns adjusted q-values and\n * significance at the target FDR. Properly handles ties and preserves\n * monotonicity of q-values.\n */\nexport function benjaminiHochberg(pValues: number[], fdr = 0.05): { qValues: number[]; significant: boolean[] } {\n const n = pValues.length\n if (n === 0) return { qValues: [], significant: [] }\n const indexed = pValues.map((p, i) => ({ p, i })).sort((a, b) => a.p - b.p)\n const q = new Array<number>(n)\n // Ranks are 1-based; q_i = p_i * n / rank_i\n let minRight = 1\n for (let k = n - 1; k >= 0; k--) {\n const rank = k + 1\n const raw = indexed[k].p * n / rank\n const bounded = Math.min(minRight, raw)\n minRight = bounded\n q[indexed[k].i] = Math.min(1, bounded)\n }\n const significant = q.map((v) => v < fdr)\n return { qValues: q, significant }\n}\n\n/** Standard-normal inverse CDF (Acklam approximation). */\nfunction zQuantile(p: number): number {\n if (p <= 0 || p >= 1) {\n if (p === 0) return -Infinity\n if (p === 1) return Infinity\n return NaN\n }\n const a = [-3.969683028665376e1, 2.209460984245205e2, -2.759285104469687e2, 1.383577518672690e2, -3.066479806614716e1, 2.506628277459239]\n const b = [-5.447609879822406e1, 1.615858368580409e2, -1.556989798598866e2, 6.680131188771972e1, -1.328068155288572e1]\n const c = [-7.784894002430293e-3, -3.223964580411365e-1, -2.400758277161838, -2.549732539343734, 4.374664141464968, 2.938163982698783]\n const d = [7.784695709041462e-3, 3.224671290700398e-1, 2.445134137142996, 3.754408661907416]\n const pLow = 0.02425\n const pHigh = 1 - pLow\n let q: number\n let r: number\n if (p < pLow) {\n q = Math.sqrt(-2 * Math.log(p))\n return (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) /\n ((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)\n }\n if (p <= pHigh) {\n q = p - 0.5\n r = q * q\n return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * q /\n (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1)\n }\n q = Math.sqrt(-2 * Math.log(1 - p))\n return -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) /\n ((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)\n}\n","/**\n * Paper-grade paired statistics for held-out promotion gates.\n *\n * The promotion gate (`HeldOutGate`) needs three things:\n *\n * 1. A bootstrap confidence interval on the per-item paired delta\n * (`pairedBootstrap`). Median delta is the headline number; the\n * CI lower bound is what the gate checks against `pairedDeltaThreshold`.\n * 2. A non-parametric significance test on the paired deltas\n * (`pairedWilcoxon` — re-export of `wilcoxonSignedRank` under the\n * paper-style name).\n * 3. False-discovery-rate correction across simultaneously-tested\n * candidate variants (`bhAdjust` — re-export of `benjaminiHochberg`).\n *\n * Why a separate file: every existing primitive lives in `statistics.ts`\n * (general) or `power-analysis.ts` (correction). Paired-bootstrap is\n * paired-only, paper-grade, and load-bearing for the promotion gate.\n * Putting it next to `statistics.ts` would require editing that file;\n * the brief forbids that. New file, new exports, no surface change.\n */\n\nimport { wilcoxonSignedRank } from './statistics'\nimport { benjaminiHochberg } from './power-analysis'\n\nexport interface PairedBootstrapResult {\n /** Number of paired observations (after dropping unequal lengths is rejected). */\n n: number\n /** Median of paired deltas (after − before). */\n median: number\n /** Mean of paired deltas. */\n mean: number\n /** Lower bound of the bootstrap CI on the median delta. */\n low: number\n /** Upper bound of the bootstrap CI on the median delta. */\n high: number\n /** Confidence level used (e.g. 0.95). */\n confidence: number\n /** Number of bootstrap resamples used. */\n resamples: number\n}\n\nexport interface PairedBootstrapOptions {\n /** Confidence level. Default 0.95. */\n confidence?: number\n /** Bootstrap resample count. Default 2000. */\n resamples?: number\n /** Statistic to bootstrap. Default 'median'. */\n statistic?: 'median' | 'mean'\n /** Deterministic seed. If omitted, uses Math.random(). */\n seed?: number\n}\n\n/**\n * Paired bootstrap on (after - before) deltas. Returns a CI on the\n * chosen statistic (median by default). Pairs are resampled with\n * replacement. The lower bound is what the promotion gate checks: if\n * `low > pairedDeltaThreshold`, the gain is real at the chosen\n * confidence level.\n *\n * Throws on unequal sample sizes — caller must align pairs upstream.\n */\nexport function pairedBootstrap(\n before: number[],\n after: number[],\n opts: PairedBootstrapOptions = {},\n): PairedBootstrapResult {\n if (before.length !== after.length) {\n throw new Error(\n `pairedBootstrap: unequal sample sizes (${before.length} vs ${after.length})`,\n )\n }\n const confidence = opts.confidence ?? 0.95\n const resamples = opts.resamples ?? 2000\n const statistic = opts.statistic ?? 'median'\n if (confidence <= 0 || confidence >= 1) {\n throw new Error(`pairedBootstrap: confidence must be in (0,1), got ${confidence}`)\n }\n\n const n = before.length\n const deltas = before.map((b, i) => after[i]! - b)\n if (n === 0) {\n return { n: 0, median: 0, mean: 0, low: 0, high: 0, confidence, resamples }\n }\n if (n === 1) {\n const d = deltas[0]!\n return { n: 1, median: d, mean: d, low: d, high: d, confidence, resamples }\n }\n\n const rng = makeRng(opts.seed)\n const samples = new Array<number>(resamples)\n for (let b = 0; b < resamples; b++) {\n let acc: number[] | null = null\n if (statistic === 'mean') {\n let sum = 0\n for (let k = 0; k < n; k++) {\n sum += deltas[Math.floor(rng() * n)]!\n }\n samples[b] = sum / n\n } else {\n acc = new Array<number>(n)\n for (let k = 0; k < n; k++) {\n acc[k] = deltas[Math.floor(rng() * n)]!\n }\n samples[b] = medianInPlace(acc)\n }\n }\n samples.sort((a, b) => a - b)\n\n const alpha = 1 - confidence\n const lowIdx = Math.floor((alpha / 2) * resamples)\n const highIdx = Math.min(resamples - 1, Math.ceil((1 - alpha / 2) * resamples) - 1)\n\n return {\n n,\n median: medianInPlace([...deltas]),\n mean: deltas.reduce((s, x) => s + x, 0) / n,\n low: samples[lowIdx]!,\n high: samples[Math.max(highIdx, lowIdx)]!,\n confidence,\n resamples,\n }\n}\n\n/**\n * Paper-style alias for `wilcoxonSignedRank`. The signed-rank test on\n * paired deltas is the standard non-parametric significance test for\n * \"candidate beats baseline on matched items.\" Use alongside the\n * bootstrap CI: bootstrap gives effect size, Wilcoxon gives p.\n */\nexport function pairedWilcoxon(before: number[], after: number[]): { w: number; p: number } {\n return wilcoxonSignedRank(before, after)\n}\n\n/**\n * Paper-style alias for `benjaminiHochberg`. Use to correct p-values\n * across multiple candidate-vs-baseline comparisons run in the same\n * promotion sweep. Returns BH-adjusted q-values and significance at\n * the requested FDR (default 0.05).\n */\nexport function bhAdjust(pValues: number[], fdr = 0.05): { qValues: number[]; significant: boolean[] } {\n return benjaminiHochberg(pValues, fdr)\n}\n\n// ── Helpers ──────────────────────────────────────────────────────────\n\nfunction medianInPlace(xs: number[]): number {\n if (xs.length === 0) return 0\n xs.sort((a, b) => a - b)\n const mid = Math.floor(xs.length / 2)\n return xs.length % 2 === 0 ? (xs[mid - 1]! + xs[mid]!) / 2 : xs[mid]!\n}\n\n/**\n * Tiny seedable PRNG (mulberry32). Deterministic given a seed; falls\n * back to Math.random when seed is omitted. Adequate for bootstrap\n * resampling — not cryptographic.\n */\nfunction makeRng(seed: number | undefined): () => number {\n if (seed === undefined) return Math.random\n let s = (seed | 0) || 0x9e3779b9\n return () => {\n s = (s + 0x6d2b79f5) | 0\n let t = s\n t = Math.imul(t ^ (t >>> 15), t | 1)\n t ^= t + Math.imul(t ^ (t >>> 7), t | 61)\n return ((t ^ (t >>> 14)) >>> 0) / 4294967296\n }\n}\n"],"mappings":";AAGA,IAAM,sBAAsB,oBAAI,IAAI;AAAA,EAClC;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAQM,SAAS,gBAAgB,QAAoC;AAClE,SAAO,OAAO,IAAI,CAAC,MAAM;AACvB,QAAI,oBAAoB,IAAI,EAAE,SAAS,GAAG;AACxC,aAAO;AAAA,IACT;AACA,WAAO;AAAA,EACT,CAAC;AACH;AAGO,SAAS,aAAa,QAAsD;AACjF,MAAI,OAAO,WAAW,EAAG,QAAO;AAChC,MAAI,cAAc;AAClB,MAAI,cAAc;AAClB,aAAW,EAAE,OAAO,OAAO,KAAK,QAAQ;AACtC,UAAM,IAAI,UAAU;AACpB,mBAAe,QAAQ;AACvB,mBAAe;AAAA,EACjB;AACA,SAAO,cAAc,IAAI,cAAc,cAAc;AACvD;AAGO,SAAS,mBACd,QACA,aAAa,MACmC;AAChD,MAAI,OAAO,WAAW,EAAG,QAAO,EAAE,MAAM,GAAG,OAAO,GAAG,OAAO,EAAE;AAC9D,MAAI,OAAO,WAAW,EAAG,QAAO,EAAE,MAAM,OAAO,CAAC,GAAG,OAAO,OAAO,CAAC,GAAG,OAAO,OAAO,CAAC,EAAE;AAEtF,QAAM,IAAI,OAAO;AACjB,QAAM,OAAO,OAAO,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI;AAEjD,QAAM,IAAI;AACV,QAAM,iBAA2B,CAAC;AAElC,WAAS,IAAI,GAAG,IAAI,GAAG,KAAK;AAC1B,QAAI,MAAM;AACV,aAAS,IAAI,GAAG,IAAI,GAAG,KAAK;AAC1B,aAAO,OAAO,KAAK,MAAM,KAAK,OAAO,IAAI,CAAC,CAAC;AAAA,IAC7C;AACA,mBAAe,KAAK,MAAM,CAAC;AAAA,EAC7B;AAEA,iBAAe,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAEnC,QAAM,QAAQ,IAAI;AAClB,QAAM,WAAW,KAAK,MAAO,QAAQ,IAAK,CAAC;AAC3C,QAAM,WAAW,KAAK,OAAO,IAAI,QAAQ,KAAK,CAAC,IAAI;AAEnD,SAAO;AAAA,IACL;AAAA,IACA,OAAO,eAAe,QAAQ;AAAA,IAC9B,OAAO,eAAe,KAAK,IAAI,UAAU,IAAI,CAAC,CAAC;AAAA,EACjD;AACF;AAQO,SAAS,sBAAsB,aAAqC;AACzE,MAAI,YAAY,SAAS,EAAG,QAAO;AAGnC,QAAM,eAAe,oBAAI,IAAwB;AACjD,aAAW,YAAY,aAAa;AAClC,eAAW,KAAK,UAAU;AACxB,UAAI,CAAC,aAAa,IAAI,EAAE,SAAS,EAAG,cAAa,IAAI,EAAE,WAAW,CAAC,CAAC;AACpE,YAAM,MAAM,aAAa,IAAI,EAAE,SAAS;AACxC,UAAI,IAAI,WAAW,KAAK,IAAI,IAAI,SAAS,CAAC,EAAE,UAAU,YAAY,QAAQ;AACxE,YAAI,KAAK,CAAC,EAAE,KAAK,CAAC;AAAA,MACpB,OAAO;AACL,YAAI,IAAI,SAAS,CAAC,EAAE,KAAK,EAAE,KAAK;AAAA,MAClC;AAAA,IACF;AAAA,EACF;AAGA,QAAM,YAAsB,CAAC;AAC7B,QAAM,YAAsB,CAAC;AAE7B,aAAW,SAAS,aAAa,OAAO,GAAG;AACzC,eAAW,WAAW,OAAO;AAC3B,UAAI,QAAQ,SAAS,EAAG;AACxB,iBAAW,KAAK,QAAS,WAAU,KAAK,CAAC;AACzC,eAAS,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AACvC,iBAAS,IAAI,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AAC3C,oBAAU,MAAM,QAAQ,CAAC,IAAI,QAAQ,CAAC,MAAM,CAAC;AAAA,QAC/C;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,MAAI,UAAU,WAAW,KAAK,UAAU,SAAS,EAAG,QAAO;AAE3D,QAAM,uBAAuB,UAAU,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,UAAU;AAG9E,MAAI,uBAAuB;AAC3B,MAAI,gBAAgB;AACpB,WAAS,IAAI,GAAG,IAAI,UAAU,QAAQ,KAAK;AACzC,aAAS,IAAI,IAAI,GAAG,IAAI,UAAU,QAAQ,KAAK;AAC7C,+BAAyB,UAAU,CAAC,IAAI,UAAU,CAAC,MAAM;AACzD;AAAA,IACF;AAAA,EACF;AACA,yBAAuB,gBAAgB,IAAI,uBAAuB,gBAAgB;AAElF,MAAI,yBAAyB,EAAG,QAAO;AACvC,SAAO,IAAI,uBAAuB;AACpC;AAMO,SAAS,aAAa,GAAa,GAAuC;AAC/E,MAAI,EAAE,WAAW,KAAK,EAAE,WAAW,EAAG,QAAO,EAAE,GAAG,GAAG,GAAG,EAAE;AAE1D,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,EAAE;AAGb,QAAM,WAAW;AAAA,IACf,GAAG,EAAE,IAAI,CAAC,OAAO,EAAE,GAAG,OAAO,IAAa,EAAE;AAAA,IAC5C,GAAG,EAAE,IAAI,CAAC,OAAO,EAAE,GAAG,OAAO,IAAa,EAAE;AAAA,EAC9C,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,IAAI,EAAE,CAAC;AAG1B,QAAM,QAAkB,IAAI,MAAM,SAAS,MAAM;AACjD,MAAI,IAAI;AACR,SAAO,IAAI,SAAS,QAAQ;AAC1B,QAAI,IAAI;AACR,WAAO,IAAI,SAAS,UAAU,SAAS,CAAC,EAAE,MAAM,SAAS,CAAC,EAAE,EAAG;AAC/D,UAAM,WAAW,IAAI,IAAI,KAAK;AAC9B,aAAS,IAAI,GAAG,IAAI,GAAG,IAAK,OAAM,CAAC,IAAI;AACvC,QAAI;AAAA,EACN;AAGA,MAAI,KAAK;AACT,WAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,QAAI,SAAS,CAAC,EAAE,UAAU,IAAK,OAAM,MAAM,CAAC;AAAA,EAC9C;AAEA,QAAM,KAAK,KAAM,MAAM,KAAK,KAAM;AAClC,QAAM,KAAK,KAAK,KAAK;AACrB,QAAM,IAAI,KAAK,IAAI,IAAI,EAAE;AAGzB,QAAM,KAAM,KAAK,KAAM;AACvB,QAAM,QAAQ,KAAK,KAAM,KAAK,MAAM,KAAK,KAAK,KAAM,EAAE;AAEtD,MAAI,UAAU,EAAG,QAAO,EAAE,GAAG,GAAG,EAAE;AAElC,QAAM,IAAI,KAAK,IAAI,IAAI,EAAE,IAAI;AAE7B,QAAM,IAAI,KAAK,IAAI,UAAU,CAAC;AAE9B,SAAO,EAAE,GAAG,EAAE;AAChB;AAGO,SAAS,cAAc,SAAiB,QAAwB;AACrE,MAAI,UAAU,EAAG,QAAO;AACxB,SAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,UAAU,MAAM,CAAC;AAClD;AAQO,SAAS,YAAY,QAAkB,OAAuD;AACnG,MAAI,OAAO,WAAW,MAAM,QAAQ;AAClC,UAAM,IAAI,MAAM,sCAAsC,OAAO,MAAM,OAAO,MAAM,MAAM,GAAG;AAAA,EAC3F;AACA,QAAM,IAAI,OAAO;AACjB,MAAI,IAAI,EAAG,QAAO,EAAE,GAAG,GAAG,IAAI,GAAG,GAAG,EAAE;AAEtC,QAAM,QAAQ,OAAO,IAAI,CAAC,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC;AAC/C,QAAM,OAAO,MAAM,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI;AAChD,QAAM,WAAW,MAAM,OAAO,CAAC,KAAK,MAAM,OAAO,IAAI,SAAS,GAAG,CAAC,KAAK,IAAI;AAC3E,QAAM,KAAK,KAAK,KAAK,WAAW,CAAC;AACjC,MAAI,OAAO,EAAG,QAAO,EAAE,GAAG,SAAS,IAAI,IAAI,UAAU,IAAI,IAAI,GAAG,GAAG,SAAS,IAAI,IAAI,EAAE;AAEtF,QAAM,IAAI,OAAO;AACjB,QAAM,KAAK,IAAI;AACf,QAAM,IAAI,KAAK,IAAI,YAAY,KAAK,IAAI,CAAC,GAAG,EAAE;AAC9C,SAAO,EAAE,GAAG,IAAI,EAAE;AACpB;AAMO,SAAS,mBAAmB,QAAkB,OAA2C;AAC9F,MAAI,OAAO,WAAW,MAAM,QAAQ;AAClC,UAAM,IAAI,MAAM,6CAA6C,OAAO,MAAM,OAAO,MAAM,MAAM,GAAG;AAAA,EAClG;AACA,QAAM,QAAQ,OAAO,IAAI,CAAC,GAAGA,OAAM,MAAMA,EAAC,IAAI,CAAC,EAAE,OAAO,CAAC,MAAM,MAAM,CAAC;AACtE,QAAM,IAAI,MAAM;AAChB,MAAI,IAAI,EAAG,QAAO,EAAE,GAAG,GAAG,GAAG,EAAE;AAE/B,QAAM,WAAW,MACd,IAAI,CAAC,GAAGA,QAAO,EAAE,KAAK,KAAK,IAAI,CAAC,GAAG,MAAM,KAAK,KAAK,CAAC,GAAG,GAAAA,GAAE,EAAE,EAC3D,KAAK,CAAC,GAAG,MAAM,EAAE,MAAM,EAAE,GAAG;AAC/B,QAAM,QAAkB,IAAI,MAAM,CAAC;AACnC,MAAI,IAAI;AACR,SAAO,IAAI,GAAG;AACZ,QAAI,IAAI;AACR,WAAO,IAAI,KAAK,SAAS,CAAC,EAAE,QAAQ,SAAS,CAAC,EAAE,IAAK;AACrD,UAAM,OAAO,IAAI,IAAI,KAAK;AAC1B,aAAS,IAAI,GAAG,IAAI,GAAG,IAAK,OAAM,SAAS,CAAC,EAAE,CAAC,IAAI;AACnD,QAAI;AAAA,EACN;AACA,MAAI,QAAQ;AACZ,WAAS,IAAI,GAAG,IAAI,GAAG,IAAK,KAAI,MAAM,CAAC,IAAI,EAAG,UAAS,MAAM,CAAC;AAE9D,QAAM,OAAQ,KAAK,IAAI,KAAM;AAC7B,QAAM,WAAY,KAAK,IAAI,MAAM,IAAI,IAAI,KAAM;AAC/C,QAAM,KAAK,QAAQ,QAAQ,KAAK,KAAK,QAAQ;AAC7C,QAAM,IAAI,KAAK,IAAI,UAAU,KAAK,IAAI,CAAC,CAAC;AACxC,SAAO,EAAE,GAAG,OAAO,EAAE;AACvB;AAOO,SAAS,QAAQ,GAAa,GAAqB;AACxD,MAAI,EAAE,SAAS,KAAK,EAAE,SAAS,EAAG,QAAO;AACzC,QAAM,QAAQ,EAAE,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,EAAE;AAC/C,QAAM,QAAQ,EAAE,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,EAAE;AAC/C,QAAM,OAAO,EAAE,OAAO,CAAC,KAAK,MAAM,OAAO,IAAI,UAAU,GAAG,CAAC,KAAK,EAAE,SAAS;AAC3E,QAAM,OAAO,EAAE,OAAO,CAAC,KAAK,MAAM,OAAO,IAAI,UAAU,GAAG,CAAC,KAAK,EAAE,SAAS;AAC3E,QAAM,SAAS,KAAK;AAAA,MAChB,EAAE,SAAS,KAAK,QAAQ,EAAE,SAAS,KAAK,SAAS,EAAE,SAAS,EAAE,SAAS;AAAA,EAC3E;AACA,MAAI,WAAW,EAAG,QAAO;AACzB,UAAQ,QAAQ,SAAS;AAC3B;AAGA,SAAS,YAAY,GAAW,IAAoB;AAClD,MAAI,MAAM,EAAG,QAAO;AACpB,MAAI,KAAK,IAAK,QAAO,UAAU,CAAC;AAChC,QAAM,IAAI,MAAM,KAAK,IAAI;AACzB,QAAM,IAAI,KAAK;AACf,QAAM,IAAI;AACV,QAAM,KAAK,eAAe,GAAG,GAAG,CAAC;AACjC,SAAO,KAAK,IAAI,IAAI,MAAM,KAAK,MAAM;AACvC;AAGA,SAAS,eAAe,GAAW,GAAW,GAAmB;AAC/D,MAAI,KAAK,EAAG,QAAO;AACnB,MAAI,KAAK,EAAG,QAAO;AACnB,QAAM,SAAS,QAAQ,CAAC,IAAI,QAAQ,CAAC,IAAI,QAAQ,IAAI,CAAC;AACtD,QAAM,QAAQ,KAAK,IAAI,KAAK,IAAI,CAAC,IAAI,IAAI,KAAK,IAAI,IAAI,CAAC,IAAI,IAAI,MAAM,IAAI;AACzE,QAAM,UAAU;AAChB,QAAM,MAAM;AACZ,MAAI,IAAI;AACR,MAAI,IAAI,KAAM,IAAI,KAAK,KAAM,IAAI;AACjC,MAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,MAAI,IAAI;AACR,MAAI,IAAI;AACR,WAAS,IAAI,GAAG,KAAK,SAAS,KAAK;AACjC,UAAM,KAAK,IAAI;AACf,QAAI,MAAO,KAAK,IAAI,KAAK,MAAO,IAAI,KAAK,MAAM,IAAI;AACnD,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI;AACR,SAAK,IAAI;AACT,UAAM,GAAG,IAAI,MAAM,IAAI,IAAI,KAAK,OAAO,IAAI,OAAO,IAAI,KAAK;AAC3D,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI;AACR,UAAM,QAAQ,IAAI;AAClB,SAAK;AACL,QAAI,KAAK,IAAI,QAAQ,CAAC,IAAI,IAAK;AAAA,EACjC;AACA,SAAO,QAAQ;AACjB;AAGA,SAAS,QAAQ,GAAmB;AAClC,QAAM,IAAI;AACV,QAAM,QAAQ;AAAA,IACZ;AAAA,IAAqB;AAAA,IAAmB;AAAA,IACxC;AAAA,IAAoB;AAAA,IAAqB;AAAA,IACzC;AAAA,IAAsB;AAAA,IAAuB;AAAA,EAC/C;AACA,MAAI,IAAI,KAAK;AACX,WAAO,KAAK,IAAI,KAAK,KAAK,KAAK,IAAI,KAAK,KAAK,CAAC,CAAC,IAAI,QAAQ,IAAI,CAAC;AAAA,EAClE;AACA,OAAK;AACL,MAAI,IAAI,MAAM,CAAC;AACf,WAAS,IAAI,GAAG,IAAI,IAAI,GAAG,IAAK,MAAK,MAAM,CAAC,KAAK,IAAI;AACrD,QAAM,IAAI,IAAI,IAAI;AAClB,SAAO,MAAM,KAAK,IAAI,IAAI,KAAK,EAAE,KAAK,IAAI,OAAO,KAAK,IAAI,CAAC,IAAI,IAAI,KAAK,IAAI,CAAC;AAC/E;AAGA,SAAS,UAAU,GAAmB;AACpC,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,IAAI;AAEV,QAAM,OAAO,IAAI,IAAI,KAAK;AAC1B,QAAM,OAAO,KAAK,IAAI,CAAC;AACvB,QAAM,IAAI,KAAK,IAAI,IAAI;AACvB,QAAM,IAAI,QAAQ,KAAK,IAAI,MAAM,IAAI,MAAM,IAAI,MAAM,IAAI,MAAM,IAAI,KAAK,IAAI,CAAC,OAAO,OAAO,CAAC;AAE5F,SAAO,OAAO,IAAI,OAAO;AAC3B;;;AC/TO,SAAS,mBAAmB,MAAsF;AACvH,QAAM,SAAS,KAAK;AACpB,MAAI,CAAC,OAAO,SAAS,MAAM,KAAK,UAAU,EAAG,QAAO;AACpD,QAAM,QAAQ,KAAK,SAAS;AAC5B,QAAM,QAAQ,KAAK,SAAS;AAC5B,QAAM,WAAW,KAAK,YAAY;AAClC,QAAM,SAAS,UAAU,WAAW,IAAI,QAAQ,IAAI,IAAI,KAAK;AAC7D,QAAM,QAAQ,UAAU,KAAK;AAC7B,QAAM,IAAI,IAAI,KAAK,KAAK,SAAS,SAAS,QAAQ,CAAC;AACnD,SAAO,KAAK,KAAK,CAAC;AACpB;AAGO,SAAS,WAAW,SAAmB,QAAQ,MAAsD;AAC1G,QAAM,IAAI,QAAQ;AAClB,QAAM,WAAW,QAAQ,IAAI,CAAC,MAAM,KAAK,IAAI,GAAG,IAAI,CAAC,CAAC;AACtD,QAAM,cAAc,SAAS,IAAI,CAAC,MAAM,IAAI,KAAK;AACjD,SAAO,EAAE,UAAU,YAAY;AACjC;AAOO,SAAS,kBAAkB,SAAmB,MAAM,MAAqD;AAC9G,QAAM,IAAI,QAAQ;AAClB,MAAI,MAAM,EAAG,QAAO,EAAE,SAAS,CAAC,GAAG,aAAa,CAAC,EAAE;AACnD,QAAM,UAAU,QAAQ,IAAI,CAAC,GAAG,OAAO,EAAE,GAAG,EAAE,EAAE,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,IAAI,EAAE,CAAC;AAC1E,QAAM,IAAI,IAAI,MAAc,CAAC;AAE7B,MAAI,WAAW;AACf,WAAS,IAAI,IAAI,GAAG,KAAK,GAAG,KAAK;AAC/B,UAAM,OAAO,IAAI;AACjB,UAAM,MAAM,QAAQ,CAAC,EAAE,IAAI,IAAI;AAC/B,UAAM,UAAU,KAAK,IAAI,UAAU,GAAG;AACtC,eAAW;AACX,MAAE,QAAQ,CAAC,EAAE,CAAC,IAAI,KAAK,IAAI,GAAG,OAAO;AAAA,EACvC;AACA,QAAM,cAAc,EAAE,IAAI,CAAC,MAAM,IAAI,GAAG;AACxC,SAAO,EAAE,SAAS,GAAG,YAAY;AACnC;AAGA,SAAS,UAAU,GAAmB;AACpC,MAAI,KAAK,KAAK,KAAK,GAAG;AACpB,QAAI,MAAM,EAAG,QAAO;AACpB,QAAI,MAAM,EAAG,QAAO;AACpB,WAAO;AAAA,EACT;AACA,QAAM,IAAI,CAAC,oBAAsB,mBAAqB,oBAAsB,kBAAqB,oBAAsB,iBAAiB;AACxI,QAAM,IAAI,CAAC,oBAAsB,mBAAqB,oBAAsB,mBAAqB,kBAAoB;AACrH,QAAM,IAAI,CAAC,uBAAuB,qBAAuB,oBAAoB,oBAAoB,mBAAmB,iBAAiB;AACrI,QAAM,IAAI,CAAC,sBAAsB,oBAAsB,mBAAmB,iBAAiB;AAC3F,QAAM,OAAO;AACb,QAAM,QAAQ,IAAI;AAClB,MAAI;AACJ,MAAI;AACJ,MAAI,IAAI,MAAM;AACZ,QAAI,KAAK,KAAK,KAAK,KAAK,IAAI,CAAC,CAAC;AAC9B,gBAAY,EAAE,CAAC,IAAI,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,SAC/D,EAAE,CAAC,IAAI,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,KAAK,IAAI;AAAA,EAC5D;AACA,MAAI,KAAK,OAAO;AACd,QAAI,IAAI;AACR,QAAI,IAAI;AACR,gBAAY,EAAE,CAAC,IAAI,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,KAAK,SACnE,EAAE,CAAC,IAAI,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,KAAK,IAAI;AAAA,EACzE;AACA,MAAI,KAAK,KAAK,KAAK,KAAK,IAAI,IAAI,CAAC,CAAC;AAClC,SAAO,MAAM,EAAE,CAAC,IAAI,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,SAC/D,EAAE,CAAC,IAAI,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,KAAK,IAAI;AAC7D;;;AClCO,SAAS,gBACd,QACA,OACA,OAA+B,CAAC,GACT;AACvB,MAAI,OAAO,WAAW,MAAM,QAAQ;AAClC,UAAM,IAAI;AAAA,MACR,0CAA0C,OAAO,MAAM,OAAO,MAAM,MAAM;AAAA,IAC5E;AAAA,EACF;AACA,QAAM,aAAa,KAAK,cAAc;AACtC,QAAM,YAAY,KAAK,aAAa;AACpC,QAAM,YAAY,KAAK,aAAa;AACpC,MAAI,cAAc,KAAK,cAAc,GAAG;AACtC,UAAM,IAAI,MAAM,qDAAqD,UAAU,EAAE;AAAA,EACnF;AAEA,QAAM,IAAI,OAAO;AACjB,QAAM,SAAS,OAAO,IAAI,CAAC,GAAG,MAAM,MAAM,CAAC,IAAK,CAAC;AACjD,MAAI,MAAM,GAAG;AACX,WAAO,EAAE,GAAG,GAAG,QAAQ,GAAG,MAAM,GAAG,KAAK,GAAG,MAAM,GAAG,YAAY,UAAU;AAAA,EAC5E;AACA,MAAI,MAAM,GAAG;AACX,UAAM,IAAI,OAAO,CAAC;AAClB,WAAO,EAAE,GAAG,GAAG,QAAQ,GAAG,MAAM,GAAG,KAAK,GAAG,MAAM,GAAG,YAAY,UAAU;AAAA,EAC5E;AAEA,QAAM,MAAM,QAAQ,KAAK,IAAI;AAC7B,QAAM,UAAU,IAAI,MAAc,SAAS;AAC3C,WAAS,IAAI,GAAG,IAAI,WAAW,KAAK;AAClC,QAAI,MAAuB;AAC3B,QAAI,cAAc,QAAQ;AACxB,UAAI,MAAM;AACV,eAAS,IAAI,GAAG,IAAI,GAAG,KAAK;AAC1B,eAAO,OAAO,KAAK,MAAM,IAAI,IAAI,CAAC,CAAC;AAAA,MACrC;AACA,cAAQ,CAAC,IAAI,MAAM;AAAA,IACrB,OAAO;AACL,YAAM,IAAI,MAAc,CAAC;AACzB,eAAS,IAAI,GAAG,IAAI,GAAG,KAAK;AAC1B,YAAI,CAAC,IAAI,OAAO,KAAK,MAAM,IAAI,IAAI,CAAC,CAAC;AAAA,MACvC;AACA,cAAQ,CAAC,IAAI,cAAc,GAAG;AAAA,IAChC;AAAA,EACF;AACA,UAAQ,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAE5B,QAAM,QAAQ,IAAI;AAClB,QAAM,SAAS,KAAK,MAAO,QAAQ,IAAK,SAAS;AACjD,QAAM,UAAU,KAAK,IAAI,YAAY,GAAG,KAAK,MAAM,IAAI,QAAQ,KAAK,SAAS,IAAI,CAAC;AAElF,SAAO;AAAA,IACL;AAAA,IACA,QAAQ,cAAc,CAAC,GAAG,MAAM,CAAC;AAAA,IACjC,MAAM,OAAO,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI;AAAA,IAC1C,KAAK,QAAQ,MAAM;AAAA,IACnB,MAAM,QAAQ,KAAK,IAAI,SAAS,MAAM,CAAC;AAAA,IACvC;AAAA,IACA;AAAA,EACF;AACF;AAQO,SAAS,eAAe,QAAkB,OAA2C;AAC1F,SAAO,mBAAmB,QAAQ,KAAK;AACzC;AAQO,SAAS,SAAS,SAAmB,MAAM,MAAqD;AACrG,SAAO,kBAAkB,SAAS,GAAG;AACvC;AAIA,SAAS,cAAc,IAAsB;AAC3C,MAAI,GAAG,WAAW,EAAG,QAAO;AAC5B,KAAG,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AACvB,QAAM,MAAM,KAAK,MAAM,GAAG,SAAS,CAAC;AACpC,SAAO,GAAG,SAAS,MAAM,KAAK,GAAG,MAAM,CAAC,IAAK,GAAG,GAAG,KAAM,IAAI,GAAG,GAAG;AACrE;AAOA,SAAS,QAAQ,MAAwC;AACvD,MAAI,SAAS,OAAW,QAAO,KAAK;AACpC,MAAI,IAAK,OAAO,KAAM;AACtB,SAAO,MAAM;AACX,QAAK,IAAI,aAAc;AACvB,QAAI,IAAI;AACR,QAAI,KAAK,KAAK,IAAK,MAAM,IAAK,IAAI,CAAC;AACnC,SAAK,IAAI,KAAK,KAAK,IAAK,MAAM,GAAI,IAAI,EAAE;AACxC,aAAS,IAAK,MAAM,QAAS,KAAK;AAAA,EACpC;AACF;","names":["i"]}
@@ -0,0 +1,200 @@
1
+ // src/trace/emitter.ts
2
+ var TraceEmitter = class {
3
+ store;
4
+ stack = [];
5
+ _runId;
6
+ now;
7
+ id;
8
+ constructor(store, options = {}) {
9
+ this.store = store;
10
+ this.now = options.now ?? (() => Date.now());
11
+ this.id = options.id ?? (() => cryptoRandomId());
12
+ this._runId = options.runId ?? this.id();
13
+ }
14
+ get runId() {
15
+ return this._runId;
16
+ }
17
+ // ── Run lifecycle ──────────────────────────────────────────────────
18
+ /**
19
+ * Begin a Run.
20
+ *
21
+ * `scenarioId` is required on the persisted Run shape — every Run downstream
22
+ * gets a non-empty scenarioId so filters and aggregations stay simple — but
23
+ * the INPUT here accepts it as optional. When omitted, startRun substitutes
24
+ * a sensible default (`run.layer ?? run.tags?.['kind'] ?? 'runtime'`) so
25
+ * runtime / operator / meta-eval runs that have no curated-scenario corpus
26
+ * to anchor to don't have to invent placeholder strings at the call site.
27
+ */
28
+ async startRun(run) {
29
+ const scenarioId = run.scenarioId ?? run.layer ?? run.tags?.["kind"] ?? "runtime";
30
+ const full = {
31
+ ...run,
32
+ scenarioId,
33
+ runId: this._runId,
34
+ startedAt: this.now(),
35
+ status: "running"
36
+ };
37
+ await this.store.appendRun(full);
38
+ return full;
39
+ }
40
+ async endRun(outcome) {
41
+ const status = outcome?.pass === false ? "failed" : "completed";
42
+ await this.store.updateRun(this._runId, { endedAt: this.now(), status, outcome });
43
+ }
44
+ async abortRun(reason) {
45
+ await this.store.updateRun(this._runId, {
46
+ endedAt: this.now(),
47
+ status: "aborted",
48
+ outcome: { pass: false, notes: reason }
49
+ });
50
+ }
51
+ // ── Generic span ───────────────────────────────────────────────────
52
+ async span(init) {
53
+ const spanId = this.id();
54
+ const parent = init.parentSpanId ?? this.stack[this.stack.length - 1];
55
+ const span = {
56
+ spanId,
57
+ parentSpanId: parent,
58
+ runId: this._runId,
59
+ startedAt: this.now(),
60
+ ...init
61
+ };
62
+ await this.store.appendSpan(span);
63
+ this.stack.push(spanId);
64
+ return this.handle(span);
65
+ }
66
+ handle(span) {
67
+ return {
68
+ span,
69
+ end: async (patch) => {
70
+ const endedAt = this.now();
71
+ await this.store.updateSpan(span.spanId, { endedAt, status: "ok", ...patch });
72
+ this.pop(span.spanId);
73
+ },
74
+ fail: async (error, patch) => {
75
+ const endedAt = this.now();
76
+ const errStr = error instanceof Error ? error.message : error;
77
+ await this.store.updateSpan(span.spanId, {
78
+ endedAt,
79
+ status: "error",
80
+ error: errStr,
81
+ ...patch
82
+ });
83
+ this.pop(span.spanId);
84
+ }
85
+ };
86
+ }
87
+ pop(spanId) {
88
+ const idx = this.stack.lastIndexOf(spanId);
89
+ if (idx >= 0) this.stack.splice(idx, 1);
90
+ }
91
+ // ── Typed span conveniences ────────────────────────────────────────
92
+ llm(init) {
93
+ return this.span({ kind: "llm", ...init });
94
+ }
95
+ tool(init) {
96
+ return this.span({ kind: "tool", ...init });
97
+ }
98
+ retrieval(init) {
99
+ return this.span({ kind: "retrieval", ...init });
100
+ }
101
+ async recordJudge(verdict) {
102
+ const spanId = this.id();
103
+ const now = this.now();
104
+ const full = {
105
+ spanId,
106
+ runId: this._runId,
107
+ kind: "judge",
108
+ startedAt: now,
109
+ endedAt: now,
110
+ status: "ok",
111
+ ...verdict
112
+ };
113
+ await this.store.appendSpan(full);
114
+ return full;
115
+ }
116
+ sandbox(init) {
117
+ return this.span({ kind: "sandbox", ...init });
118
+ }
119
+ // ── Events ─────────────────────────────────────────────────────────
120
+ async emit(event) {
121
+ const full = {
122
+ eventId: this.id(),
123
+ runId: this._runId,
124
+ spanId: event.spanId ?? this.stack[this.stack.length - 1],
125
+ kind: event.kind,
126
+ timestamp: this.now(),
127
+ payload: event.payload ?? {}
128
+ };
129
+ await this.store.appendEvent(full);
130
+ return full;
131
+ }
132
+ // ── Budget ledger ──────────────────────────────────────────────────
133
+ async recordBudget(entry) {
134
+ const full = {
135
+ runId: this._runId,
136
+ timestamp: entry.timestamp ?? this.now(),
137
+ dimension: entry.dimension,
138
+ limit: entry.limit,
139
+ consumed: entry.consumed,
140
+ remaining: entry.remaining,
141
+ breached: entry.breached,
142
+ spanId: entry.spanId ?? this.stack[this.stack.length - 1]
143
+ };
144
+ await this.store.appendBudgetEntry(full);
145
+ if (full.breached) {
146
+ await this.emit({
147
+ kind: "budget_breach",
148
+ spanId: full.spanId,
149
+ payload: { dimension: full.dimension, limit: full.limit, consumed: full.consumed }
150
+ });
151
+ }
152
+ return full;
153
+ }
154
+ // ── Artifacts ──────────────────────────────────────────────────────
155
+ async recordArtifact(artifact) {
156
+ const full = { artifactId: this.id(), runId: this._runId, ...artifact };
157
+ await this.store.appendArtifact(full);
158
+ return full;
159
+ }
160
+ // ── Nested composition ─────────────────────────────────────────────
161
+ /**
162
+ * Runs `fn` inside a span; auto-ends on success, auto-fails on throw.
163
+ * Returns the fn's return value. Use this for the 95% case.
164
+ */
165
+ async within(init, fn) {
166
+ const handle = await this.span(init);
167
+ try {
168
+ const result = await fn(handle);
169
+ await handle.end();
170
+ return result;
171
+ } catch (err) {
172
+ await handle.fail(err instanceof Error ? err : String(err));
173
+ throw err;
174
+ }
175
+ }
176
+ };
177
+ function cryptoRandomId() {
178
+ if (typeof globalThis.crypto?.randomUUID === "function") return globalThis.crypto.randomUUID();
179
+ return `${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 10)}`;
180
+ }
181
+ function llmSpanFromProvider(args) {
182
+ return {
183
+ name: args.name ?? args.model,
184
+ model: args.model,
185
+ messages: args.messages,
186
+ output: args.output,
187
+ inputTokens: args.usage?.inputTokens,
188
+ outputTokens: args.usage?.outputTokens,
189
+ cachedTokens: args.usage?.cachedTokens,
190
+ reasoningTokens: args.usage?.reasoningTokens,
191
+ costUsd: args.costUsd,
192
+ finishReason: args.finishReason
193
+ };
194
+ }
195
+
196
+ export {
197
+ TraceEmitter,
198
+ llmSpanFromProvider
199
+ };
200
+ //# sourceMappingURL=chunk-PKCVBYTQ.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/trace/emitter.ts"],"sourcesContent":["/**\n * TraceEmitter — hierarchical span builder that auto-parents using an\n * internal stack. One emitter per Run; emitters do NOT share state.\n *\n * Convenience methods (`llm`, `tool`, `retrieval`, `judge`, `sandbox`)\n * return a `SpanHandle` with `.end()` / `.fail()` so callers don't\n * have to thread spanIds manually. For async workflows that can't use\n * the stack (e.g. fan-out parallel calls), pass `parentSpanId`\n * explicitly.\n */\n\nimport type {\n Artifact,\n BudgetLedgerEntry,\n EventKind,\n JudgeSpan,\n LlmSpan,\n Message,\n RetrievalSpan,\n Run,\n RunOutcome,\n SandboxSpan,\n Span,\n SpanKind,\n ToolSpan,\n TraceEvent,\n} from './schema'\nimport type { TraceStore } from './store'\n\nexport interface SpanHandle<S extends Span = Span> {\n span: S\n end(patch?: Partial<S>): Promise<void>\n fail(error: string | Error, patch?: Partial<S>): Promise<void>\n}\n\nexport interface TraceEmitterOptions {\n runId?: string\n /** Inject a clock for deterministic tests. */\n now?: () => number\n /** Inject an id generator for deterministic tests. */\n id?: () => string\n}\n\nexport class TraceEmitter {\n private store: TraceStore\n private stack: string[] = []\n private _runId: string\n private now: () => number\n private id: () => string\n\n constructor(store: TraceStore, options: TraceEmitterOptions = {}) {\n this.store = store\n this.now = options.now ?? (() => Date.now())\n this.id = options.id ?? (() => cryptoRandomId())\n this._runId = options.runId ?? this.id()\n }\n\n get runId(): string { return this._runId }\n\n // ── Run lifecycle ──────────────────────────────────────────────────\n\n /**\n * Begin a Run.\n *\n * `scenarioId` is required on the persisted Run shape — every Run downstream\n * gets a non-empty scenarioId so filters and aggregations stay simple — but\n * the INPUT here accepts it as optional. When omitted, startRun substitutes\n * a sensible default (`run.layer ?? run.tags?.['kind'] ?? 'runtime'`) so\n * runtime / operator / meta-eval runs that have no curated-scenario corpus\n * to anchor to don't have to invent placeholder strings at the call site.\n */\n async startRun(\n run: Omit<Run, 'runId' | 'scenarioId' | 'startedAt' | 'status'> & { scenarioId?: string },\n ): Promise<Run> {\n const scenarioId =\n run.scenarioId ??\n run.layer ??\n run.tags?.['kind'] ??\n 'runtime'\n const full: Run = {\n ...run,\n scenarioId,\n runId: this._runId,\n startedAt: this.now(),\n status: 'running',\n }\n await this.store.appendRun(full)\n return full\n }\n\n async endRun(outcome?: RunOutcome): Promise<void> {\n const status = outcome?.pass === false ? 'failed' : 'completed'\n await this.store.updateRun(this._runId, { endedAt: this.now(), status, outcome })\n }\n\n async abortRun(reason: string): Promise<void> {\n await this.store.updateRun(this._runId, {\n endedAt: this.now(),\n status: 'aborted',\n outcome: { pass: false, notes: reason },\n })\n }\n\n // ── Generic span ───────────────────────────────────────────────────\n\n async span<S extends Span = Span>(init: {\n kind: SpanKind\n name: string\n parentSpanId?: string\n attributes?: Record<string, unknown>\n } & Partial<Omit<S, 'spanId' | 'runId' | 'startedAt' | 'kind' | 'name'>>): Promise<SpanHandle<S>> {\n const spanId = this.id()\n const parent = init.parentSpanId ?? this.stack[this.stack.length - 1]\n const span = {\n spanId,\n parentSpanId: parent,\n runId: this._runId,\n startedAt: this.now(),\n ...init,\n } as unknown as S\n await this.store.appendSpan(span)\n this.stack.push(spanId)\n return this.handle<S>(span)\n }\n\n private handle<S extends Span>(span: S): SpanHandle<S> {\n return {\n span,\n end: async (patch?: Partial<S>) => {\n const endedAt = this.now()\n await this.store.updateSpan(span.spanId, { endedAt, status: 'ok', ...patch } as Partial<Span>)\n this.pop(span.spanId)\n },\n fail: async (error: string | Error, patch?: Partial<S>) => {\n const endedAt = this.now()\n const errStr = error instanceof Error ? error.message : error\n await this.store.updateSpan(span.spanId, {\n endedAt,\n status: 'error',\n error: errStr,\n ...patch,\n } as Partial<Span>)\n this.pop(span.spanId)\n },\n }\n }\n\n private pop(spanId: string): void {\n const idx = this.stack.lastIndexOf(spanId)\n if (idx >= 0) this.stack.splice(idx, 1)\n }\n\n // ── Typed span conveniences ────────────────────────────────────────\n\n llm(init: Omit<LlmSpan, 'spanId' | 'runId' | 'kind' | 'startedAt'>): Promise<SpanHandle<LlmSpan>> {\n return this.span<LlmSpan>({ kind: 'llm', ...init })\n }\n\n tool(init: Omit<ToolSpan, 'spanId' | 'runId' | 'kind' | 'startedAt'>): Promise<SpanHandle<ToolSpan>> {\n return this.span<ToolSpan>({ kind: 'tool', ...init })\n }\n\n retrieval(init: Omit<RetrievalSpan, 'spanId' | 'runId' | 'kind' | 'startedAt'>): Promise<SpanHandle<RetrievalSpan>> {\n return this.span<RetrievalSpan>({ kind: 'retrieval', ...init })\n }\n\n async recordJudge(verdict: Omit<JudgeSpan, 'spanId' | 'runId' | 'kind' | 'startedAt' | 'endedAt'>): Promise<JudgeSpan> {\n const spanId = this.id()\n const now = this.now()\n const full: JudgeSpan = {\n spanId,\n runId: this._runId,\n kind: 'judge',\n startedAt: now,\n endedAt: now,\n status: 'ok',\n ...verdict,\n }\n await this.store.appendSpan(full)\n return full\n }\n\n sandbox(init: Omit<SandboxSpan, 'spanId' | 'runId' | 'kind' | 'startedAt'>): Promise<SpanHandle<SandboxSpan>> {\n return this.span<SandboxSpan>({ kind: 'sandbox', ...init })\n }\n\n // ── Events ─────────────────────────────────────────────────────────\n\n async emit(event: { kind: EventKind; spanId?: string; payload?: Record<string, unknown> }): Promise<TraceEvent> {\n const full: TraceEvent = {\n eventId: this.id(),\n runId: this._runId,\n spanId: event.spanId ?? this.stack[this.stack.length - 1],\n kind: event.kind,\n timestamp: this.now(),\n payload: event.payload ?? {},\n }\n await this.store.appendEvent(full)\n return full\n }\n\n // ── Budget ledger ──────────────────────────────────────────────────\n\n async recordBudget(entry: Omit<BudgetLedgerEntry, 'runId' | 'timestamp'> & { timestamp?: number }): Promise<BudgetLedgerEntry> {\n const full: BudgetLedgerEntry = {\n runId: this._runId,\n timestamp: entry.timestamp ?? this.now(),\n dimension: entry.dimension,\n limit: entry.limit,\n consumed: entry.consumed,\n remaining: entry.remaining,\n breached: entry.breached,\n spanId: entry.spanId ?? this.stack[this.stack.length - 1],\n }\n await this.store.appendBudgetEntry(full)\n if (full.breached) {\n await this.emit({\n kind: 'budget_breach',\n spanId: full.spanId,\n payload: { dimension: full.dimension, limit: full.limit, consumed: full.consumed },\n })\n }\n return full\n }\n\n // ── Artifacts ──────────────────────────────────────────────────────\n\n async recordArtifact(artifact: Omit<Artifact, 'artifactId' | 'runId'>): Promise<Artifact> {\n const full: Artifact = { artifactId: this.id(), runId: this._runId, ...artifact }\n await this.store.appendArtifact(full)\n return full\n }\n\n // ── Nested composition ─────────────────────────────────────────────\n\n /**\n * Runs `fn` inside a span; auto-ends on success, auto-fails on throw.\n * Returns the fn's return value. Use this for the 95% case.\n */\n async within<T>(\n init: Parameters<TraceEmitter['span']>[0],\n fn: (handle: SpanHandle) => Promise<T>,\n ): Promise<T> {\n const handle = await this.span(init)\n try {\n const result = await fn(handle)\n await handle.end()\n return result\n } catch (err) {\n await handle.fail(err instanceof Error ? err : String(err))\n throw err\n }\n }\n}\n\n// Helpers -------------------------------------------------------------\n\nfunction cryptoRandomId(): string {\n if (typeof globalThis.crypto?.randomUUID === 'function') return globalThis.crypto.randomUUID()\n return `${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 10)}`\n}\n\n/** Helper to build an LLM span handle args object from a provider-shaped response. */\nexport function llmSpanFromProvider(args: {\n name?: string\n model: string\n messages: Message[]\n output: string\n usage?: { inputTokens?: number; outputTokens?: number; cachedTokens?: number; reasoningTokens?: number }\n costUsd?: number\n finishReason?: string\n}): Omit<LlmSpan, 'spanId' | 'runId' | 'kind' | 'startedAt'> {\n return {\n name: args.name ?? args.model,\n model: args.model,\n messages: args.messages,\n output: args.output,\n inputTokens: args.usage?.inputTokens,\n outputTokens: args.usage?.outputTokens,\n cachedTokens: args.usage?.cachedTokens,\n reasoningTokens: args.usage?.reasoningTokens,\n costUsd: args.costUsd,\n finishReason: args.finishReason,\n }\n}\n"],"mappings":";AA2CO,IAAM,eAAN,MAAmB;AAAA,EAChB;AAAA,EACA,QAAkB,CAAC;AAAA,EACnB;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,OAAmB,UAA+B,CAAC,GAAG;AAChE,SAAK,QAAQ;AACb,SAAK,MAAM,QAAQ,QAAQ,MAAM,KAAK,IAAI;AAC1C,SAAK,KAAK,QAAQ,OAAO,MAAM,eAAe;AAC9C,SAAK,SAAS,QAAQ,SAAS,KAAK,GAAG;AAAA,EACzC;AAAA,EAEA,IAAI,QAAgB;AAAE,WAAO,KAAK;AAAA,EAAO;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAczC,MAAM,SACJ,KACc;AACd,UAAM,aACJ,IAAI,cACJ,IAAI,SACJ,IAAI,OAAO,MAAM,KACjB;AACF,UAAM,OAAY;AAAA,MAChB,GAAG;AAAA,MACH;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,WAAW,KAAK,IAAI;AAAA,MACpB,QAAQ;AAAA,IACV;AACA,UAAM,KAAK,MAAM,UAAU,IAAI;AAC/B,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,OAAO,SAAqC;AAChD,UAAM,SAAS,SAAS,SAAS,QAAQ,WAAW;AACpD,UAAM,KAAK,MAAM,UAAU,KAAK,QAAQ,EAAE,SAAS,KAAK,IAAI,GAAG,QAAQ,QAAQ,CAAC;AAAA,EAClF;AAAA,EAEA,MAAM,SAAS,QAA+B;AAC5C,UAAM,KAAK,MAAM,UAAU,KAAK,QAAQ;AAAA,MACtC,SAAS,KAAK,IAAI;AAAA,MAClB,QAAQ;AAAA,MACR,SAAS,EAAE,MAAM,OAAO,OAAO,OAAO;AAAA,IACxC,CAAC;AAAA,EACH;AAAA;AAAA,EAIA,MAAM,KAA4B,MAKgE;AAChG,UAAM,SAAS,KAAK,GAAG;AACvB,UAAM,SAAS,KAAK,gBAAgB,KAAK,MAAM,KAAK,MAAM,SAAS,CAAC;AACpE,UAAM,OAAO;AAAA,MACX;AAAA,MACA,cAAc;AAAA,MACd,OAAO,KAAK;AAAA,MACZ,WAAW,KAAK,IAAI;AAAA,MACpB,GAAG;AAAA,IACL;AACA,UAAM,KAAK,MAAM,WAAW,IAAI;AAChC,SAAK,MAAM,KAAK,MAAM;AACtB,WAAO,KAAK,OAAU,IAAI;AAAA,EAC5B;AAAA,EAEQ,OAAuB,MAAwB;AACrD,WAAO;AAAA,MACL;AAAA,MACA,KAAK,OAAO,UAAuB;AACjC,cAAM,UAAU,KAAK,IAAI;AACzB,cAAM,KAAK,MAAM,WAAW,KAAK,QAAQ,EAAE,SAAS,QAAQ,MAAM,GAAG,MAAM,CAAkB;AAC7F,aAAK,IAAI,KAAK,MAAM;AAAA,MACtB;AAAA,MACA,MAAM,OAAO,OAAuB,UAAuB;AACzD,cAAM,UAAU,KAAK,IAAI;AACzB,cAAM,SAAS,iBAAiB,QAAQ,MAAM,UAAU;AACxD,cAAM,KAAK,MAAM,WAAW,KAAK,QAAQ;AAAA,UACvC;AAAA,UACA,QAAQ;AAAA,UACR,OAAO;AAAA,UACP,GAAG;AAAA,QACL,CAAkB;AAClB,aAAK,IAAI,KAAK,MAAM;AAAA,MACtB;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,IAAI,QAAsB;AAChC,UAAM,MAAM,KAAK,MAAM,YAAY,MAAM;AACzC,QAAI,OAAO,EAAG,MAAK,MAAM,OAAO,KAAK,CAAC;AAAA,EACxC;AAAA;AAAA,EAIA,IAAI,MAA8F;AAChG,WAAO,KAAK,KAAc,EAAE,MAAM,OAAO,GAAG,KAAK,CAAC;AAAA,EACpD;AAAA,EAEA,KAAK,MAAgG;AACnG,WAAO,KAAK,KAAe,EAAE,MAAM,QAAQ,GAAG,KAAK,CAAC;AAAA,EACtD;AAAA,EAEA,UAAU,MAA0G;AAClH,WAAO,KAAK,KAAoB,EAAE,MAAM,aAAa,GAAG,KAAK,CAAC;AAAA,EAChE;AAAA,EAEA,MAAM,YAAY,SAAqG;AACrH,UAAM,SAAS,KAAK,GAAG;AACvB,UAAM,MAAM,KAAK,IAAI;AACrB,UAAM,OAAkB;AAAA,MACtB;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,MAAM;AAAA,MACN,WAAW;AAAA,MACX,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,GAAG;AAAA,IACL;AACA,UAAM,KAAK,MAAM,WAAW,IAAI;AAChC,WAAO;AAAA,EACT;AAAA,EAEA,QAAQ,MAAsG;AAC5G,WAAO,KAAK,KAAkB,EAAE,MAAM,WAAW,GAAG,KAAK,CAAC;AAAA,EAC5D;AAAA;AAAA,EAIA,MAAM,KAAK,OAAqG;AAC9G,UAAM,OAAmB;AAAA,MACvB,SAAS,KAAK,GAAG;AAAA,MACjB,OAAO,KAAK;AAAA,MACZ,QAAQ,MAAM,UAAU,KAAK,MAAM,KAAK,MAAM,SAAS,CAAC;AAAA,MACxD,MAAM,MAAM;AAAA,MACZ,WAAW,KAAK,IAAI;AAAA,MACpB,SAAS,MAAM,WAAW,CAAC;AAAA,IAC7B;AACA,UAAM,KAAK,MAAM,YAAY,IAAI;AACjC,WAAO;AAAA,EACT;AAAA;AAAA,EAIA,MAAM,aAAa,OAA4G;AAC7H,UAAM,OAA0B;AAAA,MAC9B,OAAO,KAAK;AAAA,MACZ,WAAW,MAAM,aAAa,KAAK,IAAI;AAAA,MACvC,WAAW,MAAM;AAAA,MACjB,OAAO,MAAM;AAAA,MACb,UAAU,MAAM;AAAA,MAChB,WAAW,MAAM;AAAA,MACjB,UAAU,MAAM;AAAA,MAChB,QAAQ,MAAM,UAAU,KAAK,MAAM,KAAK,MAAM,SAAS,CAAC;AAAA,IAC1D;AACA,UAAM,KAAK,MAAM,kBAAkB,IAAI;AACvC,QAAI,KAAK,UAAU;AACjB,YAAM,KAAK,KAAK;AAAA,QACd,MAAM;AAAA,QACN,QAAQ,KAAK;AAAA,QACb,SAAS,EAAE,WAAW,KAAK,WAAW,OAAO,KAAK,OAAO,UAAU,KAAK,SAAS;AAAA,MACnF,CAAC;AAAA,IACH;AACA,WAAO;AAAA,EACT;AAAA;AAAA,EAIA,MAAM,eAAe,UAAqE;AACxF,UAAM,OAAiB,EAAE,YAAY,KAAK,GAAG,GAAG,OAAO,KAAK,QAAQ,GAAG,SAAS;AAChF,UAAM,KAAK,MAAM,eAAe,IAAI;AACpC,WAAO;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,OACJ,MACA,IACY;AACZ,UAAM,SAAS,MAAM,KAAK,KAAK,IAAI;AACnC,QAAI;AACF,YAAM,SAAS,MAAM,GAAG,MAAM;AAC9B,YAAM,OAAO,IAAI;AACjB,aAAO;AAAA,IACT,SAAS,KAAK;AACZ,YAAM,OAAO,KAAK,eAAe,QAAQ,MAAM,OAAO,GAAG,CAAC;AAC1D,YAAM;AAAA,IACR;AAAA,EACF;AACF;AAIA,SAAS,iBAAyB;AAChC,MAAI,OAAO,WAAW,QAAQ,eAAe,WAAY,QAAO,WAAW,OAAO,WAAW;AAC7F,SAAO,GAAG,KAAK,IAAI,EAAE,SAAS,EAAE,CAAC,IAAI,KAAK,OAAO,EAAE,SAAS,EAAE,EAAE,MAAM,GAAG,EAAE,CAAC;AAC9E;AAGO,SAAS,oBAAoB,MAQyB;AAC3D,SAAO;AAAA,IACL,MAAM,KAAK,QAAQ,KAAK;AAAA,IACxB,OAAO,KAAK;AAAA,IACZ,UAAU,KAAK;AAAA,IACf,QAAQ,KAAK;AAAA,IACb,aAAa,KAAK,OAAO;AAAA,IACzB,cAAc,KAAK,OAAO;AAAA,IAC1B,cAAc,KAAK,OAAO;AAAA,IAC1B,iBAAiB,KAAK,OAAO;AAAA,IAC7B,SAAS,KAAK;AAAA,IACd,cAAc,KAAK;AAAA,EACrB;AACF;","names":[]}
@@ -0,0 +1,148 @@
1
+ // src/run-record.ts
2
+ var MANDATORY_TOP_LEVEL = [
3
+ "runId",
4
+ "experimentId",
5
+ "candidateId",
6
+ "seed",
7
+ "model",
8
+ "promptHash",
9
+ "configHash",
10
+ "commitSha",
11
+ "wallMs",
12
+ "costUsd",
13
+ "tokenUsage",
14
+ "outcome",
15
+ "splitTag"
16
+ ];
17
+ var SPLIT_TAGS = ["search", "dev", "holdout"];
18
+ var RunRecordValidationError = class extends Error {
19
+ path;
20
+ constructor(message, path = "") {
21
+ super(path ? `${message} (at ${path})` : message);
22
+ this.name = "RunRecordValidationError";
23
+ this.path = path;
24
+ }
25
+ };
26
+ function validateRunRecord(input) {
27
+ if (input === null || typeof input !== "object") {
28
+ throw new RunRecordValidationError("expected object");
29
+ }
30
+ const obj = input;
31
+ for (const key of MANDATORY_TOP_LEVEL) {
32
+ if (!(key in obj)) {
33
+ throw new RunRecordValidationError(`missing mandatory field "${key}"`);
34
+ }
35
+ }
36
+ expectString(obj.runId, "runId");
37
+ expectString(obj.experimentId, "experimentId");
38
+ expectString(obj.candidateId, "candidateId");
39
+ expectFiniteNumber(obj.seed, "seed");
40
+ expectString(obj.model, "model");
41
+ expectString(obj.promptHash, "promptHash");
42
+ expectString(obj.configHash, "configHash");
43
+ expectString(obj.commitSha, "commitSha");
44
+ expectFiniteNumber(obj.wallMs, "wallMs");
45
+ if (obj.queueMs !== void 0) expectFiniteNumber(obj.queueMs, "queueMs");
46
+ expectFiniteNumber(obj.costUsd, "costUsd");
47
+ if (!modelHasSnapshot(obj.model)) {
48
+ throw new RunRecordValidationError(
49
+ `model "${obj.model}" lacks a snapshot version (use 'name@YYYY-MM-DD' or 'name-YYYYMMDD')`,
50
+ "model"
51
+ );
52
+ }
53
+ const tu = obj.tokenUsage;
54
+ if (tu === null || typeof tu !== "object") {
55
+ throw new RunRecordValidationError("tokenUsage must be an object", "tokenUsage");
56
+ }
57
+ const tuRec = tu;
58
+ expectFiniteNumber(tuRec.input, "tokenUsage.input");
59
+ expectFiniteNumber(tuRec.output, "tokenUsage.output");
60
+ if (tuRec.cached !== void 0) expectFiniteNumber(tuRec.cached, "tokenUsage.cached");
61
+ if (obj.judgeMetadata !== void 0) {
62
+ const jm = obj.judgeMetadata;
63
+ if (jm === null || typeof jm !== "object") {
64
+ throw new RunRecordValidationError("judgeMetadata must be an object", "judgeMetadata");
65
+ }
66
+ const jmRec = jm;
67
+ expectString(jmRec.model, "judgeMetadata.model");
68
+ expectString(jmRec.promptVersion, "judgeMetadata.promptVersion");
69
+ expectFiniteNumber(jmRec.confidence, "judgeMetadata.confidence");
70
+ if (typeof jmRec.fallback !== "boolean") {
71
+ throw new RunRecordValidationError("judgeMetadata.fallback must be boolean", "judgeMetadata.fallback");
72
+ }
73
+ }
74
+ const out = obj.outcome;
75
+ if (out === null || typeof out !== "object") {
76
+ throw new RunRecordValidationError("outcome must be an object", "outcome");
77
+ }
78
+ const outRec = out;
79
+ if (outRec.searchScore !== void 0) expectFiniteNumber(outRec.searchScore, "outcome.searchScore");
80
+ if (outRec.holdoutScore !== void 0) expectFiniteNumber(outRec.holdoutScore, "outcome.holdoutScore");
81
+ if (outRec.searchScore === void 0 && outRec.holdoutScore === void 0) {
82
+ throw new RunRecordValidationError(
83
+ "outcome must define searchScore or holdoutScore (or both)",
84
+ "outcome"
85
+ );
86
+ }
87
+ const raw = outRec.raw;
88
+ if (raw === null || typeof raw !== "object") {
89
+ throw new RunRecordValidationError("outcome.raw must be an object", "outcome.raw");
90
+ }
91
+ for (const [k, v] of Object.entries(raw)) {
92
+ expectFiniteNumber(v, `outcome.raw.${k}`);
93
+ }
94
+ if (obj.failureMode !== void 0) expectString(obj.failureMode, "failureMode");
95
+ if (typeof obj.splitTag !== "string" || !SPLIT_TAGS.includes(obj.splitTag)) {
96
+ throw new RunRecordValidationError(
97
+ `splitTag must be one of ${SPLIT_TAGS.join(", ")}, got ${String(obj.splitTag)}`,
98
+ "splitTag"
99
+ );
100
+ }
101
+ return input;
102
+ }
103
+ function isRunRecord(input) {
104
+ try {
105
+ validateRunRecord(input);
106
+ return true;
107
+ } catch {
108
+ return false;
109
+ }
110
+ }
111
+ function parseRunRecordSafe(input) {
112
+ try {
113
+ return { ok: true, value: validateRunRecord(input) };
114
+ } catch (e) {
115
+ if (e instanceof RunRecordValidationError) return { ok: false, error: e };
116
+ throw e;
117
+ }
118
+ }
119
+ function roundTripRunRecord(record) {
120
+ const json = JSON.stringify(record);
121
+ return validateRunRecord(JSON.parse(json));
122
+ }
123
+ function expectString(value, path) {
124
+ if (typeof value !== "string" || value.length === 0) {
125
+ throw new RunRecordValidationError(`expected non-empty string`, path);
126
+ }
127
+ }
128
+ function expectFiniteNumber(value, path) {
129
+ if (typeof value !== "number" || !Number.isFinite(value)) {
130
+ throw new RunRecordValidationError(`expected finite number`, path);
131
+ }
132
+ }
133
+ function modelHasSnapshot(model) {
134
+ if (model.includes("@")) return true;
135
+ if (/-\d{8}$/.test(model)) return true;
136
+ if (/-\d{4}-\d{2}-\d{2}$/.test(model)) return true;
137
+ if (/:date-/.test(model)) return true;
138
+ return false;
139
+ }
140
+
141
+ export {
142
+ RunRecordValidationError,
143
+ validateRunRecord,
144
+ isRunRecord,
145
+ parseRunRecordSafe,
146
+ roundTripRunRecord
147
+ };
148
+ //# sourceMappingURL=chunk-YUFXO3TU.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/run-record.ts"],"sourcesContent":["/**\n * Paper-grade RunRecord schema + runtime validator.\n *\n * Every run that participates in a promotion gate, paper table, or\n * researcher loop SHOULD be recorded as a `RunRecord`. The mandatory\n * fields are exactly those the paper \"Two Loops, Three Roles\" requires\n * for reproducibility: who/what/when/cost/seed/hash, plus the search vs\n * holdout split tag and either a `searchScore` or a `holdoutScore`.\n *\n * This is intentionally NOT a replacement for the rich `Run` /\n * `ProposeReviewReport` / `ScenarioResult` types already in the\n * package. Those are runtime structures with full provenance. A\n * `RunRecord` is the analysis-time projection — the JSON-friendly\n * row you'd put in a parquet file or paste into a notebook.\n *\n * Validate at the boundary:\n *\n * const rec = validateRunRecord(rawJson) // throws on missing\n * const ok = isRunRecord(rawJson) // boolean check\n * const rec = parseRunRecordSafe(rawJson) // { ok, value | error }\n *\n * The validator runs in pure TS — zod is intentionally NOT a\n * dependency. Round-trip tested in `tests/run-record.test.ts`.\n */\n\n/** Search/dev/holdout split tag. 'search' is the paper-grade alias for the\n * combined train+test pool that the optimizer is allowed to read. */\nexport type RunSplitTag = 'search' | 'dev' | 'holdout'\n\nexport interface RunTokenUsage {\n input: number\n output: number\n cached?: number\n}\n\nexport interface RunJudgeMetadata {\n model: string\n promptVersion: string\n /** [0,1] confidence the judge declared. Constant judge confidence\n * across many runs is a fallback signal (see `canary.ts`). */\n confidence: number\n /** True if the judge degraded to a fallback path (rules-only,\n * prior-call cache, etc.). The canary uses this to alert. */\n fallback: boolean\n}\n\nexport interface RunOutcome {\n /** Score on the search/optimization split. Optional because a\n * holdout-only evaluation only fills `holdoutScore`. */\n searchScore?: number\n /** Score on the held-out split. Optional because a search-only run\n * only fills `searchScore`. At least one must be present. */\n holdoutScore?: number\n /** Bag of any other metric the run produced — judge dimensions,\n * pass/fail counters, latency stats, etc. Numeric only — keeps\n * reporters honest. */\n raw: Record<string, number>\n}\n\n/**\n * Mandatory paper-grade fields for a single evaluation run. Optional\n * fields are extension points; mandatory fields throw if missing.\n *\n * Hash discipline:\n * - `promptHash` is the sha256 of the EFFECTIVE prompt sent to the\n * model (after any steering bundle merge).\n * - `configHash` is the sha256 of the effective run config (model,\n * temperature, tools, judges, splits). The pair (promptHash,\n * configHash) uniquely identifies an experimental cell.\n *\n * Model snapshot discipline:\n * - `model` MUST encode a snapshot version. Bare aliases like\n * `claude-sonnet-4` or `gpt-4o` are banned — they remap silently.\n * Use `claude-sonnet-4-6@2025-04-15` or `gpt-4o-2024-11-20`.\n */\nexport interface RunRecord {\n /** UUID for the run. */\n runId: string\n /** Logical experiment grouping (a treatment vs a baseline within\n * the same sweep should share `experimentId`). */\n experimentId: string\n /** Stable identifier for the candidate (variant) being run. The\n * promotion gate compares two `candidateId`s on matched items. */\n candidateId: string\n /** RNG seed for the run. Always recorded — silent re-seeding is\n * the most common cause of non-reproducible numbers. */\n seed: number\n /** Model identifier WITH snapshot version. */\n model: string\n /** sha256 of the effective prompt (post-steering). */\n promptHash: string\n /** sha256 of the effective config. */\n configHash: string\n /** Git SHA the harness was run from. */\n commitSha: string\n /** End-to-end wall-clock duration in milliseconds. */\n wallMs: number\n /** Time spent queued before execution started, if known. */\n queueMs?: number\n /** Total USD cost. Mandatory — runs without a cost number are\n * unbounded by definition and must not be admitted into the gate. */\n costUsd: number\n /** Token usage breakdown. */\n tokenUsage: RunTokenUsage\n /** Judge-side metadata, if a judge was used. */\n judgeMetadata?: RunJudgeMetadata\n /** Per-split scores + raw bag. */\n outcome: RunOutcome\n /** Categorical failure tag, when the run failed and the harness\n * classified it. Free-form string; standard tags live in\n * `failure-taxonomy.ts`. */\n failureMode?: string\n /** Which split this run was drawn from. */\n splitTag: RunSplitTag\n}\n\n// ── Validation ───────────────────────────────────────────────────────\n\nconst MANDATORY_TOP_LEVEL = [\n 'runId',\n 'experimentId',\n 'candidateId',\n 'seed',\n 'model',\n 'promptHash',\n 'configHash',\n 'commitSha',\n 'wallMs',\n 'costUsd',\n 'tokenUsage',\n 'outcome',\n 'splitTag',\n] as const\n\nconst SPLIT_TAGS: ReadonlyArray<RunSplitTag> = ['search', 'dev', 'holdout']\n\nexport class RunRecordValidationError extends Error {\n readonly path: string\n constructor(message: string, path = '') {\n super(path ? `${message} (at ${path})` : message)\n this.name = 'RunRecordValidationError'\n this.path = path\n }\n}\n\n/**\n * Strict validator. Throws `RunRecordValidationError` on the first\n * missing or wrongly-typed field. Returns the input cast to\n * `RunRecord` on success — the validator does not coerce.\n */\nexport function validateRunRecord(input: unknown): RunRecord {\n if (input === null || typeof input !== 'object') {\n throw new RunRecordValidationError('expected object')\n }\n const obj = input as Record<string, unknown>\n\n for (const key of MANDATORY_TOP_LEVEL) {\n if (!(key in obj)) {\n throw new RunRecordValidationError(`missing mandatory field \"${key}\"`)\n }\n }\n\n expectString(obj.runId, 'runId')\n expectString(obj.experimentId, 'experimentId')\n expectString(obj.candidateId, 'candidateId')\n expectFiniteNumber(obj.seed, 'seed')\n expectString(obj.model, 'model')\n expectString(obj.promptHash, 'promptHash')\n expectString(obj.configHash, 'configHash')\n expectString(obj.commitSha, 'commitSha')\n expectFiniteNumber(obj.wallMs, 'wallMs')\n if (obj.queueMs !== undefined) expectFiniteNumber(obj.queueMs, 'queueMs')\n expectFiniteNumber(obj.costUsd, 'costUsd')\n\n // Snapshot discipline: bare model aliases are not paper-grade.\n if (!modelHasSnapshot(obj.model as string)) {\n throw new RunRecordValidationError(\n `model \"${obj.model}\" lacks a snapshot version (use 'name@YYYY-MM-DD' or 'name-YYYYMMDD')`,\n 'model',\n )\n }\n\n // Token usage.\n const tu = obj.tokenUsage\n if (tu === null || typeof tu !== 'object') {\n throw new RunRecordValidationError('tokenUsage must be an object', 'tokenUsage')\n }\n const tuRec = tu as Record<string, unknown>\n expectFiniteNumber(tuRec.input, 'tokenUsage.input')\n expectFiniteNumber(tuRec.output, 'tokenUsage.output')\n if (tuRec.cached !== undefined) expectFiniteNumber(tuRec.cached, 'tokenUsage.cached')\n\n // Judge metadata, optional.\n if (obj.judgeMetadata !== undefined) {\n const jm = obj.judgeMetadata\n if (jm === null || typeof jm !== 'object') {\n throw new RunRecordValidationError('judgeMetadata must be an object', 'judgeMetadata')\n }\n const jmRec = jm as Record<string, unknown>\n expectString(jmRec.model, 'judgeMetadata.model')\n expectString(jmRec.promptVersion, 'judgeMetadata.promptVersion')\n expectFiniteNumber(jmRec.confidence, 'judgeMetadata.confidence')\n if (typeof jmRec.fallback !== 'boolean') {\n throw new RunRecordValidationError('judgeMetadata.fallback must be boolean', 'judgeMetadata.fallback')\n }\n }\n\n // Outcome.\n const out = obj.outcome\n if (out === null || typeof out !== 'object') {\n throw new RunRecordValidationError('outcome must be an object', 'outcome')\n }\n const outRec = out as Record<string, unknown>\n if (outRec.searchScore !== undefined) expectFiniteNumber(outRec.searchScore, 'outcome.searchScore')\n if (outRec.holdoutScore !== undefined) expectFiniteNumber(outRec.holdoutScore, 'outcome.holdoutScore')\n if (outRec.searchScore === undefined && outRec.holdoutScore === undefined) {\n throw new RunRecordValidationError(\n 'outcome must define searchScore or holdoutScore (or both)',\n 'outcome',\n )\n }\n const raw = outRec.raw\n if (raw === null || typeof raw !== 'object') {\n throw new RunRecordValidationError('outcome.raw must be an object', 'outcome.raw')\n }\n for (const [k, v] of Object.entries(raw as Record<string, unknown>)) {\n expectFiniteNumber(v, `outcome.raw.${k}`)\n }\n\n // Failure mode optional.\n if (obj.failureMode !== undefined) expectString(obj.failureMode, 'failureMode')\n\n // Split tag.\n if (typeof obj.splitTag !== 'string' || !SPLIT_TAGS.includes(obj.splitTag as RunSplitTag)) {\n throw new RunRecordValidationError(\n `splitTag must be one of ${SPLIT_TAGS.join(', ')}, got ${String(obj.splitTag)}`,\n 'splitTag',\n )\n }\n\n return input as RunRecord\n}\n\n/** Boolean validator — convenience for filtering arrays. */\nexport function isRunRecord(input: unknown): input is RunRecord {\n try {\n validateRunRecord(input)\n return true\n } catch {\n return false\n }\n}\n\n/** Non-throwing validator — returns a discriminated union. */\nexport function parseRunRecordSafe(\n input: unknown,\n):\n | { ok: true; value: RunRecord }\n | { ok: false; error: RunRecordValidationError } {\n try {\n return { ok: true, value: validateRunRecord(input) }\n } catch (e) {\n if (e instanceof RunRecordValidationError) return { ok: false, error: e }\n throw e\n }\n}\n\n/** Round-trip helper — `JSON.parse(JSON.stringify(record))` then validate. */\nexport function roundTripRunRecord(record: RunRecord): RunRecord {\n const json = JSON.stringify(record)\n return validateRunRecord(JSON.parse(json))\n}\n\n// ── Internals ────────────────────────────────────────────────────────\n\nfunction expectString(value: unknown, path: string): void {\n if (typeof value !== 'string' || value.length === 0) {\n throw new RunRecordValidationError(`expected non-empty string`, path)\n }\n}\n\nfunction expectFiniteNumber(value: unknown, path: string): void {\n if (typeof value !== 'number' || !Number.isFinite(value)) {\n throw new RunRecordValidationError(`expected finite number`, path)\n }\n}\n\n/**\n * Heuristic snapshot check. Accepts:\n * - `name@YYYY-MM-DD` (Anthropic style: `claude-sonnet-4-6@2025-04-15`)\n * - `name-YYYYMMDD` (OpenAI style: `gpt-4o-2024-11-20`)\n * - `name@<arbitrary-token>` (allow opaque snapshots like `@v3`)\n * - explicit `:date-...` Vertex-style tags\n *\n * Rejects bare aliases like `claude-sonnet-4` or `gpt-4o` that remap\n * silently as providers ship new snapshots.\n */\nfunction modelHasSnapshot(model: string): boolean {\n if (model.includes('@')) return true\n if (/-\\d{8}$/.test(model)) return true\n if (/-\\d{4}-\\d{2}-\\d{2}$/.test(model)) return true\n if (/:date-/.test(model)) return true\n return false\n}\n"],"mappings":";AAsHA,IAAM,sBAAsB;AAAA,EAC1B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,IAAM,aAAyC,CAAC,UAAU,OAAO,SAAS;AAEnE,IAAM,2BAAN,cAAuC,MAAM;AAAA,EACzC;AAAA,EACT,YAAY,SAAiB,OAAO,IAAI;AACtC,UAAM,OAAO,GAAG,OAAO,QAAQ,IAAI,MAAM,OAAO;AAChD,SAAK,OAAO;AACZ,SAAK,OAAO;AAAA,EACd;AACF;AAOO,SAAS,kBAAkB,OAA2B;AAC3D,MAAI,UAAU,QAAQ,OAAO,UAAU,UAAU;AAC/C,UAAM,IAAI,yBAAyB,iBAAiB;AAAA,EACtD;AACA,QAAM,MAAM;AAEZ,aAAW,OAAO,qBAAqB;AACrC,QAAI,EAAE,OAAO,MAAM;AACjB,YAAM,IAAI,yBAAyB,4BAA4B,GAAG,GAAG;AAAA,IACvE;AAAA,EACF;AAEA,eAAa,IAAI,OAAO,OAAO;AAC/B,eAAa,IAAI,cAAc,cAAc;AAC7C,eAAa,IAAI,aAAa,aAAa;AAC3C,qBAAmB,IAAI,MAAM,MAAM;AACnC,eAAa,IAAI,OAAO,OAAO;AAC/B,eAAa,IAAI,YAAY,YAAY;AACzC,eAAa,IAAI,YAAY,YAAY;AACzC,eAAa,IAAI,WAAW,WAAW;AACvC,qBAAmB,IAAI,QAAQ,QAAQ;AACvC,MAAI,IAAI,YAAY,OAAW,oBAAmB,IAAI,SAAS,SAAS;AACxE,qBAAmB,IAAI,SAAS,SAAS;AAGzC,MAAI,CAAC,iBAAiB,IAAI,KAAe,GAAG;AAC1C,UAAM,IAAI;AAAA,MACR,UAAU,IAAI,KAAK;AAAA,MACnB;AAAA,IACF;AAAA,EACF;AAGA,QAAM,KAAK,IAAI;AACf,MAAI,OAAO,QAAQ,OAAO,OAAO,UAAU;AACzC,UAAM,IAAI,yBAAyB,gCAAgC,YAAY;AAAA,EACjF;AACA,QAAM,QAAQ;AACd,qBAAmB,MAAM,OAAO,kBAAkB;AAClD,qBAAmB,MAAM,QAAQ,mBAAmB;AACpD,MAAI,MAAM,WAAW,OAAW,oBAAmB,MAAM,QAAQ,mBAAmB;AAGpF,MAAI,IAAI,kBAAkB,QAAW;AACnC,UAAM,KAAK,IAAI;AACf,QAAI,OAAO,QAAQ,OAAO,OAAO,UAAU;AACzC,YAAM,IAAI,yBAAyB,mCAAmC,eAAe;AAAA,IACvF;AACA,UAAM,QAAQ;AACd,iBAAa,MAAM,OAAO,qBAAqB;AAC/C,iBAAa,MAAM,eAAe,6BAA6B;AAC/D,uBAAmB,MAAM,YAAY,0BAA0B;AAC/D,QAAI,OAAO,MAAM,aAAa,WAAW;AACvC,YAAM,IAAI,yBAAyB,0CAA0C,wBAAwB;AAAA,IACvG;AAAA,EACF;AAGA,QAAM,MAAM,IAAI;AAChB,MAAI,QAAQ,QAAQ,OAAO,QAAQ,UAAU;AAC3C,UAAM,IAAI,yBAAyB,6BAA6B,SAAS;AAAA,EAC3E;AACA,QAAM,SAAS;AACf,MAAI,OAAO,gBAAgB,OAAW,oBAAmB,OAAO,aAAa,qBAAqB;AAClG,MAAI,OAAO,iBAAiB,OAAW,oBAAmB,OAAO,cAAc,sBAAsB;AACrG,MAAI,OAAO,gBAAgB,UAAa,OAAO,iBAAiB,QAAW;AACzE,UAAM,IAAI;AAAA,MACR;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACA,QAAM,MAAM,OAAO;AACnB,MAAI,QAAQ,QAAQ,OAAO,QAAQ,UAAU;AAC3C,UAAM,IAAI,yBAAyB,iCAAiC,aAAa;AAAA,EACnF;AACA,aAAW,CAAC,GAAG,CAAC,KAAK,OAAO,QAAQ,GAA8B,GAAG;AACnE,uBAAmB,GAAG,eAAe,CAAC,EAAE;AAAA,EAC1C;AAGA,MAAI,IAAI,gBAAgB,OAAW,cAAa,IAAI,aAAa,aAAa;AAG9E,MAAI,OAAO,IAAI,aAAa,YAAY,CAAC,WAAW,SAAS,IAAI,QAAuB,GAAG;AACzF,UAAM,IAAI;AAAA,MACR,2BAA2B,WAAW,KAAK,IAAI,CAAC,SAAS,OAAO,IAAI,QAAQ,CAAC;AAAA,MAC7E;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;AAGO,SAAS,YAAY,OAAoC;AAC9D,MAAI;AACF,sBAAkB,KAAK;AACvB,WAAO;AAAA,EACT,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAGO,SAAS,mBACd,OAGiD;AACjD,MAAI;AACF,WAAO,EAAE,IAAI,MAAM,OAAO,kBAAkB,KAAK,EAAE;AAAA,EACrD,SAAS,GAAG;AACV,QAAI,aAAa,yBAA0B,QAAO,EAAE,IAAI,OAAO,OAAO,EAAE;AACxE,UAAM;AAAA,EACR;AACF;AAGO,SAAS,mBAAmB,QAA8B;AAC/D,QAAM,OAAO,KAAK,UAAU,MAAM;AAClC,SAAO,kBAAkB,KAAK,MAAM,IAAI,CAAC;AAC3C;AAIA,SAAS,aAAa,OAAgB,MAAoB;AACxD,MAAI,OAAO,UAAU,YAAY,MAAM,WAAW,GAAG;AACnD,UAAM,IAAI,yBAAyB,6BAA6B,IAAI;AAAA,EACtE;AACF;AAEA,SAAS,mBAAmB,OAAgB,MAAoB;AAC9D,MAAI,OAAO,UAAU,YAAY,CAAC,OAAO,SAAS,KAAK,GAAG;AACxD,UAAM,IAAI,yBAAyB,0BAA0B,IAAI;AAAA,EACnE;AACF;AAYA,SAAS,iBAAiB,OAAwB;AAChD,MAAI,MAAM,SAAS,GAAG,EAAG,QAAO;AAChC,MAAI,UAAU,KAAK,KAAK,EAAG,QAAO;AAClC,MAAI,sBAAsB,KAAK,KAAK,EAAG,QAAO;AAC9C,MAAI,SAAS,KAAK,KAAK,EAAG,QAAO;AACjC,SAAO;AACT;","names":[]}
package/dist/cli.js CHANGED
@@ -5,8 +5,8 @@ import {
5
5
  runRpcBatch,
6
6
  runRpcOnce,
7
7
  startServer
8
- } from "./chunk-LSR4IAYN.js";
9
- import "./chunk-JAOLXRIA.js";
8
+ } from "./chunk-HNJLMAJ2.js";
9
+ import "./chunk-75MCTH7P.js";
10
10
  import "./chunk-PZ5AY32C.js";
11
11
 
12
12
  // src/cli.ts