@tangle-network/agent-eval 0.23.1 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. package/CHANGELOG.md +80 -0
  2. package/README.md +141 -79
  3. package/dist/baseline-4R5deP0N.d.ts +108 -0
  4. package/dist/benchmarks/index.d.ts +3 -2
  5. package/dist/benchmarks/index.js +1 -1
  6. package/dist/builder-eval/index.d.ts +249 -0
  7. package/dist/builder-eval/index.js +391 -0
  8. package/dist/builder-eval/index.js.map +1 -0
  9. package/dist/{chunk-IOXMGMHQ.js → chunk-2A5XJB43.js} +142 -318
  10. package/dist/chunk-2A5XJB43.js.map +1 -0
  11. package/dist/chunk-47X6LRCE.js +76 -0
  12. package/dist/chunk-47X6LRCE.js.map +1 -0
  13. package/dist/{chunk-6M774GY6.js → chunk-4F5DQN55.js} +1 -1
  14. package/dist/chunk-4F5DQN55.js.map +1 -0
  15. package/dist/{chunk-KAO3Q65R.js → chunk-4S4BM3QQ.js} +15 -13
  16. package/dist/chunk-4S4BM3QQ.js.map +1 -0
  17. package/dist/chunk-5BKGXME7.js +65 -0
  18. package/dist/chunk-5BKGXME7.js.map +1 -0
  19. package/dist/{chunk-42I2QC2L.js → chunk-6QDKWHLS.js} +18 -14
  20. package/dist/chunk-6QDKWHLS.js.map +1 -0
  21. package/dist/chunk-I4MBDTY5.js +272 -0
  22. package/dist/chunk-I4MBDTY5.js.map +1 -0
  23. package/dist/chunk-K2TPS5LB.js +569 -0
  24. package/dist/chunk-K2TPS5LB.js.map +1 -0
  25. package/dist/chunk-KKHDIONI.js +414 -0
  26. package/dist/chunk-KKHDIONI.js.map +1 -0
  27. package/dist/chunk-KMPRBJK4.js +74 -0
  28. package/dist/chunk-KMPRBJK4.js.map +1 -0
  29. package/dist/{chunk-QUKKGHTZ.js → chunk-KTGTIOFD.js} +6 -3
  30. package/dist/chunk-KTGTIOFD.js.map +1 -0
  31. package/dist/chunk-LSH4MMOZ.js +838 -0
  32. package/dist/chunk-LSH4MMOZ.js.map +1 -0
  33. package/dist/chunk-NG236HPC.js +57 -0
  34. package/dist/chunk-NG236HPC.js.map +1 -0
  35. package/dist/{chunk-QBW3YBTR.js → chunk-NLMNWKVM.js} +14 -6
  36. package/dist/chunk-NLMNWKVM.js.map +1 -0
  37. package/dist/chunk-NU65VQ7M.js +99 -0
  38. package/dist/chunk-NU65VQ7M.js.map +1 -0
  39. package/dist/chunk-OHEPNJQN.js +554 -0
  40. package/dist/chunk-OHEPNJQN.js.map +1 -0
  41. package/dist/chunk-OWLAAMME.js +250 -0
  42. package/dist/chunk-OWLAAMME.js.map +1 -0
  43. package/dist/{chunk-SQQLHODJ.js → chunk-PC4UYEBM.js} +7 -4
  44. package/dist/chunk-PC4UYEBM.js.map +1 -0
  45. package/dist/{chunk-7EAUOUQS.js → chunk-RAF443UI.js} +213 -115
  46. package/dist/chunk-RAF443UI.js.map +1 -0
  47. package/dist/chunk-RZTMDUO7.js +49 -0
  48. package/dist/chunk-RZTMDUO7.js.map +1 -0
  49. package/dist/{chunk-EXGR4XEM.js → chunk-SESZDQPX.js} +23 -19
  50. package/dist/chunk-SESZDQPX.js.map +1 -0
  51. package/dist/{chunk-6KQG5HAH.js → chunk-SY6WAAAD.js} +84 -71
  52. package/dist/chunk-SY6WAAAD.js.map +1 -0
  53. package/dist/{chunk-5IIQKMD5.js → chunk-TVVP3ZZQ.js} +14 -4
  54. package/dist/chunk-TVVP3ZZQ.js.map +1 -0
  55. package/dist/{chunk-VQQSPGSM.js → chunk-VRJVTXRV.js} +169 -111
  56. package/dist/chunk-VRJVTXRV.js.map +1 -0
  57. package/dist/chunk-WWYCWKUM.js +196 -0
  58. package/dist/chunk-WWYCWKUM.js.map +1 -0
  59. package/dist/{chunk-AXHNWLIX.js → chunk-YRZ4M5GS.js} +2 -90
  60. package/dist/chunk-YRZ4M5GS.js.map +1 -0
  61. package/dist/chunk-ZN274SWR.js +613 -0
  62. package/dist/chunk-ZN274SWR.js.map +1 -0
  63. package/dist/cli.js +10 -6
  64. package/dist/cli.js.map +1 -1
  65. package/dist/{control-DvkH87qJ.d.ts → control-CBShYYA6.d.ts} +32 -33
  66. package/dist/control-runtime-BuJHoLg0.d.ts +180 -0
  67. package/dist/control.d.ts +8 -6
  68. package/dist/control.js +10 -7
  69. package/dist/{dataset-B9qvlm_o.d.ts → dataset-CiK_3LDr.d.ts} +5 -2
  70. package/dist/{emitter-B2XqDKFU.d.ts → emitter-DP_cSSiw.d.ts} +1 -1
  71. package/dist/errors-BZ9sTdz7.d.ts +70 -0
  72. package/dist/failure-cluster-C2EGSDiT.d.ts +76 -0
  73. package/dist/feedback-trajectory-DfFdrraJ.d.ts +169 -0
  74. package/dist/governance/index.d.ts +5 -0
  75. package/dist/governance/index.js +18 -0
  76. package/dist/governance/index.js.map +1 -0
  77. package/dist/{index-DDTlbHEK.d.ts → index--fVrWDiR.d.ts} +1 -1
  78. package/dist/index-Oj9fAPPN.d.ts +270 -0
  79. package/dist/index.d.ts +1866 -3151
  80. package/dist/index.js +5457 -7809
  81. package/dist/index.js.map +1 -1
  82. package/dist/{integrity-Cr5YodSY.d.ts → integrity-DK2EBVZC.d.ts} +4 -3
  83. package/dist/knowledge/index.d.ts +102 -0
  84. package/dist/knowledge/index.js +18 -0
  85. package/dist/knowledge/index.js.map +1 -0
  86. package/dist/meta-eval/index.d.ts +99 -0
  87. package/dist/meta-eval/index.js +324 -0
  88. package/dist/meta-eval/index.js.map +1 -0
  89. package/dist/multi-layer-verifier-LkP3LVKj.d.ts +141 -0
  90. package/dist/openapi.json +1 -1
  91. package/dist/optimization.d.ts +11 -8
  92. package/dist/optimization.js +11 -9
  93. package/dist/outcome-store-D6KWmYvj.d.ts +63 -0
  94. package/dist/pipelines/index.d.ts +172 -0
  95. package/dist/pipelines/index.js +409 -0
  96. package/dist/pipelines/index.js.map +1 -0
  97. package/dist/prm/index.d.ts +99 -0
  98. package/dist/prm/index.js +222 -0
  99. package/dist/prm/index.js.map +1 -0
  100. package/dist/query-DODUYdPg.d.ts +30 -0
  101. package/dist/release-report-TDPn1cxq.d.ts +292 -0
  102. package/dist/replay-BL96gCEP.d.ts +226 -0
  103. package/dist/reporting.d.ts +10 -295
  104. package/dist/reporting.js +10 -6
  105. package/dist/{eval-campaign-Ds5QljIh.d.ts → researcher-CUOiGcGv.d.ts} +148 -146
  106. package/dist/rl.d.ts +1762 -8
  107. package/dist/rl.js +2035 -58
  108. package/dist/rl.js.map +1 -1
  109. package/dist/rubric-D5tjHNJQ.d.ts +72 -0
  110. package/dist/rubric-predictive-validity-C0uDYwG6.d.ts +105 -0
  111. package/dist/{run-record-DNiOMBrZ.d.ts → run-record-CqzahIbx.d.ts} +4 -1
  112. package/dist/sequential-Dgz1n51-.d.ts +139 -0
  113. package/dist/{store-u47QaJ9G.d.ts → store-Db2Bv8Cf.d.ts} +1 -1
  114. package/dist/{summary-report-Ce1r4EYo.d.ts → summary-report-BXGs_9V0.d.ts} +3 -76
  115. package/dist/telemetry/file.js +4 -1
  116. package/dist/telemetry/file.js.map +1 -1
  117. package/dist/telemetry/index.js +57 -57
  118. package/dist/telemetry/index.js.map +1 -1
  119. package/dist/test-graded-scenario-B2kWEdh9.d.ts +146 -0
  120. package/dist/traces.d.ts +142 -387
  121. package/dist/traces.js +1302 -40
  122. package/dist/traces.js.map +1 -1
  123. package/dist/trajectory-CnoBo-JY.d.ts +32 -0
  124. package/dist/wire/index.d.ts +22 -22
  125. package/dist/wire/index.js +4 -3
  126. package/package.json +44 -18
  127. package/dist/chunk-42I2QC2L.js.map +0 -1
  128. package/dist/chunk-5IIQKMD5.js.map +0 -1
  129. package/dist/chunk-6KQG5HAH.js.map +0 -1
  130. package/dist/chunk-6M774GY6.js.map +0 -1
  131. package/dist/chunk-7EAUOUQS.js.map +0 -1
  132. package/dist/chunk-AXHNWLIX.js.map +0 -1
  133. package/dist/chunk-EXGR4XEM.js.map +0 -1
  134. package/dist/chunk-IOXMGMHQ.js.map +0 -1
  135. package/dist/chunk-KAO3Q65R.js.map +0 -1
  136. package/dist/chunk-LZKIOBG2.js +0 -2026
  137. package/dist/chunk-LZKIOBG2.js.map +0 -1
  138. package/dist/chunk-QBW3YBTR.js.map +0 -1
  139. package/dist/chunk-QUKKGHTZ.js.map +0 -1
  140. package/dist/chunk-SQQLHODJ.js.map +0 -1
  141. package/dist/chunk-V5QSWN7L.js +0 -1310
  142. package/dist/chunk-V5QSWN7L.js.map +0 -1
  143. package/dist/chunk-VQQSPGSM.js.map +0 -1
  144. package/dist/chunk-XPHOZPOM.js +0 -1947
  145. package/dist/chunk-XPHOZPOM.js.map +0 -1
  146. package/dist/feedback-trajectory-c43WGtTX.d.ts +0 -346
  147. package/dist/index-ekBXweiQ.d.ts +0 -1894
  148. package/dist/sequential-DgU2mFsE.d.ts +0 -304
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/power-analysis.ts","../src/paired-stats.ts","../src/summary-report.ts"],"sourcesContent":["/**\n * Power analysis + multiple-comparison correction.\n *\n * Two jobs:\n * 1. Before running: `requiredSampleSize({ effect, alpha, power })`\n * returns the N per arm needed to detect a given effect size.\n * 2. After running: `benjaminiHochberg(pValues, fdr)` and\n * `bonferroni(pValues, alpha)` correct for multiple pairwise tests\n * so pairwise variant comparisons stay statistically honest.\n *\n * Fixes the correctness bug in 0.2's pairwise optimizer which applied\n * alpha directly across n*(n-1)/2 pairwise tests without correction —\n * dramatically inflating false-positive rate when variants ≥ 3.\n */\n\n/**\n * Required N per arm for a two-sample comparison at target effect size,\n * alpha, and power. Uses the normal-approximation formula:\n *\n * n = 2 * ( (z_{1-α/2} + z_{1-β}) / d )^2\n *\n * where d is Cohen's d. Returns Infinity for effect ≤ 0.\n */\nexport function requiredSampleSize(opts: {\n effect: number\n alpha?: number\n power?: number\n twoSided?: boolean\n}): number {\n const effect = opts.effect\n if (!Number.isFinite(effect) || effect <= 0) return Infinity\n const alpha = opts.alpha ?? 0.05\n const power = opts.power ?? 0.8\n const twoSided = opts.twoSided ?? true\n const zAlpha = zQuantile(twoSided ? 1 - alpha / 2 : 1 - alpha)\n const zBeta = zQuantile(power)\n const n = 2 * ((zAlpha + zBeta) / effect) ** 2\n return Math.ceil(n)\n}\n\n/**\n * Minimum detectable paired effect (in standardised units) given a target\n * paired sample size. Closed-form inverse of the paired-t / sign-rank power\n * formula under the normal approximation:\n *\n * d_min = (z_{1-α/2} + z_β) / sqrt(n_paired)\n *\n * Multiply by `sd(deltas)` to convert to score units. Treat as a lower bound:\n * the Wilcoxon signed-rank test and bootstrap CIs have asymptotic relative\n * efficiency below 1 against the t-test on heavy-tailed distributions, so the\n * true achievable MDE in those regimes is somewhat larger.\n */\nexport function pairedMde(opts: {\n nPaired: number\n alpha?: number\n power?: number\n twoSided?: boolean\n}): number {\n if (!Number.isFinite(opts.nPaired) || opts.nPaired <= 0) return Infinity\n const alpha = opts.alpha ?? 0.05\n const power = opts.power ?? 0.8\n const twoSided = opts.twoSided ?? true\n const zAlpha = zQuantile(twoSided ? 1 - alpha / 2 : 1 - alpha)\n const zBeta = zQuantile(power)\n return (zAlpha + zBeta) / Math.sqrt(opts.nPaired)\n}\n\n/** Bonferroni adjustment: multiply every p-value by the number of tests, clamp at 1. */\nexport function bonferroni(\n pValues: number[],\n alpha = 0.05,\n): { adjusted: number[]; significant: boolean[] } {\n const k = pValues.length\n const adjusted = pValues.map((p) => Math.min(1, p * k))\n const significant = adjusted.map((p) => p < alpha)\n return { adjusted, significant }\n}\n\n/**\n * Benjamini–Hochberg false discovery rate. Returns adjusted q-values and\n * significance at the target FDR. Properly handles ties and preserves\n * monotonicity of q-values.\n */\nexport function benjaminiHochberg(\n pValues: number[],\n fdr = 0.05,\n): { qValues: number[]; significant: boolean[] } {\n const n = pValues.length\n if (n === 0) return { qValues: [], significant: [] }\n const indexed = pValues.map((p, i) => ({ p, i })).sort((a, b) => a.p - b.p)\n const q = new Array<number>(n)\n // Ranks are 1-based; q_i = p_i * n / rank_i\n let minRight = 1\n for (let k = n - 1; k >= 0; k--) {\n const rank = k + 1\n const entry = indexed[k]!\n const raw = (entry.p * n) / rank\n const bounded = Math.min(minRight, raw)\n minRight = bounded\n q[entry.i] = Math.min(1, bounded)\n }\n const significant = q.map((v) => v < fdr)\n return { qValues: q, significant }\n}\n\n/** Standard-normal inverse CDF (Acklam approximation). */\nfunction zQuantile(p: number): number {\n if (p <= 0 || p >= 1) {\n if (p === 0) return -Infinity\n if (p === 1) return Infinity\n return NaN\n }\n const a = [\n -3.969683028665376e1, 2.209460984245205e2, -2.759285104469687e2, 1.38357751867269e2,\n -3.066479806614716e1, 2.506628277459239,\n ]\n const b = [\n -5.447609879822406e1, 1.615858368580409e2, -1.556989798598866e2, 6.680131188771972e1,\n -1.328068155288572e1,\n ]\n const c = [\n -7.784894002430293e-3, -3.223964580411365e-1, -2.400758277161838, -2.549732539343734,\n 4.374664141464968, 2.938163982698783,\n ]\n const d = [7.784695709041462e-3, 3.224671290700398e-1, 2.445134137142996, 3.754408661907416]\n const pLow = 0.02425\n const pHigh = 1 - pLow\n let q: number\n let r: number\n if (p < pLow) {\n q = Math.sqrt(-2 * Math.log(p))\n return (\n (((((c[0]! * q + c[1]!) * q + c[2]!) * q + c[3]!) * q + c[4]!) * q + c[5]!) /\n ((((d[0]! * q + d[1]!) * q + d[2]!) * q + d[3]!) * q + 1)\n )\n }\n if (p <= pHigh) {\n q = p - 0.5\n r = q * q\n return (\n ((((((a[0]! * r + a[1]!) * r + a[2]!) * r + a[3]!) * r + a[4]!) * r + a[5]!) * q) /\n (((((b[0]! * r + b[1]!) * r + b[2]!) * r + b[3]!) * r + b[4]!) * r + 1)\n )\n }\n q = Math.sqrt(-2 * Math.log(1 - p))\n return (\n -(((((c[0]! * q + c[1]!) * q + c[2]!) * q + c[3]!) * q + c[4]!) * q + c[5]!) /\n ((((d[0]! * q + d[1]!) * q + d[2]!) * q + d[3]!) * q + 1)\n )\n}\n","/**\n * Paper-grade paired statistics for held-out promotion gates.\n *\n * The promotion gate (`HeldOutGate`) needs three things:\n *\n * 1. A bootstrap confidence interval on the per-item paired delta\n * (`pairedBootstrap`). Median delta is the headline number; the\n * CI lower bound is what the gate checks against `pairedDeltaThreshold`.\n * 2. A non-parametric significance test on the paired deltas\n * (`pairedWilcoxon` — re-export of `wilcoxonSignedRank` under the\n * paper-style name).\n * 3. False-discovery-rate correction across simultaneously-tested\n * candidate variants (`bhAdjust` — re-export of `benjaminiHochberg`).\n *\n * Why a separate file: every existing primitive lives in `statistics.ts`\n * (general) or `power-analysis.ts` (correction). Paired-bootstrap is\n * paired-only, paper-grade, and load-bearing for the promotion gate.\n * Putting it next to `statistics.ts` would require editing that file;\n * the brief forbids that. New file, new exports, no surface change.\n */\n\nimport { benjaminiHochberg } from './power-analysis'\nimport { wilcoxonSignedRank } from './statistics'\n\nexport interface PairedBootstrapResult {\n /** Number of paired observations (after dropping unequal lengths is rejected). */\n n: number\n /** Median of paired deltas (after − before). */\n median: number\n /** Mean of paired deltas. */\n mean: number\n /** Lower bound of the bootstrap CI on the median delta. */\n low: number\n /** Upper bound of the bootstrap CI on the median delta. */\n high: number\n /** Confidence level used (e.g. 0.95). */\n confidence: number\n /** Number of bootstrap resamples used. */\n resamples: number\n}\n\nexport interface PairedBootstrapOptions {\n /** Confidence level. Default 0.95. */\n confidence?: number\n /** Bootstrap resample count. Default 2000. */\n resamples?: number\n /** Statistic to bootstrap. Default 'median'. */\n statistic?: 'median' | 'mean'\n /** Deterministic seed. If omitted, uses Math.random(). */\n seed?: number\n}\n\n/**\n * Paired bootstrap on (after - before) deltas. Returns a CI on the\n * chosen statistic (median by default). Pairs are resampled with\n * replacement. The lower bound is what the promotion gate checks: if\n * `low > pairedDeltaThreshold`, the gain is real at the chosen\n * confidence level.\n *\n * Throws on unequal sample sizes — caller must align pairs upstream.\n */\nexport function pairedBootstrap(\n before: number[],\n after: number[],\n opts: PairedBootstrapOptions = {},\n): PairedBootstrapResult {\n if (before.length !== after.length) {\n throw new Error(`pairedBootstrap: unequal sample sizes (${before.length} vs ${after.length})`)\n }\n const confidence = opts.confidence ?? 0.95\n const resamples = opts.resamples ?? 2000\n const statistic = opts.statistic ?? 'median'\n if (confidence <= 0 || confidence >= 1) {\n throw new Error(`pairedBootstrap: confidence must be in (0,1), got ${confidence}`)\n }\n\n const n = before.length\n const deltas = before.map((b, i) => after[i]! - b)\n if (n === 0) {\n return { n: 0, median: 0, mean: 0, low: 0, high: 0, confidence, resamples }\n }\n if (n === 1) {\n const d = deltas[0]!\n return { n: 1, median: d, mean: d, low: d, high: d, confidence, resamples }\n }\n\n const rng = makeRng(opts.seed)\n const samples = new Array<number>(resamples)\n for (let b = 0; b < resamples; b++) {\n let acc: number[] | null = null\n if (statistic === 'mean') {\n let sum = 0\n for (let k = 0; k < n; k++) {\n sum += deltas[Math.floor(rng() * n)]!\n }\n samples[b] = sum / n\n } else {\n acc = new Array<number>(n)\n for (let k = 0; k < n; k++) {\n acc[k] = deltas[Math.floor(rng() * n)]!\n }\n samples[b] = medianInPlace(acc)\n }\n }\n samples.sort((a, b) => a - b)\n\n const alpha = 1 - confidence\n const lowIdx = Math.floor((alpha / 2) * resamples)\n const highIdx = Math.min(resamples - 1, Math.ceil((1 - alpha / 2) * resamples) - 1)\n\n return {\n n,\n median: medianInPlace([...deltas]),\n mean: deltas.reduce((s, x) => s + x, 0) / n,\n low: samples[lowIdx]!,\n high: samples[Math.max(highIdx, lowIdx)]!,\n confidence,\n resamples,\n }\n}\n\n/**\n * Paper-style alias for `wilcoxonSignedRank`. The signed-rank test on\n * paired deltas is the standard non-parametric significance test for\n * \"candidate beats baseline on matched items.\" Use alongside the\n * bootstrap CI: bootstrap gives effect size, Wilcoxon gives p.\n */\nexport function pairedWilcoxon(before: number[], after: number[]): { w: number; p: number } {\n return wilcoxonSignedRank(before, after)\n}\n\n/**\n * Paper-style alias for `benjaminiHochberg`. Use to correct p-values\n * across multiple candidate-vs-baseline comparisons run in the same\n * promotion sweep. Returns BH-adjusted q-values and significance at\n * the requested FDR (default 0.05).\n */\nexport function bhAdjust(\n pValues: number[],\n fdr = 0.05,\n): { qValues: number[]; significant: boolean[] } {\n return benjaminiHochberg(pValues, fdr)\n}\n\n// ── Helpers ──────────────────────────────────────────────────────────\n\nfunction medianInPlace(xs: number[]): number {\n if (xs.length === 0) return 0\n xs.sort((a, b) => a - b)\n const mid = Math.floor(xs.length / 2)\n return xs.length % 2 === 0 ? (xs[mid - 1]! + xs[mid]!) / 2 : xs[mid]!\n}\n\n/**\n * Tiny seedable PRNG (mulberry32). Deterministic given a seed; falls\n * back to Math.random when seed is omitted. Adequate for bootstrap\n * resampling — not cryptographic.\n */\nfunction makeRng(seed: number | undefined): () => number {\n if (seed === undefined) return Math.random\n let s = seed | 0 || 0x9e3779b9\n return () => {\n s = (s + 0x6d2b79f5) | 0\n let t = s\n t = Math.imul(t ^ (t >>> 15), t | 1)\n t ^= t + Math.imul(t ^ (t >>> 7), t | 61)\n return ((t ^ (t >>> 14)) >>> 0) / 4294967296\n }\n}\n","/**\n * Reporting helpers — production summaries and paper-quality figures — sit alongside `reporter.ts` rather\n * than replacing it.\n *\n * Three artefacts:\n *\n * - `summaryTable` Markdown table of per-candidate means,\n * 95% bootstrap CIs, BH-adjusted Wilcoxon\n * p-values, and Cohen's d versus a\n * comparator candidate.\n * - `paretoChart` Abstract spec for a cost vs quality\n * scatter, with gate decisions overlaid.\n * Returns numbers + labels — caller\n * chooses the plotting library.\n * - `gainHistogram`\n * Per-item paired holdout deltas as a\n * histogram spec (bins + counts + median +\n * CI). Same \"data, not images\" contract.\n *\n * The figure types are PlotSpecs — JSON-friendly, library-agnostic.\n * They aren't React components and they aren't PNGs; they are\n * what you'd hand to vega-lite, plotly, matplotlib, or your own\n * Canvas renderer to draw the actual figure.\n */\n\nimport type { GateDecision } from './held-out-gate'\nimport { pairedBootstrap } from './paired-stats'\nimport type { FailureClusterReport } from './pipelines/failure-cluster'\nimport { benjaminiHochberg, pairedMde } from './power-analysis'\nimport { canonicalize, hashJson } from './pre-registration'\nimport type { RunRecord } from './run-record'\nimport { cohensD, confidenceInterval, wilcoxonSignedRank } from './statistics'\n\n// ── summaryTable ───────────────────────────────────────────────────────\n\nexport interface SummaryTableOptions {\n /** Comparator candidate id. Wilcoxon + Cohen's d are computed\n * versus this candidate. Required for paired stats columns. */\n comparator?: string\n /** Which split to read scores from. Default 'holdout'. */\n split?: 'search' | 'holdout'\n /** Confidence level for the bootstrap CI on the mean. Default 0.95. */\n confidence?: number\n /** FDR for BH adjustment of the comparison p-values. Default 0.05. */\n fdr?: number\n}\n\nexport interface SummaryTableRow {\n candidateId: string\n n: number\n mean: number\n ciLow: number\n ciHigh: number\n /** BH-adjusted q-value vs comparator. NaN if no comparator. */\n qValue: number\n /** Cohen's d vs comparator. NaN if no comparator. */\n cohensD: number\n}\n\nexport interface SummaryTable {\n rows: SummaryTableRow[]\n comparator: string | null\n split: 'search' | 'holdout'\n /** Pre-rendered markdown — drop into a paper or PR. */\n markdown: string\n}\n\n/**\n * Table 1 helper. Buckets runs by `candidateId`, computes mean +\n * bootstrap CI on the chosen split, and (when a comparator is given)\n * BH-adjusted Wilcoxon p + Cohen's d versus that comparator.\n */\nexport function summaryTable(runs: RunRecord[], opts: SummaryTableOptions = {}): SummaryTable {\n const split = opts.split ?? 'holdout'\n const confidence = opts.confidence ?? 0.95\n const fdr = opts.fdr ?? 0.05\n const comparator = opts.comparator ?? null\n const scoreField = split === 'holdout' ? 'holdoutScore' : 'searchScore'\n\n const byCandidate = new Map<string, { runs: RunRecord[]; scores: number[] }>()\n for (const r of runs) {\n if (r.splitTag !== split) continue\n const v = r.outcome[scoreField]\n if (typeof v !== 'number' || !Number.isFinite(v)) continue\n const bucket = byCandidate.get(r.candidateId) ?? { runs: [], scores: [] }\n bucket.runs.push(r)\n bucket.scores.push(v)\n byCandidate.set(r.candidateId, bucket)\n }\n\n const candidateIds = [...byCandidate.keys()].sort()\n const compRuns = comparator ? byCandidate.get(comparator) : undefined\n\n // First pass: per-candidate means + CIs + raw p-values.\n const tentative: Array<SummaryTableRow & { rawP: number }> = []\n for (const id of candidateIds) {\n const bucket = byCandidate.get(id)!\n const ci = confidenceInterval(bucket.scores, confidence)\n let rawP = Number.NaN\n let d = Number.NaN\n if (comparator && compRuns && id !== comparator) {\n const paired = pairScoresByKey(bucket.runs, compRuns.runs, scoreField)\n if (paired.before.length >= 6) {\n rawP = wilcoxonSignedRank(paired.before, paired.after).p\n }\n d = cohensD(compRuns.scores, bucket.scores)\n }\n tentative.push({\n candidateId: id,\n n: bucket.scores.length,\n mean: ci.mean,\n ciLow: ci.lower,\n ciHigh: ci.upper,\n qValue: rawP,\n cohensD: d,\n rawP,\n })\n }\n\n // BH-adjust across the comparison set (skip NaN rows / the\n // comparator itself). Adjustment is a no-op when there are 0 or 1\n // comparators.\n if (comparator) {\n const idxs: number[] = []\n const ps: number[] = []\n for (let i = 0; i < tentative.length; i++) {\n const r = tentative[i]!\n if (r.candidateId === comparator) continue\n if (!Number.isFinite(r.rawP)) continue\n idxs.push(i)\n ps.push(r.rawP)\n }\n if (ps.length > 0) {\n const { qValues } = benjaminiHochberg(ps, fdr)\n for (let k = 0; k < idxs.length; k++) {\n tentative[idxs[k]!]!.qValue = qValues[k]!\n }\n }\n }\n\n const rows = tentative.map(({ rawP: _rawP, ...rest }) => rest)\n const markdown = renderSummaryTableMarkdown(rows, comparator, split)\n return { rows, comparator, split, markdown }\n}\n\nfunction pairScoresByKey(\n candidate: RunRecord[],\n baseline: RunRecord[],\n scoreField: 'searchScore' | 'holdoutScore',\n): { before: number[]; after: number[] } {\n const baseIdx = new Map<string, number>()\n for (const r of baseline) {\n const v = r.outcome[scoreField]\n if (typeof v === 'number' && Number.isFinite(v)) {\n baseIdx.set(`${r.experimentId}::${r.seed}`, v)\n }\n }\n const before: number[] = []\n const after: number[] = []\n for (const r of candidate) {\n const v = r.outcome[scoreField]\n if (typeof v !== 'number' || !Number.isFinite(v)) continue\n const key = `${r.experimentId}::${r.seed}`\n const b = baseIdx.get(key)\n if (b === undefined) continue\n before.push(b)\n after.push(v)\n }\n return { before, after }\n}\n\nfunction renderSummaryTableMarkdown(\n rows: SummaryTableRow[],\n comparator: string | null,\n split: 'search' | 'holdout',\n): string {\n const lines: string[] = []\n const cmpLabel = comparator ? ` (vs ${comparator})` : ''\n lines.push(`Summary Table — ${split} split${cmpLabel}`)\n lines.push('')\n lines.push(\"| Candidate | N | Mean | 95% CI | q (BH) | Cohen's d |\")\n lines.push('|---|---:|---:|---|---:|---:|')\n for (const r of rows) {\n const ci = `[${fmt(r.ciLow)}, ${fmt(r.ciHigh)}]`\n const q = Number.isFinite(r.qValue) ? r.qValue.toFixed(4) : '—'\n const d = Number.isFinite(r.cohensD) ? r.cohensD.toFixed(3) : '—'\n lines.push(`| ${r.candidateId} | ${r.n} | ${fmt(r.mean)} | ${ci} | ${q} | ${d} |`)\n }\n return lines.join('\\n')\n}\n\n// ── paretoChart ─────────────────────────────────────────────────────\n\nexport interface ParetoPoint {\n candidateId: string\n /** Mean USD cost per run on the chosen split. */\n cost: number\n /** Mean score on the chosen split. */\n quality: number\n /** Number of runs that informed this point. */\n n: number\n /** Whether this candidate is on the Pareto frontier — high\n * quality, low cost, no dominator. */\n onFrontier: boolean\n /** Optional gate verdict for this candidate, if a `GateDecision`\n * for it was passed in. */\n gate?: 'promote' | 'reject_few_runs' | 'reject_negative_delta' | 'reject_overfit_gap' | null\n}\n\nexport interface ParetoFigureSpec {\n kind: 'pareto-cost-quality'\n split: 'search' | 'holdout'\n points: ParetoPoint[]\n axes: { x: 'costUsd'; y: 'score' }\n}\n\n/**\n * Cost vs quality scatter spec. `gateDecisions` is keyed by\n * candidate id; if present, every point picks up the gate verdict\n * for overlay.\n */\nexport function paretoChart(\n runs: RunRecord[],\n opts: {\n split?: 'search' | 'holdout'\n gateDecisions?: Record<string, GateDecision>\n } = {},\n): ParetoFigureSpec {\n const split = opts.split ?? 'holdout'\n const scoreField = split === 'holdout' ? 'holdoutScore' : 'searchScore'\n\n const buckets = new Map<string, { cost: number[]; quality: number[] }>()\n for (const r of runs) {\n if (r.splitTag !== split) continue\n const v = r.outcome[scoreField]\n if (typeof v !== 'number' || !Number.isFinite(v)) continue\n const bucket = buckets.get(r.candidateId) ?? { cost: [], quality: [] }\n bucket.cost.push(r.costUsd)\n bucket.quality.push(v)\n buckets.set(r.candidateId, bucket)\n }\n\n const points: ParetoPoint[] = []\n for (const [candidateId, bucket] of buckets.entries()) {\n points.push({\n candidateId,\n cost: avg(bucket.cost),\n quality: avg(bucket.quality),\n n: bucket.cost.length,\n onFrontier: false,\n gate: opts.gateDecisions?.[candidateId]\n ? gateLabel(opts.gateDecisions[candidateId]!)\n : undefined,\n })\n }\n\n // Pareto: minimize cost, maximize quality. A point is dominated if\n // some other point has lower-or-equal cost AND higher-or-equal\n // quality with strict inequality somewhere.\n for (const p of points) {\n p.onFrontier = !points.some((q) => q !== p && dominates(q, p))\n }\n\n return {\n kind: 'pareto-cost-quality',\n split,\n axes: { x: 'costUsd', y: 'score' },\n points,\n }\n}\n\nfunction dominates(a: ParetoPoint, b: ParetoPoint): boolean {\n return a.cost <= b.cost && a.quality >= b.quality && (a.cost < b.cost || a.quality > b.quality)\n}\n\nfunction gateLabel(d: GateDecision): ParetoPoint['gate'] {\n if (d.promote) return 'promote'\n if (d.rejectionCode === 'few_runs') return 'reject_few_runs'\n if (d.rejectionCode === 'negative_delta') return 'reject_negative_delta'\n if (d.rejectionCode === 'overfit_gap') return 'reject_overfit_gap'\n return null\n}\n\n// ── gainHistogram ───────────────────────────────────────────\n\nexport interface GainDistributionBin {\n /** Inclusive lower edge. */\n lo: number\n /** Exclusive upper edge (or inclusive if it's the last bin). */\n hi: number\n /** Number of pairs whose delta lands in this bin. */\n count: number\n}\n\nexport interface GainDistributionFigureSpec {\n kind: 'gain-distribution'\n candidateId: string\n comparator: string\n split: 'search' | 'holdout'\n /** Number of pairs used. */\n n: number\n bins: GainDistributionBin[]\n median: number\n ci: { low: number; high: number }\n}\n\nexport interface GainDistributionOptions {\n /** Number of histogram bins. Default 11 (so the centre is exact at 0). */\n bins?: number\n /** Which split to use. Default 'holdout'. */\n split?: 'search' | 'holdout'\n /** Confidence level for the CI. Default 0.95. */\n confidence?: number\n /** Bootstrap resamples. Default 2000. */\n resamples?: number\n /** Deterministic seed. */\n seed?: number\n}\n\n/**\n * Held-out improvement distribution: per-pair delta (candidate −\n * comparator), histogrammed. Includes the bootstrap CI on the median\n * delta — same primitive the promotion gate uses.\n */\nexport function gainHistogram(\n runs: RunRecord[],\n candidateId: string,\n comparator: string,\n opts: GainDistributionOptions = {},\n): GainDistributionFigureSpec {\n const split = opts.split ?? 'holdout'\n const scoreField = split === 'holdout' ? 'holdoutScore' : 'searchScore'\n const binCount = opts.bins ?? 11\n if (binCount < 1) throw new Error('gainHistogram: bins must be ≥ 1')\n\n const candidate = runs.filter((r) => r.candidateId === candidateId && r.splitTag === split)\n const baseline = runs.filter((r) => r.candidateId === comparator && r.splitTag === split)\n // pairScoresByKey returns before=baseline-score, after=candidate-score\n // for each (experimentId, seed) pair where both sides recorded a\n // valid score on this split. delta = after - before = candidate - baseline.\n const { before, after } = pairScoresByKey(candidate, baseline, scoreField)\n const n = before.length\n\n if (n === 0) {\n return {\n kind: 'gain-distribution',\n candidateId,\n comparator,\n split,\n n: 0,\n bins: [],\n median: 0,\n ci: { low: 0, high: 0 },\n }\n }\n\n const deltas = before.map((b, i) => after[i]! - b)\n const sortedDeltas = [...deltas].sort((a, b) => a - b)\n const median = medianOfSorted(sortedDeltas)\n const min = sortedDeltas[0]!\n const max = sortedDeltas[sortedDeltas.length - 1]!\n\n // Symmetric bins around the wider of (|min|, |max|) so the chart\n // visually centres on zero without dropping outliers.\n const bound = Math.max(Math.abs(min), Math.abs(max), 1e-6)\n const lo = -bound\n const hi = bound\n const width = (hi - lo) / binCount\n const bins: GainDistributionBin[] = []\n for (let i = 0; i < binCount; i++) {\n bins.push({ lo: lo + i * width, hi: lo + (i + 1) * width, count: 0 })\n }\n for (const d of deltas) {\n let idx = Math.floor((d - lo) / width)\n if (idx < 0) idx = 0\n if (idx >= binCount) idx = binCount - 1\n bins[idx]!.count += 1\n }\n\n const ci = pairedBootstrap(before, after, {\n confidence: opts.confidence ?? 0.95,\n resamples: opts.resamples ?? 2000,\n statistic: 'median',\n seed: opts.seed,\n })\n\n return {\n kind: 'gain-distribution',\n candidateId,\n comparator,\n split,\n n,\n bins,\n median,\n ci: { low: ci.low, high: ci.high },\n }\n}\n\n// ── researchReport ───────────────────────────────────────────────────\n\nexport type ResearchReportDecision =\n | 'promote'\n | 'hold'\n | 'reject'\n | 'equivalent'\n | 'needs_more_data'\n\n/**\n * Hard floor below which a paired comparison is treated as uninformative\n * regardless of `minPairs`. Mirrors the lower limit on Wilcoxon signed-rank\n * exact tables; below this the test has no power to separate effect sizes.\n */\nexport const RESEARCH_REPORT_HARD_PAIR_FLOOR = 6\n\nexport interface ResearchReportOptions {\n /** Human-readable report title. */\n title?: string\n /** Comparator candidate id. Required for statistical decision guidance. */\n comparator?: string\n /** Which split to use for the primary decision. Default 'holdout'. */\n split?: 'search' | 'holdout'\n /** Confidence level used by lower-level report helpers. Default 0.95. */\n confidence?: number\n /** FDR threshold for q-values. Default 0.05. */\n fdr?: number\n /**\n * Soft floor on paired observations before issuing a directional\n * promote / reject. Below this we report `needs_more_data` and surface the\n * minimum detectable effect at the current N. Default 20 — chosen so the\n * Wilcoxon signed-rank approximation is reasonable and so the paired\n * bootstrap CI has non-degenerate coverage. Hard floor is enforced at\n * `RESEARCH_REPORT_HARD_PAIR_FLOOR` (6) regardless of this value.\n */\n minPairs?: number\n /**\n * Region of Practical Equivalence on the paired delta. When a candidate's\n * paired-delta CI is fully contained in `[low, high]`, the decision is\n * `equivalent` rather than `hold`. Sourced from the domain owner — there is\n * no statistically-defensible default.\n */\n rope?: { low: number; high: number }\n /**\n * Power for the minimum detectable effect (MDE) reported on each candidate.\n * Default 0.8.\n */\n mdePower?: number\n /**\n * Two-sided alpha for the MDE. Default matches `fdr` so the reported MDE\n * lines up with the test the report actually runs.\n */\n mdeAlpha?: number\n /** Optional held-out gate decisions keyed by candidate id. */\n gateDecisions?: Record<string, GateDecision>\n /** Optional failure clusters from failureClusterView. */\n failureClusters?: FailureClusterReport\n /** Build gain histograms for these candidates. Defaults to all non-comparator candidates. */\n candidateIds?: string[]\n /** Deterministic bootstrap seed passed to gainHistogram and the posterior helper. */\n seed?: number\n /** Report timestamp. Defaults to current time. */\n generatedAt?: string\n /**\n * Hash of a preregistered protocol (e.g. `signManifest({...}).contentHash`).\n * Embedded verbatim in the report so the analysis can be cited as the\n * preregistered one rather than a post-hoc fishing expedition.\n */\n preregistrationHash?: string\n}\n\nexport interface ResearchReportRecommendation {\n decision: ResearchReportDecision\n candidateId: string | null\n rationale: string[]\n risks: string[]\n nextActions: string[]\n}\n\nexport interface ResearchReportCandidate {\n candidateId: string\n n: number\n mean: number\n ciLow: number\n ciHigh: number\n qValue: number\n cohensD: number\n meanDeltaVsComparator: number | null\n pairedN: number\n medianGain: number | null\n meanGain: number | null\n gainCi: { low: number; high: number } | null\n /**\n * Bayesian-bootstrap-style posterior summaries on the paired delta. Computed\n * from the same resamples that produce the gain CI; interpretable as\n * \"fraction of resamples in which the candidate beats the comparator on\n * matched pairs.\"\n */\n prGreaterThanZero: number | null\n prInRope: number | null\n /**\n * Minimum detectable effect (in score units) at the candidate's paired N,\n * the configured power, and the configured alpha. Standardised by the\n * observed paired-delta SD and inverted via `requiredSampleSize`. Reported\n * for every candidate so a `needs_more_data` verdict is actionable.\n */\n mde: number | null\n onParetoFrontier: boolean\n gate?: ParetoPoint['gate']\n decision: ResearchReportDecision\n decisionReason: string\n}\n\nexport interface ResearchReportMethodology {\n /**\n * Plain-language assumptions the report depends on. Read these first when\n * deciding whether the verdict is load-bearing for a launch decision.\n */\n assumptions: string[]\n /** Tests and estimators the verdict was computed from. */\n methods: string[]\n /** Alternatives the author considered and why this report didn't take them. */\n alternatives: string[]\n /** Failure modes — when this report should NOT drive a decision. */\n whenNotToApply: string[]\n /** Citations for the methodological choices above. */\n citations: string[]\n}\n\nexport interface ResearchReport {\n kind: 'agent-eval-research-report'\n title: string\n generatedAt: string\n split: 'search' | 'holdout'\n comparator: string | null\n /**\n * SHA-256 over the canonicalised set of `(runId, candidateId, split)` triples\n * the report was computed from, plus the comparator and split. Stable across\n * key insertion order; recomputable by the reader to verify provenance.\n */\n runFingerprint: string\n preregistrationHash: string | null\n rope: { low: number; high: number } | null\n executiveSummary: string[]\n recommendation: ResearchReportRecommendation\n candidates: ResearchReportCandidate[]\n summary: SummaryTable\n charts: {\n pareto: ParetoFigureSpec\n gains: GainDistributionFigureSpec[]\n }\n methodology: ResearchReportMethodology\n failureClusters?: FailureClusterReport\n markdown: string\n html: string\n}\n\n/**\n * Internal: paired posterior summary on (candidate − comparator) deltas.\n *\n * Returns the bootstrap CI on the median (matching `gainHistogram`) plus\n * Bayesian-flavoured posterior summaries Pr(Δ>0) and Pr(Δ∈ROPE) computed\n * from a Bayesian-bootstrap-flavoured resample distribution on the mean\n * (Rubin 1981 — non-informative bootstrap-prior duality), and the\n * minimum detectable paired effect at the configured power and α.\n *\n * `null` is returned when no paired observations exist; callers must\n * gate on `n` before consuming the bootstrap statistics.\n */\nfunction pairedPosterior(\n runs: RunRecord[],\n candidateId: string,\n comparator: string,\n opts: {\n split: 'search' | 'holdout'\n confidence: number\n seed?: number\n rope: { low: number; high: number } | null\n mdePower: number\n mdeAlpha: number\n },\n): {\n n: number\n meanDelta: number\n medianDelta: number\n sdDelta: number\n ci: { low: number; high: number }\n prGreaterThanZero: number\n prInRope: number | null\n mde: number\n} | null {\n const scoreField = opts.split === 'holdout' ? 'holdoutScore' : 'searchScore'\n const candidate = runs.filter((r) => r.candidateId === candidateId && r.splitTag === opts.split)\n const baseline = runs.filter((r) => r.candidateId === comparator && r.splitTag === opts.split)\n const { before, after } = pairScoresByKey(candidate, baseline, scoreField)\n const n = before.length\n if (n === 0) return null\n\n const deltas = before.map((b, i) => after[i]! - b)\n const meanDelta = deltas.reduce((s, x) => s + x, 0) / n\n const sortedDeltas = [...deltas].sort((a, b) => a - b)\n const medianDelta = medianOfSorted(sortedDeltas)\n const sdDelta = stdev(deltas, meanDelta)\n\n const ci = pairedBootstrap(before, after, {\n confidence: opts.confidence,\n resamples: 2000,\n statistic: 'median',\n seed: opts.seed,\n })\n\n // Enumerate bootstrap-mean samples to derive posterior summaries on the\n // mean delta. Same RNG family as `pairedBootstrap` but kept local so we can\n // examine the full sample distribution rather than just quantiles.\n const meanSamples = bootstrapMeanSamples(deltas, 2000, opts.seed)\n const prGreaterThanZero =\n meanSamples.length === 0 ? 0 : meanSamples.filter((s) => s > 0).length / meanSamples.length\n const prInRope =\n opts.rope === null || meanSamples.length === 0\n ? null\n : meanSamples.filter((s) => s >= opts.rope!.low && s <= opts.rope!.high).length /\n meanSamples.length\n\n const dStandardised = pairedMde({ nPaired: n, alpha: opts.mdeAlpha, power: opts.mdePower })\n const mde = sdDelta === 0 ? 0 : dStandardised * sdDelta\n\n return {\n n,\n meanDelta,\n medianDelta,\n sdDelta,\n ci: { low: ci.low, high: ci.high },\n prGreaterThanZero,\n prInRope,\n mde,\n }\n}\n\nfunction bootstrapMeanSamples(deltas: number[], resamples: number, seed?: number): number[] {\n const n = deltas.length\n if (n === 0) return []\n if (n === 1) return new Array<number>(resamples).fill(deltas[0]!)\n const rng = seedRng(seed)\n const samples = new Array<number>(resamples)\n for (let b = 0; b < resamples; b++) {\n let sum = 0\n for (let k = 0; k < n; k++) sum += deltas[Math.floor(rng() * n)]!\n samples[b] = sum / n\n }\n return samples\n}\n\nfunction seedRng(seed?: number): () => number {\n if (seed === undefined) return Math.random\n let s = seed >>> 0\n return () => {\n s = (s + 0x6d2b79f5) >>> 0\n let t = s\n t = Math.imul(t ^ (t >>> 15), t | 1)\n t ^= t + Math.imul(t ^ (t >>> 7), t | 61)\n return ((t ^ (t >>> 14)) >>> 0) / 4294967296\n }\n}\n\nfunction stdev(xs: number[], mean: number): number {\n if (xs.length < 2) return 0\n let sse = 0\n for (const x of xs) sse += (x - mean) ** 2\n return Math.sqrt(sse / (xs.length - 1))\n}\n\n/**\n * Executive research report for CPO / AI-lead / launch-review consumption.\n *\n * Composes:\n * - `summaryTable` marginal stats with BH-FDR-adjusted q-values\n * - `paretoChart` cost-vs-quality frontier with gate overlay\n * - `gainHistogram` per-candidate paired-delta distribution\n * - paired posterior (this file): bootstrap CI on median, Pr(Δ>0),\n * Pr(Δ∈ROPE), MDE at the configured power\n *\n * Decisions are made on paired evidence — never on marginal means alone —\n * and respect any held-out gate decision the caller passes through. The\n * report embeds a SHA-256 fingerprint of the input run set and, optionally,\n * the hash of a preregistered protocol so a downstream reader can verify\n * provenance and that the analysis was the preregistered one.\n *\n * Async because the fingerprint uses Web Crypto via `hashJson`; deterministic\n * for any fixed `runs`, `seed`, and ROPE.\n */\nexport async function researchReport(\n runs: RunRecord[],\n opts: ResearchReportOptions = {},\n): Promise<ResearchReport> {\n const split = opts.split ?? 'holdout'\n const comparator = opts.comparator ?? null\n const confidence = opts.confidence ?? 0.95\n const fdr = opts.fdr ?? 0.05\n const minPairs = Math.max(opts.minPairs ?? 20, RESEARCH_REPORT_HARD_PAIR_FLOOR)\n const rope = opts.rope ?? null\n const mdePower = opts.mdePower ?? 0.8\n const mdeAlpha = opts.mdeAlpha ?? fdr\n const title = opts.title ?? 'Agent Evaluation Research Report'\n const generatedAt = opts.generatedAt ?? new Date().toISOString()\n const preregistrationHash = opts.preregistrationHash ?? null\n\n if (rope && !(Number.isFinite(rope.low) && Number.isFinite(rope.high) && rope.low <= rope.high)) {\n throw new Error(\n `researchReport: rope must satisfy low ≤ high with finite bounds, got ${JSON.stringify(rope)}`,\n )\n }\n\n const summary = summaryTable(runs, {\n comparator: comparator ?? undefined,\n split,\n confidence,\n fdr,\n })\n const pareto = paretoChart(runs, { split, gateDecisions: opts.gateDecisions })\n const candidateIds =\n opts.candidateIds ?? summary.rows.map((r) => r.candidateId).filter((id) => id !== comparator)\n const gains = comparator\n ? candidateIds.map((id) =>\n gainHistogram(runs, id, comparator, {\n split,\n confidence,\n seed: opts.seed,\n }),\n )\n : []\n\n const gainByCandidate = new Map(gains.map((g) => [g.candidateId, g]))\n const paretoByCandidate = new Map(pareto.points.map((p) => [p.candidateId, p]))\n const posteriorByCandidate = new Map<string, ReturnType<typeof pairedPosterior>>()\n if (comparator) {\n for (const id of candidateIds) {\n posteriorByCandidate.set(\n id,\n pairedPosterior(runs, id, comparator, {\n split,\n confidence,\n seed: opts.seed,\n rope,\n mdePower,\n mdeAlpha,\n }),\n )\n }\n }\n\n const candidates = summary.rows\n .map((row) => {\n const gain = gainByCandidate.get(row.candidateId)\n const point = paretoByCandidate.get(row.candidateId)\n const posterior = posteriorByCandidate.get(row.candidateId) ?? null\n const classified = classifyCandidate(row, {\n comparator,\n posterior,\n point,\n fdr,\n minPairs,\n rope,\n })\n return {\n candidateId: row.candidateId,\n n: row.n,\n mean: row.mean,\n ciLow: row.ciLow,\n ciHigh: row.ciHigh,\n qValue: row.qValue,\n cohensD: row.cohensD,\n meanDeltaVsComparator: posterior ? posterior.meanDelta : null,\n pairedN: posterior?.n ?? gain?.n ?? 0,\n medianGain: posterior ? posterior.medianDelta : gain ? gain.median : null,\n meanGain: posterior ? posterior.meanDelta : null,\n gainCi: posterior ? posterior.ci : gain ? gain.ci : null,\n prGreaterThanZero: posterior ? posterior.prGreaterThanZero : null,\n prInRope: posterior ? posterior.prInRope : null,\n mde: posterior ? posterior.mde : null,\n onParetoFrontier: point?.onFrontier ?? false,\n gate: point?.gate,\n decision: classified.decision,\n decisionReason: classified.reason,\n } satisfies ResearchReportCandidate\n })\n .sort((a, b) => {\n const decisionRank = decisionWeight(b.decision) - decisionWeight(a.decision)\n if (decisionRank !== 0) return decisionRank\n return b.mean - a.mean\n })\n\n const recommendation = buildRecommendation(candidates, {\n comparator,\n failureClusters: opts.failureClusters,\n rope,\n minPairs,\n preregistrationHash,\n })\n const executiveSummary = buildExecutiveSummary(candidates, recommendation, {\n comparator,\n split,\n failureClusters: opts.failureClusters,\n preregistrationHash,\n })\n const methodology = buildMethodology({\n split,\n comparator,\n fdr,\n minPairs,\n rope,\n confidence,\n mdePower,\n mdeAlpha,\n })\n\n const runFingerprint = await hashJson(\n canonicalize({\n triples: runs\n .filter((r) => r.splitTag === split)\n .map((r) => ({ runId: r.runId, candidateId: r.candidateId, splitTag: r.splitTag }))\n .sort((a, b) => a.runId.localeCompare(b.runId)),\n comparator,\n split,\n }),\n )\n\n const markdown = renderResearchMarkdown({\n title,\n generatedAt,\n split,\n comparator,\n rope,\n runFingerprint,\n preregistrationHash,\n executiveSummary,\n recommendation,\n candidates,\n summary,\n pareto,\n gains,\n methodology,\n failureClusters: opts.failureClusters,\n })\n const html = renderResearchHtml(markdown, title)\n\n return {\n kind: 'agent-eval-research-report',\n title,\n generatedAt,\n split,\n comparator,\n runFingerprint,\n preregistrationHash,\n rope,\n executiveSummary,\n recommendation,\n candidates,\n summary,\n charts: { pareto, gains },\n methodology,\n failureClusters: opts.failureClusters,\n markdown,\n html,\n }\n}\n\nfunction buildMethodology(ctx: {\n split: 'search' | 'holdout'\n comparator: string | null\n fdr: number\n minPairs: number\n rope: { low: number; high: number } | null\n confidence: number\n mdePower: number\n mdeAlpha: number\n}): ResearchReportMethodology {\n const assumptions: string[] = [\n 'Pairs are matched by (experimentId, seed); the candidate and comparator see the same scenarios in the same order.',\n 'Paired deltas are exchangeable conditional on the matched scenario — no mid-run distribution shift.',\n `Decisions are pre-specified at fdr=${ctx.fdr}, minPairs=${ctx.minPairs}, confidence=${ctx.confidence}; deviating from these post-hoc invalidates the false-discovery control.`,\n ]\n if (ctx.rope) {\n assumptions.push(\n `The Region of Practical Equivalence ${formatRope(ctx.rope)} is supplied by the domain owner; equivalent verdicts are only meaningful if that range is treated as the standing definition of \"no material difference.\"`,\n )\n }\n if (ctx.comparator === null) {\n assumptions.push('No comparator was configured; this run is descriptive, not causal.')\n }\n const methods: string[] = [\n \"Marginal scores summarised with BH-FDR-adjusted Wilcoxon signed-rank q-values and Cohen's d via summaryTable.\",\n 'Paired evidence summarised with bootstrap CI on the median delta and Bayesian-bootstrap-style Pr(Δ>0) and Pr(Δ∈ROPE) on the mean delta.',\n `Minimum detectable effect reported per candidate at α=${ctx.mdeAlpha} (two-sided), power=${ctx.mdePower}, standardised by the observed paired-delta SD.`,\n 'Pareto frontier flagged as a separate axis (cost vs quality); a candidate can be on-frontier without winning the paired test.',\n 'Held-out gate decisions, when supplied, override the statistical verdict in the reject direction.',\n ]\n const alternatives: string[] = [\n 'Paired t-test rejected: not robust to the heavy-tailed score distributions common in agent benchmarks.',\n 'Unpaired Mann–Whitney rejected: matched scenarios make pairing free; unpaired throws away that variance reduction.',\n 'Sequential / always-valid inference (e-values, mSPRT) is the right tool for iterative sweeps and is out of scope for this single-look report — preregister and run once, or wrap this report in an alpha-spending schedule.',\n 'Hierarchical Bayesian shrinkage across many candidates is future work; the current ranking uses raw paired statistics.',\n ]\n const whenNotToApply: string[] = [\n `Paired N below ${RESEARCH_REPORT_HARD_PAIR_FLOOR} on any candidate — the bootstrap CI is degenerate.`,\n 'Comparator chosen post-hoc by inspecting the same data; q-values are no longer false-discovery-controlled.',\n 'Scenarios not drawn under a stable preregistered protocol; the report can describe the data but cannot anchor a launch decision.',\n 'Score distributions with mid-run shift (judge model swap, rubric change, infra outage) — pair exchangeability is violated.',\n ]\n const citations: string[] = [\n 'Benjamini, Y. & Hochberg, Y. (1995). Controlling the false discovery rate: a practical and powerful approach to multiple testing. JRSS B, 57(1), 289–300.',\n 'Wilcoxon, F. (1945). Individual comparisons by ranking methods. Biometrics Bulletin, 1(6), 80–83.',\n 'Efron, B. (1979). Bootstrap methods: another look at the jackknife. Annals of Statistics, 7(1), 1–26.',\n 'Rubin, D. B. (1981). The Bayesian bootstrap. Annals of Statistics, 9(1), 130–134.',\n 'Kruschke, J. K. (2018). Rejecting or accepting parameter values in Bayesian estimation. Advances in Methods and Practices in Psychological Science, 1(2), 270–280. (ROPE.)',\n ]\n return { assumptions, methods, alternatives, whenNotToApply, citations }\n}\n\nfunction formatRope(rope: { low: number; high: number }): string {\n return `[${fmt(rope.low)}, ${fmt(rope.high)}]`\n}\n\nfunction classifyCandidate(\n row: SummaryTableRow,\n ctx: {\n comparator: string | null\n posterior: ReturnType<typeof pairedPosterior> | null\n point?: ParetoPoint\n fdr: number\n minPairs: number\n rope: { low: number; high: number } | null\n },\n): { decision: ResearchReportDecision; reason: string } {\n if (ctx.comparator && row.candidateId === ctx.comparator) {\n return { decision: 'hold', reason: 'Comparator baseline.' }\n }\n if (!ctx.comparator) {\n return {\n decision: ctx.point?.onFrontier ? 'hold' : 'needs_more_data',\n reason:\n 'No comparator configured; report ranks candidates but cannot anchor a promotion call.',\n }\n }\n // Held-out gate is authoritative against — promote requires statistical\n // evidence even if the gate said `promote` (gate is necessary, not sufficient).\n if (ctx.point?.gate && ctx.point.gate !== 'promote') {\n return { decision: 'reject', reason: `Held-out gate returned ${ctx.point.gate}.` }\n }\n if (!ctx.posterior || ctx.posterior.n < RESEARCH_REPORT_HARD_PAIR_FLOOR) {\n return {\n decision: 'needs_more_data',\n reason: `Only ${ctx.posterior?.n ?? 0} paired observations; below hard floor of ${RESEARCH_REPORT_HARD_PAIR_FLOOR} for any paired inference.`,\n }\n }\n const ci = ctx.posterior.ci\n if (ctx.rope && ci.low >= ctx.rope.low && ci.high <= ctx.rope.high) {\n return {\n decision: 'equivalent',\n reason: `Paired-delta CI [${fmt(ci.low)}, ${fmt(ci.high)}] is fully inside ROPE ${formatRope(ctx.rope)}; candidate is practically equivalent to comparator.`,\n }\n }\n const significant = Number.isFinite(row.qValue) && row.qValue <= ctx.fdr\n const gainPositive = ci.low > 0\n const gainNegative = ci.high < 0\n if (gainNegative) {\n return {\n decision: 'reject',\n reason: `Paired-delta CI [${fmt(ci.low)}, ${fmt(ci.high)}] lies entirely below zero.`,\n }\n }\n if (ctx.posterior.n < ctx.minPairs) {\n return {\n decision: 'needs_more_data',\n reason: `Only ${ctx.posterior.n} paired observations; minimum detectable effect at this N is ${fmt(ctx.posterior.mde)} score units (need ≥ ${ctx.minPairs} pairs to issue a directional verdict).`,\n }\n }\n if (significant && gainPositive) {\n return {\n decision: 'promote',\n reason: `BH-adjusted q=${fmt(row.qValue)} ≤ ${ctx.fdr} and paired-delta CI [${fmt(ci.low)}, ${fmt(ci.high)}] excludes zero; Pr(Δ>0)=${fmt(ctx.posterior.prGreaterThanZero)}.`,\n }\n }\n return {\n decision: 'hold',\n reason: `Pr(Δ>0)=${fmt(ctx.posterior.prGreaterThanZero)} but CI [${fmt(ci.low)}, ${fmt(ci.high)}] crosses zero; effect not decisive at fdr=${ctx.fdr}.`,\n }\n}\n\nfunction buildRecommendation(\n candidates: ResearchReportCandidate[],\n ctx: {\n comparator: string | null\n failureClusters?: FailureClusterReport\n rope: { low: number; high: number } | null\n minPairs: number\n preregistrationHash: string | null\n },\n): ResearchReportRecommendation {\n const nonComparator = candidates.filter((c) => c.candidateId !== ctx.comparator)\n const bestPromote = nonComparator.find((c) => c.decision === 'promote')\n const bestEquivalent = nonComparator.find((c) => c.decision === 'equivalent')\n const chosen = bestPromote ?? bestEquivalent ?? nonComparator[0] ?? null\n const decision: ResearchReportDecision = bestPromote\n ? 'promote'\n : nonComparator.some((c) => c.decision === 'needs_more_data')\n ? 'needs_more_data'\n : bestEquivalent\n ? 'equivalent'\n : nonComparator.some((c) => c.decision === 'hold')\n ? 'hold'\n : 'reject'\n\n const rationale: string[] = []\n const risks: string[] = []\n const nextActions: string[] = []\n\n if (chosen) {\n rationale.push(`${chosen.candidateId}: ${chosen.decisionReason}`)\n if (chosen.gainCi) {\n const probSummary =\n chosen.prGreaterThanZero !== null ? `, Pr(Δ>0)=${fmt(chosen.prGreaterThanZero)}` : ''\n rationale.push(\n `Median paired gain CI: [${fmt(chosen.gainCi.low)}, ${fmt(chosen.gainCi.high)}]${probSummary}.`,\n )\n }\n if (chosen.mde !== null && Number.isFinite(chosen.mde)) {\n rationale.push(`MDE at current paired N=${chosen.pairedN}: ${fmt(chosen.mde)} score units.`)\n }\n }\n if (!ctx.comparator) {\n risks.push('No comparator was configured; verdict is descriptive, not causal.')\n nextActions.push('Re-run with a stable comparator candidate for paired inference.')\n }\n if (!ctx.preregistrationHash) {\n risks.push(\n 'No preregistration hash supplied; readers cannot verify the analysis was specified before data inspection.',\n )\n nextActions.push(\n 'Sign a HypothesisManifest before the next sweep and pass `preregistrationHash` so the report cites it.',\n )\n }\n if (ctx.rope === null && nonComparator.length > 0) {\n risks.push(\n 'No ROPE configured; the report cannot distinguish \"equivalent\" from \"inconclusive\".',\n )\n nextActions.push(\n 'Define a domain-specific Region of Practical Equivalence and pass it to lock in the equivalence threshold.',\n )\n }\n const inconclusive = nonComparator.filter((c) => c.decision === 'needs_more_data')\n if (inconclusive.length > 0) {\n const worst = inconclusive.reduce((a, b) => (b.pairedN < a.pairedN ? b : a))\n risks.push(\n `${inconclusive.length} candidate(s) below soft floor (${ctx.minPairs} pairs); thinnest is ${worst.candidateId} with ${worst.pairedN}.`,\n )\n nextActions.push(\n `Collect at least ${ctx.minPairs - worst.pairedN} more matched holdout runs for ${worst.candidateId}.`,\n )\n }\n const rejected = nonComparator.filter((c) => c.decision === 'reject')\n if (rejected.length > 0) {\n risks.push(\n `${rejected.length} candidate(s) failed the paired test or held-out gate; do not ship those variants.`,\n )\n }\n if (ctx.failureClusters && ctx.failureClusters.clusters.length > 0) {\n const top = ctx.failureClusters.clusters[0]!\n risks.push(`Top failure cluster: ${top.failureClass} across ${top.runCount} run(s).`)\n nextActions.push('Prioritize the largest failure cluster before broad rollout.')\n }\n if (decision === 'promote') {\n nextActions.push('Ship behind the existing promotion gate and monitor canaries.')\n } else if (decision === 'hold') {\n nextActions.push('Keep current production candidate while expanding holdout evidence.')\n } else if (decision === 'equivalent') {\n nextActions.push(\n 'Either keep the comparator (no quality regression) or promote on cost/latency grounds — equivalence does not justify either; the choice is a product decision, not a stats one.',\n )\n } else if (decision === 'reject') {\n nextActions.push(\n 'Do not promote this sweep; inspect failures and generate a revised candidate.',\n )\n }\n\n return {\n decision,\n candidateId: chosen?.candidateId ?? null,\n rationale,\n risks,\n nextActions,\n }\n}\n\nfunction buildExecutiveSummary(\n candidates: ResearchReportCandidate[],\n recommendation: ResearchReportRecommendation,\n ctx: {\n comparator: string | null\n split: 'search' | 'holdout'\n failureClusters?: FailureClusterReport\n preregistrationHash: string | null\n },\n): string[] {\n const lines: string[] = []\n const nonComparator = candidates.filter((c) => c.candidateId !== ctx.comparator)\n lines.push(\n `Evaluated ${nonComparator.length} candidate(s) on the ${ctx.split} split${ctx.comparator ? ` against ${ctx.comparator}` : ''}.`,\n )\n lines.push(\n `Recommendation: ${recommendation.decision}${recommendation.candidateId ? ` ${recommendation.candidateId}` : ''}.`,\n )\n const promoted = nonComparator.filter((c) => c.decision === 'promote').length\n const held = nonComparator.filter((c) => c.decision === 'hold').length\n const equivalent = nonComparator.filter((c) => c.decision === 'equivalent').length\n const rejected = nonComparator.filter((c) => c.decision === 'reject').length\n const more = nonComparator.filter((c) => c.decision === 'needs_more_data').length\n lines.push(\n `Decision mix: ${promoted} promote, ${equivalent} equivalent, ${held} hold, ${rejected} reject, ${more} need more data.`,\n )\n const frontier = nonComparator.filter((c) => c.onParetoFrontier).map((c) => c.candidateId)\n if (frontier.length > 0) lines.push(`Pareto-frontier candidates: ${frontier.join(', ')}.`)\n if (ctx.failureClusters) {\n lines.push(\n `Failure clustering found ${ctx.failureClusters.totalFailures}/${ctx.failureClusters.totalRuns} failed runs across ${ctx.failureClusters.clusters.length} reportable cluster(s).`,\n )\n }\n lines.push(\n ctx.preregistrationHash\n ? `Preregistered analysis: ${ctx.preregistrationHash.slice(0, 12)}…`\n : 'Analysis is post-hoc — no preregistration hash supplied.',\n )\n return lines\n}\n\nfunction renderResearchMarkdown(report: {\n title: string\n generatedAt: string\n split: 'search' | 'holdout'\n comparator: string | null\n executiveSummary: string[]\n recommendation: ResearchReportRecommendation\n candidates: ResearchReportCandidate[]\n summary: SummaryTable\n pareto: ParetoFigureSpec\n gains: GainDistributionFigureSpec[]\n rope: { low: number; high: number } | null\n runFingerprint: string\n preregistrationHash: string | null\n methodology: ResearchReportMethodology\n failureClusters?: FailureClusterReport\n}): string {\n const lines: string[] = []\n lines.push(`# ${report.title}`)\n lines.push('')\n lines.push(`**Generated:** ${report.generatedAt}`)\n lines.push(`**Primary split:** ${report.split}`)\n lines.push(`**Comparator:** ${report.comparator ?? 'not configured'}`)\n lines.push(`**ROPE:** ${report.rope ? formatRope(report.rope) : 'not configured'}`)\n lines.push(`**Run fingerprint:** \\`${report.runFingerprint}\\``)\n lines.push(\n `**Preregistration:** ${report.preregistrationHash ? `\\`${report.preregistrationHash}\\`` : 'none'}`,\n )\n lines.push('')\n lines.push('## Executive Summary')\n lines.push('')\n for (const item of report.executiveSummary) lines.push(`- ${item}`)\n lines.push('')\n lines.push('## Recommendation')\n lines.push('')\n lines.push(`**Decision:** ${report.recommendation.decision}`)\n lines.push(`**Candidate:** ${report.recommendation.candidateId ?? 'N/A'}`)\n lines.push('')\n lines.push('### Rationale')\n lines.push('')\n for (const item of report.recommendation.rationale) lines.push(`- ${item}`)\n lines.push('')\n lines.push('### Risks')\n lines.push('')\n for (const item of report.recommendation.risks.length\n ? report.recommendation.risks\n : ['No material report-level risks detected.']) {\n lines.push(`- ${item}`)\n }\n lines.push('')\n lines.push('### Next Actions')\n lines.push('')\n for (const item of report.recommendation.nextActions) lines.push(`- ${item}`)\n lines.push('')\n lines.push('## Candidate Decision Table')\n lines.push('')\n lines.push(\n '| Candidate | Decision | Mean | Δ̄ | Pr(Δ>0) | q | d | Paired N | Median Gain CI | MDE | Pareto | Gate |',\n )\n lines.push('|---|---|---:|---:|---:|---:|---:|---:|---|---:|---|---|')\n for (const c of report.candidates) {\n const delta = c.meanDeltaVsComparator === null ? '-' : signed(c.meanDeltaVsComparator)\n const prGt = c.prGreaterThanZero === null ? '-' : c.prGreaterThanZero.toFixed(3)\n const q = Number.isFinite(c.qValue) ? c.qValue.toFixed(4) : '-'\n const d = Number.isFinite(c.cohensD) ? c.cohensD.toFixed(3) : '-'\n const gain = c.gainCi ? `[${fmt(c.gainCi.low)}, ${fmt(c.gainCi.high)}]` : '-'\n const mde = c.mde === null || !Number.isFinite(c.mde) ? '-' : fmt(c.mde)\n lines.push(\n `| ${c.candidateId} | ${c.decision} | ${fmt(c.mean)} | ${delta} | ${prGt} | ${q} | ${d} | ${c.pairedN} | ${gain} | ${mde} | ${c.onParetoFrontier ? 'yes' : 'no'} | ${c.gate ?? '-'} |`,\n )\n }\n lines.push('')\n lines.push('## Statistical Summary')\n lines.push('')\n lines.push(report.summary.markdown)\n lines.push('')\n lines.push('## Methodology')\n lines.push('')\n lines.push('### Assumptions')\n lines.push('')\n for (const item of report.methodology.assumptions) lines.push(`- ${item}`)\n lines.push('')\n lines.push('### Methods')\n lines.push('')\n for (const item of report.methodology.methods) lines.push(`- ${item}`)\n lines.push('')\n lines.push('### Alternatives Considered')\n lines.push('')\n for (const item of report.methodology.alternatives) lines.push(`- ${item}`)\n lines.push('')\n lines.push('### When NOT To Apply')\n lines.push('')\n for (const item of report.methodology.whenNotToApply) lines.push(`- ${item}`)\n lines.push('')\n lines.push('### Citations')\n lines.push('')\n for (const item of report.methodology.citations) lines.push(`- ${item}`)\n lines.push('')\n lines.push('## Chart Specs')\n lines.push('')\n lines.push(\n 'The report carries JSON chart specs for Pareto cost/quality and paired gain histograms.',\n )\n lines.push('')\n lines.push('```json')\n lines.push(JSON.stringify({ pareto: report.pareto, gains: report.gains }, null, 2))\n lines.push('```')\n if (report.failureClusters) {\n lines.push('')\n lines.push('## Failure Clusters')\n lines.push('')\n lines.push('| Failure Class | Runs | Scenarios | Tool | Example |')\n lines.push('|---|---:|---:|---|---|')\n for (const c of report.failureClusters.clusters.slice(0, 10)) {\n lines.push(\n `| ${c.failureClass} | ${c.runCount} | ${c.scenarioIds.length} | ${c.toolName ?? '-'} | ${escapePipes(c.exampleError ?? c.exampleRunId)} |`,\n )\n }\n }\n return lines.join('\\n')\n}\n\nfunction renderResearchHtml(markdown: string, title: string): string {\n const body = markdownToHtml(markdown)\n return [\n '<!doctype html>',\n '<html lang=\"en\">',\n '<head>',\n '<meta charset=\"utf-8\">',\n '<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">',\n `<title>${escapeHtml(title)}</title>`,\n '<style>',\n 'body{font-family:Inter,ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,\"Segoe UI\",sans-serif;margin:0;color:#172026;background:#f7f8f8;}',\n 'main{max-width:1080px;margin:0 auto;padding:40px 24px 64px;background:#fff;min-height:100vh;}',\n 'h1{font-size:34px;line-height:1.15;margin:0 0 20px;}h2{margin-top:34px;border-top:1px solid #d9dfdf;padding-top:22px;}h3{margin-top:22px;}',\n 'p,li{line-height:1.55;}table{border-collapse:collapse;width:100%;margin:16px 0;font-size:14px;}th,td{border:1px solid #d9dfdf;padding:8px;text-align:left;}th{background:#eef2f2;}',\n 'code,pre{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;}pre{overflow:auto;background:#111827;color:#f9fafb;padding:16px;border-radius:6px;}',\n '</style>',\n '</head>',\n '<body><main>',\n body,\n '</main></body></html>',\n ].join('\\n')\n}\n\nfunction markdownToHtml(markdown: string): string {\n const lines = markdown.split('\\n')\n const html: string[] = []\n let inList = false\n let inCode = false\n let code: string[] = []\n let table: string[] = []\n\n const flushList = () => {\n if (inList) {\n html.push('</ul>')\n inList = false\n }\n }\n const flushTable = () => {\n if (table.length === 0) return\n html.push(renderMarkdownTable(table))\n table = []\n }\n\n for (const line of lines) {\n if (line.startsWith('```')) {\n if (inCode) {\n html.push(`<pre><code>${escapeHtml(code.join('\\n'))}</code></pre>`)\n code = []\n inCode = false\n } else {\n flushList()\n flushTable()\n inCode = true\n }\n continue\n }\n if (inCode) {\n code.push(line)\n continue\n }\n if (line.startsWith('|')) {\n flushList()\n table.push(line)\n continue\n }\n flushTable()\n if (line.startsWith('- ')) {\n if (!inList) {\n html.push('<ul>')\n inList = true\n }\n html.push(`<li>${inlineMarkdown(line.slice(2))}</li>`)\n continue\n }\n flushList()\n if (line.startsWith('# ')) html.push(`<h1>${inlineMarkdown(line.slice(2))}</h1>`)\n else if (line.startsWith('## ')) html.push(`<h2>${inlineMarkdown(line.slice(3))}</h2>`)\n else if (line.startsWith('### ')) html.push(`<h3>${inlineMarkdown(line.slice(4))}</h3>`)\n else if (line.trim() === '') html.push('')\n else html.push(`<p>${inlineMarkdown(line)}</p>`)\n }\n flushList()\n flushTable()\n return html.join('\\n')\n}\n\nfunction renderMarkdownTable(lines: string[]): string {\n const rows = lines\n .filter((line) => !/^\\|[-:\\s|]+\\|$/.test(line))\n .map((line) =>\n line\n .slice(1, -1)\n .split('|')\n .map((cell) => inlineMarkdown(cell.trim())),\n )\n if (rows.length === 0) return ''\n const [head, ...body] = rows\n const th = head!.map((cell) => `<th>${cell}</th>`).join('')\n const trs = body\n .map((row) => `<tr>${row.map((cell) => `<td>${cell}</td>`).join('')}</tr>`)\n .join('\\n')\n return `<table><thead><tr>${th}</tr></thead><tbody>${trs}</tbody></table>`\n}\n\nfunction inlineMarkdown(s: string): string {\n return escapeHtml(s).replace(/\\*\\*([^*]+)\\*\\*/g, '<strong>$1</strong>')\n}\n\nfunction escapeHtml(s: string): string {\n return s\n .replace(/&/g, '&amp;')\n .replace(/</g, '&lt;')\n .replace(/>/g, '&gt;')\n .replace(/\"/g, '&quot;')\n}\n\nfunction escapePipes(s: string): string {\n return s.replace(/\\|/g, '\\\\|')\n}\n\nfunction decisionWeight(decision: ResearchReportDecision): number {\n if (decision === 'promote') return 5\n if (decision === 'equivalent') return 4\n if (decision === 'hold') return 3\n if (decision === 'needs_more_data') return 2\n return 1\n}\n\nfunction signed(x: number): string {\n return `${x >= 0 ? '+' : ''}${fmt(x)}`\n}\n\n// ── tiny helpers ─────────────────────────────────────────────────────\n\nfunction avg(xs: number[]): number {\n if (xs.length === 0) return Number.NaN\n return xs.reduce((s, x) => s + x, 0) / xs.length\n}\n\nfunction medianOfSorted(sorted: number[]): number {\n if (sorted.length === 0) return 0\n const mid = Math.floor(sorted.length / 2)\n return sorted.length % 2 === 0 ? (sorted[mid - 1]! + sorted[mid]!) / 2 : sorted[mid]!\n}\n\nfunction fmt(x: number): string {\n if (!Number.isFinite(x)) return String(x)\n return x.toFixed(4)\n}\n"],"mappings":";;;;;;;;;;;AAuBO,SAAS,mBAAmB,MAKxB;AACT,QAAM,SAAS,KAAK;AACpB,MAAI,CAAC,OAAO,SAAS,MAAM,KAAK,UAAU,EAAG,QAAO;AACpD,QAAM,QAAQ,KAAK,SAAS;AAC5B,QAAM,QAAQ,KAAK,SAAS;AAC5B,QAAM,WAAW,KAAK,YAAY;AAClC,QAAM,SAAS,UAAU,WAAW,IAAI,QAAQ,IAAI,IAAI,KAAK;AAC7D,QAAM,QAAQ,UAAU,KAAK;AAC7B,QAAM,IAAI,MAAM,SAAS,SAAS,WAAW;AAC7C,SAAO,KAAK,KAAK,CAAC;AACpB;AAcO,SAAS,UAAU,MAKf;AACT,MAAI,CAAC,OAAO,SAAS,KAAK,OAAO,KAAK,KAAK,WAAW,EAAG,QAAO;AAChE,QAAM,QAAQ,KAAK,SAAS;AAC5B,QAAM,QAAQ,KAAK,SAAS;AAC5B,QAAM,WAAW,KAAK,YAAY;AAClC,QAAM,SAAS,UAAU,WAAW,IAAI,QAAQ,IAAI,IAAI,KAAK;AAC7D,QAAM,QAAQ,UAAU,KAAK;AAC7B,UAAQ,SAAS,SAAS,KAAK,KAAK,KAAK,OAAO;AAClD;AAGO,SAAS,WACd,SACA,QAAQ,MACwC;AAChD,QAAM,IAAI,QAAQ;AAClB,QAAM,WAAW,QAAQ,IAAI,CAAC,MAAM,KAAK,IAAI,GAAG,IAAI,CAAC,CAAC;AACtD,QAAM,cAAc,SAAS,IAAI,CAAC,MAAM,IAAI,KAAK;AACjD,SAAO,EAAE,UAAU,YAAY;AACjC;AAOO,SAAS,kBACd,SACA,MAAM,MACyC;AAC/C,QAAM,IAAI,QAAQ;AAClB,MAAI,MAAM,EAAG,QAAO,EAAE,SAAS,CAAC,GAAG,aAAa,CAAC,EAAE;AACnD,QAAM,UAAU,QAAQ,IAAI,CAAC,GAAG,OAAO,EAAE,GAAG,EAAE,EAAE,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,IAAI,EAAE,CAAC;AAC1E,QAAM,IAAI,IAAI,MAAc,CAAC;AAE7B,MAAI,WAAW;AACf,WAAS,IAAI,IAAI,GAAG,KAAK,GAAG,KAAK;AAC/B,UAAM,OAAO,IAAI;AACjB,UAAM,QAAQ,QAAQ,CAAC;AACvB,UAAM,MAAO,MAAM,IAAI,IAAK;AAC5B,UAAM,UAAU,KAAK,IAAI,UAAU,GAAG;AACtC,eAAW;AACX,MAAE,MAAM,CAAC,IAAI,KAAK,IAAI,GAAG,OAAO;AAAA,EAClC;AACA,QAAM,cAAc,EAAE,IAAI,CAAC,MAAM,IAAI,GAAG;AACxC,SAAO,EAAE,SAAS,GAAG,YAAY;AACnC;AAGA,SAAS,UAAU,GAAmB;AACpC,MAAI,KAAK,KAAK,KAAK,GAAG;AACpB,QAAI,MAAM,EAAG,QAAO;AACpB,QAAI,MAAM,EAAG,QAAO;AACpB,WAAO;AAAA,EACT;AACA,QAAM,IAAI;AAAA,IACR;AAAA,IAAsB;AAAA,IAAqB;AAAA,IAAsB;AAAA,IACjE;AAAA,IAAsB;AAAA,EACxB;AACA,QAAM,IAAI;AAAA,IACR;AAAA,IAAsB;AAAA,IAAqB;AAAA,IAAsB;AAAA,IACjE;AAAA,EACF;AACA,QAAM,IAAI;AAAA,IACR;AAAA,IAAuB;AAAA,IAAuB;AAAA,IAAoB;AAAA,IAClE;AAAA,IAAmB;AAAA,EACrB;AACA,QAAM,IAAI,CAAC,sBAAsB,oBAAsB,mBAAmB,iBAAiB;AAC3F,QAAM,OAAO;AACb,QAAM,QAAQ,IAAI;AAClB,MAAI;AACJ,MAAI;AACJ,MAAI,IAAI,MAAM;AACZ,QAAI,KAAK,KAAK,KAAK,KAAK,IAAI,CAAC,CAAC;AAC9B,gBACO,EAAE,CAAC,IAAK,IAAI,EAAE,CAAC,KAAM,IAAI,EAAE,CAAC,KAAM,IAAI,EAAE,CAAC,KAAM,IAAI,EAAE,CAAC,KAAM,IAAI,EAAE,CAAC,SACpE,EAAE,CAAC,IAAK,IAAI,EAAE,CAAC,KAAM,IAAI,EAAE,CAAC,KAAM,IAAI,EAAE,CAAC,KAAM,IAAI;AAAA,EAE3D;AACA,MAAI,KAAK,OAAO;AACd,QAAI,IAAI;AACR,QAAI,IAAI;AACR,gBACQ,EAAE,CAAC,IAAK,IAAI,EAAE,CAAC,KAAM,IAAI,EAAE,CAAC,KAAM,IAAI,EAAE,CAAC,KAAM,IAAI,EAAE,CAAC,KAAM,IAAI,EAAE,CAAC,KAAM,SAC1E,EAAE,CAAC,IAAK,IAAI,EAAE,CAAC,KAAM,IAAI,EAAE,CAAC,KAAM,IAAI,EAAE,CAAC,KAAM,IAAI,EAAE,CAAC,KAAM,IAAI;AAAA,EAEzE;AACA,MAAI,KAAK,KAAK,KAAK,KAAK,IAAI,IAAI,CAAC,CAAC;AAClC,SACE,MAAM,EAAE,CAAC,IAAK,IAAI,EAAE,CAAC,KAAM,IAAI,EAAE,CAAC,KAAM,IAAI,EAAE,CAAC,KAAM,IAAI,EAAE,CAAC,KAAM,IAAI,EAAE,CAAC,SACrE,EAAE,CAAC,IAAK,IAAI,EAAE,CAAC,KAAM,IAAI,EAAE,CAAC,KAAM,IAAI,EAAE,CAAC,KAAM,IAAI;AAE3D;;;ACxFO,SAAS,gBACd,QACA,OACA,OAA+B,CAAC,GACT;AACvB,MAAI,OAAO,WAAW,MAAM,QAAQ;AAClC,UAAM,IAAI,MAAM,0CAA0C,OAAO,MAAM,OAAO,MAAM,MAAM,GAAG;AAAA,EAC/F;AACA,QAAM,aAAa,KAAK,cAAc;AACtC,QAAM,YAAY,KAAK,aAAa;AACpC,QAAM,YAAY,KAAK,aAAa;AACpC,MAAI,cAAc,KAAK,cAAc,GAAG;AACtC,UAAM,IAAI,MAAM,qDAAqD,UAAU,EAAE;AAAA,EACnF;AAEA,QAAM,IAAI,OAAO;AACjB,QAAM,SAAS,OAAO,IAAI,CAAC,GAAG,MAAM,MAAM,CAAC,IAAK,CAAC;AACjD,MAAI,MAAM,GAAG;AACX,WAAO,EAAE,GAAG,GAAG,QAAQ,GAAG,MAAM,GAAG,KAAK,GAAG,MAAM,GAAG,YAAY,UAAU;AAAA,EAC5E;AACA,MAAI,MAAM,GAAG;AACX,UAAM,IAAI,OAAO,CAAC;AAClB,WAAO,EAAE,GAAG,GAAG,QAAQ,GAAG,MAAM,GAAG,KAAK,GAAG,MAAM,GAAG,YAAY,UAAU;AAAA,EAC5E;AAEA,QAAM,MAAM,QAAQ,KAAK,IAAI;AAC7B,QAAM,UAAU,IAAI,MAAc,SAAS;AAC3C,WAAS,IAAI,GAAG,IAAI,WAAW,KAAK;AAClC,QAAI,MAAuB;AAC3B,QAAI,cAAc,QAAQ;AACxB,UAAI,MAAM;AACV,eAAS,IAAI,GAAG,IAAI,GAAG,KAAK;AAC1B,eAAO,OAAO,KAAK,MAAM,IAAI,IAAI,CAAC,CAAC;AAAA,MACrC;AACA,cAAQ,CAAC,IAAI,MAAM;AAAA,IACrB,OAAO;AACL,YAAM,IAAI,MAAc,CAAC;AACzB,eAAS,IAAI,GAAG,IAAI,GAAG,KAAK;AAC1B,YAAI,CAAC,IAAI,OAAO,KAAK,MAAM,IAAI,IAAI,CAAC,CAAC;AAAA,MACvC;AACA,cAAQ,CAAC,IAAI,cAAc,GAAG;AAAA,IAChC;AAAA,EACF;AACA,UAAQ,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAE5B,QAAM,QAAQ,IAAI;AAClB,QAAM,SAAS,KAAK,MAAO,QAAQ,IAAK,SAAS;AACjD,QAAM,UAAU,KAAK,IAAI,YAAY,GAAG,KAAK,MAAM,IAAI,QAAQ,KAAK,SAAS,IAAI,CAAC;AAElF,SAAO;AAAA,IACL;AAAA,IACA,QAAQ,cAAc,CAAC,GAAG,MAAM,CAAC;AAAA,IACjC,MAAM,OAAO,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI;AAAA,IAC1C,KAAK,QAAQ,MAAM;AAAA,IACnB,MAAM,QAAQ,KAAK,IAAI,SAAS,MAAM,CAAC;AAAA,IACvC;AAAA,IACA;AAAA,EACF;AACF;AAQO,SAAS,eAAe,QAAkB,OAA2C;AAC1F,SAAO,mBAAmB,QAAQ,KAAK;AACzC;AAQO,SAAS,SACd,SACA,MAAM,MACyC;AAC/C,SAAO,kBAAkB,SAAS,GAAG;AACvC;AAIA,SAAS,cAAc,IAAsB;AAC3C,MAAI,GAAG,WAAW,EAAG,QAAO;AAC5B,KAAG,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AACvB,QAAM,MAAM,KAAK,MAAM,GAAG,SAAS,CAAC;AACpC,SAAO,GAAG,SAAS,MAAM,KAAK,GAAG,MAAM,CAAC,IAAK,GAAG,GAAG,KAAM,IAAI,GAAG,GAAG;AACrE;AAOA,SAAS,QAAQ,MAAwC;AACvD,MAAI,SAAS,OAAW,QAAO,KAAK;AACpC,MAAI,IAAI,OAAO,KAAK;AACpB,SAAO,MAAM;AACX,QAAK,IAAI,aAAc;AACvB,QAAI,IAAI;AACR,QAAI,KAAK,KAAK,IAAK,MAAM,IAAK,IAAI,CAAC;AACnC,SAAK,IAAI,KAAK,KAAK,IAAK,MAAM,GAAI,IAAI,EAAE;AACxC,aAAS,IAAK,MAAM,QAAS,KAAK;AAAA,EACpC;AACF;;;AChGO,SAAS,aAAa,MAAmB,OAA4B,CAAC,GAAiB;AAC5F,QAAM,QAAQ,KAAK,SAAS;AAC5B,QAAM,aAAa,KAAK,cAAc;AACtC,QAAM,MAAM,KAAK,OAAO;AACxB,QAAM,aAAa,KAAK,cAAc;AACtC,QAAM,aAAa,UAAU,YAAY,iBAAiB;AAE1D,QAAM,cAAc,oBAAI,IAAqD;AAC7E,aAAW,KAAK,MAAM;AACpB,QAAI,EAAE,aAAa,MAAO;AAC1B,UAAM,IAAI,EAAE,QAAQ,UAAU;AAC9B,QAAI,OAAO,MAAM,YAAY,CAAC,OAAO,SAAS,CAAC,EAAG;AAClD,UAAM,SAAS,YAAY,IAAI,EAAE,WAAW,KAAK,EAAE,MAAM,CAAC,GAAG,QAAQ,CAAC,EAAE;AACxE,WAAO,KAAK,KAAK,CAAC;AAClB,WAAO,OAAO,KAAK,CAAC;AACpB,gBAAY,IAAI,EAAE,aAAa,MAAM;AAAA,EACvC;AAEA,QAAM,eAAe,CAAC,GAAG,YAAY,KAAK,CAAC,EAAE,KAAK;AAClD,QAAM,WAAW,aAAa,YAAY,IAAI,UAAU,IAAI;AAG5D,QAAM,YAAuD,CAAC;AAC9D,aAAW,MAAM,cAAc;AAC7B,UAAM,SAAS,YAAY,IAAI,EAAE;AACjC,UAAM,KAAK,mBAAmB,OAAO,QAAQ,UAAU;AACvD,QAAI,OAAO,OAAO;AAClB,QAAI,IAAI,OAAO;AACf,QAAI,cAAc,YAAY,OAAO,YAAY;AAC/C,YAAM,SAAS,gBAAgB,OAAO,MAAM,SAAS,MAAM,UAAU;AACrE,UAAI,OAAO,OAAO,UAAU,GAAG;AAC7B,eAAO,mBAAmB,OAAO,QAAQ,OAAO,KAAK,EAAE;AAAA,MACzD;AACA,UAAI,QAAQ,SAAS,QAAQ,OAAO,MAAM;AAAA,IAC5C;AACA,cAAU,KAAK;AAAA,MACb,aAAa;AAAA,MACb,GAAG,OAAO,OAAO;AAAA,MACjB,MAAM,GAAG;AAAA,MACT,OAAO,GAAG;AAAA,MACV,QAAQ,GAAG;AAAA,MACX,QAAQ;AAAA,MACR,SAAS;AAAA,MACT;AAAA,IACF,CAAC;AAAA,EACH;AAKA,MAAI,YAAY;AACd,UAAM,OAAiB,CAAC;AACxB,UAAM,KAAe,CAAC;AACtB,aAAS,IAAI,GAAG,IAAI,UAAU,QAAQ,KAAK;AACzC,YAAM,IAAI,UAAU,CAAC;AACrB,UAAI,EAAE,gBAAgB,WAAY;AAClC,UAAI,CAAC,OAAO,SAAS,EAAE,IAAI,EAAG;AAC9B,WAAK,KAAK,CAAC;AACX,SAAG,KAAK,EAAE,IAAI;AAAA,IAChB;AACA,QAAI,GAAG,SAAS,GAAG;AACjB,YAAM,EAAE,QAAQ,IAAI,kBAAkB,IAAI,GAAG;AAC7C,eAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AACpC,kBAAU,KAAK,CAAC,CAAE,EAAG,SAAS,QAAQ,CAAC;AAAA,MACzC;AAAA,IACF;AAAA,EACF;AAEA,QAAM,OAAO,UAAU,IAAI,CAAC,EAAE,MAAM,OAAO,GAAG,KAAK,MAAM,IAAI;AAC7D,QAAM,WAAW,2BAA2B,MAAM,YAAY,KAAK;AACnE,SAAO,EAAE,MAAM,YAAY,OAAO,SAAS;AAC7C;AAEA,SAAS,gBACP,WACA,UACA,YACuC;AACvC,QAAM,UAAU,oBAAI,IAAoB;AACxC,aAAW,KAAK,UAAU;AACxB,UAAM,IAAI,EAAE,QAAQ,UAAU;AAC9B,QAAI,OAAO,MAAM,YAAY,OAAO,SAAS,CAAC,GAAG;AAC/C,cAAQ,IAAI,GAAG,EAAE,YAAY,KAAK,EAAE,IAAI,IAAI,CAAC;AAAA,IAC/C;AAAA,EACF;AACA,QAAM,SAAmB,CAAC;AAC1B,QAAM,QAAkB,CAAC;AACzB,aAAW,KAAK,WAAW;AACzB,UAAM,IAAI,EAAE,QAAQ,UAAU;AAC9B,QAAI,OAAO,MAAM,YAAY,CAAC,OAAO,SAAS,CAAC,EAAG;AAClD,UAAM,MAAM,GAAG,EAAE,YAAY,KAAK,EAAE,IAAI;AACxC,UAAM,IAAI,QAAQ,IAAI,GAAG;AACzB,QAAI,MAAM,OAAW;AACrB,WAAO,KAAK,CAAC;AACb,UAAM,KAAK,CAAC;AAAA,EACd;AACA,SAAO,EAAE,QAAQ,MAAM;AACzB;AAEA,SAAS,2BACP,MACA,YACA,OACQ;AACR,QAAM,QAAkB,CAAC;AACzB,QAAM,WAAW,aAAa,QAAQ,UAAU,MAAM;AACtD,QAAM,KAAK,wBAAmB,KAAK,SAAS,QAAQ,EAAE;AACtD,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,wDAAwD;AACnE,QAAM,KAAK,+BAA+B;AAC1C,aAAW,KAAK,MAAM;AACpB,UAAM,KAAK,IAAI,IAAI,EAAE,KAAK,CAAC,KAAK,IAAI,EAAE,MAAM,CAAC;AAC7C,UAAM,IAAI,OAAO,SAAS,EAAE,MAAM,IAAI,EAAE,OAAO,QAAQ,CAAC,IAAI;AAC5D,UAAM,IAAI,OAAO,SAAS,EAAE,OAAO,IAAI,EAAE,QAAQ,QAAQ,CAAC,IAAI;AAC9D,UAAM,KAAK,KAAK,EAAE,WAAW,MAAM,EAAE,CAAC,MAAM,IAAI,EAAE,IAAI,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC,IAAI;AAAA,EACnF;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;AAgCO,SAAS,YACd,MACA,OAGI,CAAC,GACa;AAClB,QAAM,QAAQ,KAAK,SAAS;AAC5B,QAAM,aAAa,UAAU,YAAY,iBAAiB;AAE1D,QAAM,UAAU,oBAAI,IAAmD;AACvE,aAAW,KAAK,MAAM;AACpB,QAAI,EAAE,aAAa,MAAO;AAC1B,UAAM,IAAI,EAAE,QAAQ,UAAU;AAC9B,QAAI,OAAO,MAAM,YAAY,CAAC,OAAO,SAAS,CAAC,EAAG;AAClD,UAAM,SAAS,QAAQ,IAAI,EAAE,WAAW,KAAK,EAAE,MAAM,CAAC,GAAG,SAAS,CAAC,EAAE;AACrE,WAAO,KAAK,KAAK,EAAE,OAAO;AAC1B,WAAO,QAAQ,KAAK,CAAC;AACrB,YAAQ,IAAI,EAAE,aAAa,MAAM;AAAA,EACnC;AAEA,QAAM,SAAwB,CAAC;AAC/B,aAAW,CAAC,aAAa,MAAM,KAAK,QAAQ,QAAQ,GAAG;AACrD,WAAO,KAAK;AAAA,MACV;AAAA,MACA,MAAM,IAAI,OAAO,IAAI;AAAA,MACrB,SAAS,IAAI,OAAO,OAAO;AAAA,MAC3B,GAAG,OAAO,KAAK;AAAA,MACf,YAAY;AAAA,MACZ,MAAM,KAAK,gBAAgB,WAAW,IAClC,UAAU,KAAK,cAAc,WAAW,CAAE,IAC1C;AAAA,IACN,CAAC;AAAA,EACH;AAKA,aAAW,KAAK,QAAQ;AACtB,MAAE,aAAa,CAAC,OAAO,KAAK,CAAC,MAAM,MAAM,KAAK,UAAU,GAAG,CAAC,CAAC;AAAA,EAC/D;AAEA,SAAO;AAAA,IACL,MAAM;AAAA,IACN;AAAA,IACA,MAAM,EAAE,GAAG,WAAW,GAAG,QAAQ;AAAA,IACjC;AAAA,EACF;AACF;AAEA,SAAS,UAAU,GAAgB,GAAyB;AAC1D,SAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE;AACzF;AAEA,SAAS,UAAU,GAAsC;AACvD,MAAI,EAAE,QAAS,QAAO;AACtB,MAAI,EAAE,kBAAkB,WAAY,QAAO;AAC3C,MAAI,EAAE,kBAAkB,iBAAkB,QAAO;AACjD,MAAI,EAAE,kBAAkB,cAAe,QAAO;AAC9C,SAAO;AACT;AA2CO,SAAS,cACd,MACA,aACA,YACA,OAAgC,CAAC,GACL;AAC5B,QAAM,QAAQ,KAAK,SAAS;AAC5B,QAAM,aAAa,UAAU,YAAY,iBAAiB;AAC1D,QAAM,WAAW,KAAK,QAAQ;AAC9B,MAAI,WAAW,EAAG,OAAM,IAAI,MAAM,sCAAiC;AAEnE,QAAM,YAAY,KAAK,OAAO,CAAC,MAAM,EAAE,gBAAgB,eAAe,EAAE,aAAa,KAAK;AAC1F,QAAM,WAAW,KAAK,OAAO,CAAC,MAAM,EAAE,gBAAgB,cAAc,EAAE,aAAa,KAAK;AAIxF,QAAM,EAAE,QAAQ,MAAM,IAAI,gBAAgB,WAAW,UAAU,UAAU;AACzE,QAAM,IAAI,OAAO;AAEjB,MAAI,MAAM,GAAG;AACX,WAAO;AAAA,MACL,MAAM;AAAA,MACN;AAAA,MACA;AAAA,MACA;AAAA,MACA,GAAG;AAAA,MACH,MAAM,CAAC;AAAA,MACP,QAAQ;AAAA,MACR,IAAI,EAAE,KAAK,GAAG,MAAM,EAAE;AAAA,IACxB;AAAA,EACF;AAEA,QAAM,SAAS,OAAO,IAAI,CAAC,GAAG,MAAM,MAAM,CAAC,IAAK,CAAC;AACjD,QAAM,eAAe,CAAC,GAAG,MAAM,EAAE,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AACrD,QAAM,SAAS,eAAe,YAAY;AAC1C,QAAM,MAAM,aAAa,CAAC;AAC1B,QAAM,MAAM,aAAa,aAAa,SAAS,CAAC;AAIhD,QAAM,QAAQ,KAAK,IAAI,KAAK,IAAI,GAAG,GAAG,KAAK,IAAI,GAAG,GAAG,IAAI;AACzD,QAAM,KAAK,CAAC;AACZ,QAAM,KAAK;AACX,QAAM,SAAS,KAAK,MAAM;AAC1B,QAAM,OAA8B,CAAC;AACrC,WAAS,IAAI,GAAG,IAAI,UAAU,KAAK;AACjC,SAAK,KAAK,EAAE,IAAI,KAAK,IAAI,OAAO,IAAI,MAAM,IAAI,KAAK,OAAO,OAAO,EAAE,CAAC;AAAA,EACtE;AACA,aAAW,KAAK,QAAQ;AACtB,QAAI,MAAM,KAAK,OAAO,IAAI,MAAM,KAAK;AACrC,QAAI,MAAM,EAAG,OAAM;AACnB,QAAI,OAAO,SAAU,OAAM,WAAW;AACtC,SAAK,GAAG,EAAG,SAAS;AAAA,EACtB;AAEA,QAAM,KAAK,gBAAgB,QAAQ,OAAO;AAAA,IACxC,YAAY,KAAK,cAAc;AAAA,IAC/B,WAAW,KAAK,aAAa;AAAA,IAC7B,WAAW;AAAA,IACX,MAAM,KAAK;AAAA,EACb,CAAC;AAED,SAAO;AAAA,IACL,MAAM;AAAA,IACN;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,IAAI,EAAE,KAAK,GAAG,KAAK,MAAM,GAAG,KAAK;AAAA,EACnC;AACF;AAgBO,IAAM,kCAAkC;AA2J/C,SAAS,gBACP,MACA,aACA,YACA,MAiBO;AACP,QAAM,aAAa,KAAK,UAAU,YAAY,iBAAiB;AAC/D,QAAM,YAAY,KAAK,OAAO,CAAC,MAAM,EAAE,gBAAgB,eAAe,EAAE,aAAa,KAAK,KAAK;AAC/F,QAAM,WAAW,KAAK,OAAO,CAAC,MAAM,EAAE,gBAAgB,cAAc,EAAE,aAAa,KAAK,KAAK;AAC7F,QAAM,EAAE,QAAQ,MAAM,IAAI,gBAAgB,WAAW,UAAU,UAAU;AACzE,QAAM,IAAI,OAAO;AACjB,MAAI,MAAM,EAAG,QAAO;AAEpB,QAAM,SAAS,OAAO,IAAI,CAAC,GAAG,MAAM,MAAM,CAAC,IAAK,CAAC;AACjD,QAAM,YAAY,OAAO,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI;AACtD,QAAM,eAAe,CAAC,GAAG,MAAM,EAAE,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AACrD,QAAM,cAAc,eAAe,YAAY;AAC/C,QAAM,UAAU,MAAM,QAAQ,SAAS;AAEvC,QAAM,KAAK,gBAAgB,QAAQ,OAAO;AAAA,IACxC,YAAY,KAAK;AAAA,IACjB,WAAW;AAAA,IACX,WAAW;AAAA,IACX,MAAM,KAAK;AAAA,EACb,CAAC;AAKD,QAAM,cAAc,qBAAqB,QAAQ,KAAM,KAAK,IAAI;AAChE,QAAM,oBACJ,YAAY,WAAW,IAAI,IAAI,YAAY,OAAO,CAAC,MAAM,IAAI,CAAC,EAAE,SAAS,YAAY;AACvF,QAAM,WACJ,KAAK,SAAS,QAAQ,YAAY,WAAW,IACzC,OACA,YAAY,OAAO,CAAC,MAAM,KAAK,KAAK,KAAM,OAAO,KAAK,KAAK,KAAM,IAAI,EAAE,SACvE,YAAY;AAElB,QAAM,gBAAgB,UAAU,EAAE,SAAS,GAAG,OAAO,KAAK,UAAU,OAAO,KAAK,SAAS,CAAC;AAC1F,QAAM,MAAM,YAAY,IAAI,IAAI,gBAAgB;AAEhD,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,IAAI,EAAE,KAAK,GAAG,KAAK,MAAM,GAAG,KAAK;AAAA,IACjC;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAEA,SAAS,qBAAqB,QAAkB,WAAmB,MAAyB;AAC1F,QAAM,IAAI,OAAO;AACjB,MAAI,MAAM,EAAG,QAAO,CAAC;AACrB,MAAI,MAAM,EAAG,QAAO,IAAI,MAAc,SAAS,EAAE,KAAK,OAAO,CAAC,CAAE;AAChE,QAAM,MAAM,QAAQ,IAAI;AACxB,QAAM,UAAU,IAAI,MAAc,SAAS;AAC3C,WAAS,IAAI,GAAG,IAAI,WAAW,KAAK;AAClC,QAAI,MAAM;AACV,aAAS,IAAI,GAAG,IAAI,GAAG,IAAK,QAAO,OAAO,KAAK,MAAM,IAAI,IAAI,CAAC,CAAC;AAC/D,YAAQ,CAAC,IAAI,MAAM;AAAA,EACrB;AACA,SAAO;AACT;AAEA,SAAS,QAAQ,MAA6B;AAC5C,MAAI,SAAS,OAAW,QAAO,KAAK;AACpC,MAAI,IAAI,SAAS;AACjB,SAAO,MAAM;AACX,QAAK,IAAI,eAAgB;AACzB,QAAI,IAAI;AACR,QAAI,KAAK,KAAK,IAAK,MAAM,IAAK,IAAI,CAAC;AACnC,SAAK,IAAI,KAAK,KAAK,IAAK,MAAM,GAAI,IAAI,EAAE;AACxC,aAAS,IAAK,MAAM,QAAS,KAAK;AAAA,EACpC;AACF;AAEA,SAAS,MAAM,IAAc,MAAsB;AACjD,MAAI,GAAG,SAAS,EAAG,QAAO;AAC1B,MAAI,MAAM;AACV,aAAW,KAAK,GAAI,SAAQ,IAAI,SAAS;AACzC,SAAO,KAAK,KAAK,OAAO,GAAG,SAAS,EAAE;AACxC;AAqBA,eAAsB,eACpB,MACA,OAA8B,CAAC,GACN;AACzB,QAAM,QAAQ,KAAK,SAAS;AAC5B,QAAM,aAAa,KAAK,cAAc;AACtC,QAAM,aAAa,KAAK,cAAc;AACtC,QAAM,MAAM,KAAK,OAAO;AACxB,QAAM,WAAW,KAAK,IAAI,KAAK,YAAY,IAAI,+BAA+B;AAC9E,QAAM,OAAO,KAAK,QAAQ;AAC1B,QAAM,WAAW,KAAK,YAAY;AAClC,QAAM,WAAW,KAAK,YAAY;AAClC,QAAM,QAAQ,KAAK,SAAS;AAC5B,QAAM,cAAc,KAAK,gBAAe,oBAAI,KAAK,GAAE,YAAY;AAC/D,QAAM,sBAAsB,KAAK,uBAAuB;AAExD,MAAI,QAAQ,EAAE,OAAO,SAAS,KAAK,GAAG,KAAK,OAAO,SAAS,KAAK,IAAI,KAAK,KAAK,OAAO,KAAK,OAAO;AAC/F,UAAM,IAAI;AAAA,MACR,6EAAwE,KAAK,UAAU,IAAI,CAAC;AAAA,IAC9F;AAAA,EACF;AAEA,QAAM,UAAU,aAAa,MAAM;AAAA,IACjC,YAAY,cAAc;AAAA,IAC1B;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AACD,QAAM,SAAS,YAAY,MAAM,EAAE,OAAO,eAAe,KAAK,cAAc,CAAC;AAC7E,QAAM,eACJ,KAAK,gBAAgB,QAAQ,KAAK,IAAI,CAAC,MAAM,EAAE,WAAW,EAAE,OAAO,CAAC,OAAO,OAAO,UAAU;AAC9F,QAAM,QAAQ,aACV,aAAa;AAAA,IAAI,CAAC,OAChB,cAAc,MAAM,IAAI,YAAY;AAAA,MAClC;AAAA,MACA;AAAA,MACA,MAAM,KAAK;AAAA,IACb,CAAC;AAAA,EACH,IACA,CAAC;AAEL,QAAM,kBAAkB,IAAI,IAAI,MAAM,IAAI,CAAC,MAAM,CAAC,EAAE,aAAa,CAAC,CAAC,CAAC;AACpE,QAAM,oBAAoB,IAAI,IAAI,OAAO,OAAO,IAAI,CAAC,MAAM,CAAC,EAAE,aAAa,CAAC,CAAC,CAAC;AAC9E,QAAM,uBAAuB,oBAAI,IAAgD;AACjF,MAAI,YAAY;AACd,eAAW,MAAM,cAAc;AAC7B,2BAAqB;AAAA,QACnB;AAAA,QACA,gBAAgB,MAAM,IAAI,YAAY;AAAA,UACpC;AAAA,UACA;AAAA,UACA,MAAM,KAAK;AAAA,UACX;AAAA,UACA;AAAA,UACA;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF;AAAA,EACF;AAEA,QAAM,aAAa,QAAQ,KACxB,IAAI,CAAC,QAAQ;AACZ,UAAM,OAAO,gBAAgB,IAAI,IAAI,WAAW;AAChD,UAAM,QAAQ,kBAAkB,IAAI,IAAI,WAAW;AACnD,UAAM,YAAY,qBAAqB,IAAI,IAAI,WAAW,KAAK;AAC/D,UAAM,aAAa,kBAAkB,KAAK;AAAA,MACxC;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,CAAC;AACD,WAAO;AAAA,MACL,aAAa,IAAI;AAAA,MACjB,GAAG,IAAI;AAAA,MACP,MAAM,IAAI;AAAA,MACV,OAAO,IAAI;AAAA,MACX,QAAQ,IAAI;AAAA,MACZ,QAAQ,IAAI;AAAA,MACZ,SAAS,IAAI;AAAA,MACb,uBAAuB,YAAY,UAAU,YAAY;AAAA,MACzD,SAAS,WAAW,KAAK,MAAM,KAAK;AAAA,MACpC,YAAY,YAAY,UAAU,cAAc,OAAO,KAAK,SAAS;AAAA,MACrE,UAAU,YAAY,UAAU,YAAY;AAAA,MAC5C,QAAQ,YAAY,UAAU,KAAK,OAAO,KAAK,KAAK;AAAA,MACpD,mBAAmB,YAAY,UAAU,oBAAoB;AAAA,MAC7D,UAAU,YAAY,UAAU,WAAW;AAAA,MAC3C,KAAK,YAAY,UAAU,MAAM;AAAA,MACjC,kBAAkB,OAAO,cAAc;AAAA,MACvC,MAAM,OAAO;AAAA,MACb,UAAU,WAAW;AAAA,MACrB,gBAAgB,WAAW;AAAA,IAC7B;AAAA,EACF,CAAC,EACA,KAAK,CAAC,GAAG,MAAM;AACd,UAAM,eAAe,eAAe,EAAE,QAAQ,IAAI,eAAe,EAAE,QAAQ;AAC3E,QAAI,iBAAiB,EAAG,QAAO;AAC/B,WAAO,EAAE,OAAO,EAAE;AAAA,EACpB,CAAC;AAEH,QAAM,iBAAiB,oBAAoB,YAAY;AAAA,IACrD;AAAA,IACA,iBAAiB,KAAK;AAAA,IACtB;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AACD,QAAM,mBAAmB,sBAAsB,YAAY,gBAAgB;AAAA,IACzE;AAAA,IACA;AAAA,IACA,iBAAiB,KAAK;AAAA,IACtB;AAAA,EACF,CAAC;AACD,QAAM,cAAc,iBAAiB;AAAA,IACnC;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AAED,QAAM,iBAAiB,MAAM;AAAA,IAC3B,aAAa;AAAA,MACX,SAAS,KACN,OAAO,CAAC,MAAM,EAAE,aAAa,KAAK,EAClC,IAAI,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,aAAa,EAAE,aAAa,UAAU,EAAE,SAAS,EAAE,EACjF,KAAK,CAAC,GAAG,MAAM,EAAE,MAAM,cAAc,EAAE,KAAK,CAAC;AAAA,MAChD;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAEA,QAAM,WAAW,uBAAuB;AAAA,IACtC;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,iBAAiB,KAAK;AAAA,EACxB,CAAC;AACD,QAAM,OAAO,mBAAmB,UAAU,KAAK;AAE/C,SAAO;AAAA,IACL,MAAM;AAAA,IACN;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,QAAQ,EAAE,QAAQ,MAAM;AAAA,IACxB;AAAA,IACA,iBAAiB,KAAK;AAAA,IACtB;AAAA,IACA;AAAA,EACF;AACF;AAEA,SAAS,iBAAiB,KASI;AAC5B,QAAM,cAAwB;AAAA,IAC5B;AAAA,IACA;AAAA,IACA,sCAAsC,IAAI,GAAG,cAAc,IAAI,QAAQ,gBAAgB,IAAI,UAAU;AAAA,EACvG;AACA,MAAI,IAAI,MAAM;AACZ,gBAAY;AAAA,MACV,uCAAuC,WAAW,IAAI,IAAI,CAAC;AAAA,IAC7D;AAAA,EACF;AACA,MAAI,IAAI,eAAe,MAAM;AAC3B,gBAAY,KAAK,oEAAoE;AAAA,EACvF;AACA,QAAM,UAAoB;AAAA,IACxB;AAAA,IACA;AAAA,IACA,8DAAyD,IAAI,QAAQ,uBAAuB,IAAI,QAAQ;AAAA,IACxG;AAAA,IACA;AAAA,EACF;AACA,QAAM,eAAyB;AAAA,IAC7B;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,QAAM,iBAA2B;AAAA,IAC/B,kBAAkB,+BAA+B;AAAA,IACjD;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,QAAM,YAAsB;AAAA,IAC1B;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,SAAO,EAAE,aAAa,SAAS,cAAc,gBAAgB,UAAU;AACzE;AAEA,SAAS,WAAW,MAA6C;AAC/D,SAAO,IAAI,IAAI,KAAK,GAAG,CAAC,KAAK,IAAI,KAAK,IAAI,CAAC;AAC7C;AAEA,SAAS,kBACP,KACA,KAQsD;AACtD,MAAI,IAAI,cAAc,IAAI,gBAAgB,IAAI,YAAY;AACxD,WAAO,EAAE,UAAU,QAAQ,QAAQ,uBAAuB;AAAA,EAC5D;AACA,MAAI,CAAC,IAAI,YAAY;AACnB,WAAO;AAAA,MACL,UAAU,IAAI,OAAO,aAAa,SAAS;AAAA,MAC3C,QACE;AAAA,IACJ;AAAA,EACF;AAGA,MAAI,IAAI,OAAO,QAAQ,IAAI,MAAM,SAAS,WAAW;AACnD,WAAO,EAAE,UAAU,UAAU,QAAQ,0BAA0B,IAAI,MAAM,IAAI,IAAI;AAAA,EACnF;AACA,MAAI,CAAC,IAAI,aAAa,IAAI,UAAU,IAAI,iCAAiC;AACvE,WAAO;AAAA,MACL,UAAU;AAAA,MACV,QAAQ,QAAQ,IAAI,WAAW,KAAK,CAAC,6CAA6C,+BAA+B;AAAA,IACnH;AAAA,EACF;AACA,QAAM,KAAK,IAAI,UAAU;AACzB,MAAI,IAAI,QAAQ,GAAG,OAAO,IAAI,KAAK,OAAO,GAAG,QAAQ,IAAI,KAAK,MAAM;AAClE,WAAO;AAAA,MACL,UAAU;AAAA,MACV,QAAQ,oBAAoB,IAAI,GAAG,GAAG,CAAC,KAAK,IAAI,GAAG,IAAI,CAAC,0BAA0B,WAAW,IAAI,IAAI,CAAC;AAAA,IACxG;AAAA,EACF;AACA,QAAM,cAAc,OAAO,SAAS,IAAI,MAAM,KAAK,IAAI,UAAU,IAAI;AACrE,QAAM,eAAe,GAAG,MAAM;AAC9B,QAAM,eAAe,GAAG,OAAO;AAC/B,MAAI,cAAc;AAChB,WAAO;AAAA,MACL,UAAU;AAAA,MACV,QAAQ,oBAAoB,IAAI,GAAG,GAAG,CAAC,KAAK,IAAI,GAAG,IAAI,CAAC;AAAA,IAC1D;AAAA,EACF;AACA,MAAI,IAAI,UAAU,IAAI,IAAI,UAAU;AAClC,WAAO;AAAA,MACL,UAAU;AAAA,MACV,QAAQ,QAAQ,IAAI,UAAU,CAAC,gEAAgE,IAAI,IAAI,UAAU,GAAG,CAAC,6BAAwB,IAAI,QAAQ;AAAA,IAC3J;AAAA,EACF;AACA,MAAI,eAAe,cAAc;AAC/B,WAAO;AAAA,MACL,UAAU;AAAA,MACV,QAAQ,iBAAiB,IAAI,IAAI,MAAM,CAAC,WAAM,IAAI,GAAG,yBAAyB,IAAI,GAAG,GAAG,CAAC,KAAK,IAAI,GAAG,IAAI,CAAC,iCAA4B,IAAI,IAAI,UAAU,iBAAiB,CAAC;AAAA,IAC5K;AAAA,EACF;AACA,SAAO;AAAA,IACL,UAAU;AAAA,IACV,QAAQ,gBAAW,IAAI,IAAI,UAAU,iBAAiB,CAAC,YAAY,IAAI,GAAG,GAAG,CAAC,KAAK,IAAI,GAAG,IAAI,CAAC,8CAA8C,IAAI,GAAG;AAAA,EACtJ;AACF;AAEA,SAAS,oBACP,YACA,KAO8B;AAC9B,QAAM,gBAAgB,WAAW,OAAO,CAAC,MAAM,EAAE,gBAAgB,IAAI,UAAU;AAC/E,QAAM,cAAc,cAAc,KAAK,CAAC,MAAM,EAAE,aAAa,SAAS;AACtE,QAAM,iBAAiB,cAAc,KAAK,CAAC,MAAM,EAAE,aAAa,YAAY;AAC5E,QAAM,SAAS,eAAe,kBAAkB,cAAc,CAAC,KAAK;AACpE,QAAM,WAAmC,cACrC,YACA,cAAc,KAAK,CAAC,MAAM,EAAE,aAAa,iBAAiB,IACxD,oBACA,iBACE,eACA,cAAc,KAAK,CAAC,MAAM,EAAE,aAAa,MAAM,IAC7C,SACA;AAEV,QAAM,YAAsB,CAAC;AAC7B,QAAM,QAAkB,CAAC;AACzB,QAAM,cAAwB,CAAC;AAE/B,MAAI,QAAQ;AACV,cAAU,KAAK,GAAG,OAAO,WAAW,KAAK,OAAO,cAAc,EAAE;AAChE,QAAI,OAAO,QAAQ;AACjB,YAAM,cACJ,OAAO,sBAAsB,OAAO,kBAAa,IAAI,OAAO,iBAAiB,CAAC,KAAK;AACrF,gBAAU;AAAA,QACR,2BAA2B,IAAI,OAAO,OAAO,GAAG,CAAC,KAAK,IAAI,OAAO,OAAO,IAAI,CAAC,IAAI,WAAW;AAAA,MAC9F;AAAA,IACF;AACA,QAAI,OAAO,QAAQ,QAAQ,OAAO,SAAS,OAAO,GAAG,GAAG;AACtD,gBAAU,KAAK,2BAA2B,OAAO,OAAO,KAAK,IAAI,OAAO,GAAG,CAAC,eAAe;AAAA,IAC7F;AAAA,EACF;AACA,MAAI,CAAC,IAAI,YAAY;AACnB,UAAM,KAAK,mEAAmE;AAC9E,gBAAY,KAAK,iEAAiE;AAAA,EACpF;AACA,MAAI,CAAC,IAAI,qBAAqB;AAC5B,UAAM;AAAA,MACJ;AAAA,IACF;AACA,gBAAY;AAAA,MACV;AAAA,IACF;AAAA,EACF;AACA,MAAI,IAAI,SAAS,QAAQ,cAAc,SAAS,GAAG;AACjD,UAAM;AAAA,MACJ;AAAA,IACF;AACA,gBAAY;AAAA,MACV;AAAA,IACF;AAAA,EACF;AACA,QAAM,eAAe,cAAc,OAAO,CAAC,MAAM,EAAE,aAAa,iBAAiB;AACjF,MAAI,aAAa,SAAS,GAAG;AAC3B,UAAM,QAAQ,aAAa,OAAO,CAAC,GAAG,MAAO,EAAE,UAAU,EAAE,UAAU,IAAI,CAAE;AAC3E,UAAM;AAAA,MACJ,GAAG,aAAa,MAAM,mCAAmC,IAAI,QAAQ,wBAAwB,MAAM,WAAW,SAAS,MAAM,OAAO;AAAA,IACtI;AACA,gBAAY;AAAA,MACV,oBAAoB,IAAI,WAAW,MAAM,OAAO,kCAAkC,MAAM,WAAW;AAAA,IACrG;AAAA,EACF;AACA,QAAM,WAAW,cAAc,OAAO,CAAC,MAAM,EAAE,aAAa,QAAQ;AACpE,MAAI,SAAS,SAAS,GAAG;AACvB,UAAM;AAAA,MACJ,GAAG,SAAS,MAAM;AAAA,IACpB;AAAA,EACF;AACA,MAAI,IAAI,mBAAmB,IAAI,gBAAgB,SAAS,SAAS,GAAG;AAClE,UAAM,MAAM,IAAI,gBAAgB,SAAS,CAAC;AAC1C,UAAM,KAAK,wBAAwB,IAAI,YAAY,WAAW,IAAI,QAAQ,UAAU;AACpF,gBAAY,KAAK,8DAA8D;AAAA,EACjF;AACA,MAAI,aAAa,WAAW;AAC1B,gBAAY,KAAK,+DAA+D;AAAA,EAClF,WAAW,aAAa,QAAQ;AAC9B,gBAAY,KAAK,qEAAqE;AAAA,EACxF,WAAW,aAAa,cAAc;AACpC,gBAAY;AAAA,MACV;AAAA,IACF;AAAA,EACF,WAAW,aAAa,UAAU;AAChC,gBAAY;AAAA,MACV;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AAAA,IACL;AAAA,IACA,aAAa,QAAQ,eAAe;AAAA,IACpC;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAEA,SAAS,sBACP,YACA,gBACA,KAMU;AACV,QAAM,QAAkB,CAAC;AACzB,QAAM,gBAAgB,WAAW,OAAO,CAAC,MAAM,EAAE,gBAAgB,IAAI,UAAU;AAC/E,QAAM;AAAA,IACJ,aAAa,cAAc,MAAM,wBAAwB,IAAI,KAAK,SAAS,IAAI,aAAa,YAAY,IAAI,UAAU,KAAK,EAAE;AAAA,EAC/H;AACA,QAAM;AAAA,IACJ,mBAAmB,eAAe,QAAQ,GAAG,eAAe,cAAc,IAAI,eAAe,WAAW,KAAK,EAAE;AAAA,EACjH;AACA,QAAM,WAAW,cAAc,OAAO,CAAC,MAAM,EAAE,aAAa,SAAS,EAAE;AACvE,QAAM,OAAO,cAAc,OAAO,CAAC,MAAM,EAAE,aAAa,MAAM,EAAE;AAChE,QAAM,aAAa,cAAc,OAAO,CAAC,MAAM,EAAE,aAAa,YAAY,EAAE;AAC5E,QAAM,WAAW,cAAc,OAAO,CAAC,MAAM,EAAE,aAAa,QAAQ,EAAE;AACtE,QAAM,OAAO,cAAc,OAAO,CAAC,MAAM,EAAE,aAAa,iBAAiB,EAAE;AAC3E,QAAM;AAAA,IACJ,iBAAiB,QAAQ,aAAa,UAAU,gBAAgB,IAAI,UAAU,QAAQ,YAAY,IAAI;AAAA,EACxG;AACA,QAAM,WAAW,cAAc,OAAO,CAAC,MAAM,EAAE,gBAAgB,EAAE,IAAI,CAAC,MAAM,EAAE,WAAW;AACzF,MAAI,SAAS,SAAS,EAAG,OAAM,KAAK,+BAA+B,SAAS,KAAK,IAAI,CAAC,GAAG;AACzF,MAAI,IAAI,iBAAiB;AACvB,UAAM;AAAA,MACJ,4BAA4B,IAAI,gBAAgB,aAAa,IAAI,IAAI,gBAAgB,SAAS,uBAAuB,IAAI,gBAAgB,SAAS,MAAM;AAAA,IAC1J;AAAA,EACF;AACA,QAAM;AAAA,IACJ,IAAI,sBACA,2BAA2B,IAAI,oBAAoB,MAAM,GAAG,EAAE,CAAC,WAC/D;AAAA,EACN;AACA,SAAO;AACT;AAEA,SAAS,uBAAuB,QAgBrB;AACT,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,KAAK,OAAO,KAAK,EAAE;AAC9B,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,kBAAkB,OAAO,WAAW,EAAE;AACjD,QAAM,KAAK,sBAAsB,OAAO,KAAK,EAAE;AAC/C,QAAM,KAAK,mBAAmB,OAAO,cAAc,gBAAgB,EAAE;AACrE,QAAM,KAAK,aAAa,OAAO,OAAO,WAAW,OAAO,IAAI,IAAI,gBAAgB,EAAE;AAClF,QAAM,KAAK,0BAA0B,OAAO,cAAc,IAAI;AAC9D,QAAM;AAAA,IACJ,wBAAwB,OAAO,sBAAsB,KAAK,OAAO,mBAAmB,OAAO,MAAM;AAAA,EACnG;AACA,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,sBAAsB;AACjC,QAAM,KAAK,EAAE;AACb,aAAW,QAAQ,OAAO,iBAAkB,OAAM,KAAK,KAAK,IAAI,EAAE;AAClE,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,mBAAmB;AAC9B,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,iBAAiB,OAAO,eAAe,QAAQ,EAAE;AAC5D,QAAM,KAAK,kBAAkB,OAAO,eAAe,eAAe,KAAK,EAAE;AACzE,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,eAAe;AAC1B,QAAM,KAAK,EAAE;AACb,aAAW,QAAQ,OAAO,eAAe,UAAW,OAAM,KAAK,KAAK,IAAI,EAAE;AAC1E,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,WAAW;AACtB,QAAM,KAAK,EAAE;AACb,aAAW,QAAQ,OAAO,eAAe,MAAM,SAC3C,OAAO,eAAe,QACtB,CAAC,0CAA0C,GAAG;AAChD,UAAM,KAAK,KAAK,IAAI,EAAE;AAAA,EACxB;AACA,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,kBAAkB;AAC7B,QAAM,KAAK,EAAE;AACb,aAAW,QAAQ,OAAO,eAAe,YAAa,OAAM,KAAK,KAAK,IAAI,EAAE;AAC5E,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,6BAA6B;AACxC,QAAM,KAAK,EAAE;AACb,QAAM;AAAA,IACJ;AAAA,EACF;AACA,QAAM,KAAK,0DAA0D;AACrE,aAAW,KAAK,OAAO,YAAY;AACjC,UAAM,QAAQ,EAAE,0BAA0B,OAAO,MAAM,OAAO,EAAE,qBAAqB;AACrF,UAAM,OAAO,EAAE,sBAAsB,OAAO,MAAM,EAAE,kBAAkB,QAAQ,CAAC;AAC/E,UAAM,IAAI,OAAO,SAAS,EAAE,MAAM,IAAI,EAAE,OAAO,QAAQ,CAAC,IAAI;AAC5D,UAAM,IAAI,OAAO,SAAS,EAAE,OAAO,IAAI,EAAE,QAAQ,QAAQ,CAAC,IAAI;AAC9D,UAAM,OAAO,EAAE,SAAS,IAAI,IAAI,EAAE,OAAO,GAAG,CAAC,KAAK,IAAI,EAAE,OAAO,IAAI,CAAC,MAAM;AAC1E,UAAM,MAAM,EAAE,QAAQ,QAAQ,CAAC,OAAO,SAAS,EAAE,GAAG,IAAI,MAAM,IAAI,EAAE,GAAG;AACvE,UAAM;AAAA,MACJ,KAAK,EAAE,WAAW,MAAM,EAAE,QAAQ,MAAM,IAAI,EAAE,IAAI,CAAC,MAAM,KAAK,MAAM,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,MAAM,IAAI,MAAM,GAAG,MAAM,EAAE,mBAAmB,QAAQ,IAAI,MAAM,EAAE,QAAQ,GAAG;AAAA,IACpL;AAAA,EACF;AACA,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,wBAAwB;AACnC,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,OAAO,QAAQ,QAAQ;AAClC,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,gBAAgB;AAC3B,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,iBAAiB;AAC5B,QAAM,KAAK,EAAE;AACb,aAAW,QAAQ,OAAO,YAAY,YAAa,OAAM,KAAK,KAAK,IAAI,EAAE;AACzE,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,aAAa;AACxB,QAAM,KAAK,EAAE;AACb,aAAW,QAAQ,OAAO,YAAY,QAAS,OAAM,KAAK,KAAK,IAAI,EAAE;AACrE,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,6BAA6B;AACxC,QAAM,KAAK,EAAE;AACb,aAAW,QAAQ,OAAO,YAAY,aAAc,OAAM,KAAK,KAAK,IAAI,EAAE;AAC1E,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,uBAAuB;AAClC,QAAM,KAAK,EAAE;AACb,aAAW,QAAQ,OAAO,YAAY,eAAgB,OAAM,KAAK,KAAK,IAAI,EAAE;AAC5E,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,eAAe;AAC1B,QAAM,KAAK,EAAE;AACb,aAAW,QAAQ,OAAO,YAAY,UAAW,OAAM,KAAK,KAAK,IAAI,EAAE;AACvE,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,gBAAgB;AAC3B,QAAM,KAAK,EAAE;AACb,QAAM;AAAA,IACJ;AAAA,EACF;AACA,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,SAAS;AACpB,QAAM,KAAK,KAAK,UAAU,EAAE,QAAQ,OAAO,QAAQ,OAAO,OAAO,MAAM,GAAG,MAAM,CAAC,CAAC;AAClF,QAAM,KAAK,KAAK;AAChB,MAAI,OAAO,iBAAiB;AAC1B,UAAM,KAAK,EAAE;AACb,UAAM,KAAK,qBAAqB;AAChC,UAAM,KAAK,EAAE;AACb,UAAM,KAAK,uDAAuD;AAClE,UAAM,KAAK,yBAAyB;AACpC,eAAW,KAAK,OAAO,gBAAgB,SAAS,MAAM,GAAG,EAAE,GAAG;AAC5D,YAAM;AAAA,QACJ,KAAK,EAAE,YAAY,MAAM,EAAE,QAAQ,MAAM,EAAE,YAAY,MAAM,MAAM,EAAE,YAAY,GAAG,MAAM,YAAY,EAAE,gBAAgB,EAAE,YAAY,CAAC;AAAA,MACzI;AAAA,IACF;AAAA,EACF;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;AAEA,SAAS,mBAAmB,UAAkB,OAAuB;AACnE,QAAM,OAAO,eAAe,QAAQ;AACpC,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,UAAU,WAAW,KAAK,CAAC;AAAA,IAC3B;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,EAAE,KAAK,IAAI;AACb;AAEA,SAAS,eAAe,UAA0B;AAChD,QAAM,QAAQ,SAAS,MAAM,IAAI;AACjC,QAAM,OAAiB,CAAC;AACxB,MAAI,SAAS;AACb,MAAI,SAAS;AACb,MAAI,OAAiB,CAAC;AACtB,MAAI,QAAkB,CAAC;AAEvB,QAAM,YAAY,MAAM;AACtB,QAAI,QAAQ;AACV,WAAK,KAAK,OAAO;AACjB,eAAS;AAAA,IACX;AAAA,EACF;AACA,QAAM,aAAa,MAAM;AACvB,QAAI,MAAM,WAAW,EAAG;AACxB,SAAK,KAAK,oBAAoB,KAAK,CAAC;AACpC,YAAQ,CAAC;AAAA,EACX;AAEA,aAAW,QAAQ,OAAO;AACxB,QAAI,KAAK,WAAW,KAAK,GAAG;AAC1B,UAAI,QAAQ;AACV,aAAK,KAAK,cAAc,WAAW,KAAK,KAAK,IAAI,CAAC,CAAC,eAAe;AAClE,eAAO,CAAC;AACR,iBAAS;AAAA,MACX,OAAO;AACL,kBAAU;AACV,mBAAW;AACX,iBAAS;AAAA,MACX;AACA;AAAA,IACF;AACA,QAAI,QAAQ;AACV,WAAK,KAAK,IAAI;AACd;AAAA,IACF;AACA,QAAI,KAAK,WAAW,GAAG,GAAG;AACxB,gBAAU;AACV,YAAM,KAAK,IAAI;AACf;AAAA,IACF;AACA,eAAW;AACX,QAAI,KAAK,WAAW,IAAI,GAAG;AACzB,UAAI,CAAC,QAAQ;AACX,aAAK,KAAK,MAAM;AAChB,iBAAS;AAAA,MACX;AACA,WAAK,KAAK,OAAO,eAAe,KAAK,MAAM,CAAC,CAAC,CAAC,OAAO;AACrD;AAAA,IACF;AACA,cAAU;AACV,QAAI,KAAK,WAAW,IAAI,EAAG,MAAK,KAAK,OAAO,eAAe,KAAK,MAAM,CAAC,CAAC,CAAC,OAAO;AAAA,aACvE,KAAK,WAAW,KAAK,EAAG,MAAK,KAAK,OAAO,eAAe,KAAK,MAAM,CAAC,CAAC,CAAC,OAAO;AAAA,aAC7E,KAAK,WAAW,MAAM,EAAG,MAAK,KAAK,OAAO,eAAe,KAAK,MAAM,CAAC,CAAC,CAAC,OAAO;AAAA,aAC9E,KAAK,KAAK,MAAM,GAAI,MAAK,KAAK,EAAE;AAAA,QACpC,MAAK,KAAK,MAAM,eAAe,IAAI,CAAC,MAAM;AAAA,EACjD;AACA,YAAU;AACV,aAAW;AACX,SAAO,KAAK,KAAK,IAAI;AACvB;AAEA,SAAS,oBAAoB,OAAyB;AACpD,QAAM,OAAO,MACV,OAAO,CAAC,SAAS,CAAC,iBAAiB,KAAK,IAAI,CAAC,EAC7C;AAAA,IAAI,CAAC,SACJ,KACG,MAAM,GAAG,EAAE,EACX,MAAM,GAAG,EACT,IAAI,CAAC,SAAS,eAAe,KAAK,KAAK,CAAC,CAAC;AAAA,EAC9C;AACF,MAAI,KAAK,WAAW,EAAG,QAAO;AAC9B,QAAM,CAAC,MAAM,GAAG,IAAI,IAAI;AACxB,QAAM,KAAK,KAAM,IAAI,CAAC,SAAS,OAAO,IAAI,OAAO,EAAE,KAAK,EAAE;AAC1D,QAAM,MAAM,KACT,IAAI,CAAC,QAAQ,OAAO,IAAI,IAAI,CAAC,SAAS,OAAO,IAAI,OAAO,EAAE,KAAK,EAAE,CAAC,OAAO,EACzE,KAAK,IAAI;AACZ,SAAO,qBAAqB,EAAE,uBAAuB,GAAG;AAC1D;AAEA,SAAS,eAAe,GAAmB;AACzC,SAAO,WAAW,CAAC,EAAE,QAAQ,oBAAoB,qBAAqB;AACxE;AAEA,SAAS,WAAW,GAAmB;AACrC,SAAO,EACJ,QAAQ,MAAM,OAAO,EACrB,QAAQ,MAAM,MAAM,EACpB,QAAQ,MAAM,MAAM,EACpB,QAAQ,MAAM,QAAQ;AAC3B;AAEA,SAAS,YAAY,GAAmB;AACtC,SAAO,EAAE,QAAQ,OAAO,KAAK;AAC/B;AAEA,SAAS,eAAe,UAA0C;AAChE,MAAI,aAAa,UAAW,QAAO;AACnC,MAAI,aAAa,aAAc,QAAO;AACtC,MAAI,aAAa,OAAQ,QAAO;AAChC,MAAI,aAAa,kBAAmB,QAAO;AAC3C,SAAO;AACT;AAEA,SAAS,OAAO,GAAmB;AACjC,SAAO,GAAG,KAAK,IAAI,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC;AACtC;AAIA,SAAS,IAAI,IAAsB;AACjC,MAAI,GAAG,WAAW,EAAG,QAAO,OAAO;AACnC,SAAO,GAAG,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,GAAG;AAC5C;AAEA,SAAS,eAAe,QAA0B;AAChD,MAAI,OAAO,WAAW,EAAG,QAAO;AAChC,QAAM,MAAM,KAAK,MAAM,OAAO,SAAS,CAAC;AACxC,SAAO,OAAO,SAAS,MAAM,KAAK,OAAO,MAAM,CAAC,IAAK,OAAO,GAAG,KAAM,IAAI,OAAO,GAAG;AACrF;AAEA,SAAS,IAAI,GAAmB;AAC9B,MAAI,CAAC,OAAO,SAAS,CAAC,EAAG,QAAO,OAAO,CAAC;AACxC,SAAO,EAAE,QAAQ,CAAC;AACpB;","names":[]}
@@ -0,0 +1,76 @@
1
+ import {
2
+ isJudgeSpan,
3
+ isLlmSpan,
4
+ isToolSpan
5
+ } from "./chunk-5BKGXME7.js";
6
+
7
+ // src/trace/query.ts
8
+ async function runsForScenario(store, scenarioId) {
9
+ return store.listRuns({ scenarioId });
10
+ }
11
+ async function llmSpans(store, runId) {
12
+ const spans = await store.spans({ runId, kind: "llm" });
13
+ return spans.filter(isLlmSpan);
14
+ }
15
+ async function toolSpans(store, runId, toolName) {
16
+ const spans = await store.spans({ runId, kind: "tool", toolName });
17
+ return spans.filter(isToolSpan);
18
+ }
19
+ async function judgeSpans(store, runId) {
20
+ const spans = await store.spans({ runId, kind: "judge" });
21
+ return spans.filter(isJudgeSpan);
22
+ }
23
+ function groupBy(items, key) {
24
+ const map = /* @__PURE__ */ new Map();
25
+ for (const item of items) {
26
+ const k = key(item);
27
+ let bucket = map.get(k);
28
+ if (!bucket) {
29
+ bucket = [];
30
+ map.set(k, bucket);
31
+ }
32
+ bucket.push(item);
33
+ }
34
+ return map;
35
+ }
36
+ function argHash(args) {
37
+ return stableStringify(args);
38
+ }
39
+ function stableStringify(value) {
40
+ if (value === null || typeof value !== "object") return JSON.stringify(value);
41
+ if (Array.isArray(value)) return `[${value.map(stableStringify).join(",")}]`;
42
+ const keys = Object.keys(value).sort();
43
+ const parts = keys.map(
44
+ (k) => `${JSON.stringify(k)}:${stableStringify(value[k])}`
45
+ );
46
+ return `{${parts.join(",")}}`;
47
+ }
48
+ function aggregateLlm(spans) {
49
+ return spans.reduce(
50
+ (acc, s) => ({
51
+ inputTokens: acc.inputTokens + (s.inputTokens ?? 0),
52
+ outputTokens: acc.outputTokens + (s.outputTokens ?? 0),
53
+ cachedTokens: acc.cachedTokens + (s.cachedTokens ?? 0),
54
+ costUsd: acc.costUsd + (s.costUsd ?? 0)
55
+ }),
56
+ { inputTokens: 0, outputTokens: 0, cachedTokens: 0, costUsd: 0 }
57
+ );
58
+ }
59
+ function runFailureClass(run) {
60
+ if (run.outcome?.failureClass) return run.outcome.failureClass;
61
+ if (run.status === "completed" && run.outcome?.pass !== false) return "success";
62
+ if (run.status === "aborted") return "budget_exceeded";
63
+ return "unknown";
64
+ }
65
+
66
+ export {
67
+ runsForScenario,
68
+ llmSpans,
69
+ toolSpans,
70
+ judgeSpans,
71
+ groupBy,
72
+ argHash,
73
+ aggregateLlm,
74
+ runFailureClass
75
+ };
76
+ //# sourceMappingURL=chunk-47X6LRCE.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/trace/query.ts"],"sourcesContent":["/**\n * Typed query helpers over TraceStore.\n *\n * Not a full SQL engine — a minimal, composable set of operators that\n * cover the canned-pipeline use cases. For ad-hoc analytics, persist to\n * NDJSON and point DuckDB at it; the schema is stable so external SQL\n * tooling works out of the box.\n */\n\nimport type { FailureClass, JudgeSpan, LlmSpan, Run, ToolSpan } from './schema'\nimport { isJudgeSpan, isLlmSpan, isToolSpan } from './schema'\nimport type { TraceStore } from './store'\n\nexport async function runsForScenario(store: TraceStore, scenarioId: string): Promise<Run[]> {\n return store.listRuns({ scenarioId })\n}\n\nexport async function llmSpans(store: TraceStore, runId?: string): Promise<LlmSpan[]> {\n const spans = await store.spans({ runId, kind: 'llm' })\n return spans.filter(isLlmSpan)\n}\n\nexport async function toolSpans(\n store: TraceStore,\n runId?: string,\n toolName?: string,\n): Promise<ToolSpan[]> {\n const spans = await store.spans({ runId, kind: 'tool', toolName })\n return spans.filter(isToolSpan)\n}\n\nexport async function judgeSpans(store: TraceStore, runId?: string): Promise<JudgeSpan[]> {\n const spans = await store.spans({ runId, kind: 'judge' })\n return spans.filter(isJudgeSpan)\n}\n\n/** Group spans by any key selector. */\nexport function groupBy<T, K extends string | number>(items: T[], key: (t: T) => K): Map<K, T[]> {\n const map = new Map<K, T[]>()\n for (const item of items) {\n const k = key(item)\n let bucket = map.get(k)\n if (!bucket) {\n bucket = []\n map.set(k, bucket)\n }\n bucket.push(item)\n }\n return map\n}\n\n/** Hash tool arguments to an orderless-key-stable string for de-duplication. */\nexport function argHash(args: unknown): string {\n return stableStringify(args)\n}\n\nfunction stableStringify(value: unknown): string {\n if (value === null || typeof value !== 'object') return JSON.stringify(value)\n if (Array.isArray(value)) return `[${value.map(stableStringify).join(',')}]`\n const keys = Object.keys(value as Record<string, unknown>).sort()\n const parts = keys.map(\n (k) => `${JSON.stringify(k)}:${stableStringify((value as Record<string, unknown>)[k])}`,\n )\n return `{${parts.join(',')}}`\n}\n\n/** Sum an LLM-span array into aggregate token + cost. */\nexport function aggregateLlm(spans: LlmSpan[]): {\n inputTokens: number\n outputTokens: number\n cachedTokens: number\n costUsd: number\n} {\n return spans.reduce(\n (acc, s) => ({\n inputTokens: acc.inputTokens + (s.inputTokens ?? 0),\n outputTokens: acc.outputTokens + (s.outputTokens ?? 0),\n cachedTokens: acc.cachedTokens + (s.cachedTokens ?? 0),\n costUsd: acc.costUsd + (s.costUsd ?? 0),\n }),\n { inputTokens: 0, outputTokens: 0, cachedTokens: 0, costUsd: 0 },\n )\n}\n\n/** Pick the outcome's failure class when present, else derive 'success' from run status. */\nexport function runFailureClass(run: Run): FailureClass {\n if (run.outcome?.failureClass) return run.outcome.failureClass\n if (run.status === 'completed' && run.outcome?.pass !== false) return 'success'\n if (run.status === 'aborted') return 'budget_exceeded'\n return 'unknown'\n}\n"],"mappings":";;;;;;;AAaA,eAAsB,gBAAgB,OAAmB,YAAoC;AAC3F,SAAO,MAAM,SAAS,EAAE,WAAW,CAAC;AACtC;AAEA,eAAsB,SAAS,OAAmB,OAAoC;AACpF,QAAM,QAAQ,MAAM,MAAM,MAAM,EAAE,OAAO,MAAM,MAAM,CAAC;AACtD,SAAO,MAAM,OAAO,SAAS;AAC/B;AAEA,eAAsB,UACpB,OACA,OACA,UACqB;AACrB,QAAM,QAAQ,MAAM,MAAM,MAAM,EAAE,OAAO,MAAM,QAAQ,SAAS,CAAC;AACjE,SAAO,MAAM,OAAO,UAAU;AAChC;AAEA,eAAsB,WAAW,OAAmB,OAAsC;AACxF,QAAM,QAAQ,MAAM,MAAM,MAAM,EAAE,OAAO,MAAM,QAAQ,CAAC;AACxD,SAAO,MAAM,OAAO,WAAW;AACjC;AAGO,SAAS,QAAsC,OAAY,KAA+B;AAC/F,QAAM,MAAM,oBAAI,IAAY;AAC5B,aAAW,QAAQ,OAAO;AACxB,UAAM,IAAI,IAAI,IAAI;AAClB,QAAI,SAAS,IAAI,IAAI,CAAC;AACtB,QAAI,CAAC,QAAQ;AACX,eAAS,CAAC;AACV,UAAI,IAAI,GAAG,MAAM;AAAA,IACnB;AACA,WAAO,KAAK,IAAI;AAAA,EAClB;AACA,SAAO;AACT;AAGO,SAAS,QAAQ,MAAuB;AAC7C,SAAO,gBAAgB,IAAI;AAC7B;AAEA,SAAS,gBAAgB,OAAwB;AAC/C,MAAI,UAAU,QAAQ,OAAO,UAAU,SAAU,QAAO,KAAK,UAAU,KAAK;AAC5E,MAAI,MAAM,QAAQ,KAAK,EAAG,QAAO,IAAI,MAAM,IAAI,eAAe,EAAE,KAAK,GAAG,CAAC;AACzE,QAAM,OAAO,OAAO,KAAK,KAAgC,EAAE,KAAK;AAChE,QAAM,QAAQ,KAAK;AAAA,IACjB,CAAC,MAAM,GAAG,KAAK,UAAU,CAAC,CAAC,IAAI,gBAAiB,MAAkC,CAAC,CAAC,CAAC;AAAA,EACvF;AACA,SAAO,IAAI,MAAM,KAAK,GAAG,CAAC;AAC5B;AAGO,SAAS,aAAa,OAK3B;AACA,SAAO,MAAM;AAAA,IACX,CAAC,KAAK,OAAO;AAAA,MACX,aAAa,IAAI,eAAe,EAAE,eAAe;AAAA,MACjD,cAAc,IAAI,gBAAgB,EAAE,gBAAgB;AAAA,MACpD,cAAc,IAAI,gBAAgB,EAAE,gBAAgB;AAAA,MACpD,SAAS,IAAI,WAAW,EAAE,WAAW;AAAA,IACvC;AAAA,IACA,EAAE,aAAa,GAAG,cAAc,GAAG,cAAc,GAAG,SAAS,EAAE;AAAA,EACjE;AACF;AAGO,SAAS,gBAAgB,KAAwB;AACtD,MAAI,IAAI,SAAS,aAAc,QAAO,IAAI,QAAQ;AAClD,MAAI,IAAI,WAAW,eAAe,IAAI,SAAS,SAAS,MAAO,QAAO;AACtE,MAAI,IAAI,WAAW,UAAW,QAAO;AACrC,SAAO;AACT;","names":[]}
@@ -50,4 +50,4 @@ export {
50
50
  verifyManifest,
51
51
  evaluateHypothesis
52
52
  };
53
- //# sourceMappingURL=chunk-6M774GY6.js.map
53
+ //# sourceMappingURL=chunk-4F5DQN55.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/pre-registration.ts"],"sourcesContent":["/**\n * Pre-registered hypotheses — declare what you're testing BEFORE the\n * run, check it AFTER. Prevents p-hacking, optional stopping, and the\n * \"we ran until it looked good\" failure mode.\n *\n * Manifest is a plain JSON-friendly object. Sign it with a content hash\n * + timestamp; the registered record becomes immutable. Post-run,\n * evaluate the manifest against observed results — the library refuses\n * to let you re-interpret a different metric as the declared one.\n */\n\nexport interface HypothesisManifest {\n id: string\n /** Human prose — goes into the audit trail. */\n hypothesis: string\n /** Metric the hypothesis claims to move. */\n metric: string\n /** 'increase' = candidate should score higher than baseline; 'decrease' = lower. */\n direction: 'increase' | 'decrease'\n /** Minimum effect size to count (same units as the metric). */\n minEffect: number\n /** Alpha threshold. */\n alpha: number\n /** Target statistical power at which sample size was pre-computed. */\n power: number\n /** Declared N per arm before running. */\n preRegisteredN: number\n /** ISO8601 timestamp the manifest was registered. */\n registeredAt: string\n /** Optional identifiers to tie into the trace corpus. */\n baselineLabel?: string\n candidateLabel?: string\n}\n\n/**\n * Identifier for the hashing scheme used to produce `contentHash`.\n *\n * `'sha256-content'` — sha256 hex over the canonicalized manifest with\n * the `contentHash` and `algo` fields stripped. This is what\n * `signManifest` produces today.\n *\n * Held as a string union so future schemes can be added without\n * breaking parsers; legacy SignedManifest values written before this\n * field existed will deserialize cleanly because the field is optional.\n */\nexport type SignedManifestAlgo = 'sha256-content'\n\nexport interface SignedManifest extends HypothesisManifest {\n /** sha256 hex of canonicalized manifest (everything except contentHash and algo). */\n contentHash: string\n /**\n * Algorithm string describing how `contentHash` was produced.\n *\n * Optional on the type so legacy serialized manifests (pre-`algo`)\n * still parse, but ALWAYS populated by {@link signManifest}.\n * Consumers that want to enforce a known algorithm should reject\n * manifests where this field is missing or unrecognized.\n */\n algo?: SignedManifestAlgo\n}\n\nexport interface HypothesisResult {\n manifest: SignedManifest\n observedN: number\n observedEffect: number\n observedPValue: number\n /** True iff the observed effect hits the pre-declared direction with\n * magnitude ≥ minEffect AND p < alpha. */\n confirmed: boolean\n /** Enumerated reasons the hypothesis was rejected (each a machine-tag). */\n rejectionReasons: Array<\n 'wrong_direction' | 'effect_too_small' | 'not_significant' | 'undersampled'\n >\n notes?: string\n}\n\n/**\n * Deterministic JSON canonicalization — sort object keys recursively.\n *\n * Two semantically-equal objects produce byte-identical canonicalized output;\n * this is what makes a content-hash stable across encoders, key insertion\n * orders, and runtime versions. Exported for any consumer that needs the same\n * canonicalization guarantee outside the manifest-signing path (e.g., signing\n * an artifact bundle, hashing a dataset version, etc.).\n */\nexport function canonicalize(v: unknown): unknown {\n if (v === null || typeof v !== 'object') return v\n if (Array.isArray(v)) return v.map(canonicalize)\n const keys = Object.keys(v as Record<string, unknown>).sort()\n const out: Record<string, unknown> = {}\n for (const k of keys) out[k] = canonicalize((v as Record<string, unknown>)[k])\n return out\n}\n\n/**\n * SHA-256 hex (full 64 chars) over the canonicalized JSON encoding of `obj`.\n *\n * The same primitive `signManifest` and `verifyManifest` are built on, exposed\n * directly so consumers signing arbitrary structured content (artifact bundles,\n * production packets, dataset manifests, etc.) don't have to re-derive\n * canonicalize+sha256 from scratch.\n *\n * Stable across:\n * - object key insertion order (canonicalization sorts keys recursively)\n * - encoder choice (UTF-8 via TextEncoder, fixed)\n * - runtime (uses the Web Crypto subtle digest, present in Node ≥18 and browsers)\n *\n * Naming note: `hashJson` rather than `hashContent` because `hashContent` is\n * already taken in `prompt-registry.ts` for the truncated 12-char prompt-id\n * helper, which has different semantics (string input, short return). Both\n * coexist; `hashJson` is the right name when you mean \"canonicalize then hash.\"\n *\n * @example\n * const hash = await hashJson({ id: '1', kind: 'spec' })\n * // 'a3f1...' (64 hex chars)\n */\nexport async function hashJson<T>(obj: T): Promise<string> {\n const canonical = canonicalize(obj)\n const bytes = new TextEncoder().encode(JSON.stringify(canonical))\n const digest = await globalThis.crypto.subtle.digest('SHA-256', bytes)\n return Array.from(new Uint8Array(digest))\n .map((b) => b.toString(16).padStart(2, '0'))\n .join('')\n}\n\n/**\n * Sign a manifest with a SHA-256 content hash.\n *\n * The hash covers the canonicalized manifest with the `contentHash`\n * and `algo` fields stripped; this lets verifiers re-sign the rest and\n * compare. Returned manifest always carries `algo: 'sha256-content'`\n * so downstream consumers can identify the scheme; legacy serialized\n * manifests without `algo` still verify because it is stripped before\n * hashing on both sides.\n */\nexport async function signManifest(m: HypothesisManifest): Promise<SignedManifest> {\n const hash = await hashJson(m)\n return { ...m, contentHash: hash, algo: 'sha256-content' }\n}\n\n/**\n * Verify that a signed manifest has not been tampered with.\n *\n * Strips `contentHash` and `algo` before re-signing so legacy manifests\n * (written before `algo` was emitted) verify identically to current\n * ones.\n */\nexport async function verifyManifest(m: SignedManifest): Promise<boolean> {\n const { contentHash, algo: _algo, ...rest } = m\n void _algo\n const resigned = await signManifest(rest)\n return resigned.contentHash === contentHash\n}\n\n/**\n * Evaluate a pre-registered hypothesis against observed results.\n * Mechanical — no re-interpretation permitted.\n */\nexport async function evaluateHypothesis(\n manifest: SignedManifest,\n observed: { n: number; effect: number; pValue: number },\n): Promise<HypothesisResult> {\n if (!(await verifyManifest(manifest))) {\n throw new Error('evaluateHypothesis: manifest content hash mismatch (tampered)')\n }\n const reasons: HypothesisResult['rejectionReasons'] = []\n const directionOk = manifest.direction === 'increase' ? observed.effect > 0 : observed.effect < 0\n if (!directionOk) reasons.push('wrong_direction')\n if (Math.abs(observed.effect) < manifest.minEffect) reasons.push('effect_too_small')\n if (observed.pValue >= manifest.alpha) reasons.push('not_significant')\n if (observed.n < manifest.preRegisteredN) reasons.push('undersampled')\n return {\n manifest,\n observedN: observed.n,\n observedEffect: observed.effect,\n observedPValue: observed.pValue,\n confirmed: reasons.length === 0,\n rejectionReasons: reasons,\n }\n}\n"],"mappings":";AAqFO,SAAS,aAAa,GAAqB;AAChD,MAAI,MAAM,QAAQ,OAAO,MAAM,SAAU,QAAO;AAChD,MAAI,MAAM,QAAQ,CAAC,EAAG,QAAO,EAAE,IAAI,YAAY;AAC/C,QAAM,OAAO,OAAO,KAAK,CAA4B,EAAE,KAAK;AAC5D,QAAM,MAA+B,CAAC;AACtC,aAAW,KAAK,KAAM,KAAI,CAAC,IAAI,aAAc,EAA8B,CAAC,CAAC;AAC7E,SAAO;AACT;AAwBA,eAAsB,SAAY,KAAyB;AACzD,QAAM,YAAY,aAAa,GAAG;AAClC,QAAM,QAAQ,IAAI,YAAY,EAAE,OAAO,KAAK,UAAU,SAAS,CAAC;AAChE,QAAM,SAAS,MAAM,WAAW,OAAO,OAAO,OAAO,WAAW,KAAK;AACrE,SAAO,MAAM,KAAK,IAAI,WAAW,MAAM,CAAC,EACrC,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,EAAE,SAAS,GAAG,GAAG,CAAC,EAC1C,KAAK,EAAE;AACZ;AAYA,eAAsB,aAAa,GAAgD;AACjF,QAAM,OAAO,MAAM,SAAS,CAAC;AAC7B,SAAO,EAAE,GAAG,GAAG,aAAa,MAAM,MAAM,iBAAiB;AAC3D;AASA,eAAsB,eAAe,GAAqC;AACxE,QAAM,EAAE,aAAa,MAAM,OAAO,GAAG,KAAK,IAAI;AAC9C,OAAK;AACL,QAAM,WAAW,MAAM,aAAa,IAAI;AACxC,SAAO,SAAS,gBAAgB;AAClC;AAMA,eAAsB,mBACpB,UACA,UAC2B;AAC3B,MAAI,CAAE,MAAM,eAAe,QAAQ,GAAI;AACrC,UAAM,IAAI,MAAM,+DAA+D;AAAA,EACjF;AACA,QAAM,UAAgD,CAAC;AACvD,QAAM,cAAc,SAAS,cAAc,aAAa,SAAS,SAAS,IAAI,SAAS,SAAS;AAChG,MAAI,CAAC,YAAa,SAAQ,KAAK,iBAAiB;AAChD,MAAI,KAAK,IAAI,SAAS,MAAM,IAAI,SAAS,UAAW,SAAQ,KAAK,kBAAkB;AACnF,MAAI,SAAS,UAAU,SAAS,MAAO,SAAQ,KAAK,iBAAiB;AACrE,MAAI,SAAS,IAAI,SAAS,eAAgB,SAAQ,KAAK,cAAc;AACrE,SAAO;AAAA,IACL;AAAA,IACA,WAAW,SAAS;AAAA,IACpB,gBAAgB,SAAS;AAAA,IACzB,gBAAgB,SAAS;AAAA,IACzB,WAAW,QAAQ,WAAW;AAAA,IAC9B,kBAAkB;AAAA,EACpB;AACF;","names":[]}
@@ -1,16 +1,19 @@
1
1
  import {
2
2
  defaultProviderRedactor,
3
3
  providerFromBaseUrl
4
- } from "./chunk-SQQLHODJ.js";
4
+ } from "./chunk-PC4UYEBM.js";
5
+ import {
6
+ AgentEvalError,
7
+ CaptureIntegrityError
8
+ } from "./chunk-NG236HPC.js";
5
9
 
6
10
  // src/llm-client.ts
7
- var LlmCallError = class extends Error {
11
+ var LlmCallError = class extends AgentEvalError {
8
12
  constructor(message, status, body, model) {
9
- super(message);
13
+ super("judge", message);
10
14
  this.status = status;
11
15
  this.body = body;
12
16
  this.model = model;
13
- this.name = "LlmCallError";
14
17
  }
15
18
  status;
16
19
  body;
@@ -37,7 +40,7 @@ function parseRetryAfter(headers) {
37
40
  return null;
38
41
  }
39
42
  function backoffMs(attempt) {
40
- return Math.min(500 * Math.pow(2, attempt), 16e3);
43
+ return Math.min(500 * 2 ** attempt, 16e3);
41
44
  }
42
45
  function buildHeaders(opts) {
43
46
  const headers = {
@@ -77,7 +80,7 @@ function buildBody(req, forceJsonObject) {
77
80
  return body;
78
81
  }
79
82
  function usesMaxCompletionTokens(model) {
80
- return /^gpt-5(?:[.\-]|$)/i.test(model);
83
+ return /^gpt-5(?:[.-]|$)/i.test(model);
81
84
  }
82
85
  async function sleep(ms) {
83
86
  return new Promise((resolve) => setTimeout(resolve, ms));
@@ -94,7 +97,7 @@ function extractJsonPayload(raw) {
94
97
  return stripped;
95
98
  } catch {
96
99
  }
97
- const starts = [...stripped.matchAll(/[\[{]/g)].map((match) => match.index).filter((index) => index != null);
100
+ const starts = [...stripped.matchAll(/[[{]/g)].map((match) => match.index).filter((index) => index != null);
98
101
  for (const start of starts) {
99
102
  const candidate = extractBalancedJson(stripped, start);
100
103
  if (!candidate) continue;
@@ -357,14 +360,13 @@ ${content.slice(0, 800)}`
357
360
  );
358
361
  }
359
362
  }
360
- var LlmRouteAssertionError = class extends Error {
361
- constructor(message, code, baseUrl) {
363
+ var LlmRouteAssertionError = class extends CaptureIntegrityError {
364
+ constructor(message, reason, baseUrl) {
362
365
  super(message);
363
- this.code = code;
366
+ this.reason = reason;
364
367
  this.baseUrl = baseUrl;
365
- this.name = "LlmRouteAssertionError";
366
368
  }
367
- code;
369
+ reason;
368
370
  baseUrl;
369
371
  };
370
372
  function assertLlmRoute(opts, req = {}) {
@@ -463,4 +465,4 @@ export {
463
465
  probeLlm,
464
466
  LlmClient
465
467
  };
466
- //# sourceMappingURL=chunk-KAO3Q65R.js.map
468
+ //# sourceMappingURL=chunk-4S4BM3QQ.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/llm-client.ts"],"sourcesContent":["/**\n * LLM client with graceful degrade.\n *\n * OpenAI-compatible `/v1/chat/completions` client with:\n * - Exponential-backoff retry on 429 + 5xx gateway errors (502/503/504).\n * - Retry on transient network errors (fetch failed, AbortError, ECONNRESET).\n * - Graceful json_schema → json_object degrade on 400 with schema-reject body.\n * - Fenced-JSON stripping (```json ... ```) for models that wrap structured output.\n * - Configurable base URL + api key / bearer, works with LiteLLM proxies, OpenAI\n * directly, cli-bridge subscriptions, and any router that speaks the spec.\n *\n * Usage:\n * const { value, result } = await callLlmJson<MyType>(\n * { model: 'gpt-4o', messages: [...], jsonSchema: { name: 'x', schema: {...} } },\n * { baseUrl: 'https://router.tangle.tools/v1', apiKey: process.env.KEY },\n * )\n *\n * This is THE llm-calling seam for agent-eval primitives that need structured\n * output (semantic concept judge, reviewer directives, critic scores). Primitives\n * that need free-form text use `callLlm` and parse output themselves.\n */\n\nimport { AgentEvalError, CaptureIntegrityError } from './errors'\nimport {\n defaultProviderRedactor,\n type ProviderRedactor,\n providerFromBaseUrl,\n type RawProviderEvent,\n type RawProviderSink,\n} from './trace/raw-provider-sink'\n\n// ─── Types ──────────────────────────────────────────────────────────────\n\nexport interface LlmMessage {\n role: 'system' | 'user' | 'assistant'\n /**\n * Either a plain text content string OR a multimodal content array\n * (text + image_url parts) for vision-capable models.\n */\n content:\n | string\n | Array<\n | { type: 'text'; text: string }\n | { type: 'image_url'; image_url: { url: string; detail?: 'auto' | 'low' | 'high' } }\n >\n}\n\nexport interface LlmCallRequest {\n model: string\n messages: LlmMessage[]\n /** Optional JSON-mode response format (response_format: json_object). */\n jsonMode?: boolean\n /** Optional structured output via JSON Schema. Falls back to json_object on 400. */\n jsonSchema?: { name: string; schema: Record<string, unknown> }\n temperature?: number\n maxTokens?: number\n /** Per-call timeout, default 60s. */\n timeoutMs?: number\n}\n\nexport interface LlmUsage {\n promptTokens: number\n completionTokens: number\n totalTokens: number\n /** Proxies populate this when prompt caching is on. */\n cachedPromptTokens?: number\n}\n\nexport interface LlmCallResult {\n /** The text content of the first choice. Empty string if none. */\n content: string\n usage: LlmUsage\n /**\n * Cost in USD. Pulled from proxy's `_response_cost` field when present;\n * `null` when neither the proxy nor the caller can derive it.\n */\n costUsd: number | null\n /** Model name actually used (echoed from response). */\n model: string\n /** Wall-clock duration of the HTTP call (last attempt, if retried). */\n durationMs: number\n /** Raw response body. */\n raw: Record<string, unknown>\n}\n\nexport class LlmCallError extends AgentEvalError {\n constructor(\n message: string,\n public readonly status: number,\n public readonly body: string,\n public readonly model: string,\n ) {\n super('judge', message)\n }\n}\n\nexport interface LlmClientOptions {\n /** Base URL (without trailing slash). Must end at the `/v1` prefix. */\n baseUrl?: string\n /** Bearer token — either `apiKey` or `bearer` populates `Authorization: Bearer ...`. */\n apiKey?: string\n bearer?: string\n /** Override for the `Authorization` header (e.g. `X-Auth: ...`). Takes precedence over apiKey/bearer. */\n authHeader?: { name: string; value: string }\n /** Default timeout in ms. Per-call can override. */\n defaultTimeoutMs?: number\n /** Max retry attempts on retriable errors. Default 3 (1 initial + 2 retries). */\n maxRetries?: number\n /** Fetch implementation — defaults to global `fetch`. Override for custom transport (e.g. tests). */\n fetch?: typeof fetch\n /**\n * Optional raw HTTP capture sink. When provided, every request, response,\n * and error (across all retry attempts) is recorded to the sink, with auth\n * headers and credential-shaped body fields redacted by default. This is\n * the layer-1 forensics primitive: structured `LlmSpan`s record intent,\n * raw events record what actually crossed the wire.\n */\n rawSink?: RawProviderSink\n /**\n * Logical provider id attached to raw events. When omitted, derived from\n * `baseUrl` via `providerFromBaseUrl`.\n */\n provider?: string\n /** Trace context attached to raw events; populated by emitter-aware callers. */\n traceContext?: { runId?: string; spanId?: string }\n /** Override the redaction strategy for this call. Defaults to `defaultProviderRedactor`. */\n redactor?: ProviderRedactor\n}\n\n// ─── Internals ──────────────────────────────────────────────────────────\n\nconst DEFAULT_BASE_URL = 'https://router.tangle.tools/v1'\nconst DEFAULT_TIMEOUT_MS = 60_000\nconst DEFAULT_MAX_RETRIES = 3\n\nconst RETRYABLE_STATUS = new Set([429, 502, 503, 504])\n\nfunction isRetryableError(err: unknown): boolean {\n if (err instanceof LlmCallError) return RETRYABLE_STATUS.has(err.status)\n if (err instanceof Error) {\n return (\n err.name === 'AbortError' ||\n err.name === 'TimeoutError' ||\n /fetch failed|ECONNRESET|ETIMEDOUT|EAI_AGAIN/i.test(err.message)\n )\n }\n return false\n}\n\nfunction parseRetryAfter(headers: Headers): number | null {\n const h = headers.get('retry-after')\n if (!h) return null\n const asNumber = Number(h)\n if (Number.isFinite(asNumber) && asNumber > 0) return asNumber * 1000\n const asDate = Date.parse(h)\n if (Number.isFinite(asDate)) return Math.max(0, asDate - Date.now())\n return null\n}\n\nfunction backoffMs(attempt: number): number {\n // 500ms, 1s, 2s, 4s, ...\n return Math.min(500 * 2 ** attempt, 16_000)\n}\n\nfunction buildHeaders(opts: LlmClientOptions): Record<string, string> {\n const headers: Record<string, string> = {\n 'Content-Type': 'application/json',\n Accept: 'application/json',\n }\n if (opts.authHeader) {\n headers[opts.authHeader.name] = opts.authHeader.value\n } else if (opts.bearer || opts.apiKey) {\n headers.Authorization = `Bearer ${opts.bearer ?? opts.apiKey}`\n }\n return headers\n}\n\nfunction isSchemaRejection(status: number, body: string): boolean {\n if (status !== 400) return false\n const lower = body.toLowerCase()\n return (\n lower.includes('response_format') ||\n lower.includes('json_schema') ||\n lower.includes('is unavailable') ||\n lower.includes('not supported')\n )\n}\n\nfunction buildBody(req: LlmCallRequest, forceJsonObject: boolean): Record<string, unknown> {\n const body: Record<string, unknown> = {\n model: req.model,\n messages: req.messages,\n temperature: req.temperature ?? 0,\n }\n if (req.maxTokens != null) {\n if (usesMaxCompletionTokens(req.model)) body.max_completion_tokens = req.maxTokens\n else body.max_tokens = req.maxTokens\n }\n\n if (req.jsonSchema && !forceJsonObject) {\n body.response_format = {\n type: 'json_schema',\n json_schema: { name: req.jsonSchema.name, schema: req.jsonSchema.schema, strict: true },\n }\n } else if (req.jsonMode || req.jsonSchema) {\n body.response_format = { type: 'json_object' }\n }\n\n return body\n}\n\nfunction usesMaxCompletionTokens(model: string): boolean {\n return /^gpt-5(?:[.-]|$)/i.test(model)\n}\n\nasync function sleep(ms: number): Promise<void> {\n return new Promise((resolve) => setTimeout(resolve, ms))\n}\n\n// ─── Public API ─────────────────────────────────────────────────────────\n\n/**\n * Strip a ```json / ``` code fence if the model emitted one.\n * Idempotent for naked JSON. Some models (claude-code via router, certain\n * deepseek models) wrap output even under json_object.\n */\nexport function stripFencedJson(raw: string): string {\n const trimmed = raw.trim()\n const m = trimmed.match(/^```(?:json)?\\s*\\n?([\\s\\S]*?)\\n?```\\s*$/)\n return m ? m[1]!.trim() : trimmed\n}\n\nexport function extractJsonPayload(raw: string): string {\n const stripped = stripFencedJson(raw)\n try {\n JSON.parse(stripped)\n return stripped\n } catch {\n // Continue with balanced extraction below.\n }\n\n const starts = [...stripped.matchAll(/[[{]/g)]\n .map((match) => match.index)\n .filter((index) => index != null)\n for (const start of starts) {\n const candidate = extractBalancedJson(stripped, start)\n if (!candidate) continue\n try {\n JSON.parse(candidate)\n return candidate\n } catch {\n // Keep scanning; earlier braces may belong to prose.\n }\n }\n\n return stripped\n}\n\nfunction extractBalancedJson(input: string, start: number): string | null {\n const opener = input[start]\n const closer = opener === '{' ? '}' : opener === '[' ? ']' : null\n if (!closer) return null\n\n const stack: string[] = [closer]\n let isInString = false\n let isEscaped = false\n\n for (let i = start + 1; i < input.length; i++) {\n const char = input[i]!\n if (isEscaped) {\n isEscaped = false\n continue\n }\n if (char === '\\\\') {\n isEscaped = isInString\n continue\n }\n if (char === '\"') {\n isInString = !isInString\n continue\n }\n if (isInString) continue\n\n if (char === '{') stack.push('}')\n else if (char === '[') stack.push(']')\n else if (char === stack[stack.length - 1]) {\n stack.pop()\n if (stack.length === 0) return input.slice(start, i + 1)\n }\n }\n\n return null\n}\n\n/**\n * Low-level call. Returns raw content + usage + cost. Retries on transient\n * failures; does NOT degrade schema here — callers that want graceful\n * degrade use `callLlmJson`.\n */\nexport async function callLlm(\n req: LlmCallRequest,\n opts: LlmClientOptions = {},\n): Promise<LlmCallResult> {\n const baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL).replace(/\\/+$/, '')\n const url = `${baseUrl}/chat/completions`\n const endpoint = '/chat/completions'\n const timeoutMs = req.timeoutMs ?? opts.defaultTimeoutMs ?? DEFAULT_TIMEOUT_MS\n const maxRetries = opts.maxRetries ?? DEFAULT_MAX_RETRIES\n const fetchFn = opts.fetch ?? globalThis.fetch\n const headers = buildHeaders(opts)\n const provider = opts.provider ?? providerFromBaseUrl(baseUrl)\n const sink = opts.rawSink\n const redactor = opts.redactor ?? defaultProviderRedactor\n const traceContext = opts.traceContext\n\n let lastErr: unknown\n for (let attempt = 0; attempt < maxRetries; attempt++) {\n const controller = new AbortController()\n const timeoutHandle = setTimeout(() => controller.abort(), timeoutMs)\n const started = Date.now()\n const requestBody = buildBody(req, false)\n let attemptErrorRecorded = false\n if (sink) {\n await recordRaw(sink, redactor, {\n eventId: cryptoEventId(),\n runId: traceContext?.runId,\n spanId: traceContext?.spanId,\n provider,\n model: req.model,\n endpoint,\n baseUrl,\n attemptIndex: attempt,\n direction: 'request',\n timestamp: started,\n requestHeaders: headers,\n requestBody,\n redactedFields: [],\n })\n }\n\n try {\n const res = await fetchFn(url, {\n method: 'POST',\n headers,\n body: JSON.stringify(requestBody),\n signal: controller.signal,\n })\n clearTimeout(timeoutHandle)\n const responseHeaders = sink ? headersToObject(res.headers) : undefined\n\n if (!res.ok) {\n const body = await res.text()\n if (sink) {\n await recordRaw(sink, redactor, {\n eventId: cryptoEventId(),\n runId: traceContext?.runId,\n spanId: traceContext?.spanId,\n provider,\n model: req.model,\n endpoint,\n baseUrl,\n attemptIndex: attempt,\n direction: 'error',\n timestamp: Date.now(),\n durationMs: Date.now() - started,\n statusCode: res.status,\n responseHeaders,\n responseBody: body,\n errorMessage: `HTTP ${res.status}`,\n redactedFields: [],\n })\n attemptErrorRecorded = true\n }\n const err = new LlmCallError(\n `LLM call ${res.status}: ${body.slice(0, 300)}`,\n res.status,\n body,\n req.model,\n )\n if (RETRYABLE_STATUS.has(res.status) && attempt < maxRetries - 1) {\n lastErr = err\n const retryAfter = parseRetryAfter(res.headers)\n await sleep(retryAfter ?? backoffMs(attempt))\n continue\n }\n throw err\n }\n\n const text = await res.text()\n let json: Record<string, unknown>\n try {\n json = JSON.parse(text) as Record<string, unknown>\n } catch (parseErr) {\n if (sink) {\n await recordRaw(sink, redactor, {\n eventId: cryptoEventId(),\n runId: traceContext?.runId,\n spanId: traceContext?.spanId,\n provider,\n model: req.model,\n endpoint,\n baseUrl,\n attemptIndex: attempt,\n direction: 'error',\n timestamp: Date.now(),\n durationMs: Date.now() - started,\n statusCode: res.status,\n responseHeaders,\n responseBody: text,\n errorMessage: `non-JSON response: ${parseErr instanceof Error ? parseErr.message : String(parseErr)}`,\n redactedFields: [],\n })\n attemptErrorRecorded = true\n }\n throw parseErr\n }\n if (sink) {\n await recordRaw(sink, redactor, {\n eventId: cryptoEventId(),\n runId: traceContext?.runId,\n spanId: traceContext?.spanId,\n provider,\n model: req.model,\n endpoint,\n baseUrl,\n attemptIndex: attempt,\n direction: 'response',\n timestamp: Date.now(),\n durationMs: Date.now() - started,\n statusCode: res.status,\n responseHeaders,\n responseBody: json,\n redactedFields: [],\n })\n }\n const choice = (json.choices as Array<{ message?: { content?: string } }> | undefined)?.[0]\n const usageRaw = (json.usage as Record<string, unknown> | undefined) ?? {}\n const costFromProxy = (json._response_cost ?? json.cost_usd) as number | undefined\n\n return {\n content: choice?.message?.content ?? '',\n usage: {\n promptTokens: Number(usageRaw.prompt_tokens ?? 0),\n completionTokens: Number(usageRaw.completion_tokens ?? 0),\n totalTokens: Number(usageRaw.total_tokens ?? 0),\n cachedPromptTokens:\n usageRaw.prompt_tokens_details && typeof usageRaw.prompt_tokens_details === 'object'\n ? Number(\n (usageRaw.prompt_tokens_details as Record<string, unknown>).cached_tokens ?? 0,\n )\n : undefined,\n },\n costUsd: typeof costFromProxy === 'number' ? costFromProxy : null,\n model: (json.model as string) ?? req.model,\n durationMs: Date.now() - started,\n raw: json,\n }\n } catch (err) {\n clearTimeout(timeoutHandle)\n lastErr = err\n if (sink && !attemptErrorRecorded) {\n // Record only if neither the !res.ok branch nor the JSON.parse catch\n // already produced an error event for this attempt. Covers network\n // failures, timeouts, and aborts.\n await recordRaw(sink, redactor, {\n eventId: cryptoEventId(),\n runId: traceContext?.runId,\n spanId: traceContext?.spanId,\n provider,\n model: req.model,\n endpoint,\n baseUrl,\n attemptIndex: attempt,\n direction: 'error',\n timestamp: Date.now(),\n durationMs: Date.now() - started,\n errorMessage: err instanceof Error ? err.message : String(err),\n redactedFields: [],\n })\n }\n if (attempt < maxRetries - 1 && isRetryableError(err)) {\n await sleep(backoffMs(attempt))\n continue\n }\n throw err\n }\n }\n throw lastErr instanceof Error ? lastErr : new Error(String(lastErr))\n}\n\nasync function recordRaw(\n sink: RawProviderSink,\n redactor: ProviderRedactor,\n event: RawProviderEvent,\n): Promise<void> {\n // Errors from sinks must not crash the LLM call. Forensic capture is\n // best-effort; the structured trace is the system of record.\n try {\n await sink.record(redactor(event))\n } catch {\n // Intentionally swallowed.\n }\n}\n\nfunction headersToObject(h: Headers): Record<string, string> {\n const out: Record<string, string> = {}\n h.forEach((value, key) => {\n out[key] = value\n })\n return out\n}\n\nfunction cryptoEventId(): string {\n if (typeof globalThis.crypto?.randomUUID === 'function') return globalThis.crypto.randomUUID()\n return `${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 10)}`\n}\n\n/**\n * Structured-output call. Returns parsed JSON plus the raw result envelope.\n * Degrades `jsonSchema` → `jsonMode` on a 400 that names the schema param —\n * critical for deepseek-v3/v4, kimi-k2.6, and other models that don't accept\n * the `response_format.json_schema` shape but DO accept `json_object`.\n */\nexport async function callLlmJson<T = unknown>(\n req: LlmCallRequest,\n opts: LlmClientOptions = {},\n): Promise<{ value: T; result: LlmCallResult }> {\n try {\n const result = await callLlm({ ...req, jsonMode: req.jsonMode ?? !req.jsonSchema }, opts)\n const value = parseJsonSafely<T>(result.content, result.model)\n return { value, result }\n } catch (err) {\n if (err instanceof LlmCallError && isSchemaRejection(err.status, err.body) && req.jsonSchema) {\n // Degrade to json_object + retry.\n const degradedReq: LlmCallRequest = { ...req, jsonMode: true, jsonSchema: undefined }\n const result = await callLlm(degradedReq, opts)\n const value = parseJsonSafely<T>(result.content, result.model)\n return { value, result }\n }\n throw err\n }\n}\n\nfunction parseJsonSafely<T>(content: string, model: string): T {\n const stripped = extractJsonPayload(content)\n try {\n return JSON.parse(stripped) as T\n } catch (err) {\n throw new Error(\n `LLM returned non-JSON content (model=${model}): ${\n err instanceof Error ? err.message : String(err)\n }\\n--- raw content ---\\n${content.slice(0, 800)}`,\n )\n }\n}\n\n// ─── Route assertion ────────────────────────────────────────────────────\n\nexport type LlmRouteAssertionReason =\n | 'no_explicit_base_url'\n | 'base_url_blocked'\n | 'base_url_not_allowed'\n | 'no_auth'\n | 'wrong_provider'\n\nexport class LlmRouteAssertionError extends CaptureIntegrityError {\n constructor(\n message: string,\n public readonly reason: LlmRouteAssertionReason,\n public readonly baseUrl: string,\n ) {\n super(message)\n }\n}\n\nexport interface LlmRouteRequirements {\n /**\n * Throw if `opts.baseUrl` is undefined, i.e. the call would fall back to\n * `DEFAULT_BASE_URL`. Set this for evaluation runs where silently using\n * the public/free-tier router is a defect — the launch reviewer needs to\n * know exactly which provider answered.\n */\n requireExplicitBaseUrl?: boolean\n /**\n * Allowlist of acceptable base URLs. Strings match by prefix\n * (case-insensitive); RegExps test against the full base URL.\n */\n allowedBaseUrls?: Array<string | RegExp>\n /** Blocklist that takes precedence over `allowedBaseUrls`. */\n blockedBaseUrls?: Array<string | RegExp>\n /** Throw if no auth header / api key is configured. */\n requireAuth?: boolean\n /**\n * Logical provider id the configured `baseUrl` is expected to match (via\n * `providerFromBaseUrl`). Mainly useful when paired with `requireExplicitBaseUrl`.\n */\n expectedProvider?: string\n}\n\n/**\n * Fail-loud assertion that the configured LLM client points at the route\n * the caller intends. Designed for the matrix-runner preflight: invoke\n * once before any LLM call to catch misconfiguration before a sweep burns\n * dollars on the wrong provider.\n *\n * Throws `LlmRouteAssertionError`. Pure — no I/O — so it's safe to call\n * from constructors and CI gates.\n */\nexport function assertLlmRoute(opts: LlmClientOptions, req: LlmRouteRequirements = {}): void {\n const baseUrlExplicit = opts.baseUrl !== undefined\n const baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL).replace(/\\/+$/, '')\n\n if (req.requireExplicitBaseUrl && !baseUrlExplicit) {\n throw new LlmRouteAssertionError(\n `assertLlmRoute: requireExplicitBaseUrl set but opts.baseUrl is undefined; would fall back to ${DEFAULT_BASE_URL}.`,\n 'no_explicit_base_url',\n baseUrl,\n )\n }\n\n if (req.blockedBaseUrls?.some((p) => matchUrl(baseUrl, p))) {\n throw new LlmRouteAssertionError(\n `assertLlmRoute: baseUrl ${baseUrl} matches a blocked pattern.`,\n 'base_url_blocked',\n baseUrl,\n )\n }\n\n if (req.allowedBaseUrls && req.allowedBaseUrls.length > 0) {\n const ok = req.allowedBaseUrls.some((p) => matchUrl(baseUrl, p))\n if (!ok) {\n throw new LlmRouteAssertionError(\n `assertLlmRoute: baseUrl ${baseUrl} is not in the allowed list (${req.allowedBaseUrls.map(describePattern).join(', ')}).`,\n 'base_url_not_allowed',\n baseUrl,\n )\n }\n }\n\n if (req.requireAuth && !opts.apiKey && !opts.bearer && !opts.authHeader) {\n throw new LlmRouteAssertionError(\n `assertLlmRoute: requireAuth set but no apiKey, bearer, or authHeader was supplied.`,\n 'no_auth',\n baseUrl,\n )\n }\n\n if (req.expectedProvider) {\n const actual = opts.provider ?? providerFromBaseUrl(baseUrl)\n if (actual !== req.expectedProvider) {\n throw new LlmRouteAssertionError(\n `assertLlmRoute: expected provider ${req.expectedProvider} but baseUrl ${baseUrl} resolves to ${actual}.`,\n 'wrong_provider',\n baseUrl,\n )\n }\n }\n}\n\nfunction matchUrl(url: string, pattern: string | RegExp): boolean {\n if (pattern instanceof RegExp) return pattern.test(url)\n return url.toLowerCase().startsWith(pattern.toLowerCase())\n}\n\nfunction describePattern(p: string | RegExp): string {\n return p instanceof RegExp ? p.source : p\n}\n\n/**\n * Probe whether a model is reachable. Returns latency + null error on\n * success; `ok=false` + error message on any failure (HTTP, timeout,\n * network, parse). Designed for sweep preflights — fail loud at the\n * boundary before burning a 30-leaf run on a misconfigured router.\n *\n * Sends a tiny `ping` message with `maxTokens=64`. Reasoning models\n * (glm-5.1, deepseek-v4) can burn the entire budget on internal reasoning\n * for short prompts, so don't tighten this further. We don't validate\n * content; HTTP 200 means reachable.\n */\nexport async function probeLlm(\n model: string,\n opts: LlmClientOptions & { timeoutMs?: number } = {},\n): Promise<{ ok: boolean; latencyMs: number; error: string | null }> {\n const start = Date.now()\n try {\n await callLlm(\n {\n model,\n messages: [{ role: 'user', content: 'ping' }],\n maxTokens: 64,\n timeoutMs: opts.timeoutMs ?? 30_000,\n },\n opts,\n )\n return { ok: true, latencyMs: Date.now() - start, error: null }\n } catch (err) {\n return {\n ok: false,\n latencyMs: Date.now() - start,\n error: err instanceof Error ? err.message : String(err),\n }\n }\n}\n\n/**\n * Stateful client — construct once with defaults, call many times.\n * Thin wrapper around the free functions; exists for callers that want\n * to inject a single configured instance into multiple primitives.\n */\nexport class LlmClient {\n constructor(private readonly opts: LlmClientOptions = {}) {}\n\n call(req: LlmCallRequest, per?: LlmClientOptions): Promise<LlmCallResult> {\n return callLlm(req, { ...this.opts, ...per })\n }\n\n callJson<T = unknown>(\n req: LlmCallRequest,\n per?: LlmClientOptions,\n ): Promise<{ value: T; result: LlmCallResult }> {\n return callLlmJson<T>(req, { ...this.opts, ...per })\n }\n}\n"],"mappings":";;;;;;;;;;AAqFO,IAAM,eAAN,cAA2B,eAAe;AAAA,EAC/C,YACE,SACgB,QACA,MACA,OAChB;AACA,UAAM,SAAS,OAAO;AAJN;AACA;AACA;AAAA,EAGlB;AAAA,EALkB;AAAA,EACA;AAAA,EACA;AAIpB;AAqCA,IAAM,mBAAmB;AACzB,IAAM,qBAAqB;AAC3B,IAAM,sBAAsB;AAE5B,IAAM,mBAAmB,oBAAI,IAAI,CAAC,KAAK,KAAK,KAAK,GAAG,CAAC;AAErD,SAAS,iBAAiB,KAAuB;AAC/C,MAAI,eAAe,aAAc,QAAO,iBAAiB,IAAI,IAAI,MAAM;AACvE,MAAI,eAAe,OAAO;AACxB,WACE,IAAI,SAAS,gBACb,IAAI,SAAS,kBACb,+CAA+C,KAAK,IAAI,OAAO;AAAA,EAEnE;AACA,SAAO;AACT;AAEA,SAAS,gBAAgB,SAAiC;AACxD,QAAM,IAAI,QAAQ,IAAI,aAAa;AACnC,MAAI,CAAC,EAAG,QAAO;AACf,QAAM,WAAW,OAAO,CAAC;AACzB,MAAI,OAAO,SAAS,QAAQ,KAAK,WAAW,EAAG,QAAO,WAAW;AACjE,QAAM,SAAS,KAAK,MAAM,CAAC;AAC3B,MAAI,OAAO,SAAS,MAAM,EAAG,QAAO,KAAK,IAAI,GAAG,SAAS,KAAK,IAAI,CAAC;AACnE,SAAO;AACT;AAEA,SAAS,UAAU,SAAyB;AAE1C,SAAO,KAAK,IAAI,MAAM,KAAK,SAAS,IAAM;AAC5C;AAEA,SAAS,aAAa,MAAgD;AACpE,QAAM,UAAkC;AAAA,IACtC,gBAAgB;AAAA,IAChB,QAAQ;AAAA,EACV;AACA,MAAI,KAAK,YAAY;AACnB,YAAQ,KAAK,WAAW,IAAI,IAAI,KAAK,WAAW;AAAA,EAClD,WAAW,KAAK,UAAU,KAAK,QAAQ;AACrC,YAAQ,gBAAgB,UAAU,KAAK,UAAU,KAAK,MAAM;AAAA,EAC9D;AACA,SAAO;AACT;AAEA,SAAS,kBAAkB,QAAgB,MAAuB;AAChE,MAAI,WAAW,IAAK,QAAO;AAC3B,QAAM,QAAQ,KAAK,YAAY;AAC/B,SACE,MAAM,SAAS,iBAAiB,KAChC,MAAM,SAAS,aAAa,KAC5B,MAAM,SAAS,gBAAgB,KAC/B,MAAM,SAAS,eAAe;AAElC;AAEA,SAAS,UAAU,KAAqB,iBAAmD;AACzF,QAAM,OAAgC;AAAA,IACpC,OAAO,IAAI;AAAA,IACX,UAAU,IAAI;AAAA,IACd,aAAa,IAAI,eAAe;AAAA,EAClC;AACA,MAAI,IAAI,aAAa,MAAM;AACzB,QAAI,wBAAwB,IAAI,KAAK,EAAG,MAAK,wBAAwB,IAAI;AAAA,QACpE,MAAK,aAAa,IAAI;AAAA,EAC7B;AAEA,MAAI,IAAI,cAAc,CAAC,iBAAiB;AACtC,SAAK,kBAAkB;AAAA,MACrB,MAAM;AAAA,MACN,aAAa,EAAE,MAAM,IAAI,WAAW,MAAM,QAAQ,IAAI,WAAW,QAAQ,QAAQ,KAAK;AAAA,IACxF;AAAA,EACF,WAAW,IAAI,YAAY,IAAI,YAAY;AACzC,SAAK,kBAAkB,EAAE,MAAM,cAAc;AAAA,EAC/C;AAEA,SAAO;AACT;AAEA,SAAS,wBAAwB,OAAwB;AACvD,SAAO,oBAAoB,KAAK,KAAK;AACvC;AAEA,eAAe,MAAM,IAA2B;AAC9C,SAAO,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,EAAE,CAAC;AACzD;AASO,SAAS,gBAAgB,KAAqB;AACnD,QAAM,UAAU,IAAI,KAAK;AACzB,QAAM,IAAI,QAAQ,MAAM,yCAAyC;AACjE,SAAO,IAAI,EAAE,CAAC,EAAG,KAAK,IAAI;AAC5B;AAEO,SAAS,mBAAmB,KAAqB;AACtD,QAAM,WAAW,gBAAgB,GAAG;AACpC,MAAI;AACF,SAAK,MAAM,QAAQ;AACnB,WAAO;AAAA,EACT,QAAQ;AAAA,EAER;AAEA,QAAM,SAAS,CAAC,GAAG,SAAS,SAAS,OAAO,CAAC,EAC1C,IAAI,CAAC,UAAU,MAAM,KAAK,EAC1B,OAAO,CAAC,UAAU,SAAS,IAAI;AAClC,aAAW,SAAS,QAAQ;AAC1B,UAAM,YAAY,oBAAoB,UAAU,KAAK;AACrD,QAAI,CAAC,UAAW;AAChB,QAAI;AACF,WAAK,MAAM,SAAS;AACpB,aAAO;AAAA,IACT,QAAQ;AAAA,IAER;AAAA,EACF;AAEA,SAAO;AACT;AAEA,SAAS,oBAAoB,OAAe,OAA8B;AACxE,QAAM,SAAS,MAAM,KAAK;AAC1B,QAAM,SAAS,WAAW,MAAM,MAAM,WAAW,MAAM,MAAM;AAC7D,MAAI,CAAC,OAAQ,QAAO;AAEpB,QAAM,QAAkB,CAAC,MAAM;AAC/B,MAAI,aAAa;AACjB,MAAI,YAAY;AAEhB,WAAS,IAAI,QAAQ,GAAG,IAAI,MAAM,QAAQ,KAAK;AAC7C,UAAM,OAAO,MAAM,CAAC;AACpB,QAAI,WAAW;AACb,kBAAY;AACZ;AAAA,IACF;AACA,QAAI,SAAS,MAAM;AACjB,kBAAY;AACZ;AAAA,IACF;AACA,QAAI,SAAS,KAAK;AAChB,mBAAa,CAAC;AACd;AAAA,IACF;AACA,QAAI,WAAY;AAEhB,QAAI,SAAS,IAAK,OAAM,KAAK,GAAG;AAAA,aACvB,SAAS,IAAK,OAAM,KAAK,GAAG;AAAA,aAC5B,SAAS,MAAM,MAAM,SAAS,CAAC,GAAG;AACzC,YAAM,IAAI;AACV,UAAI,MAAM,WAAW,EAAG,QAAO,MAAM,MAAM,OAAO,IAAI,CAAC;AAAA,IACzD;AAAA,EACF;AAEA,SAAO;AACT;AAOA,eAAsB,QACpB,KACA,OAAyB,CAAC,GACF;AACxB,QAAM,WAAW,KAAK,WAAW,kBAAkB,QAAQ,QAAQ,EAAE;AACrE,QAAM,MAAM,GAAG,OAAO;AACtB,QAAM,WAAW;AACjB,QAAM,YAAY,IAAI,aAAa,KAAK,oBAAoB;AAC5D,QAAM,aAAa,KAAK,cAAc;AACtC,QAAM,UAAU,KAAK,SAAS,WAAW;AACzC,QAAM,UAAU,aAAa,IAAI;AACjC,QAAM,WAAW,KAAK,YAAY,oBAAoB,OAAO;AAC7D,QAAM,OAAO,KAAK;AAClB,QAAM,WAAW,KAAK,YAAY;AAClC,QAAM,eAAe,KAAK;AAE1B,MAAI;AACJ,WAAS,UAAU,GAAG,UAAU,YAAY,WAAW;AACrD,UAAM,aAAa,IAAI,gBAAgB;AACvC,UAAM,gBAAgB,WAAW,MAAM,WAAW,MAAM,GAAG,SAAS;AACpE,UAAM,UAAU,KAAK,IAAI;AACzB,UAAM,cAAc,UAAU,KAAK,KAAK;AACxC,QAAI,uBAAuB;AAC3B,QAAI,MAAM;AACR,YAAM,UAAU,MAAM,UAAU;AAAA,QAC9B,SAAS,cAAc;AAAA,QACvB,OAAO,cAAc;AAAA,QACrB,QAAQ,cAAc;AAAA,QACtB;AAAA,QACA,OAAO,IAAI;AAAA,QACX;AAAA,QACA;AAAA,QACA,cAAc;AAAA,QACd,WAAW;AAAA,QACX,WAAW;AAAA,QACX,gBAAgB;AAAA,QAChB;AAAA,QACA,gBAAgB,CAAC;AAAA,MACnB,CAAC;AAAA,IACH;AAEA,QAAI;AACF,YAAM,MAAM,MAAM,QAAQ,KAAK;AAAA,QAC7B,QAAQ;AAAA,QACR;AAAA,QACA,MAAM,KAAK,UAAU,WAAW;AAAA,QAChC,QAAQ,WAAW;AAAA,MACrB,CAAC;AACD,mBAAa,aAAa;AAC1B,YAAM,kBAAkB,OAAO,gBAAgB,IAAI,OAAO,IAAI;AAE9D,UAAI,CAAC,IAAI,IAAI;AACX,cAAM,OAAO,MAAM,IAAI,KAAK;AAC5B,YAAI,MAAM;AACR,gBAAM,UAAU,MAAM,UAAU;AAAA,YAC9B,SAAS,cAAc;AAAA,YACvB,OAAO,cAAc;AAAA,YACrB,QAAQ,cAAc;AAAA,YACtB;AAAA,YACA,OAAO,IAAI;AAAA,YACX;AAAA,YACA;AAAA,YACA,cAAc;AAAA,YACd,WAAW;AAAA,YACX,WAAW,KAAK,IAAI;AAAA,YACpB,YAAY,KAAK,IAAI,IAAI;AAAA,YACzB,YAAY,IAAI;AAAA,YAChB;AAAA,YACA,cAAc;AAAA,YACd,cAAc,QAAQ,IAAI,MAAM;AAAA,YAChC,gBAAgB,CAAC;AAAA,UACnB,CAAC;AACD,iCAAuB;AAAA,QACzB;AACA,cAAM,MAAM,IAAI;AAAA,UACd,YAAY,IAAI,MAAM,KAAK,KAAK,MAAM,GAAG,GAAG,CAAC;AAAA,UAC7C,IAAI;AAAA,UACJ;AAAA,UACA,IAAI;AAAA,QACN;AACA,YAAI,iBAAiB,IAAI,IAAI,MAAM,KAAK,UAAU,aAAa,GAAG;AAChE,oBAAU;AACV,gBAAM,aAAa,gBAAgB,IAAI,OAAO;AAC9C,gBAAM,MAAM,cAAc,UAAU,OAAO,CAAC;AAC5C;AAAA,QACF;AACA,cAAM;AAAA,MACR;AAEA,YAAM,OAAO,MAAM,IAAI,KAAK;AAC5B,UAAI;AACJ,UAAI;AACF,eAAO,KAAK,MAAM,IAAI;AAAA,MACxB,SAAS,UAAU;AACjB,YAAI,MAAM;AACR,gBAAM,UAAU,MAAM,UAAU;AAAA,YAC9B,SAAS,cAAc;AAAA,YACvB,OAAO,cAAc;AAAA,YACrB,QAAQ,cAAc;AAAA,YACtB;AAAA,YACA,OAAO,IAAI;AAAA,YACX;AAAA,YACA;AAAA,YACA,cAAc;AAAA,YACd,WAAW;AAAA,YACX,WAAW,KAAK,IAAI;AAAA,YACpB,YAAY,KAAK,IAAI,IAAI;AAAA,YACzB,YAAY,IAAI;AAAA,YAChB;AAAA,YACA,cAAc;AAAA,YACd,cAAc,sBAAsB,oBAAoB,QAAQ,SAAS,UAAU,OAAO,QAAQ,CAAC;AAAA,YACnG,gBAAgB,CAAC;AAAA,UACnB,CAAC;AACD,iCAAuB;AAAA,QACzB;AACA,cAAM;AAAA,MACR;AACA,UAAI,MAAM;AACR,cAAM,UAAU,MAAM,UAAU;AAAA,UAC9B,SAAS,cAAc;AAAA,UACvB,OAAO,cAAc;AAAA,UACrB,QAAQ,cAAc;AAAA,UACtB;AAAA,UACA,OAAO,IAAI;AAAA,UACX;AAAA,UACA;AAAA,UACA,cAAc;AAAA,UACd,WAAW;AAAA,UACX,WAAW,KAAK,IAAI;AAAA,UACpB,YAAY,KAAK,IAAI,IAAI;AAAA,UACzB,YAAY,IAAI;AAAA,UAChB;AAAA,UACA,cAAc;AAAA,UACd,gBAAgB,CAAC;AAAA,QACnB,CAAC;AAAA,MACH;AACA,YAAM,SAAU,KAAK,UAAoE,CAAC;AAC1F,YAAM,WAAY,KAAK,SAAiD,CAAC;AACzE,YAAM,gBAAiB,KAAK,kBAAkB,KAAK;AAEnD,aAAO;AAAA,QACL,SAAS,QAAQ,SAAS,WAAW;AAAA,QACrC,OAAO;AAAA,UACL,cAAc,OAAO,SAAS,iBAAiB,CAAC;AAAA,UAChD,kBAAkB,OAAO,SAAS,qBAAqB,CAAC;AAAA,UACxD,aAAa,OAAO,SAAS,gBAAgB,CAAC;AAAA,UAC9C,oBACE,SAAS,yBAAyB,OAAO,SAAS,0BAA0B,WACxE;AAAA,YACG,SAAS,sBAAkD,iBAAiB;AAAA,UAC/E,IACA;AAAA,QACR;AAAA,QACA,SAAS,OAAO,kBAAkB,WAAW,gBAAgB;AAAA,QAC7D,OAAQ,KAAK,SAAoB,IAAI;AAAA,QACrC,YAAY,KAAK,IAAI,IAAI;AAAA,QACzB,KAAK;AAAA,MACP;AAAA,IACF,SAAS,KAAK;AACZ,mBAAa,aAAa;AAC1B,gBAAU;AACV,UAAI,QAAQ,CAAC,sBAAsB;AAIjC,cAAM,UAAU,MAAM,UAAU;AAAA,UAC9B,SAAS,cAAc;AAAA,UACvB,OAAO,cAAc;AAAA,UACrB,QAAQ,cAAc;AAAA,UACtB;AAAA,UACA,OAAO,IAAI;AAAA,UACX;AAAA,UACA;AAAA,UACA,cAAc;AAAA,UACd,WAAW;AAAA,UACX,WAAW,KAAK,IAAI;AAAA,UACpB,YAAY,KAAK,IAAI,IAAI;AAAA,UACzB,cAAc,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,UAC7D,gBAAgB,CAAC;AAAA,QACnB,CAAC;AAAA,MACH;AACA,UAAI,UAAU,aAAa,KAAK,iBAAiB,GAAG,GAAG;AACrD,cAAM,MAAM,UAAU,OAAO,CAAC;AAC9B;AAAA,MACF;AACA,YAAM;AAAA,IACR;AAAA,EACF;AACA,QAAM,mBAAmB,QAAQ,UAAU,IAAI,MAAM,OAAO,OAAO,CAAC;AACtE;AAEA,eAAe,UACb,MACA,UACA,OACe;AAGf,MAAI;AACF,UAAM,KAAK,OAAO,SAAS,KAAK,CAAC;AAAA,EACnC,QAAQ;AAAA,EAER;AACF;AAEA,SAAS,gBAAgB,GAAoC;AAC3D,QAAM,MAA8B,CAAC;AACrC,IAAE,QAAQ,CAAC,OAAO,QAAQ;AACxB,QAAI,GAAG,IAAI;AAAA,EACb,CAAC;AACD,SAAO;AACT;AAEA,SAAS,gBAAwB;AAC/B,MAAI,OAAO,WAAW,QAAQ,eAAe,WAAY,QAAO,WAAW,OAAO,WAAW;AAC7F,SAAO,GAAG,KAAK,IAAI,EAAE,SAAS,EAAE,CAAC,IAAI,KAAK,OAAO,EAAE,SAAS,EAAE,EAAE,MAAM,GAAG,EAAE,CAAC;AAC9E;AAQA,eAAsB,YACpB,KACA,OAAyB,CAAC,GACoB;AAC9C,MAAI;AACF,UAAM,SAAS,MAAM,QAAQ,EAAE,GAAG,KAAK,UAAU,IAAI,YAAY,CAAC,IAAI,WAAW,GAAG,IAAI;AACxF,UAAM,QAAQ,gBAAmB,OAAO,SAAS,OAAO,KAAK;AAC7D,WAAO,EAAE,OAAO,OAAO;AAAA,EACzB,SAAS,KAAK;AACZ,QAAI,eAAe,gBAAgB,kBAAkB,IAAI,QAAQ,IAAI,IAAI,KAAK,IAAI,YAAY;AAE5F,YAAM,cAA8B,EAAE,GAAG,KAAK,UAAU,MAAM,YAAY,OAAU;AACpF,YAAM,SAAS,MAAM,QAAQ,aAAa,IAAI;AAC9C,YAAM,QAAQ,gBAAmB,OAAO,SAAS,OAAO,KAAK;AAC7D,aAAO,EAAE,OAAO,OAAO;AAAA,IACzB;AACA,UAAM;AAAA,EACR;AACF;AAEA,SAAS,gBAAmB,SAAiB,OAAkB;AAC7D,QAAM,WAAW,mBAAmB,OAAO;AAC3C,MAAI;AACF,WAAO,KAAK,MAAM,QAAQ;AAAA,EAC5B,SAAS,KAAK;AACZ,UAAM,IAAI;AAAA,MACR,wCAAwC,KAAK,MAC3C,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CACjD;AAAA;AAAA,EAA0B,QAAQ,MAAM,GAAG,GAAG,CAAC;AAAA,IACjD;AAAA,EACF;AACF;AAWO,IAAM,yBAAN,cAAqC,sBAAsB;AAAA,EAChE,YACE,SACgB,QACA,SAChB;AACA,UAAM,OAAO;AAHG;AACA;AAAA,EAGlB;AAAA,EAJkB;AAAA,EACA;AAIpB;AAmCO,SAAS,eAAe,MAAwB,MAA4B,CAAC,GAAS;AAC3F,QAAM,kBAAkB,KAAK,YAAY;AACzC,QAAM,WAAW,KAAK,WAAW,kBAAkB,QAAQ,QAAQ,EAAE;AAErE,MAAI,IAAI,0BAA0B,CAAC,iBAAiB;AAClD,UAAM,IAAI;AAAA,MACR,gGAAgG,gBAAgB;AAAA,MAChH;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAEA,MAAI,IAAI,iBAAiB,KAAK,CAAC,MAAM,SAAS,SAAS,CAAC,CAAC,GAAG;AAC1D,UAAM,IAAI;AAAA,MACR,2BAA2B,OAAO;AAAA,MAClC;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAEA,MAAI,IAAI,mBAAmB,IAAI,gBAAgB,SAAS,GAAG;AACzD,UAAM,KAAK,IAAI,gBAAgB,KAAK,CAAC,MAAM,SAAS,SAAS,CAAC,CAAC;AAC/D,QAAI,CAAC,IAAI;AACP,YAAM,IAAI;AAAA,QACR,2BAA2B,OAAO,gCAAgC,IAAI,gBAAgB,IAAI,eAAe,EAAE,KAAK,IAAI,CAAC;AAAA,QACrH;AAAA,QACA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,MAAI,IAAI,eAAe,CAAC,KAAK,UAAU,CAAC,KAAK,UAAU,CAAC,KAAK,YAAY;AACvE,UAAM,IAAI;AAAA,MACR;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAEA,MAAI,IAAI,kBAAkB;AACxB,UAAM,SAAS,KAAK,YAAY,oBAAoB,OAAO;AAC3D,QAAI,WAAW,IAAI,kBAAkB;AACnC,YAAM,IAAI;AAAA,QACR,qCAAqC,IAAI,gBAAgB,gBAAgB,OAAO,gBAAgB,MAAM;AAAA,QACtG;AAAA,QACA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACF;AAEA,SAAS,SAAS,KAAa,SAAmC;AAChE,MAAI,mBAAmB,OAAQ,QAAO,QAAQ,KAAK,GAAG;AACtD,SAAO,IAAI,YAAY,EAAE,WAAW,QAAQ,YAAY,CAAC;AAC3D;AAEA,SAAS,gBAAgB,GAA4B;AACnD,SAAO,aAAa,SAAS,EAAE,SAAS;AAC1C;AAaA,eAAsB,SACpB,OACA,OAAkD,CAAC,GACgB;AACnE,QAAM,QAAQ,KAAK,IAAI;AACvB,MAAI;AACF,UAAM;AAAA,MACJ;AAAA,QACE;AAAA,QACA,UAAU,CAAC,EAAE,MAAM,QAAQ,SAAS,OAAO,CAAC;AAAA,QAC5C,WAAW;AAAA,QACX,WAAW,KAAK,aAAa;AAAA,MAC/B;AAAA,MACA;AAAA,IACF;AACA,WAAO,EAAE,IAAI,MAAM,WAAW,KAAK,IAAI,IAAI,OAAO,OAAO,KAAK;AAAA,EAChE,SAAS,KAAK;AACZ,WAAO;AAAA,MACL,IAAI;AAAA,MACJ,WAAW,KAAK,IAAI,IAAI;AAAA,MACxB,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IACxD;AAAA,EACF;AACF;AAOO,IAAM,YAAN,MAAgB;AAAA,EACrB,YAA6B,OAAyB,CAAC,GAAG;AAA7B;AAAA,EAA8B;AAAA,EAA9B;AAAA,EAE7B,KAAK,KAAqB,KAAgD;AACxE,WAAO,QAAQ,KAAK,EAAE,GAAG,KAAK,MAAM,GAAG,IAAI,CAAC;AAAA,EAC9C;AAAA,EAEA,SACE,KACA,KAC8C;AAC9C,WAAO,YAAe,KAAK,EAAE,GAAG,KAAK,MAAM,GAAG,IAAI,CAAC;AAAA,EACrD;AACF;","names":[]}
@@ -0,0 +1,65 @@
1
+ // src/trace/schema.ts
2
+ var TRACE_SCHEMA_VERSION = "1.0.0";
3
+ var FAILURE_CLASSES = [
4
+ "success",
5
+ "reasoning_error",
6
+ "tool_selection_error",
7
+ "tool_argument_error",
8
+ "tool_recovery_failure",
9
+ "hallucination",
10
+ "instruction_following",
11
+ "safety_refusal_miss",
12
+ "policy_violation",
13
+ "budget_exceeded",
14
+ "format_drift",
15
+ "permission_escalation",
16
+ "pii_leak",
17
+ "cost_overrun",
18
+ "timeout",
19
+ "sandbox_failure",
20
+ "missing_user_data",
21
+ "missing_domain_data",
22
+ "missing_codebase_context",
23
+ "missing_runtime_context",
24
+ "missing_credentials",
25
+ "missing_integration_connection",
26
+ "missing_integration_scope",
27
+ "integration_approval_required",
28
+ "integration_auth_expired",
29
+ "integration_provider_failure",
30
+ "bad_integration_manifest",
31
+ "unsafe_integration_write_denied",
32
+ "stale_external_data",
33
+ "bad_retrieval",
34
+ "insufficient_evidence",
35
+ "contradictory_evidence",
36
+ "ambiguous_user_intent",
37
+ "knowledge_readiness_blocked",
38
+ "unknown"
39
+ ];
40
+ function isLlmSpan(s) {
41
+ return s.kind === "llm";
42
+ }
43
+ function isToolSpan(s) {
44
+ return s.kind === "tool";
45
+ }
46
+ function isRetrievalSpan(s) {
47
+ return s.kind === "retrieval";
48
+ }
49
+ function isJudgeSpan(s) {
50
+ return s.kind === "judge";
51
+ }
52
+ function isSandboxSpan(s) {
53
+ return s.kind === "sandbox";
54
+ }
55
+
56
+ export {
57
+ TRACE_SCHEMA_VERSION,
58
+ FAILURE_CLASSES,
59
+ isLlmSpan,
60
+ isToolSpan,
61
+ isRetrievalSpan,
62
+ isJudgeSpan,
63
+ isSandboxSpan
64
+ };
65
+ //# sourceMappingURL=chunk-5BKGXME7.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/trace/schema.ts"],"sourcesContent":["/**\n * TraceSchema v1 — the canonical data model for agent-eval.\n *\n * Every score, every failure class, every pipeline in the framework is\n * a view over this data. Shape it once, live with it.\n *\n * Wire-compatible with OpenTelemetry span semantics (see trace/otel.ts)\n * but extended with agent-specific span kinds (llm, tool, retrieval,\n * judge, sandbox) and first-class BudgetLedger / Artifact / JudgeVerdict\n * entities that OTEL leaves as free-form attributes.\n */\n\nexport const TRACE_SCHEMA_VERSION = '1.0.0'\n\n// ── Run ──────────────────────────────────────────────────────────────\n\nexport type RunStatus = 'running' | 'completed' | 'failed' | 'aborted'\n\nexport interface BudgetSpec {\n tokens?: number\n wallMs?: number\n calls?: number\n usd?: number\n}\n\nexport interface RunOutcome {\n score?: number\n pass?: boolean\n failureClass?: FailureClass\n notes?: string\n}\n\n/**\n * Layer — optional classification in a nested build workflow.\n * `builder`: the meta-agent editing a project (e.g. agent-builder Forge chat).\n * `app-build`: sandbox harness that compiled + tested the generated scaffold.\n * `app-runtime`: a run of the generated agent against a domain scenario.\n * `meta`: any meta-eval (judge replay, correlation analysis).\n */\nexport type RunLayer = 'builder' | 'app-build' | 'app-runtime' | 'meta' | 'custom'\n\nexport interface Run {\n runId: string\n /**\n * Stable identifier of the scenario being executed.\n *\n * Always populated on the persisted Run — but `TraceEmitter.startRun` accepts\n * input WITHOUT this field, substituting a sensible default\n * (`run.layer ?? run.tags?.['kind'] ?? 'runtime'`) when the caller has no\n * curated scenario to anchor to (runtime / operator / meta-eval runs). This\n * keeps the persisted shape unambiguous for downstream filters + aggregations\n * while removing the boilerplate of inventing placeholder ids at the call site.\n */\n scenarioId: string\n variantId?: string\n datasetVersion?: string\n /** Git SHA of agent code at run time. */\n codeSha?: string\n /** Hash of the prompt template + any system prompt. */\n promptSha?: string\n /** Model id + date + system-prompt hash, concatenated. */\n modelFingerprint?: string\n seed?: number\n /** Arbitrary environment markers (shell, docker version, tz). */\n envFingerprint?: Record<string, string>\n /** Version of the redaction rules applied to this run. */\n redactionVersion?: string\n /** Parent run in a nested build workflow. A builder run's children are\n * app-build runs; those children are app-runtime runs. */\n parentRunId?: string\n /** Stable project identifier — groups runs across chats + sessions. */\n projectId?: string\n /** Chat/conversation identifier within a project. */\n chatId?: string\n /** Layer classification — hint for aggregation; not enforced. */\n layer?: RunLayer\n startedAt: number\n endedAt?: number\n status: RunStatus\n outcome?: RunOutcome\n budget?: BudgetSpec\n /** Free-form labels for downstream grouping. */\n tags?: Record<string, string>\n}\n\n// ── Spans (hierarchical work units) ──────────────────────────────────\n\nexport type SpanKind = 'agent' | 'llm' | 'tool' | 'retrieval' | 'judge' | 'sandbox' | 'custom'\n\nexport type SpanStatus = 'ok' | 'error'\n\nexport interface SpanBase {\n spanId: string\n parentSpanId?: string\n runId: string\n kind: SpanKind\n name: string\n startedAt: number\n endedAt?: number\n status?: SpanStatus\n error?: string\n /** Anything not covered by typed fields. Kept deliberately free-form. */\n attributes?: Record<string, unknown>\n}\n\nexport interface Message {\n role: 'system' | 'user' | 'assistant' | 'tool'\n content: string\n tokens?: number\n /** Multi-modal content descriptors; blobs themselves live in Artifacts. */\n images?: Array<{ artifactId?: string; url?: string; mime?: string }>\n}\n\nexport interface LlmSpan extends SpanBase {\n kind: 'llm'\n model: string\n messages: Message[]\n output?: string\n inputTokens?: number\n outputTokens?: number\n cachedTokens?: number\n reasoningTokens?: number\n costUsd?: number\n finishReason?: string\n}\n\nexport interface ToolSpan extends SpanBase {\n kind: 'tool'\n toolName: string\n args: unknown\n result?: unknown\n latencyMs?: number\n}\n\nexport interface RetrievalSpan extends SpanBase {\n kind: 'retrieval'\n query: string\n hits: Array<{ docId: string; score: number; content?: string }>\n}\n\nexport interface JudgeSpan extends SpanBase {\n kind: 'judge'\n judgeId: string\n /** Span this judgment applies to. */\n targetSpanId: string\n dimension: string\n /** Numeric score (free-range; interpretation up to the judge). */\n score: number\n rationale?: string\n evidence?: string\n}\n\nexport interface SandboxSpan extends SpanBase {\n kind: 'sandbox'\n image?: string\n command?: string\n exitCode?: number\n testsTotal?: number\n testsPassed?: number\n stdoutHash?: string\n stderrHash?: string\n /** Duration in ms; the harness fills this explicitly (endedAt - startedAt may miss setup). */\n wallMs?: number\n}\n\nexport interface GenericSpan extends SpanBase {\n kind: 'agent' | 'custom'\n}\n\nexport type Span = LlmSpan | ToolSpan | RetrievalSpan | JudgeSpan | SandboxSpan | GenericSpan\n\n// ── Events (point-in-time occurrences within a span) ─────────────────\n\nexport type EventKind =\n | 'log'\n | 'error'\n | 'budget_decrement'\n | 'budget_breach'\n | 'state_mutation'\n | 'policy_violation'\n | 'redaction_applied'\n | 'custom'\n\nexport interface TraceEvent {\n eventId: string\n runId: string\n spanId?: string\n kind: EventKind\n timestamp: number\n payload: Record<string, unknown>\n}\n\n// ── Budget ledger (running token/wall/call/$ accounting) ─────────────\n\nexport interface BudgetLedgerEntry {\n runId: string\n dimension: keyof BudgetSpec\n limit: number\n consumed: number\n remaining: number\n timestamp: number\n breached: boolean\n /** Span that triggered this entry, if any. */\n spanId?: string\n}\n\n// ── Artifacts (blobs addressed by hash) ──────────────────────────────\n\nexport interface Artifact {\n artifactId: string\n runId: string\n spanId?: string\n contentType: string\n sizeBytes: number\n /** sha256 in hex. */\n hash: string\n /** External storage URL (R2, S3, filesystem path). */\n storageUrl?: string\n /** Inline content for small blobs — keep under ~64KB. */\n inlineContent?: string\n}\n\n// ── Failure taxonomy ─────────────────────────────────────────────────\n\nexport type FailureClass =\n | 'success'\n | 'reasoning_error'\n | 'tool_selection_error'\n | 'tool_argument_error'\n | 'tool_recovery_failure'\n | 'hallucination'\n | 'instruction_following'\n | 'safety_refusal_miss'\n | 'policy_violation'\n | 'budget_exceeded'\n | 'format_drift'\n | 'permission_escalation'\n | 'pii_leak'\n | 'cost_overrun'\n | 'timeout'\n | 'sandbox_failure'\n | 'missing_user_data'\n | 'missing_domain_data'\n | 'missing_codebase_context'\n | 'missing_runtime_context'\n | 'missing_credentials'\n | 'missing_integration_connection'\n | 'missing_integration_scope'\n | 'integration_approval_required'\n | 'integration_auth_expired'\n | 'integration_provider_failure'\n | 'bad_integration_manifest'\n | 'unsafe_integration_write_denied'\n | 'stale_external_data'\n | 'bad_retrieval'\n | 'insufficient_evidence'\n | 'contradictory_evidence'\n | 'ambiguous_user_intent'\n | 'knowledge_readiness_blocked'\n | 'unknown'\n\nexport const FAILURE_CLASSES: readonly FailureClass[] = [\n 'success',\n 'reasoning_error',\n 'tool_selection_error',\n 'tool_argument_error',\n 'tool_recovery_failure',\n 'hallucination',\n 'instruction_following',\n 'safety_refusal_miss',\n 'policy_violation',\n 'budget_exceeded',\n 'format_drift',\n 'permission_escalation',\n 'pii_leak',\n 'cost_overrun',\n 'timeout',\n 'sandbox_failure',\n 'missing_user_data',\n 'missing_domain_data',\n 'missing_codebase_context',\n 'missing_runtime_context',\n 'missing_credentials',\n 'missing_integration_connection',\n 'missing_integration_scope',\n 'integration_approval_required',\n 'integration_auth_expired',\n 'integration_provider_failure',\n 'bad_integration_manifest',\n 'unsafe_integration_write_denied',\n 'stale_external_data',\n 'bad_retrieval',\n 'insufficient_evidence',\n 'contradictory_evidence',\n 'ambiguous_user_intent',\n 'knowledge_readiness_blocked',\n 'unknown',\n] as const\n\n// ── Helpers ──────────────────────────────────────────────────────────\n\nexport function isLlmSpan(s: Span): s is LlmSpan {\n return s.kind === 'llm'\n}\nexport function isToolSpan(s: Span): s is ToolSpan {\n return s.kind === 'tool'\n}\nexport function isRetrievalSpan(s: Span): s is RetrievalSpan {\n return s.kind === 'retrieval'\n}\nexport function isJudgeSpan(s: Span): s is JudgeSpan {\n return s.kind === 'judge'\n}\nexport function isSandboxSpan(s: Span): s is SandboxSpan {\n return s.kind === 'sandbox'\n}\n"],"mappings":";AAYO,IAAM,uBAAuB;AAyP7B,IAAM,kBAA2C;AAAA,EACtD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAIO,SAAS,UAAU,GAAuB;AAC/C,SAAO,EAAE,SAAS;AACpB;AACO,SAAS,WAAW,GAAwB;AACjD,SAAO,EAAE,SAAS;AACpB;AACO,SAAS,gBAAgB,GAA6B;AAC3D,SAAO,EAAE,SAAS;AACpB;AACO,SAAS,YAAY,GAAyB;AACnD,SAAO,EAAE,SAAS;AACpB;AACO,SAAS,cAAc,GAA2B;AACvD,SAAO,EAAE,SAAS;AACpB;","names":[]}