@tangle-network/agent-eval 0.27.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/CHANGELOG.md +72 -0
  2. package/README.md +4 -5
  3. package/dist/{baseline-4R5deP0N.d.ts → baseline-BwdCXUS8.d.ts} +1 -1
  4. package/dist/builder-eval/index.d.ts +3 -3
  5. package/dist/builder-eval/index.js +1 -1
  6. package/dist/{chunk-WWYCWKUM.js → chunk-3CKU6VGU.js} +2 -2
  7. package/dist/{chunk-2A5XJB43.js → chunk-5AKPEK5L.js} +3 -3
  8. package/dist/chunk-5AKPEK5L.js.map +1 -0
  9. package/dist/{chunk-RAF443UI.js → chunk-DBIGN5MJ.js} +2 -2
  10. package/dist/{chunk-JLZQWFV3.js → chunk-K33INZHH.js} +2 -2
  11. package/dist/chunk-K33INZHH.js.map +1 -0
  12. package/dist/{chunk-NU65VQ7M.js → chunk-MAZ26DC7.js} +1 -1
  13. package/dist/chunk-MAZ26DC7.js.map +1 -0
  14. package/dist/{chunk-LSH4MMOZ.js → chunk-NCRFYPS3.js} +1 -1
  15. package/dist/chunk-NCRFYPS3.js.map +1 -0
  16. package/dist/{chunk-ZN274SWR.js → chunk-PALJO75S.js} +2 -2
  17. package/dist/{chunk-OWLAAMME.js → chunk-QHF6EQKK.js} +3 -2
  18. package/dist/chunk-QHF6EQKK.js.map +1 -0
  19. package/dist/chunk-R5UQJNKC.js +722 -0
  20. package/dist/chunk-R5UQJNKC.js.map +1 -0
  21. package/dist/{chunk-SESZDQPX.js → chunk-RUI6SIHY.js} +3 -3
  22. package/dist/chunk-RUI6SIHY.js.map +1 -0
  23. package/dist/{chunk-WHZMVFUV.js → chunk-SZSBQUIJ.js} +2 -2
  24. package/dist/chunk-SZSBQUIJ.js.map +1 -0
  25. package/dist/chunk-UW4NOOZI.js +1561 -0
  26. package/dist/chunk-UW4NOOZI.js.map +1 -0
  27. package/dist/{chunk-4F5DQN55.js → chunk-VSMTAMNK.js} +1 -1
  28. package/dist/chunk-VSMTAMNK.js.map +1 -0
  29. package/dist/{chunk-5LBB5B3Z.js → chunk-XFZCM5Z3.js} +1 -1
  30. package/dist/chunk-XFZCM5Z3.js.map +1 -0
  31. package/dist/cli.js +1 -1
  32. package/dist/{control-CBShYYA6.d.ts → control-rJhEDdpy.d.ts} +4 -4
  33. package/dist/{control-runtime-BuJHoLg0.d.ts → control-runtime-BRdQ0wrx.d.ts} +3 -2
  34. package/dist/control.d.ts +5 -5
  35. package/dist/control.js +2 -2
  36. package/dist/{emitter-DP_cSSiw.d.ts → emitter-BqjeOvJh.d.ts} +1 -1
  37. package/dist/{failure-cluster-C2EGSDiT.d.ts → failure-cluster-D1NZKqYu.d.ts} +2 -3
  38. package/dist/{feedback-trajectory-DfFdrraJ.d.ts → feedback-trajectory-j0nJFgC6.d.ts} +1 -1
  39. package/dist/governance/index.d.ts +2 -2
  40. package/dist/{index-D3iBCjdF.d.ts → index-Cgt3DKXr.d.ts} +2 -2
  41. package/dist/index.d.ts +1279 -468
  42. package/dist/index.js +1992 -1259
  43. package/dist/index.js.map +1 -1
  44. package/dist/{integrity-DK2EBVZC.d.ts → integrity-BAxLGJ9I.d.ts} +2 -2
  45. package/dist/knowledge/index.d.ts +3 -3
  46. package/dist/knowledge/index.js +2 -2
  47. package/dist/meta-eval/index.d.ts +1 -1
  48. package/dist/{multi-layer-verifier-LkP3LVKj.d.ts → multi-layer-verifier-BNi4-8lR.d.ts} +2 -2
  49. package/dist/openapi.json +1 -1
  50. package/dist/optimization.d.ts +8 -8
  51. package/dist/optimization.js +5 -5
  52. package/dist/pipelines/index.d.ts +6 -6
  53. package/dist/pipelines/index.js +2 -2
  54. package/dist/prm/index.d.ts +4 -4
  55. package/dist/{query-DODUYdPg.d.ts → query-BFDT0kX_.d.ts} +1 -1
  56. package/dist/{release-report-wfUySN5F.d.ts → release-report-PWhGlpfO.d.ts} +1 -1
  57. package/dist/replay-BX5Fm8en.d.ts +529 -0
  58. package/dist/reporting.d.ts +5 -5
  59. package/dist/reporting.js +5 -5
  60. package/dist/{researcher-bGkI7vCl.d.ts → researcher-ClDX3KZx.d.ts} +13 -14
  61. package/dist/rl.d.ts +29 -47
  62. package/dist/rl.js +5 -5
  63. package/dist/rl.js.map +1 -1
  64. package/dist/{rubric-D5tjHNJQ.d.ts → rubric-DgSqjqqj.d.ts} +2 -2
  65. package/dist/{sequential-Dgz1n51-.d.ts → sequential-5iSVfzl2.d.ts} +2 -2
  66. package/dist/{store-Db2Bv8Cf.d.ts → store-BP5be6s7.d.ts} +1 -1
  67. package/dist/{summary-report-DZVXOCK_.d.ts → summary-report-jrSGb2xZ.d.ts} +5 -5
  68. package/dist/{test-graded-scenario-B2kWEdh9.d.ts → test-graded-scenario-BJ54PDan.d.ts} +2 -2
  69. package/dist/traces.d.ts +9 -311
  70. package/dist/traces.js +16 -987
  71. package/dist/traces.js.map +1 -1
  72. package/dist/{trajectory-CnoBo-JY.d.ts → trajectory-BFmveYZt.d.ts} +1 -1
  73. package/dist/wire/index.d.ts +4 -4
  74. package/dist/wire/index.js +1 -1
  75. package/docs/research-report-methodology.md +4 -4
  76. package/docs/three-package-architecture.md +12 -24
  77. package/package.json +1 -1
  78. package/dist/chunk-2A5XJB43.js.map +0 -1
  79. package/dist/chunk-4F5DQN55.js.map +0 -1
  80. package/dist/chunk-5LBB5B3Z.js.map +0 -1
  81. package/dist/chunk-I4MBDTY5.js +0 -272
  82. package/dist/chunk-I4MBDTY5.js.map +0 -1
  83. package/dist/chunk-JLZQWFV3.js.map +0 -1
  84. package/dist/chunk-K2TPS5LB.js +0 -569
  85. package/dist/chunk-K2TPS5LB.js.map +0 -1
  86. package/dist/chunk-LSH4MMOZ.js.map +0 -1
  87. package/dist/chunk-NU65VQ7M.js.map +0 -1
  88. package/dist/chunk-OWLAAMME.js.map +0 -1
  89. package/dist/chunk-SESZDQPX.js.map +0 -1
  90. package/dist/chunk-WHZMVFUV.js.map +0 -1
  91. package/dist/replay-BL96gCEP.d.ts +0 -226
  92. /package/dist/{chunk-WWYCWKUM.js.map → chunk-3CKU6VGU.js.map} +0 -0
  93. /package/dist/{chunk-RAF443UI.js.map → chunk-DBIGN5MJ.js.map} +0 -0
  94. /package/dist/{chunk-ZN274SWR.js.map → chunk-PALJO75S.js.map} +0 -0
@@ -1,272 +0,0 @@
1
- import {
2
- ValidationError
3
- } from "./chunk-NG236HPC.js";
4
-
5
- // src/statistics.ts
6
- var INVERTED_DIMENSIONS = /* @__PURE__ */ new Set(["hallucination", "false_confidence", "worst_failure"]);
7
- function normalizeScores(scores) {
8
- return scores.map((s) => {
9
- if (INVERTED_DIMENSIONS.has(s.dimension)) {
10
- return s;
11
- }
12
- return s;
13
- });
14
- }
15
- function weightedMean(scores) {
16
- if (scores.length === 0) return 0;
17
- let totalWeight = 0;
18
- let weightedSum = 0;
19
- for (const { score, weight } of scores) {
20
- const w = weight ?? 1;
21
- weightedSum += score * w;
22
- totalWeight += w;
23
- }
24
- return totalWeight > 0 ? weightedSum / totalWeight : 0;
25
- }
26
- function confidenceInterval(scores, confidence = 0.95) {
27
- if (scores.length === 0) return { mean: 0, lower: 0, upper: 0 };
28
- if (scores.length === 1) return { mean: scores[0], lower: scores[0], upper: scores[0] };
29
- const n = scores.length;
30
- const mean = scores.reduce((a, b) => a + b, 0) / n;
31
- const B = 1e3;
32
- const bootstrapMeans = [];
33
- for (let i = 0; i < B; i++) {
34
- let sum = 0;
35
- for (let j = 0; j < n; j++) {
36
- sum += scores[Math.floor(Math.random() * n)];
37
- }
38
- bootstrapMeans.push(sum / n);
39
- }
40
- bootstrapMeans.sort((a, b) => a - b);
41
- const alpha = 1 - confidence;
42
- const lowerIdx = Math.floor(alpha / 2 * B);
43
- const upperIdx = Math.floor((1 - alpha / 2) * B) - 1;
44
- return {
45
- mean,
46
- lower: bootstrapMeans[lowerIdx],
47
- upper: bootstrapMeans[Math.min(upperIdx, B - 1)]
48
- };
49
- }
50
- function interRaterReliability(judgeScores) {
51
- if (judgeScores.length < 2) return 1;
52
- const dimensionMap = /* @__PURE__ */ new Map();
53
- for (const judgeSet of judgeScores) {
54
- for (const s of judgeSet) {
55
- if (!dimensionMap.has(s.dimension)) dimensionMap.set(s.dimension, []);
56
- const arr = dimensionMap.get(s.dimension);
57
- if (arr.length === 0 || arr[arr.length - 1].length >= judgeScores.length) {
58
- arr.push([s.score]);
59
- } else {
60
- arr[arr.length - 1].push(s.score);
61
- }
62
- }
63
- }
64
- const allValues = [];
65
- const pairDiffs = [];
66
- for (const items of dimensionMap.values()) {
67
- for (const ratings of items) {
68
- if (ratings.length < 2) continue;
69
- for (const v of ratings) allValues.push(v);
70
- for (let i = 0; i < ratings.length; i++) {
71
- for (let j = i + 1; j < ratings.length; j++) {
72
- pairDiffs.push((ratings[i] - ratings[j]) ** 2);
73
- }
74
- }
75
- }
76
- }
77
- if (pairDiffs.length === 0 || allValues.length < 2) return 1;
78
- const observedDisagreement = pairDiffs.reduce((a, b) => a + b, 0) / pairDiffs.length;
79
- let expectedDisagreement = 0;
80
- let expectedCount = 0;
81
- for (let i = 0; i < allValues.length; i++) {
82
- for (let j = i + 1; j < allValues.length; j++) {
83
- expectedDisagreement += (allValues[i] - allValues[j]) ** 2;
84
- expectedCount++;
85
- }
86
- }
87
- expectedDisagreement = expectedCount > 0 ? expectedDisagreement / expectedCount : 0;
88
- if (expectedDisagreement === 0) return 1;
89
- return 1 - observedDisagreement / expectedDisagreement;
90
- }
91
- function mannWhitneyU(a, b) {
92
- if (a.length === 0 || b.length === 0) return { u: 0, p: 1 };
93
- const n1 = a.length;
94
- const n2 = b.length;
95
- const combined = [
96
- ...a.map((v) => ({ v, group: "a" })),
97
- ...b.map((v) => ({ v, group: "b" }))
98
- ].sort((x, y) => x.v - y.v);
99
- const ranks = new Array(combined.length);
100
- let i = 0;
101
- while (i < combined.length) {
102
- let j = i;
103
- while (j < combined.length && combined[j].v === combined[i].v) j++;
104
- const avgRank = (i + 1 + j) / 2;
105
- for (let k = i; k < j; k++) ranks[k] = avgRank;
106
- i = j;
107
- }
108
- let r1 = 0;
109
- for (let k = 0; k < combined.length; k++) {
110
- if (combined[k].group === "a") r1 += ranks[k];
111
- }
112
- const u1 = r1 - n1 * (n1 + 1) / 2;
113
- const u2 = n1 * n2 - u1;
114
- const u = Math.min(u1, u2);
115
- const mu = n1 * n2 / 2;
116
- const sigma = Math.sqrt(n1 * n2 * (n1 + n2 + 1) / 12);
117
- if (sigma === 0) return { u, p: 1 };
118
- const z = Math.abs(u - mu) / sigma;
119
- const p = 2 * (1 - normalCdf(z));
120
- return { u, p };
121
- }
122
- function partialCredit(current, target) {
123
- if (target <= 0) return 1;
124
- return Math.min(1, Math.max(0, current / target));
125
- }
126
- function pairedTTest(before, after) {
127
- if (before.length !== after.length) {
128
- throw new ValidationError(
129
- `pairedTTest: unequal sample sizes (${before.length} vs ${after.length})`
130
- );
131
- }
132
- const n = before.length;
133
- if (n < 2) return { t: 0, df: 0, p: 1 };
134
- const diffs = before.map((b, i) => after[i] - b);
135
- const mean = diffs.reduce((a, b) => a + b, 0) / n;
136
- const variance = diffs.reduce((acc, d) => acc + (d - mean) ** 2, 0) / (n - 1);
137
- const se = Math.sqrt(variance / n);
138
- if (se === 0) return { t: mean === 0 ? 0 : Infinity, df: n - 1, p: mean === 0 ? 1 : 0 };
139
- const t = mean / se;
140
- const df = n - 1;
141
- const p = 2 * (1 - studentTCdf(Math.abs(t), df));
142
- return { t, df, p };
143
- }
144
- function wilcoxonSignedRank(before, after) {
145
- if (before.length !== after.length) {
146
- throw new ValidationError(
147
- `wilcoxonSignedRank: unequal sample sizes (${before.length} vs ${after.length})`
148
- );
149
- }
150
- const diffs = before.map((b, i2) => after[i2] - b).filter((d) => d !== 0);
151
- const n = diffs.length;
152
- if (n < 6) return { w: 0, p: 1 };
153
- const absRanks = diffs.map((d, i2) => ({ abs: Math.abs(d), sign: Math.sign(d), i: i2 })).sort((a, b) => a.abs - b.abs);
154
- const ranks = new Array(n);
155
- let i = 0;
156
- while (i < n) {
157
- let j = i;
158
- while (j < n && absRanks[j].abs === absRanks[i].abs) j++;
159
- const avg = (i + 1 + j) / 2;
160
- for (let k = i; k < j; k++) ranks[absRanks[k].i] = avg;
161
- i = j;
162
- }
163
- let wPlus = 0;
164
- for (let k = 0; k < n; k++) if (diffs[k] > 0) wPlus += ranks[k];
165
- const mean = n * (n + 1) / 4;
166
- const variance = n * (n + 1) * (2 * n + 1) / 24;
167
- const z = (wPlus - mean) / Math.sqrt(variance);
168
- const p = 2 * (1 - normalCdf(Math.abs(z)));
169
- return { w: wPlus, p };
170
- }
171
- function cohensD(a, b) {
172
- if (a.length < 2 || b.length < 2) return 0;
173
- const meanA = a.reduce((x, y) => x + y, 0) / a.length;
174
- const meanB = b.reduce((x, y) => x + y, 0) / b.length;
175
- const varA = a.reduce((acc, x) => acc + (x - meanA) ** 2, 0) / (a.length - 1);
176
- const varB = b.reduce((acc, x) => acc + (x - meanB) ** 2, 0) / (b.length - 1);
177
- const pooled = Math.sqrt(
178
- ((a.length - 1) * varA + (b.length - 1) * varB) / (a.length + b.length - 2)
179
- );
180
- if (pooled === 0) return 0;
181
- return (meanB - meanA) / pooled;
182
- }
183
- function studentTCdf(t, df) {
184
- if (df <= 0) return 0.5;
185
- if (df > 100) return normalCdf(t);
186
- const x = df / (df + t * t);
187
- const a = df / 2;
188
- const b = 0.5;
189
- const ib = incompleteBeta(x, a, b);
190
- return t >= 0 ? 1 - 0.5 * ib : 0.5 * ib;
191
- }
192
- function incompleteBeta(x, a, b) {
193
- if (x <= 0) return 0;
194
- if (x >= 1) return 1;
195
- const lnBeta = lnGamma(a) + lnGamma(b) - lnGamma(a + b);
196
- const front = Math.exp(Math.log(x) * a + Math.log(1 - x) * b - lnBeta) / a;
197
- const maxIter = 200;
198
- const eps = 3e-7;
199
- let c = 1;
200
- let d = 1 - (a + b) * x / (a + 1);
201
- if (Math.abs(d) < 1e-30) d = 1e-30;
202
- d = 1 / d;
203
- let f = d;
204
- for (let m = 1; m <= maxIter; m++) {
205
- const m2 = 2 * m;
206
- let num = m * (b - m) * x / ((a + m2 - 1) * (a + m2));
207
- d = 1 + num * d;
208
- if (Math.abs(d) < 1e-30) d = 1e-30;
209
- c = 1 + num / c;
210
- if (Math.abs(c) < 1e-30) c = 1e-30;
211
- d = 1 / d;
212
- f *= d * c;
213
- num = -((a + m) * (a + b + m) * x) / ((a + m2) * (a + m2 + 1));
214
- d = 1 + num * d;
215
- if (Math.abs(d) < 1e-30) d = 1e-30;
216
- c = 1 + num / c;
217
- if (Math.abs(c) < 1e-30) c = 1e-30;
218
- d = 1 / d;
219
- const delta = d * c;
220
- f *= delta;
221
- if (Math.abs(delta - 1) < eps) break;
222
- }
223
- return front * f;
224
- }
225
- function lnGamma(z) {
226
- const g = 7;
227
- const coefs = [
228
- 0.9999999999998099,
229
- 676.5203681218851,
230
- -1259.1392167224028,
231
- 771.3234287776531,
232
- -176.6150291621406,
233
- 12.507343278686905,
234
- -0.13857109526572012,
235
- 9984369578019572e-21,
236
- 15056327351493116e-23
237
- ];
238
- if (z < 0.5) {
239
- return Math.log(Math.PI / Math.sin(Math.PI * z)) - lnGamma(1 - z);
240
- }
241
- z -= 1;
242
- let x = coefs[0];
243
- for (let i = 1; i < g + 2; i++) x += coefs[i] / (z + i);
244
- const t = z + g + 0.5;
245
- return 0.5 * Math.log(2 * Math.PI) + (z + 0.5) * Math.log(t) - t + Math.log(x);
246
- }
247
- function normalCdf(x) {
248
- const a1 = 0.254829592;
249
- const a2 = -0.284496736;
250
- const a3 = 1.421413741;
251
- const a4 = -1.453152027;
252
- const a5 = 1.061405429;
253
- const p = 0.3275911;
254
- const sign = x < 0 ? -1 : 1;
255
- const absX = Math.abs(x);
256
- const t = 1 / (1 + p * absX);
257
- const y = 1 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp(-absX * absX / 2);
258
- return 0.5 * (1 + sign * y);
259
- }
260
-
261
- export {
262
- normalizeScores,
263
- weightedMean,
264
- confidenceInterval,
265
- interRaterReliability,
266
- mannWhitneyU,
267
- partialCredit,
268
- pairedTTest,
269
- wilcoxonSignedRank,
270
- cohensD
271
- };
272
- //# sourceMappingURL=chunk-I4MBDTY5.js.map
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/statistics.ts"],"sourcesContent":["import { ValidationError } from './errors'\nimport type { JudgeScore } from './types'\n\n/** Dimensions where lower raw score = better outcome (inverted semantics) */\nconst INVERTED_DIMENSIONS = new Set(['hallucination', 'false_confidence', 'worst_failure'])\n\n/**\n * Normalize scores so all dimensions follow \"higher = better\".\n * Inverted dimensions (hallucination, false_confidence, worst_failure)\n * already use inverted scoring in the prompt (10 = no hallucination),\n * but this function ensures consistency if raw scores leak through.\n */\nexport function normalizeScores(scores: JudgeScore[]): JudgeScore[] {\n return scores.map((s) => {\n if (INVERTED_DIMENSIONS.has(s.dimension)) {\n return s\n }\n return s\n })\n}\n\n/** Weighted mean — falls back to uniform weights when omitted */\nexport function weightedMean(scores: { score: number; weight?: number }[]): number {\n if (scores.length === 0) return 0\n let totalWeight = 0\n let weightedSum = 0\n for (const { score, weight } of scores) {\n const w = weight ?? 1\n weightedSum += score * w\n totalWeight += w\n }\n return totalWeight > 0 ? weightedSum / totalWeight : 0\n}\n\n/** Bootstrap confidence interval */\nexport function confidenceInterval(\n scores: number[],\n confidence = 0.95,\n): { mean: number; lower: number; upper: number } {\n if (scores.length === 0) return { mean: 0, lower: 0, upper: 0 }\n if (scores.length === 1) return { mean: scores[0]!, lower: scores[0]!, upper: scores[0]! }\n\n const n = scores.length\n const mean = scores.reduce((a, b) => a + b, 0) / n\n\n const B = 1000\n const bootstrapMeans: number[] = []\n\n for (let i = 0; i < B; i++) {\n let sum = 0\n for (let j = 0; j < n; j++) {\n sum += scores[Math.floor(Math.random() * n)]!\n }\n bootstrapMeans.push(sum / n)\n }\n\n bootstrapMeans.sort((a, b) => a - b)\n\n const alpha = 1 - confidence\n const lowerIdx = Math.floor((alpha / 2) * B)\n const upperIdx = Math.floor((1 - alpha / 2) * B) - 1\n\n return {\n mean,\n lower: bootstrapMeans[lowerIdx]!,\n upper: bootstrapMeans[Math.min(upperIdx, B - 1)]!,\n }\n}\n\n/**\n * Inter-rater reliability — simplified Krippendorff's alpha.\n *\n * Each inner array is one judge's scores for all items.\n * All arrays must have the same length (same items scored).\n */\nexport function interRaterReliability(judgeScores: JudgeScore[][]): number {\n if (judgeScores.length < 2) return 1\n\n // Group scores by dimension across judges\n const dimensionMap = new Map<string, number[][]>()\n for (const judgeSet of judgeScores) {\n for (const s of judgeSet) {\n if (!dimensionMap.has(s.dimension)) dimensionMap.set(s.dimension, [])\n const arr = dimensionMap.get(s.dimension)!\n if (arr.length === 0 || arr[arr.length - 1]!.length >= judgeScores.length) {\n arr.push([s.score])\n } else {\n arr[arr.length - 1]!.push(s.score)\n }\n }\n }\n\n // Collect all paired ratings\n const allValues: number[] = []\n const pairDiffs: number[] = []\n\n for (const items of dimensionMap.values()) {\n for (const ratings of items) {\n if (ratings.length < 2) continue\n for (const v of ratings) allValues.push(v)\n for (let i = 0; i < ratings.length; i++) {\n for (let j = i + 1; j < ratings.length; j++) {\n pairDiffs.push((ratings[i]! - ratings[j]!) ** 2)\n }\n }\n }\n }\n\n if (pairDiffs.length === 0 || allValues.length < 2) return 1\n\n const observedDisagreement = pairDiffs.reduce((a, b) => a + b, 0) / pairDiffs.length\n\n // Expected disagreement from all possible pairings of values\n let expectedDisagreement = 0\n let expectedCount = 0\n for (let i = 0; i < allValues.length; i++) {\n for (let j = i + 1; j < allValues.length; j++) {\n expectedDisagreement += (allValues[i]! - allValues[j]!) ** 2\n expectedCount++\n }\n }\n expectedDisagreement = expectedCount > 0 ? expectedDisagreement / expectedCount : 0\n\n if (expectedDisagreement === 0) return 1\n return 1 - observedDisagreement / expectedDisagreement\n}\n\n/**\n * Mann-Whitney U test for comparing two independent groups.\n * Returns U statistic and approximate p-value (normal approximation).\n */\nexport function mannWhitneyU(a: number[], b: number[]): { u: number; p: number } {\n if (a.length === 0 || b.length === 0) return { u: 0, p: 1 }\n\n const n1 = a.length\n const n2 = b.length\n\n // Rank all values together\n const combined = [\n ...a.map((v) => ({ v, group: 'a' as const })),\n ...b.map((v) => ({ v, group: 'b' as const })),\n ].sort((x, y) => x.v - y.v)\n\n // Assign ranks with tie handling\n const ranks: number[] = new Array(combined.length)\n let i = 0\n while (i < combined.length) {\n let j = i\n while (j < combined.length && combined[j]!.v === combined[i]!.v) j++\n const avgRank = (i + 1 + j) / 2\n for (let k = i; k < j; k++) ranks[k] = avgRank\n i = j\n }\n\n // Sum ranks for group a\n let r1 = 0\n for (let k = 0; k < combined.length; k++) {\n if (combined[k]!.group === 'a') r1 += ranks[k]!\n }\n\n const u1 = r1 - (n1 * (n1 + 1)) / 2\n const u2 = n1 * n2 - u1\n const u = Math.min(u1, u2)\n\n // Normal approximation for p-value\n const mu = (n1 * n2) / 2\n const sigma = Math.sqrt((n1 * n2 * (n1 + n2 + 1)) / 12)\n\n if (sigma === 0) return { u, p: 1 }\n\n const z = Math.abs(u - mu) / sigma\n // Two-tailed p-value from z-score (approximation)\n const p = 2 * (1 - normalCdf(z))\n\n return { u, p }\n}\n\n/** Partial credit: returns 0-1 ratio of current toward target */\nexport function partialCredit(current: number, target: number): number {\n if (target <= 0) return 1\n return Math.min(1, Math.max(0, current / target))\n}\n\n/**\n * Paired t-test — before/after measurements on the SAME items.\n * Pairing removes inter-item variance, giving tighter significance than\n * an unpaired test when comparing prompt v1 vs prompt v2 on identical\n * scenarios.\n */\nexport function pairedTTest(\n before: number[],\n after: number[],\n): { t: number; df: number; p: number } {\n if (before.length !== after.length) {\n throw new ValidationError(\n `pairedTTest: unequal sample sizes (${before.length} vs ${after.length})`,\n )\n }\n const n = before.length\n if (n < 2) return { t: 0, df: 0, p: 1 }\n\n const diffs = before.map((b, i) => after[i]! - b)\n const mean = diffs.reduce((a, b) => a + b, 0) / n\n const variance = diffs.reduce((acc, d) => acc + (d - mean) ** 2, 0) / (n - 1)\n const se = Math.sqrt(variance / n)\n if (se === 0) return { t: mean === 0 ? 0 : Infinity, df: n - 1, p: mean === 0 ? 1 : 0 }\n\n const t = mean / se\n const df = n - 1\n const p = 2 * (1 - studentTCdf(Math.abs(t), df))\n return { t, df, p }\n}\n\n/**\n * Wilcoxon signed-rank test — paired non-parametric alternative.\n * Use when the differences aren't normally distributed.\n */\nexport function wilcoxonSignedRank(before: number[], after: number[]): { w: number; p: number } {\n if (before.length !== after.length) {\n throw new ValidationError(\n `wilcoxonSignedRank: unequal sample sizes (${before.length} vs ${after.length})`,\n )\n }\n const diffs = before.map((b, i) => after[i]! - b).filter((d) => d !== 0)\n const n = diffs.length\n if (n < 6) return { w: 0, p: 1 }\n\n const absRanks = diffs\n .map((d, i) => ({ abs: Math.abs(d), sign: Math.sign(d), i }))\n .sort((a, b) => a.abs - b.abs)\n const ranks: number[] = new Array(n)\n let i = 0\n while (i < n) {\n let j = i\n while (j < n && absRanks[j]!.abs === absRanks[i]!.abs) j++\n const avg = (i + 1 + j) / 2\n for (let k = i; k < j; k++) ranks[absRanks[k]!.i] = avg\n i = j\n }\n let wPlus = 0\n for (let k = 0; k < n; k++) if (diffs[k]! > 0) wPlus += ranks[k]!\n\n const mean = (n * (n + 1)) / 4\n const variance = (n * (n + 1) * (2 * n + 1)) / 24\n const z = (wPlus - mean) / Math.sqrt(variance)\n const p = 2 * (1 - normalCdf(Math.abs(z)))\n return { w: wPlus, p }\n}\n\n/**\n * Cohen's d — standardized effect size for two independent groups.\n * Positive d means group b has higher mean than group a.\n * Rule of thumb: |d| < 0.2 negligible, 0.2–0.5 small, 0.5–0.8 medium, > 0.8 large.\n */\nexport function cohensD(a: number[], b: number[]): number {\n if (a.length < 2 || b.length < 2) return 0\n const meanA = a.reduce((x, y) => x + y, 0) / a.length\n const meanB = b.reduce((x, y) => x + y, 0) / b.length\n const varA = a.reduce((acc, x) => acc + (x - meanA) ** 2, 0) / (a.length - 1)\n const varB = b.reduce((acc, x) => acc + (x - meanB) ** 2, 0) / (b.length - 1)\n const pooled = Math.sqrt(\n ((a.length - 1) * varA + (b.length - 1) * varB) / (a.length + b.length - 2),\n )\n if (pooled === 0) return 0\n return (meanB - meanA) / pooled\n}\n\n/** Student-t CDF approximation via Abramowitz-Stegun series. */\nfunction studentTCdf(t: number, df: number): number {\n if (df <= 0) return 0.5\n if (df > 100) return normalCdf(t)\n const x = df / (df + t * t)\n const a = df / 2\n const b = 0.5\n const ib = incompleteBeta(x, a, b)\n return t >= 0 ? 1 - 0.5 * ib : 0.5 * ib\n}\n\n/** Regularized incomplete beta function via continued fraction (Lentz). */\nfunction incompleteBeta(x: number, a: number, b: number): number {\n if (x <= 0) return 0\n if (x >= 1) return 1\n const lnBeta = lnGamma(a) + lnGamma(b) - lnGamma(a + b)\n const front = Math.exp(Math.log(x) * a + Math.log(1 - x) * b - lnBeta) / a\n const maxIter = 200\n const eps = 3e-7\n let c = 1\n let d = 1 - ((a + b) * x) / (a + 1)\n if (Math.abs(d) < 1e-30) d = 1e-30\n d = 1 / d\n let f = d\n for (let m = 1; m <= maxIter; m++) {\n const m2 = 2 * m\n let num = (m * (b - m) * x) / ((a + m2 - 1) * (a + m2))\n d = 1 + num * d\n if (Math.abs(d) < 1e-30) d = 1e-30\n c = 1 + num / c\n if (Math.abs(c) < 1e-30) c = 1e-30\n d = 1 / d\n f *= d * c\n num = -((a + m) * (a + b + m) * x) / ((a + m2) * (a + m2 + 1))\n d = 1 + num * d\n if (Math.abs(d) < 1e-30) d = 1e-30\n c = 1 + num / c\n if (Math.abs(c) < 1e-30) c = 1e-30\n d = 1 / d\n const delta = d * c\n f *= delta\n if (Math.abs(delta - 1) < eps) break\n }\n return front * f\n}\n\n/** Lanczos approximation to ln Γ(z). */\nfunction lnGamma(z: number): number {\n const g = 7\n const coefs = [\n 0.99999999999980993, 676.5203681218851, -1259.1392167224028, 771.32342877765313,\n -176.61502916214059, 12.507343278686905, -0.13857109526572012, 9.9843695780195716e-6,\n 1.5056327351493116e-7,\n ]\n if (z < 0.5) {\n return Math.log(Math.PI / Math.sin(Math.PI * z)) - lnGamma(1 - z)\n }\n z -= 1\n let x = coefs[0]!\n for (let i = 1; i < g + 2; i++) x += coefs[i]! / (z + i)\n const t = z + g + 0.5\n return 0.5 * Math.log(2 * Math.PI) + (z + 0.5) * Math.log(t) - t + Math.log(x)\n}\n\n// Standard normal CDF approximation (Abramowitz and Stegun)\nfunction normalCdf(x: number): number {\n const a1 = 0.254829592\n const a2 = -0.284496736\n const a3 = 1.421413741\n const a4 = -1.453152027\n const a5 = 1.061405429\n const p = 0.3275911\n\n const sign = x < 0 ? -1 : 1\n const absX = Math.abs(x)\n const t = 1 / (1 + p * absX)\n const y = 1 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp((-absX * absX) / 2)\n\n return 0.5 * (1 + sign * y)\n}\n"],"mappings":";;;;;AAIA,IAAM,sBAAsB,oBAAI,IAAI,CAAC,iBAAiB,oBAAoB,eAAe,CAAC;AAQnF,SAAS,gBAAgB,QAAoC;AAClE,SAAO,OAAO,IAAI,CAAC,MAAM;AACvB,QAAI,oBAAoB,IAAI,EAAE,SAAS,GAAG;AACxC,aAAO;AAAA,IACT;AACA,WAAO;AAAA,EACT,CAAC;AACH;AAGO,SAAS,aAAa,QAAsD;AACjF,MAAI,OAAO,WAAW,EAAG,QAAO;AAChC,MAAI,cAAc;AAClB,MAAI,cAAc;AAClB,aAAW,EAAE,OAAO,OAAO,KAAK,QAAQ;AACtC,UAAM,IAAI,UAAU;AACpB,mBAAe,QAAQ;AACvB,mBAAe;AAAA,EACjB;AACA,SAAO,cAAc,IAAI,cAAc,cAAc;AACvD;AAGO,SAAS,mBACd,QACA,aAAa,MACmC;AAChD,MAAI,OAAO,WAAW,EAAG,QAAO,EAAE,MAAM,GAAG,OAAO,GAAG,OAAO,EAAE;AAC9D,MAAI,OAAO,WAAW,EAAG,QAAO,EAAE,MAAM,OAAO,CAAC,GAAI,OAAO,OAAO,CAAC,GAAI,OAAO,OAAO,CAAC,EAAG;AAEzF,QAAM,IAAI,OAAO;AACjB,QAAM,OAAO,OAAO,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI;AAEjD,QAAM,IAAI;AACV,QAAM,iBAA2B,CAAC;AAElC,WAAS,IAAI,GAAG,IAAI,GAAG,KAAK;AAC1B,QAAI,MAAM;AACV,aAAS,IAAI,GAAG,IAAI,GAAG,KAAK;AAC1B,aAAO,OAAO,KAAK,MAAM,KAAK,OAAO,IAAI,CAAC,CAAC;AAAA,IAC7C;AACA,mBAAe,KAAK,MAAM,CAAC;AAAA,EAC7B;AAEA,iBAAe,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAEnC,QAAM,QAAQ,IAAI;AAClB,QAAM,WAAW,KAAK,MAAO,QAAQ,IAAK,CAAC;AAC3C,QAAM,WAAW,KAAK,OAAO,IAAI,QAAQ,KAAK,CAAC,IAAI;AAEnD,SAAO;AAAA,IACL;AAAA,IACA,OAAO,eAAe,QAAQ;AAAA,IAC9B,OAAO,eAAe,KAAK,IAAI,UAAU,IAAI,CAAC,CAAC;AAAA,EACjD;AACF;AAQO,SAAS,sBAAsB,aAAqC;AACzE,MAAI,YAAY,SAAS,EAAG,QAAO;AAGnC,QAAM,eAAe,oBAAI,IAAwB;AACjD,aAAW,YAAY,aAAa;AAClC,eAAW,KAAK,UAAU;AACxB,UAAI,CAAC,aAAa,IAAI,EAAE,SAAS,EAAG,cAAa,IAAI,EAAE,WAAW,CAAC,CAAC;AACpE,YAAM,MAAM,aAAa,IAAI,EAAE,SAAS;AACxC,UAAI,IAAI,WAAW,KAAK,IAAI,IAAI,SAAS,CAAC,EAAG,UAAU,YAAY,QAAQ;AACzE,YAAI,KAAK,CAAC,EAAE,KAAK,CAAC;AAAA,MACpB,OAAO;AACL,YAAI,IAAI,SAAS,CAAC,EAAG,KAAK,EAAE,KAAK;AAAA,MACnC;AAAA,IACF;AAAA,EACF;AAGA,QAAM,YAAsB,CAAC;AAC7B,QAAM,YAAsB,CAAC;AAE7B,aAAW,SAAS,aAAa,OAAO,GAAG;AACzC,eAAW,WAAW,OAAO;AAC3B,UAAI,QAAQ,SAAS,EAAG;AACxB,iBAAW,KAAK,QAAS,WAAU,KAAK,CAAC;AACzC,eAAS,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AACvC,iBAAS,IAAI,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AAC3C,oBAAU,MAAM,QAAQ,CAAC,IAAK,QAAQ,CAAC,MAAO,CAAC;AAAA,QACjD;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,MAAI,UAAU,WAAW,KAAK,UAAU,SAAS,EAAG,QAAO;AAE3D,QAAM,uBAAuB,UAAU,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,UAAU;AAG9E,MAAI,uBAAuB;AAC3B,MAAI,gBAAgB;AACpB,WAAS,IAAI,GAAG,IAAI,UAAU,QAAQ,KAAK;AACzC,aAAS,IAAI,IAAI,GAAG,IAAI,UAAU,QAAQ,KAAK;AAC7C,+BAAyB,UAAU,CAAC,IAAK,UAAU,CAAC,MAAO;AAC3D;AAAA,IACF;AAAA,EACF;AACA,yBAAuB,gBAAgB,IAAI,uBAAuB,gBAAgB;AAElF,MAAI,yBAAyB,EAAG,QAAO;AACvC,SAAO,IAAI,uBAAuB;AACpC;AAMO,SAAS,aAAa,GAAa,GAAuC;AAC/E,MAAI,EAAE,WAAW,KAAK,EAAE,WAAW,EAAG,QAAO,EAAE,GAAG,GAAG,GAAG,EAAE;AAE1D,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,EAAE;AAGb,QAAM,WAAW;AAAA,IACf,GAAG,EAAE,IAAI,CAAC,OAAO,EAAE,GAAG,OAAO,IAAa,EAAE;AAAA,IAC5C,GAAG,EAAE,IAAI,CAAC,OAAO,EAAE,GAAG,OAAO,IAAa,EAAE;AAAA,EAC9C,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,IAAI,EAAE,CAAC;AAG1B,QAAM,QAAkB,IAAI,MAAM,SAAS,MAAM;AACjD,MAAI,IAAI;AACR,SAAO,IAAI,SAAS,QAAQ;AAC1B,QAAI,IAAI;AACR,WAAO,IAAI,SAAS,UAAU,SAAS,CAAC,EAAG,MAAM,SAAS,CAAC,EAAG,EAAG;AACjE,UAAM,WAAW,IAAI,IAAI,KAAK;AAC9B,aAAS,IAAI,GAAG,IAAI,GAAG,IAAK,OAAM,CAAC,IAAI;AACvC,QAAI;AAAA,EACN;AAGA,MAAI,KAAK;AACT,WAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,QAAI,SAAS,CAAC,EAAG,UAAU,IAAK,OAAM,MAAM,CAAC;AAAA,EAC/C;AAEA,QAAM,KAAK,KAAM,MAAM,KAAK,KAAM;AAClC,QAAM,KAAK,KAAK,KAAK;AACrB,QAAM,IAAI,KAAK,IAAI,IAAI,EAAE;AAGzB,QAAM,KAAM,KAAK,KAAM;AACvB,QAAM,QAAQ,KAAK,KAAM,KAAK,MAAM,KAAK,KAAK,KAAM,EAAE;AAEtD,MAAI,UAAU,EAAG,QAAO,EAAE,GAAG,GAAG,EAAE;AAElC,QAAM,IAAI,KAAK,IAAI,IAAI,EAAE,IAAI;AAE7B,QAAM,IAAI,KAAK,IAAI,UAAU,CAAC;AAE9B,SAAO,EAAE,GAAG,EAAE;AAChB;AAGO,SAAS,cAAc,SAAiB,QAAwB;AACrE,MAAI,UAAU,EAAG,QAAO;AACxB,SAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,UAAU,MAAM,CAAC;AAClD;AAQO,SAAS,YACd,QACA,OACsC;AACtC,MAAI,OAAO,WAAW,MAAM,QAAQ;AAClC,UAAM,IAAI;AAAA,MACR,sCAAsC,OAAO,MAAM,OAAO,MAAM,MAAM;AAAA,IACxE;AAAA,EACF;AACA,QAAM,IAAI,OAAO;AACjB,MAAI,IAAI,EAAG,QAAO,EAAE,GAAG,GAAG,IAAI,GAAG,GAAG,EAAE;AAEtC,QAAM,QAAQ,OAAO,IAAI,CAAC,GAAG,MAAM,MAAM,CAAC,IAAK,CAAC;AAChD,QAAM,OAAO,MAAM,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI;AAChD,QAAM,WAAW,MAAM,OAAO,CAAC,KAAK,MAAM,OAAO,IAAI,SAAS,GAAG,CAAC,KAAK,IAAI;AAC3E,QAAM,KAAK,KAAK,KAAK,WAAW,CAAC;AACjC,MAAI,OAAO,EAAG,QAAO,EAAE,GAAG,SAAS,IAAI,IAAI,UAAU,IAAI,IAAI,GAAG,GAAG,SAAS,IAAI,IAAI,EAAE;AAEtF,QAAM,IAAI,OAAO;AACjB,QAAM,KAAK,IAAI;AACf,QAAM,IAAI,KAAK,IAAI,YAAY,KAAK,IAAI,CAAC,GAAG,EAAE;AAC9C,SAAO,EAAE,GAAG,IAAI,EAAE;AACpB;AAMO,SAAS,mBAAmB,QAAkB,OAA2C;AAC9F,MAAI,OAAO,WAAW,MAAM,QAAQ;AAClC,UAAM,IAAI;AAAA,MACR,6CAA6C,OAAO,MAAM,OAAO,MAAM,MAAM;AAAA,IAC/E;AAAA,EACF;AACA,QAAM,QAAQ,OAAO,IAAI,CAAC,GAAGA,OAAM,MAAMA,EAAC,IAAK,CAAC,EAAE,OAAO,CAAC,MAAM,MAAM,CAAC;AACvE,QAAM,IAAI,MAAM;AAChB,MAAI,IAAI,EAAG,QAAO,EAAE,GAAG,GAAG,GAAG,EAAE;AAE/B,QAAM,WAAW,MACd,IAAI,CAAC,GAAGA,QAAO,EAAE,KAAK,KAAK,IAAI,CAAC,GAAG,MAAM,KAAK,KAAK,CAAC,GAAG,GAAAA,GAAE,EAAE,EAC3D,KAAK,CAAC,GAAG,MAAM,EAAE,MAAM,EAAE,GAAG;AAC/B,QAAM,QAAkB,IAAI,MAAM,CAAC;AACnC,MAAI,IAAI;AACR,SAAO,IAAI,GAAG;AACZ,QAAI,IAAI;AACR,WAAO,IAAI,KAAK,SAAS,CAAC,EAAG,QAAQ,SAAS,CAAC,EAAG,IAAK;AACvD,UAAM,OAAO,IAAI,IAAI,KAAK;AAC1B,aAAS,IAAI,GAAG,IAAI,GAAG,IAAK,OAAM,SAAS,CAAC,EAAG,CAAC,IAAI;AACpD,QAAI;AAAA,EACN;AACA,MAAI,QAAQ;AACZ,WAAS,IAAI,GAAG,IAAI,GAAG,IAAK,KAAI,MAAM,CAAC,IAAK,EAAG,UAAS,MAAM,CAAC;AAE/D,QAAM,OAAQ,KAAK,IAAI,KAAM;AAC7B,QAAM,WAAY,KAAK,IAAI,MAAM,IAAI,IAAI,KAAM;AAC/C,QAAM,KAAK,QAAQ,QAAQ,KAAK,KAAK,QAAQ;AAC7C,QAAM,IAAI,KAAK,IAAI,UAAU,KAAK,IAAI,CAAC,CAAC;AACxC,SAAO,EAAE,GAAG,OAAO,EAAE;AACvB;AAOO,SAAS,QAAQ,GAAa,GAAqB;AACxD,MAAI,EAAE,SAAS,KAAK,EAAE,SAAS,EAAG,QAAO;AACzC,QAAM,QAAQ,EAAE,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,EAAE;AAC/C,QAAM,QAAQ,EAAE,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,EAAE;AAC/C,QAAM,OAAO,EAAE,OAAO,CAAC,KAAK,MAAM,OAAO,IAAI,UAAU,GAAG,CAAC,KAAK,EAAE,SAAS;AAC3E,QAAM,OAAO,EAAE,OAAO,CAAC,KAAK,MAAM,OAAO,IAAI,UAAU,GAAG,CAAC,KAAK,EAAE,SAAS;AAC3E,QAAM,SAAS,KAAK;AAAA,MAChB,EAAE,SAAS,KAAK,QAAQ,EAAE,SAAS,KAAK,SAAS,EAAE,SAAS,EAAE,SAAS;AAAA,EAC3E;AACA,MAAI,WAAW,EAAG,QAAO;AACzB,UAAQ,QAAQ,SAAS;AAC3B;AAGA,SAAS,YAAY,GAAW,IAAoB;AAClD,MAAI,MAAM,EAAG,QAAO;AACpB,MAAI,KAAK,IAAK,QAAO,UAAU,CAAC;AAChC,QAAM,IAAI,MAAM,KAAK,IAAI;AACzB,QAAM,IAAI,KAAK;AACf,QAAM,IAAI;AACV,QAAM,KAAK,eAAe,GAAG,GAAG,CAAC;AACjC,SAAO,KAAK,IAAI,IAAI,MAAM,KAAK,MAAM;AACvC;AAGA,SAAS,eAAe,GAAW,GAAW,GAAmB;AAC/D,MAAI,KAAK,EAAG,QAAO;AACnB,MAAI,KAAK,EAAG,QAAO;AACnB,QAAM,SAAS,QAAQ,CAAC,IAAI,QAAQ,CAAC,IAAI,QAAQ,IAAI,CAAC;AACtD,QAAM,QAAQ,KAAK,IAAI,KAAK,IAAI,CAAC,IAAI,IAAI,KAAK,IAAI,IAAI,CAAC,IAAI,IAAI,MAAM,IAAI;AACzE,QAAM,UAAU;AAChB,QAAM,MAAM;AACZ,MAAI,IAAI;AACR,MAAI,IAAI,KAAM,IAAI,KAAK,KAAM,IAAI;AACjC,MAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,MAAI,IAAI;AACR,MAAI,IAAI;AACR,WAAS,IAAI,GAAG,KAAK,SAAS,KAAK;AACjC,UAAM,KAAK,IAAI;AACf,QAAI,MAAO,KAAK,IAAI,KAAK,MAAO,IAAI,KAAK,MAAM,IAAI;AACnD,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI;AACR,SAAK,IAAI;AACT,UAAM,GAAG,IAAI,MAAM,IAAI,IAAI,KAAK,OAAO,IAAI,OAAO,IAAI,KAAK;AAC3D,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI;AACR,UAAM,QAAQ,IAAI;AAClB,SAAK;AACL,QAAI,KAAK,IAAI,QAAQ,CAAC,IAAI,IAAK;AAAA,EACjC;AACA,SAAO,QAAQ;AACjB;AAGA,SAAS,QAAQ,GAAmB;AAClC,QAAM,IAAI;AACV,QAAM,QAAQ;AAAA,IACZ;AAAA,IAAqB;AAAA,IAAmB;AAAA,IAAqB;AAAA,IAC7D;AAAA,IAAqB;AAAA,IAAoB;AAAA,IAAsB;AAAA,IAC/D;AAAA,EACF;AACA,MAAI,IAAI,KAAK;AACX,WAAO,KAAK,IAAI,KAAK,KAAK,KAAK,IAAI,KAAK,KAAK,CAAC,CAAC,IAAI,QAAQ,IAAI,CAAC;AAAA,EAClE;AACA,OAAK;AACL,MAAI,IAAI,MAAM,CAAC;AACf,WAAS,IAAI,GAAG,IAAI,IAAI,GAAG,IAAK,MAAK,MAAM,CAAC,KAAM,IAAI;AACtD,QAAM,IAAI,IAAI,IAAI;AAClB,SAAO,MAAM,KAAK,IAAI,IAAI,KAAK,EAAE,KAAK,IAAI,OAAO,KAAK,IAAI,CAAC,IAAI,IAAI,KAAK,IAAI,CAAC;AAC/E;AAGA,SAAS,UAAU,GAAmB;AACpC,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,IAAI;AAEV,QAAM,OAAO,IAAI,IAAI,KAAK;AAC1B,QAAM,OAAO,KAAK,IAAI,CAAC;AACvB,QAAM,IAAI,KAAK,IAAI,IAAI;AACvB,QAAM,IAAI,QAAQ,KAAK,IAAI,MAAM,IAAI,MAAM,IAAI,MAAM,IAAI,MAAM,IAAI,KAAK,IAAK,CAAC,OAAO,OAAQ,CAAC;AAE9F,SAAO,OAAO,IAAI,OAAO;AAC3B;","names":["i"]}
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/failure-taxonomy.ts","../src/pipelines/failure-cluster.ts","../src/tool-use-metrics.ts","../src/baseline.ts"],"sourcesContent":["/**\n * Failure taxonomy — canonical classes + a default classifier.\n *\n * Every failed run should end up in a named class. The classifier here\n * is rule-based (fast, deterministic); an LLM fallback can be added by\n * the consumer for novel cases and trained into the rule base over time.\n *\n * Consumers call `classifyFailure(run, spans, events)` and persist the\n * returned class as `Run.outcome.failureClass`.\n */\n\nimport type { FailureClass, Run, Span, TraceEvent } from './trace/schema'\nimport { FAILURE_CLASSES } from './trace/schema'\n\nexport { FAILURE_CLASSES, type FailureClass }\n\nexport interface FailureContext {\n run: Run\n spans: Span[]\n events: TraceEvent[]\n}\n\nexport interface FailureClassification {\n failureClass: FailureClass\n reason: string\n triggerSpanId?: string\n triggerEventId?: string\n}\n\n/** Ordered rules — first match wins. */\nexport interface FailureRule {\n id: string\n match: (ctx: FailureContext) => {\n failureClass: FailureClass\n reason: string\n triggerSpanId?: string\n triggerEventId?: string\n } | null\n}\n\nexport const DEFAULT_RULES: FailureRule[] = [\n // Outcome already named? Respect it.\n {\n id: 'explicit-outcome',\n match: ({ run }) => {\n const fc = run.outcome?.failureClass\n if (fc && fc !== 'unknown')\n return { failureClass: fc, reason: 'outcome.failureClass set explicitly' }\n return null\n },\n },\n {\n id: 'knowledge-readiness-blocked',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'readiness_scored' &&\n e.payload.passed === false,\n )\n return event\n ? {\n failureClass: 'knowledge_readiness_blocked',\n reason: 'knowledge readiness report blocked execution',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'bad-integration-manifest',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n ((e.payload.kind === 'integration_manifest_validated' && e.payload.valid === false) ||\n (e.payload.kind === 'integration_invoke_failed' &&\n e.payload.code === 'manifest_invalid')),\n )\n return event\n ? {\n failureClass: 'bad_integration_manifest',\n reason: 'integration manifest validation failed before launch',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'missing-integration-connection',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'integration_manifest_resolved' &&\n hasResolutionStatus(e.payload, 'missing_connection'),\n )\n return event\n ? {\n failureClass: 'missing_integration_connection',\n reason: 'required integration connection was missing',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'missing-integration-scope',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n ((e.payload.kind === 'integration_manifest_resolved' && hasMissingScopes(e.payload)) ||\n (e.payload.kind === 'integration_invoke_failed' && e.payload.code === 'scope_denied')),\n )\n return event\n ? {\n failureClass: 'missing_integration_scope',\n reason: 'integration grant or connection lacks required scopes',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'integration-approval-required',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n ((e.payload.kind === 'integration_invoke' && e.payload.status === 'approval_required') ||\n (e.payload.kind === 'integration_invoke_failed' &&\n e.payload.code === 'approval_required') ||\n e.payload.kind === 'integration_approval_required'),\n )\n return event\n ? {\n failureClass: 'integration_approval_required',\n reason: 'integration write paused for user approval',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'integration-auth-expired',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'integration_invoke_failed' &&\n (e.payload.code === 'auth_expired' ||\n e.payload.code === 'connection_not_active' ||\n e.payload.code === 'capability_expired' ||\n e.payload.status === 'expired'),\n )\n return event\n ? {\n failureClass: 'integration_auth_expired',\n reason: 'integration connection or capability expired',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'unsafe-integration-write-denied',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'integration_invoke_failed' &&\n (e.payload.code === 'unsafe_write_denied' ||\n e.payload.code === 'policy_denied' ||\n e.payload.code === 'action_denied'),\n )\n return event\n ? {\n failureClass: 'unsafe_integration_write_denied',\n reason: 'integration write was denied by policy or capability scope',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'integration-provider-failure',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'integration_invoke_failed' &&\n ![\n 'scope_denied',\n 'approval_required',\n 'auth_expired',\n 'connection_not_active',\n 'capability_expired',\n 'unsafe_write_denied',\n 'policy_denied',\n 'action_denied',\n 'manifest_invalid',\n ].includes(String(e.payload.code)),\n )\n return event\n ? {\n failureClass: 'integration_provider_failure',\n reason: 'integration provider invocation failed',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'missing-credentials',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'knowledge_gap' &&\n e.payload.category === 'credential_or_secret',\n )\n return event\n ? {\n failureClass: 'missing_credentials',\n reason: 'required credential or secret was missing',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'bad-retrieval',\n match: ({ run, spans }) => {\n if (run.outcome?.pass !== false) return null\n const retrieval = spans.find(\n (s) =>\n s.kind === 'retrieval' && (s.hits.length === 0 || s.hits.every((hit) => hit.score <= 0)),\n )\n return retrieval\n ? {\n failureClass: 'bad_retrieval',\n reason: 'retrieval returned no useful hits for a failed run',\n triggerSpanId: retrieval.spanId,\n }\n : null\n },\n },\n {\n id: 'insufficient-evidence',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'knowledge_gap' &&\n e.payload.reason === 'insufficient_evidence',\n )\n return event\n ? {\n failureClass: 'insufficient_evidence',\n reason: 'task proceeded with insufficient supporting evidence',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'contradictory-evidence',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'knowledge_gap' &&\n e.payload.reason === 'contradictory_evidence',\n )\n return event\n ? {\n failureClass: 'contradictory_evidence',\n reason: 'supporting evidence contradicted itself',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n // Budget breach events\n {\n id: 'budget-breach',\n match: ({ events }) => {\n const breach = events.find((e) => e.kind === 'budget_breach')\n return breach\n ? {\n failureClass: 'budget_exceeded',\n reason: `budget breached on ${breach.payload.dimension ?? 'unknown dimension'}`,\n triggerEventId: breach.eventId,\n }\n : null\n },\n },\n // Policy violations\n {\n id: 'policy-violation',\n match: ({ events }) => {\n const e = events.find((x) => x.kind === 'policy_violation')\n return e\n ? {\n failureClass: 'policy_violation',\n reason: 'policy_violation event emitted',\n triggerEventId: e.eventId,\n }\n : null\n },\n },\n // Sandbox non-zero exit code\n {\n id: 'sandbox-failure',\n match: ({ spans }) => {\n const s = spans.find(\n (x) => x.kind === 'sandbox' && typeof x.exitCode === 'number' && x.exitCode !== 0,\n )\n if (!s) return null\n return {\n failureClass: 'sandbox_failure',\n reason: `sandbox exited ${(s as Extract<Span, { kind: 'sandbox' }>).exitCode}`,\n triggerSpanId: s.spanId,\n }\n },\n },\n // Timeout: run aborted by external signal\n {\n id: 'timeout',\n match: ({ run, events }) => {\n if (run.status !== 'aborted') return null\n const hasTimeout = events.some(\n (e) =>\n e.kind === 'error' &&\n String(e.payload.reason ?? '')\n .toLowerCase()\n .includes('timeout'),\n )\n const note = (run.outcome?.notes ?? '').toLowerCase()\n if (hasTimeout || note.includes('timeout') || note.includes('deadline')) {\n return { failureClass: 'timeout', reason: 'timeout signal observed' }\n }\n return null\n },\n },\n // Tool recovery failure: many consecutive tool errors on the same tool\n {\n id: 'tool-recovery-failure',\n match: ({ spans }) => {\n const tools = spans.filter((s) => s.kind === 'tool')\n const byTool = new Map<string, Span[]>()\n for (const t of tools) {\n const name = (t as Extract<Span, { kind: 'tool' }>).toolName\n const arr = byTool.get(name) ?? []\n arr.push(t)\n byTool.set(name, arr)\n }\n for (const [name, arr] of byTool) {\n const errs = arr.filter((s) => s.status === 'error')\n if (errs.length >= 3 && errs.length === arr.length) {\n return {\n failureClass: 'tool_recovery_failure',\n reason: `${errs.length} consecutive errors on tool \"${name}\"`,\n triggerSpanId: errs[errs.length - 1]!.spanId,\n }\n }\n }\n return null\n },\n },\n // Tool selection error: the run failed and agent called zero tools despite having them\n {\n id: 'tool-selection-error',\n match: ({ run, spans }) => {\n if (run.outcome?.pass !== false) return null\n const hasToolsAvailable = spans.some(\n (s) =>\n s.kind === 'agent' &&\n (s.attributes?.toolsAvailable as number | undefined) !== undefined &&\n (s.attributes?.toolsAvailable as number) > 0,\n )\n const tools = spans.filter((s) => s.kind === 'tool')\n if (hasToolsAvailable && tools.length === 0) {\n return {\n failureClass: 'tool_selection_error',\n reason: 'tools were available but none were called',\n }\n }\n return null\n },\n },\n // Format drift: scored by a judge with dimension='format' below threshold\n {\n id: 'format-drift',\n match: ({ spans }) => {\n const judge = spans.find(\n (s) =>\n s.kind === 'judge' &&\n (s as Extract<Span, { kind: 'judge' }>).dimension === 'format' &&\n (s as Extract<Span, { kind: 'judge' }>).score < 0.5,\n )\n return judge\n ? {\n failureClass: 'format_drift',\n reason: 'format judge scored below 0.5',\n triggerSpanId: judge.spanId,\n }\n : null\n },\n },\n]\n\nfunction hasResolutionStatus(payload: Record<string, unknown>, status: string): boolean {\n if (status === 'missing_connection' && stringArray(payload.missingConnections).length > 0)\n return true\n return resolutionItems(payload).some((item) => item.status === status)\n}\n\nfunction hasMissingScopes(payload: Record<string, unknown>): boolean {\n if (stringArray(payload.missingScopes).length > 0) return true\n return resolutionItems(payload).some(\n (item) => Array.isArray(item.missingScopes) && item.missingScopes.length > 0,\n )\n}\n\nfunction resolutionItems(payload: Record<string, unknown>): Array<Record<string, unknown>> {\n return [\n ...records(payload.missing),\n ...records(payload.optionalMissing),\n ...records(payload.ready),\n ]\n}\n\nfunction records(value: unknown): Array<Record<string, unknown>> {\n if (!Array.isArray(value)) return []\n return value.filter(\n (item): item is Record<string, unknown> =>\n Boolean(item) && typeof item === 'object' && !Array.isArray(item),\n )\n}\n\nfunction stringArray(value: unknown): string[] {\n return Array.isArray(value)\n ? value.filter((item): item is string => typeof item === 'string')\n : []\n}\n\n/** Classify the failure mode of a run using an ordered rule list. */\nexport function classifyFailure(\n ctx: FailureContext,\n rules: FailureRule[] = DEFAULT_RULES,\n): FailureClassification {\n if (ctx.run.outcome?.pass !== false && ctx.run.status === 'completed') {\n return { failureClass: 'success', reason: 'run completed with pass=true (or no explicit fail)' }\n }\n for (const rule of rules) {\n const hit = rule.match(ctx)\n if (hit) return hit\n }\n return { failureClass: 'unknown', reason: 'no rule matched; run failed for unclassified reason' }\n}\n","/**\n * FailureClusterView — groups failed runs by (failureClass, triggerTool,\n * argHash-prefix) so weekly reviews can prioritize the top-N clusters.\n *\n * Each cluster includes: N runs, scenarios affected, representative\n * error message, a proposed mitigation hint (rule → action table).\n */\n\nimport { classifyFailure, DEFAULT_RULES, type FailureRule } from '../failure-taxonomy'\nimport { argHash, toolSpans } from '../trace/query'\nimport type { FailureClass, Span } from '../trace/schema'\nimport type { TraceStore } from '../trace/store'\n\nexport interface FailureCluster {\n failureClass: FailureClass\n /** Tool name when the trigger was a tool span, else undefined. */\n toolName?: string\n /** First 16 chars of argHash — clusters similar args. */\n argPrefix?: string\n /**\n * Source dimension when the trigger was a judge span (e.g. `'format'`,\n * `'safety'`, `'correctness'`). Lets cross-template aggregators\n * group failures by the dimension that fired without overloading\n * `argPrefix`. Optional — legacy clusters without this field\n * deserialize cleanly.\n */\n dimension?: string\n runCount: number\n scenarioIds: string[]\n exampleError?: string\n exampleRunId: string\n}\n\nexport interface FailureClusterReport {\n clusters: FailureCluster[]\n totalFailures: number\n totalRuns: number\n}\n\nexport async function failureClusterView(\n store: TraceStore,\n options: { rules?: FailureRule[]; minClusterSize?: number } = {},\n): Promise<FailureClusterReport> {\n const rules = options.rules ?? DEFAULT_RULES\n const minSize = options.minClusterSize ?? 1\n const runs = await store.listRuns()\n\n type Key = string\n const clusters = new Map<Key, FailureCluster>()\n let totalFailures = 0\n\n for (const run of runs) {\n if (run.status === 'completed' && run.outcome?.pass !== false) continue\n totalFailures++\n const spans = await store.spans({ runId: run.runId })\n const events = await store.events({ runId: run.runId })\n const cls = classifyFailure({ run, spans, events }, rules)\n\n let toolName: string | undefined\n let argPrefix: string | undefined\n let dimension: string | undefined\n if (cls.triggerSpanId) {\n const trig = spans.find((s) => s.spanId === cls.triggerSpanId)\n if (trig?.kind === 'tool') {\n toolName = trig.toolName\n argPrefix = argHash(trig.args).slice(0, 16)\n } else if (trig?.kind === 'judge') {\n dimension = trig.dimension\n }\n }\n // Fallback: look at the last errored tool span\n if (!toolName) {\n const ts = await toolSpans(store, run.runId)\n const errored = ts.filter((t) => t.status === 'error').pop()\n if (errored) {\n toolName = errored.toolName\n argPrefix = argHash(errored.args).slice(0, 16)\n }\n }\n // Secondary signal: any judge span on the failed run carries a\n // dimension. Useful when the rule classified by judge score but\n // didn't surface the trigger span (or surfaced a non-judge span).\n if (!dimension) {\n const judge = spans.find((s) => s.kind === 'judge' && typeof s.dimension === 'string')\n if (judge?.kind === 'judge') dimension = judge.dimension\n }\n\n const key = `${cls.failureClass}|${toolName ?? ''}|${argPrefix ?? ''}|${dimension ?? ''}`\n let cluster = clusters.get(key)\n if (!cluster) {\n cluster = {\n failureClass: cls.failureClass,\n toolName,\n argPrefix,\n dimension,\n runCount: 0,\n scenarioIds: [],\n exampleRunId: run.runId,\n exampleError: firstErrorMessage(spans) ?? cls.reason,\n }\n clusters.set(key, cluster)\n }\n cluster.runCount++\n if (!cluster.scenarioIds.includes(run.scenarioId)) cluster.scenarioIds.push(run.scenarioId)\n }\n\n const arr = [...clusters.values()]\n .filter((c) => c.runCount >= minSize)\n .sort((a, b) => b.runCount - a.runCount)\n\n return { clusters: arr, totalFailures, totalRuns: runs.length }\n}\n\nfunction firstErrorMessage(spans: Span[]): string | undefined {\n const errored = spans.find((s) => s.status === 'error')\n return errored?.error\n}\n","/**\n * Tool-use metrics — derived purely from trace data.\n *\n * No scoring assumptions: consumers supply optional ground-truth tool\n * selections per turn + optional \"information used downstream\" signals.\n * Without those, we still compute descriptive metrics (error rate,\n * retry rate, duplicate-call rate) that are useful on their own.\n */\n\nimport { argHash, groupBy, toolSpans } from './trace/query'\nimport type { Span } from './trace/schema'\nimport type { TraceStore } from './trace/store'\n\nexport interface ToolUseMetrics {\n runId: string\n totalCalls: number\n byTool: Record<string, ToolStats>\n errorRate: number\n /** Ratio of calls with identical (toolName, argHash) already seen earlier in the same run. */\n duplicateRate: number\n /** Ratio of error calls followed by ≥1 retry on same tool. */\n retryRate: number\n /** Optional: of the calls agent made, fraction the evaluator marked as \"correct selection\". */\n selectionAccuracy?: number\n}\n\nexport interface ToolStats {\n calls: number\n errors: number\n avgLatencyMs: number\n duplicates: number\n}\n\nexport interface ToolUseOptions {\n /** Map of spanId → whether the evaluator judged the tool selection correct. Optional. */\n selectionLabels?: Record<string, boolean>\n}\n\nexport async function computeToolUseMetrics(\n store: TraceStore,\n runId: string,\n options: ToolUseOptions = {},\n): Promise<ToolUseMetrics> {\n const tools = await toolSpans(store, runId)\n if (tools.length === 0) {\n return { runId, totalCalls: 0, byTool: {}, errorRate: 0, duplicateRate: 0, retryRate: 0 }\n }\n\n const byTool: Record<string, ToolStats> = {}\n let totalErrors = 0\n let totalDuplicates = 0\n const sortedTools = [...tools].sort((a, b) => a.startedAt - b.startedAt)\n const seenSignatures = new Set<string>()\n\n // duplicate detection + per-tool aggregation\n for (const t of sortedTools) {\n const stat = (byTool[t.toolName] ??= { calls: 0, errors: 0, avgLatencyMs: 0, duplicates: 0 })\n stat.calls += 1\n if (t.status === 'error') {\n stat.errors += 1\n totalErrors += 1\n }\n if (typeof t.latencyMs === 'number') stat.avgLatencyMs += t.latencyMs\n const sig = `${t.toolName}|${argHash(t.args)}`\n if (seenSignatures.has(sig)) {\n stat.duplicates += 1\n totalDuplicates += 1\n }\n seenSignatures.add(sig)\n }\n\n for (const stat of Object.values(byTool)) {\n stat.avgLatencyMs = stat.calls > 0 ? stat.avgLatencyMs / stat.calls : 0\n }\n\n // retry detection: per-tool chronological adjacency where error → next same-tool call\n let retryOpportunities = 0\n let retriesFollowed = 0\n for (const [, arr] of groupBy(sortedTools, (t) => t.toolName)) {\n for (let i = 0; i < arr.length; i++) {\n if (arr[i]!.status !== 'error') continue\n retryOpportunities += 1\n if (arr[i + 1]) retriesFollowed += 1\n }\n }\n const retryRate = retryOpportunities > 0 ? retriesFollowed / retryOpportunities : 0\n\n let selectionAccuracy: number | undefined\n if (options.selectionLabels) {\n const labeled = sortedTools.filter((t) => t.spanId in options.selectionLabels!)\n if (labeled.length > 0) {\n selectionAccuracy =\n labeled.filter((t) => options.selectionLabels![t.spanId]).length / labeled.length\n }\n }\n\n return {\n runId,\n totalCalls: sortedTools.length,\n byTool,\n errorRate: totalErrors / sortedTools.length,\n duplicateRate: totalDuplicates / sortedTools.length,\n retryRate,\n selectionAccuracy,\n }\n}\n\nexport type { Span }\n","/**\n * Baseline regression detection.\n *\n * Lifted from ADC baseline.ts. Every promotion-blocking signal boils down\n * to: \"is this run measurably worse than baseline?\" — with enough\n * statistical rigor to distinguish noise from drift.\n *\n * Uses:\n * - Welch's t-test (unequal variance) for per-metric mean comparison\n * - Cohen's d for effect size magnitude\n * - IQR for stability flag (unstable samples can't be trusted for comparisons)\n *\n * Returns a structured verdict: improved | regressed | stable | unstable.\n */\n\nimport { cohensD } from './statistics'\n\nexport interface MetricSamples {\n /** Stable metric key (e.g. \"overallScore\", \"firstTokenMs\"). */\n metric: string\n /** Whether higher values are better. */\n higherIsBetter: boolean\n baseline: number[]\n candidate: number[]\n}\n\nexport interface MetricVerdict {\n metric: string\n baselineMean: number\n candidateMean: number\n delta: number\n cohensD: number\n welchT: number\n welchDf: number\n welchP: number\n stable: boolean\n /** IQR of the combined samples — used as a rough stability indicator. */\n iqr: number\n verdict: 'improved' | 'regressed' | 'stable' | 'unstable'\n}\n\nexport interface BaselineReport {\n metrics: MetricVerdict[]\n /** True if any critical metric regressed. */\n hasRegression: boolean\n /** True if any metric is unstable (too noisy to judge). */\n hasUnstable: boolean\n}\n\nexport interface BaselineOptions {\n /** Effect size threshold for meaningful delta (default 0.5 — medium effect). */\n effectThreshold?: number\n /** p-value threshold for statistical significance (default 0.05). */\n alpha?: number\n /** IQR/mean ratio above which samples are flagged unstable (default 0.30). */\n unstableCvThreshold?: number\n}\n\n/**\n * Compare candidate samples against baseline per metric. Verdict logic:\n * - unstable: IQR/|mean| > threshold on either set — not enough signal\n * - improved: meaningful effect in the \"better\" direction AND p < alpha\n * - regressed: meaningful effect in the \"worse\" direction AND p < alpha\n * - stable: otherwise (no significant change)\n */\nexport function compareToBaseline(\n samples: MetricSamples[],\n options: BaselineOptions = {},\n): BaselineReport {\n const effectThreshold = options.effectThreshold ?? 0.5\n const alpha = options.alpha ?? 0.05\n const cvThreshold = options.unstableCvThreshold ?? 0.3\n\n const metrics: MetricVerdict[] = samples.map((s) => {\n if (s.baseline.length < 2 || s.candidate.length < 2) {\n throw new Error(`compareToBaseline: need ≥2 samples per side for \"${s.metric}\"`)\n }\n const bMean = mean(s.baseline)\n const cMean = mean(s.candidate)\n const delta = cMean - bMean\n const d = cohensD(s.baseline, s.candidate) // positive = candidate higher\n const { t, df, p } = welchsTTest(s.baseline, s.candidate)\n // Stability is per-side: a comparison is trustworthy only when BOTH\n // samples are internally consistent. Combining the sides would flag\n // large-but-real deltas as \"unstable\" which is exactly what we want\n // to detect.\n const baselineIqr = iqr(s.baseline)\n const candidateIqr = iqr(s.candidate)\n const baselineStable = baselineIqr / Math.max(Math.abs(bMean), 1e-9) <= cvThreshold\n const candidateStable = candidateIqr / Math.max(Math.abs(cMean), 1e-9) <= cvThreshold\n const stable = baselineStable && candidateStable\n const reportedIqr = Math.max(baselineIqr, candidateIqr)\n\n let verdict: MetricVerdict['verdict']\n if (!stable) {\n verdict = 'unstable'\n } else if (p < alpha && Math.abs(d) >= effectThreshold) {\n const candidateIsBetter = s.higherIsBetter ? delta > 0 : delta < 0\n verdict = candidateIsBetter ? 'improved' : 'regressed'\n } else {\n verdict = 'stable'\n }\n\n return {\n metric: s.metric,\n baselineMean: bMean,\n candidateMean: cMean,\n delta,\n cohensD: d,\n welchT: t,\n welchDf: df,\n welchP: p,\n stable,\n iqr: reportedIqr,\n verdict,\n }\n })\n\n return {\n metrics,\n hasRegression: metrics.some((m) => m.verdict === 'regressed'),\n hasUnstable: metrics.some((m) => m.verdict === 'unstable'),\n }\n}\n\nfunction mean(xs: number[]): number {\n return xs.reduce((a, b) => a + b, 0) / xs.length\n}\n\n/** Inter-quartile range; 0 when the sample has no spread. */\nexport function iqr(xs: number[]): number {\n if (xs.length === 0) return 0\n const sorted = [...xs].sort((a, b) => a - b)\n const q = (p: number) => {\n const idx = p * (sorted.length - 1)\n const lo = Math.floor(idx)\n const hi = Math.ceil(idx)\n return sorted[lo]! + (sorted[hi]! - sorted[lo]!) * (idx - lo)\n }\n return q(0.75) - q(0.25)\n}\n\n/**\n * Welch's t-test — unequal-variance two-sample t. Uses the same Student-t\n * CDF as `pairedTTest` (via incomplete beta); falls back to normal tail\n * when df is large.\n */\nexport function welchsTTest(a: number[], b: number[]): { t: number; df: number; p: number } {\n if (a.length < 2 || b.length < 2) return { t: 0, df: 0, p: 1 }\n const mA = mean(a)\n const mB = mean(b)\n const vA = variance(a, mA)\n const vB = variance(b, mB)\n const seSquared = vA / a.length + vB / b.length\n if (seSquared === 0) return { t: mA === mB ? 0 : Infinity, df: 0, p: mA === mB ? 1 : 0 }\n const t = (mB - mA) / Math.sqrt(seSquared)\n const df =\n (seSquared * seSquared) /\n ((vA / a.length) ** 2 / (a.length - 1) + (vB / b.length) ** 2 / (b.length - 1))\n const p = 2 * (1 - studentTCdf(Math.abs(t), df))\n return { t, df, p }\n}\n\nfunction variance(xs: number[], m: number): number {\n return xs.reduce((acc, x) => acc + (x - m) ** 2, 0) / (xs.length - 1)\n}\n\n// Re-used from statistics.ts via small local copy to avoid exporting internals.\nfunction studentTCdf(t: number, df: number): number {\n if (df <= 0) return 0.5\n if (df > 100) return normalCdf(t)\n const x = df / (df + t * t)\n const ib = incompleteBeta(x, df / 2, 0.5)\n return t >= 0 ? 1 - 0.5 * ib : 0.5 * ib\n}\n\nfunction incompleteBeta(x: number, a: number, b: number): number {\n if (x <= 0) return 0\n if (x >= 1) return 1\n const lnBeta = lnGamma(a) + lnGamma(b) - lnGamma(a + b)\n const front = Math.exp(Math.log(x) * a + Math.log(1 - x) * b - lnBeta) / a\n let c = 1\n let d = 1 - ((a + b) * x) / (a + 1)\n if (Math.abs(d) < 1e-30) d = 1e-30\n d = 1 / d\n let f = d\n for (let m = 1; m <= 200; m++) {\n const m2 = 2 * m\n let num = (m * (b - m) * x) / ((a + m2 - 1) * (a + m2))\n d = 1 + num * d\n if (Math.abs(d) < 1e-30) d = 1e-30\n c = 1 + num / c\n if (Math.abs(c) < 1e-30) c = 1e-30\n d = 1 / d\n f *= d * c\n num = -((a + m) * (a + b + m) * x) / ((a + m2) * (a + m2 + 1))\n d = 1 + num * d\n if (Math.abs(d) < 1e-30) d = 1e-30\n c = 1 + num / c\n if (Math.abs(c) < 1e-30) c = 1e-30\n d = 1 / d\n const delta = d * c\n f *= delta\n if (Math.abs(delta - 1) < 3e-7) break\n }\n return front * f\n}\n\nfunction lnGamma(z: number): number {\n const coefs = [\n 0.99999999999980993, 676.5203681218851, -1259.1392167224028, 771.32342877765313,\n -176.61502916214059, 12.507343278686905, -0.13857109526572012, 9.9843695780195716e-6,\n 1.5056327351493116e-7,\n ]\n if (z < 0.5) return Math.log(Math.PI / Math.sin(Math.PI * z)) - lnGamma(1 - z)\n z -= 1\n let x = coefs[0]!\n for (let i = 1; i < 9; i++) x += coefs[i]! / (z + i)\n const t = z + 7.5\n return 0.5 * Math.log(2 * Math.PI) + (z + 0.5) * Math.log(t) - t + Math.log(x)\n}\n\nfunction normalCdf(x: number): number {\n const a1 = 0.254829592\n const a2 = -0.284496736\n const a3 = 1.421413741\n const a4 = -1.453152027\n const a5 = 1.061405429\n const p = 0.3275911\n const sign = x < 0 ? -1 : 1\n const absX = Math.abs(x)\n const t = 1 / (1 + p * absX)\n const y = 1 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp((-absX * absX) / 2)\n return 0.5 * (1 + sign * y)\n}\n"],"mappings":";;;;;;;;;;AAwCO,IAAM,gBAA+B;AAAA;AAAA,EAE1C;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,IAAI,MAAM;AAClB,YAAM,KAAK,IAAI,SAAS;AACxB,UAAI,MAAM,OAAO;AACf,eAAO,EAAE,cAAc,IAAI,QAAQ,sCAAsC;AAC3E,aAAO;AAAA,IACT;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,sBACnB,EAAE,QAAQ,WAAW;AAAA,MACzB;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,aACT,EAAE,QAAQ,SAAS,oCAAoC,EAAE,QAAQ,UAAU,SAC1E,EAAE,QAAQ,SAAS,+BAClB,EAAE,QAAQ,SAAS;AAAA,MAC3B;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,mCACnB,oBAAoB,EAAE,SAAS,oBAAoB;AAAA,MACvD;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,aACT,EAAE,QAAQ,SAAS,mCAAmC,iBAAiB,EAAE,OAAO,KAC/E,EAAE,QAAQ,SAAS,+BAA+B,EAAE,QAAQ,SAAS;AAAA,MAC5E;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,aACT,EAAE,QAAQ,SAAS,wBAAwB,EAAE,QAAQ,WAAW,uBAC/D,EAAE,QAAQ,SAAS,+BAClB,EAAE,QAAQ,SAAS,uBACrB,EAAE,QAAQ,SAAS;AAAA,MACzB;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,gCAClB,EAAE,QAAQ,SAAS,kBAClB,EAAE,QAAQ,SAAS,2BACnB,EAAE,QAAQ,SAAS,wBACnB,EAAE,QAAQ,WAAW;AAAA,MAC3B;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,gCAClB,EAAE,QAAQ,SAAS,yBAClB,EAAE,QAAQ,SAAS,mBACnB,EAAE,QAAQ,SAAS;AAAA,MACzB;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,+BACnB,CAAC;AAAA,UACC;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,QACF,EAAE,SAAS,OAAO,EAAE,QAAQ,IAAI,CAAC;AAAA,MACrC;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,mBACnB,EAAE,QAAQ,aAAa;AAAA,MAC3B;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,KAAK,MAAM,MAAM;AACzB,UAAI,IAAI,SAAS,SAAS,MAAO,QAAO;AACxC,YAAM,YAAY,MAAM;AAAA,QACtB,CAAC,MACC,EAAE,SAAS,gBAAgB,EAAE,KAAK,WAAW,KAAK,EAAE,KAAK,MAAM,CAAC,QAAQ,IAAI,SAAS,CAAC;AAAA,MAC1F;AACA,aAAO,YACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,eAAe,UAAU;AAAA,MAC3B,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,mBACnB,EAAE,QAAQ,WAAW;AAAA,MACzB;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,mBACnB,EAAE,QAAQ,WAAW;AAAA,MACzB;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,SAAS,OAAO,KAAK,CAAC,MAAM,EAAE,SAAS,eAAe;AAC5D,aAAO,SACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ,sBAAsB,OAAO,QAAQ,aAAa,mBAAmB;AAAA,QAC7E,gBAAgB,OAAO;AAAA,MACzB,IACA;AAAA,IACN;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,IAAI,OAAO,KAAK,CAAC,MAAM,EAAE,SAAS,kBAAkB;AAC1D,aAAO,IACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,EAAE;AAAA,MACpB,IACA;AAAA,IACN;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,MAAM,MAAM;AACpB,YAAM,IAAI,MAAM;AAAA,QACd,CAAC,MAAM,EAAE,SAAS,aAAa,OAAO,EAAE,aAAa,YAAY,EAAE,aAAa;AAAA,MAClF;AACA,UAAI,CAAC,EAAG,QAAO;AACf,aAAO;AAAA,QACL,cAAc;AAAA,QACd,QAAQ,kBAAmB,EAAyC,QAAQ;AAAA,QAC5E,eAAe,EAAE;AAAA,MACnB;AAAA,IACF;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,KAAK,OAAO,MAAM;AAC1B,UAAI,IAAI,WAAW,UAAW,QAAO;AACrC,YAAM,aAAa,OAAO;AAAA,QACxB,CAAC,MACC,EAAE,SAAS,WACX,OAAO,EAAE,QAAQ,UAAU,EAAE,EAC1B,YAAY,EACZ,SAAS,SAAS;AAAA,MACzB;AACA,YAAM,QAAQ,IAAI,SAAS,SAAS,IAAI,YAAY;AACpD,UAAI,cAAc,KAAK,SAAS,SAAS,KAAK,KAAK,SAAS,UAAU,GAAG;AACvE,eAAO,EAAE,cAAc,WAAW,QAAQ,0BAA0B;AAAA,MACtE;AACA,aAAO;AAAA,IACT;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,MAAM,MAAM;AACpB,YAAM,QAAQ,MAAM,OAAO,CAAC,MAAM,EAAE,SAAS,MAAM;AACnD,YAAM,SAAS,oBAAI,IAAoB;AACvC,iBAAW,KAAK,OAAO;AACrB,cAAM,OAAQ,EAAsC;AACpD,cAAM,MAAM,OAAO,IAAI,IAAI,KAAK,CAAC;AACjC,YAAI,KAAK,CAAC;AACV,eAAO,IAAI,MAAM,GAAG;AAAA,MACtB;AACA,iBAAW,CAAC,MAAM,GAAG,KAAK,QAAQ;AAChC,cAAM,OAAO,IAAI,OAAO,CAAC,MAAM,EAAE,WAAW,OAAO;AACnD,YAAI,KAAK,UAAU,KAAK,KAAK,WAAW,IAAI,QAAQ;AAClD,iBAAO;AAAA,YACL,cAAc;AAAA,YACd,QAAQ,GAAG,KAAK,MAAM,gCAAgC,IAAI;AAAA,YAC1D,eAAe,KAAK,KAAK,SAAS,CAAC,EAAG;AAAA,UACxC;AAAA,QACF;AAAA,MACF;AACA,aAAO;AAAA,IACT;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,KAAK,MAAM,MAAM;AACzB,UAAI,IAAI,SAAS,SAAS,MAAO,QAAO;AACxC,YAAM,oBAAoB,MAAM;AAAA,QAC9B,CAAC,MACC,EAAE,SAAS,WACV,EAAE,YAAY,mBAA0C,UACxD,EAAE,YAAY,iBAA4B;AAAA,MAC/C;AACA,YAAM,QAAQ,MAAM,OAAO,CAAC,MAAM,EAAE,SAAS,MAAM;AACnD,UAAI,qBAAqB,MAAM,WAAW,GAAG;AAC3C,eAAO;AAAA,UACL,cAAc;AAAA,UACd,QAAQ;AAAA,QACV;AAAA,MACF;AACA,aAAO;AAAA,IACT;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,MAAM,MAAM;AACpB,YAAM,QAAQ,MAAM;AAAA,QAClB,CAAC,MACC,EAAE,SAAS,WACV,EAAuC,cAAc,YACrD,EAAuC,QAAQ;AAAA,MACpD;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,eAAe,MAAM;AAAA,MACvB,IACA;AAAA,IACN;AAAA,EACF;AACF;AAEA,SAAS,oBAAoB,SAAkC,QAAyB;AACtF,MAAI,WAAW,wBAAwB,YAAY,QAAQ,kBAAkB,EAAE,SAAS;AACtF,WAAO;AACT,SAAO,gBAAgB,OAAO,EAAE,KAAK,CAAC,SAAS,KAAK,WAAW,MAAM;AACvE;AAEA,SAAS,iBAAiB,SAA2C;AACnE,MAAI,YAAY,QAAQ,aAAa,EAAE,SAAS,EAAG,QAAO;AAC1D,SAAO,gBAAgB,OAAO,EAAE;AAAA,IAC9B,CAAC,SAAS,MAAM,QAAQ,KAAK,aAAa,KAAK,KAAK,cAAc,SAAS;AAAA,EAC7E;AACF;AAEA,SAAS,gBAAgB,SAAkE;AACzF,SAAO;AAAA,IACL,GAAG,QAAQ,QAAQ,OAAO;AAAA,IAC1B,GAAG,QAAQ,QAAQ,eAAe;AAAA,IAClC,GAAG,QAAQ,QAAQ,KAAK;AAAA,EAC1B;AACF;AAEA,SAAS,QAAQ,OAAgD;AAC/D,MAAI,CAAC,MAAM,QAAQ,KAAK,EAAG,QAAO,CAAC;AACnC,SAAO,MAAM;AAAA,IACX,CAAC,SACC,QAAQ,IAAI,KAAK,OAAO,SAAS,YAAY,CAAC,MAAM,QAAQ,IAAI;AAAA,EACpE;AACF;AAEA,SAAS,YAAY,OAA0B;AAC7C,SAAO,MAAM,QAAQ,KAAK,IACtB,MAAM,OAAO,CAAC,SAAyB,OAAO,SAAS,QAAQ,IAC/D,CAAC;AACP;AAGO,SAAS,gBACd,KACA,QAAuB,eACA;AACvB,MAAI,IAAI,IAAI,SAAS,SAAS,SAAS,IAAI,IAAI,WAAW,aAAa;AACrE,WAAO,EAAE,cAAc,WAAW,QAAQ,qDAAqD;AAAA,EACjG;AACA,aAAW,QAAQ,OAAO;AACxB,UAAM,MAAM,KAAK,MAAM,GAAG;AAC1B,QAAI,IAAK,QAAO;AAAA,EAClB;AACA,SAAO,EAAE,cAAc,WAAW,QAAQ,sDAAsD;AAClG;;;ACtaA,eAAsB,mBACpB,OACA,UAA8D,CAAC,GAChC;AAC/B,QAAM,QAAQ,QAAQ,SAAS;AAC/B,QAAM,UAAU,QAAQ,kBAAkB;AAC1C,QAAM,OAAO,MAAM,MAAM,SAAS;AAGlC,QAAM,WAAW,oBAAI,IAAyB;AAC9C,MAAI,gBAAgB;AAEpB,aAAW,OAAO,MAAM;AACtB,QAAI,IAAI,WAAW,eAAe,IAAI,SAAS,SAAS,MAAO;AAC/D;AACA,UAAM,QAAQ,MAAM,MAAM,MAAM,EAAE,OAAO,IAAI,MAAM,CAAC;AACpD,UAAM,SAAS,MAAM,MAAM,OAAO,EAAE,OAAO,IAAI,MAAM,CAAC;AACtD,UAAM,MAAM,gBAAgB,EAAE,KAAK,OAAO,OAAO,GAAG,KAAK;AAEzD,QAAI;AACJ,QAAI;AACJ,QAAI;AACJ,QAAI,IAAI,eAAe;AACrB,YAAM,OAAO,MAAM,KAAK,CAAC,MAAM,EAAE,WAAW,IAAI,aAAa;AAC7D,UAAI,MAAM,SAAS,QAAQ;AACzB,mBAAW,KAAK;AAChB,oBAAY,QAAQ,KAAK,IAAI,EAAE,MAAM,GAAG,EAAE;AAAA,MAC5C,WAAW,MAAM,SAAS,SAAS;AACjC,oBAAY,KAAK;AAAA,MACnB;AAAA,IACF;AAEA,QAAI,CAAC,UAAU;AACb,YAAM,KAAK,MAAM,UAAU,OAAO,IAAI,KAAK;AAC3C,YAAM,UAAU,GAAG,OAAO,CAAC,MAAM,EAAE,WAAW,OAAO,EAAE,IAAI;AAC3D,UAAI,SAAS;AACX,mBAAW,QAAQ;AACnB,oBAAY,QAAQ,QAAQ,IAAI,EAAE,MAAM,GAAG,EAAE;AAAA,MAC/C;AAAA,IACF;AAIA,QAAI,CAAC,WAAW;AACd,YAAM,QAAQ,MAAM,KAAK,CAAC,MAAM,EAAE,SAAS,WAAW,OAAO,EAAE,cAAc,QAAQ;AACrF,UAAI,OAAO,SAAS,QAAS,aAAY,MAAM;AAAA,IACjD;AAEA,UAAM,MAAM,GAAG,IAAI,YAAY,IAAI,YAAY,EAAE,IAAI,aAAa,EAAE,IAAI,aAAa,EAAE;AACvF,QAAI,UAAU,SAAS,IAAI,GAAG;AAC9B,QAAI,CAAC,SAAS;AACZ,gBAAU;AAAA,QACR,cAAc,IAAI;AAAA,QAClB;AAAA,QACA;AAAA,QACA;AAAA,QACA,UAAU;AAAA,QACV,aAAa,CAAC;AAAA,QACd,cAAc,IAAI;AAAA,QAClB,cAAc,kBAAkB,KAAK,KAAK,IAAI;AAAA,MAChD;AACA,eAAS,IAAI,KAAK,OAAO;AAAA,IAC3B;AACA,YAAQ;AACR,QAAI,CAAC,QAAQ,YAAY,SAAS,IAAI,UAAU,EAAG,SAAQ,YAAY,KAAK,IAAI,UAAU;AAAA,EAC5F;AAEA,QAAM,MAAM,CAAC,GAAG,SAAS,OAAO,CAAC,EAC9B,OAAO,CAAC,MAAM,EAAE,YAAY,OAAO,EACnC,KAAK,CAAC,GAAG,MAAM,EAAE,WAAW,EAAE,QAAQ;AAEzC,SAAO,EAAE,UAAU,KAAK,eAAe,WAAW,KAAK,OAAO;AAChE;AAEA,SAAS,kBAAkB,OAAmC;AAC5D,QAAM,UAAU,MAAM,KAAK,CAAC,MAAM,EAAE,WAAW,OAAO;AACtD,SAAO,SAAS;AAClB;;;AC9EA,eAAsB,sBACpB,OACA,OACA,UAA0B,CAAC,GACF;AACzB,QAAM,QAAQ,MAAM,UAAU,OAAO,KAAK;AAC1C,MAAI,MAAM,WAAW,GAAG;AACtB,WAAO,EAAE,OAAO,YAAY,GAAG,QAAQ,CAAC,GAAG,WAAW,GAAG,eAAe,GAAG,WAAW,EAAE;AAAA,EAC1F;AAEA,QAAM,SAAoC,CAAC;AAC3C,MAAI,cAAc;AAClB,MAAI,kBAAkB;AACtB,QAAM,cAAc,CAAC,GAAG,KAAK,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,YAAY,EAAE,SAAS;AACvE,QAAM,iBAAiB,oBAAI,IAAY;AAGvC,aAAW,KAAK,aAAa;AAC3B,UAAM,OAAQ,OAAO,EAAE,QAAQ,MAAM,EAAE,OAAO,GAAG,QAAQ,GAAG,cAAc,GAAG,YAAY,EAAE;AAC3F,SAAK,SAAS;AACd,QAAI,EAAE,WAAW,SAAS;AACxB,WAAK,UAAU;AACf,qBAAe;AAAA,IACjB;AACA,QAAI,OAAO,EAAE,cAAc,SAAU,MAAK,gBAAgB,EAAE;AAC5D,UAAM,MAAM,GAAG,EAAE,QAAQ,IAAI,QAAQ,EAAE,IAAI,CAAC;AAC5C,QAAI,eAAe,IAAI,GAAG,GAAG;AAC3B,WAAK,cAAc;AACnB,yBAAmB;AAAA,IACrB;AACA,mBAAe,IAAI,GAAG;AAAA,EACxB;AAEA,aAAW,QAAQ,OAAO,OAAO,MAAM,GAAG;AACxC,SAAK,eAAe,KAAK,QAAQ,IAAI,KAAK,eAAe,KAAK,QAAQ;AAAA,EACxE;AAGA,MAAI,qBAAqB;AACzB,MAAI,kBAAkB;AACtB,aAAW,CAAC,EAAE,GAAG,KAAK,QAAQ,aAAa,CAAC,MAAM,EAAE,QAAQ,GAAG;AAC7D,aAAS,IAAI,GAAG,IAAI,IAAI,QAAQ,KAAK;AACnC,UAAI,IAAI,CAAC,EAAG,WAAW,QAAS;AAChC,4BAAsB;AACtB,UAAI,IAAI,IAAI,CAAC,EAAG,oBAAmB;AAAA,IACrC;AAAA,EACF;AACA,QAAM,YAAY,qBAAqB,IAAI,kBAAkB,qBAAqB;AAElF,MAAI;AACJ,MAAI,QAAQ,iBAAiB;AAC3B,UAAM,UAAU,YAAY,OAAO,CAAC,MAAM,EAAE,UAAU,QAAQ,eAAgB;AAC9E,QAAI,QAAQ,SAAS,GAAG;AACtB,0BACE,QAAQ,OAAO,CAAC,MAAM,QAAQ,gBAAiB,EAAE,MAAM,CAAC,EAAE,SAAS,QAAQ;AAAA,IAC/E;AAAA,EACF;AAEA,SAAO;AAAA,IACL;AAAA,IACA,YAAY,YAAY;AAAA,IACxB;AAAA,IACA,WAAW,cAAc,YAAY;AAAA,IACrC,eAAe,kBAAkB,YAAY;AAAA,IAC7C;AAAA,IACA;AAAA,EACF;AACF;;;ACxCO,SAAS,kBACd,SACA,UAA2B,CAAC,GACZ;AAChB,QAAM,kBAAkB,QAAQ,mBAAmB;AACnD,QAAM,QAAQ,QAAQ,SAAS;AAC/B,QAAM,cAAc,QAAQ,uBAAuB;AAEnD,QAAM,UAA2B,QAAQ,IAAI,CAAC,MAAM;AAClD,QAAI,EAAE,SAAS,SAAS,KAAK,EAAE,UAAU,SAAS,GAAG;AACnD,YAAM,IAAI,MAAM,yDAAoD,EAAE,MAAM,GAAG;AAAA,IACjF;AACA,UAAM,QAAQ,KAAK,EAAE,QAAQ;AAC7B,UAAM,QAAQ,KAAK,EAAE,SAAS;AAC9B,UAAM,QAAQ,QAAQ;AACtB,UAAM,IAAI,QAAQ,EAAE,UAAU,EAAE,SAAS;AACzC,UAAM,EAAE,GAAG,IAAI,EAAE,IAAI,YAAY,EAAE,UAAU,EAAE,SAAS;AAKxD,UAAM,cAAc,IAAI,EAAE,QAAQ;AAClC,UAAM,eAAe,IAAI,EAAE,SAAS;AACpC,UAAM,iBAAiB,cAAc,KAAK,IAAI,KAAK,IAAI,KAAK,GAAG,IAAI,KAAK;AACxE,UAAM,kBAAkB,eAAe,KAAK,IAAI,KAAK,IAAI,KAAK,GAAG,IAAI,KAAK;AAC1E,UAAM,SAAS,kBAAkB;AACjC,UAAM,cAAc,KAAK,IAAI,aAAa,YAAY;AAEtD,QAAI;AACJ,QAAI,CAAC,QAAQ;AACX,gBAAU;AAAA,IACZ,WAAW,IAAI,SAAS,KAAK,IAAI,CAAC,KAAK,iBAAiB;AACtD,YAAM,oBAAoB,EAAE,iBAAiB,QAAQ,IAAI,QAAQ;AACjE,gBAAU,oBAAoB,aAAa;AAAA,IAC7C,OAAO;AACL,gBAAU;AAAA,IACZ;AAEA,WAAO;AAAA,MACL,QAAQ,EAAE;AAAA,MACV,cAAc;AAAA,MACd,eAAe;AAAA,MACf;AAAA,MACA,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,SAAS;AAAA,MACT,QAAQ;AAAA,MACR;AAAA,MACA,KAAK;AAAA,MACL;AAAA,IACF;AAAA,EACF,CAAC;AAED,SAAO;AAAA,IACL;AAAA,IACA,eAAe,QAAQ,KAAK,CAAC,MAAM,EAAE,YAAY,WAAW;AAAA,IAC5D,aAAa,QAAQ,KAAK,CAAC,MAAM,EAAE,YAAY,UAAU;AAAA,EAC3D;AACF;AAEA,SAAS,KAAK,IAAsB;AAClC,SAAO,GAAG,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,GAAG;AAC5C;AAGO,SAAS,IAAI,IAAsB;AACxC,MAAI,GAAG,WAAW,EAAG,QAAO;AAC5B,QAAM,SAAS,CAAC,GAAG,EAAE,EAAE,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAC3C,QAAM,IAAI,CAAC,MAAc;AACvB,UAAM,MAAM,KAAK,OAAO,SAAS;AACjC,UAAM,KAAK,KAAK,MAAM,GAAG;AACzB,UAAM,KAAK,KAAK,KAAK,GAAG;AACxB,WAAO,OAAO,EAAE,KAAM,OAAO,EAAE,IAAK,OAAO,EAAE,MAAO,MAAM;AAAA,EAC5D;AACA,SAAO,EAAE,IAAI,IAAI,EAAE,IAAI;AACzB;AAOO,SAAS,YAAY,GAAa,GAAmD;AAC1F,MAAI,EAAE,SAAS,KAAK,EAAE,SAAS,EAAG,QAAO,EAAE,GAAG,GAAG,IAAI,GAAG,GAAG,EAAE;AAC7D,QAAM,KAAK,KAAK,CAAC;AACjB,QAAM,KAAK,KAAK,CAAC;AACjB,QAAM,KAAK,SAAS,GAAG,EAAE;AACzB,QAAM,KAAK,SAAS,GAAG,EAAE;AACzB,QAAM,YAAY,KAAK,EAAE,SAAS,KAAK,EAAE;AACzC,MAAI,cAAc,EAAG,QAAO,EAAE,GAAG,OAAO,KAAK,IAAI,UAAU,IAAI,GAAG,GAAG,OAAO,KAAK,IAAI,EAAE;AACvF,QAAM,KAAK,KAAK,MAAM,KAAK,KAAK,SAAS;AACzC,QAAM,KACH,YAAY,cACX,KAAK,EAAE,WAAW,KAAK,EAAE,SAAS,MAAM,KAAK,EAAE,WAAW,KAAK,EAAE,SAAS;AAC9E,QAAM,IAAI,KAAK,IAAI,YAAY,KAAK,IAAI,CAAC,GAAG,EAAE;AAC9C,SAAO,EAAE,GAAG,IAAI,EAAE;AACpB;AAEA,SAAS,SAAS,IAAc,GAAmB;AACjD,SAAO,GAAG,OAAO,CAAC,KAAK,MAAM,OAAO,IAAI,MAAM,GAAG,CAAC,KAAK,GAAG,SAAS;AACrE;AAGA,SAAS,YAAY,GAAW,IAAoB;AAClD,MAAI,MAAM,EAAG,QAAO;AACpB,MAAI,KAAK,IAAK,QAAO,UAAU,CAAC;AAChC,QAAM,IAAI,MAAM,KAAK,IAAI;AACzB,QAAM,KAAK,eAAe,GAAG,KAAK,GAAG,GAAG;AACxC,SAAO,KAAK,IAAI,IAAI,MAAM,KAAK,MAAM;AACvC;AAEA,SAAS,eAAe,GAAW,GAAW,GAAmB;AAC/D,MAAI,KAAK,EAAG,QAAO;AACnB,MAAI,KAAK,EAAG,QAAO;AACnB,QAAM,SAAS,QAAQ,CAAC,IAAI,QAAQ,CAAC,IAAI,QAAQ,IAAI,CAAC;AACtD,QAAM,QAAQ,KAAK,IAAI,KAAK,IAAI,CAAC,IAAI,IAAI,KAAK,IAAI,IAAI,CAAC,IAAI,IAAI,MAAM,IAAI;AACzE,MAAI,IAAI;AACR,MAAI,IAAI,KAAM,IAAI,KAAK,KAAM,IAAI;AACjC,MAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,MAAI,IAAI;AACR,MAAI,IAAI;AACR,WAAS,IAAI,GAAG,KAAK,KAAK,KAAK;AAC7B,UAAM,KAAK,IAAI;AACf,QAAI,MAAO,KAAK,IAAI,KAAK,MAAO,IAAI,KAAK,MAAM,IAAI;AACnD,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI;AACR,SAAK,IAAI;AACT,UAAM,GAAG,IAAI,MAAM,IAAI,IAAI,KAAK,OAAO,IAAI,OAAO,IAAI,KAAK;AAC3D,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI;AACR,UAAM,QAAQ,IAAI;AAClB,SAAK;AACL,QAAI,KAAK,IAAI,QAAQ,CAAC,IAAI,KAAM;AAAA,EAClC;AACA,SAAO,QAAQ;AACjB;AAEA,SAAS,QAAQ,GAAmB;AAClC,QAAM,QAAQ;AAAA,IACZ;AAAA,IAAqB;AAAA,IAAmB;AAAA,IAAqB;AAAA,IAC7D;AAAA,IAAqB;AAAA,IAAoB;AAAA,IAAsB;AAAA,IAC/D;AAAA,EACF;AACA,MAAI,IAAI,IAAK,QAAO,KAAK,IAAI,KAAK,KAAK,KAAK,IAAI,KAAK,KAAK,CAAC,CAAC,IAAI,QAAQ,IAAI,CAAC;AAC7E,OAAK;AACL,MAAI,IAAI,MAAM,CAAC;AACf,WAAS,IAAI,GAAG,IAAI,GAAG,IAAK,MAAK,MAAM,CAAC,KAAM,IAAI;AAClD,QAAM,IAAI,IAAI;AACd,SAAO,MAAM,KAAK,IAAI,IAAI,KAAK,EAAE,KAAK,IAAI,OAAO,KAAK,IAAI,CAAC,IAAI,IAAI,KAAK,IAAI,CAAC;AAC/E;AAEA,SAAS,UAAU,GAAmB;AACpC,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,IAAI;AACV,QAAM,OAAO,IAAI,IAAI,KAAK;AAC1B,QAAM,OAAO,KAAK,IAAI,CAAC;AACvB,QAAM,IAAI,KAAK,IAAI,IAAI;AACvB,QAAM,IAAI,QAAQ,KAAK,IAAI,MAAM,IAAI,MAAM,IAAI,MAAM,IAAI,MAAM,IAAI,KAAK,IAAK,CAAC,OAAO,OAAQ,CAAC;AAC9F,SAAO,OAAO,IAAI,OAAO;AAC3B;","names":[]}