@tangle-network/agent-eval 0.25.0 → 0.27.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +145 -0
- package/README.md +5 -5
- package/dist/builder-eval/index.js +1 -1
- package/dist/{chunk-WWYCWKUM.js → chunk-3CKU6VGU.js} +2 -2
- package/dist/{chunk-K2TPS5LB.js → chunk-4U4BKCXK.js} +2 -2
- package/dist/chunk-4U4BKCXK.js.map +1 -0
- package/dist/{chunk-2A5XJB43.js → chunk-5AKPEK5L.js} +3 -3
- package/dist/chunk-5AKPEK5L.js.map +1 -0
- package/dist/{chunk-RAF443UI.js → chunk-DBIGN5MJ.js} +2 -2
- package/dist/{chunk-JLZQWFV3.js → chunk-K33INZHH.js} +2 -2
- package/dist/chunk-K33INZHH.js.map +1 -0
- package/dist/{chunk-NU65VQ7M.js → chunk-MAZ26DC7.js} +1 -1
- package/dist/chunk-MAZ26DC7.js.map +1 -0
- package/dist/{chunk-LSH4MMOZ.js → chunk-NCRFYPS3.js} +1 -1
- package/dist/chunk-NCRFYPS3.js.map +1 -0
- package/dist/{chunk-ZN274SWR.js → chunk-PALJO75S.js} +2 -2
- package/dist/{chunk-OWLAAMME.js → chunk-QHF6EQKK.js} +3 -2
- package/dist/chunk-QHF6EQKK.js.map +1 -0
- package/dist/chunk-R5UQJNKC.js +722 -0
- package/dist/chunk-R5UQJNKC.js.map +1 -0
- package/dist/{chunk-SESZDQPX.js → chunk-RUI6SIHY.js} +3 -3
- package/dist/chunk-RUI6SIHY.js.map +1 -0
- package/dist/{chunk-EDUKQ5AM.js → chunk-SZSBQUIJ.js} +2 -2
- package/dist/chunk-SZSBQUIJ.js.map +1 -0
- package/dist/{chunk-4F5DQN55.js → chunk-VSMTAMNK.js} +1 -1
- package/dist/chunk-VSMTAMNK.js.map +1 -0
- package/dist/{chunk-5LBB5B3Z.js → chunk-XFZCM5Z3.js} +1 -1
- package/dist/chunk-XFZCM5Z3.js.map +1 -0
- package/dist/cli.js +1 -1
- package/dist/{control-CBShYYA6.d.ts → control-BT4qnXiS.d.ts} +2 -2
- package/dist/{control-runtime-BuJHoLg0.d.ts → control-runtime-BZ_lVLYW.d.ts} +1 -0
- package/dist/control.d.ts +3 -3
- package/dist/control.js +2 -2
- package/dist/{failure-cluster-C2EGSDiT.d.ts → failure-cluster-Cw65_5FY.d.ts} +1 -2
- package/dist/{feedback-trajectory-DfFdrraJ.d.ts → feedback-trajectory-D1aGKusy.d.ts} +1 -1
- package/dist/governance/index.d.ts +1 -1
- package/dist/{index-Oj9fAPPN.d.ts → index-BhLlu-qO.d.ts} +63 -2
- package/dist/index.d.ts +279 -72
- package/dist/index.js +222 -136
- package/dist/index.js.map +1 -1
- package/dist/knowledge/index.d.ts +1 -1
- package/dist/knowledge/index.js +2 -2
- package/dist/{multi-layer-verifier-LkP3LVKj.d.ts → multi-layer-verifier-U-c8ge1k.d.ts} +1 -1
- package/dist/openapi.json +1 -1
- package/dist/optimization.d.ts +5 -5
- package/dist/optimization.js +5 -5
- package/dist/pipelines/index.d.ts +1 -1
- package/dist/pipelines/index.js +2 -2
- package/dist/{release-report-BNgMdqPF.d.ts → release-report-CCQqnK46.d.ts} +1 -1
- package/dist/{replay-BL96gCEP.d.ts → replay-D7z0J43-.d.ts} +4 -5
- package/dist/reporting.d.ts +4 -4
- package/dist/reporting.js +5 -5
- package/dist/{researcher-BPT8x_NT.d.ts → researcher-G81CWc0q.d.ts} +9 -10
- package/dist/rl.d.ts +26 -44
- package/dist/rl.js +5 -5
- package/dist/rl.js.map +1 -1
- package/dist/{sequential-Dgz1n51-.d.ts → sequential-5iSVfzl2.d.ts} +2 -2
- package/dist/{summary-report-C7VPYEj2.d.ts → summary-report-Dl4akLKX.d.ts} +13 -1
- package/dist/traces.d.ts +1 -1
- package/dist/traces.js +2 -2
- package/dist/wire/index.d.ts +2 -2
- package/dist/wire/index.js +1 -1
- package/docs/concepts.md +11 -0
- package/docs/research-report-methodology.md +4 -4
- package/docs/three-package-architecture.md +12 -24
- package/package.json +1 -1
- package/dist/chunk-2A5XJB43.js.map +0 -1
- package/dist/chunk-4F5DQN55.js.map +0 -1
- package/dist/chunk-5LBB5B3Z.js.map +0 -1
- package/dist/chunk-EDUKQ5AM.js.map +0 -1
- package/dist/chunk-I4MBDTY5.js +0 -272
- package/dist/chunk-I4MBDTY5.js.map +0 -1
- package/dist/chunk-JLZQWFV3.js.map +0 -1
- package/dist/chunk-K2TPS5LB.js.map +0 -1
- package/dist/chunk-LSH4MMOZ.js.map +0 -1
- package/dist/chunk-NU65VQ7M.js.map +0 -1
- package/dist/chunk-OWLAAMME.js.map +0 -1
- package/dist/chunk-SESZDQPX.js.map +0 -1
- /package/dist/{chunk-WWYCWKUM.js.map → chunk-3CKU6VGU.js.map} +0 -0
- /package/dist/{chunk-RAF443UI.js.map → chunk-DBIGN5MJ.js.map} +0 -0
- /package/dist/{chunk-ZN274SWR.js.map → chunk-PALJO75S.js.map} +0 -0
package/dist/chunk-I4MBDTY5.js
DELETED
|
@@ -1,272 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
ValidationError
|
|
3
|
-
} from "./chunk-NG236HPC.js";
|
|
4
|
-
|
|
5
|
-
// src/statistics.ts
|
|
6
|
-
var INVERTED_DIMENSIONS = /* @__PURE__ */ new Set(["hallucination", "false_confidence", "worst_failure"]);
|
|
7
|
-
function normalizeScores(scores) {
|
|
8
|
-
return scores.map((s) => {
|
|
9
|
-
if (INVERTED_DIMENSIONS.has(s.dimension)) {
|
|
10
|
-
return s;
|
|
11
|
-
}
|
|
12
|
-
return s;
|
|
13
|
-
});
|
|
14
|
-
}
|
|
15
|
-
function weightedMean(scores) {
|
|
16
|
-
if (scores.length === 0) return 0;
|
|
17
|
-
let totalWeight = 0;
|
|
18
|
-
let weightedSum = 0;
|
|
19
|
-
for (const { score, weight } of scores) {
|
|
20
|
-
const w = weight ?? 1;
|
|
21
|
-
weightedSum += score * w;
|
|
22
|
-
totalWeight += w;
|
|
23
|
-
}
|
|
24
|
-
return totalWeight > 0 ? weightedSum / totalWeight : 0;
|
|
25
|
-
}
|
|
26
|
-
function confidenceInterval(scores, confidence = 0.95) {
|
|
27
|
-
if (scores.length === 0) return { mean: 0, lower: 0, upper: 0 };
|
|
28
|
-
if (scores.length === 1) return { mean: scores[0], lower: scores[0], upper: scores[0] };
|
|
29
|
-
const n = scores.length;
|
|
30
|
-
const mean = scores.reduce((a, b) => a + b, 0) / n;
|
|
31
|
-
const B = 1e3;
|
|
32
|
-
const bootstrapMeans = [];
|
|
33
|
-
for (let i = 0; i < B; i++) {
|
|
34
|
-
let sum = 0;
|
|
35
|
-
for (let j = 0; j < n; j++) {
|
|
36
|
-
sum += scores[Math.floor(Math.random() * n)];
|
|
37
|
-
}
|
|
38
|
-
bootstrapMeans.push(sum / n);
|
|
39
|
-
}
|
|
40
|
-
bootstrapMeans.sort((a, b) => a - b);
|
|
41
|
-
const alpha = 1 - confidence;
|
|
42
|
-
const lowerIdx = Math.floor(alpha / 2 * B);
|
|
43
|
-
const upperIdx = Math.floor((1 - alpha / 2) * B) - 1;
|
|
44
|
-
return {
|
|
45
|
-
mean,
|
|
46
|
-
lower: bootstrapMeans[lowerIdx],
|
|
47
|
-
upper: bootstrapMeans[Math.min(upperIdx, B - 1)]
|
|
48
|
-
};
|
|
49
|
-
}
|
|
50
|
-
function interRaterReliability(judgeScores) {
|
|
51
|
-
if (judgeScores.length < 2) return 1;
|
|
52
|
-
const dimensionMap = /* @__PURE__ */ new Map();
|
|
53
|
-
for (const judgeSet of judgeScores) {
|
|
54
|
-
for (const s of judgeSet) {
|
|
55
|
-
if (!dimensionMap.has(s.dimension)) dimensionMap.set(s.dimension, []);
|
|
56
|
-
const arr = dimensionMap.get(s.dimension);
|
|
57
|
-
if (arr.length === 0 || arr[arr.length - 1].length >= judgeScores.length) {
|
|
58
|
-
arr.push([s.score]);
|
|
59
|
-
} else {
|
|
60
|
-
arr[arr.length - 1].push(s.score);
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
const allValues = [];
|
|
65
|
-
const pairDiffs = [];
|
|
66
|
-
for (const items of dimensionMap.values()) {
|
|
67
|
-
for (const ratings of items) {
|
|
68
|
-
if (ratings.length < 2) continue;
|
|
69
|
-
for (const v of ratings) allValues.push(v);
|
|
70
|
-
for (let i = 0; i < ratings.length; i++) {
|
|
71
|
-
for (let j = i + 1; j < ratings.length; j++) {
|
|
72
|
-
pairDiffs.push((ratings[i] - ratings[j]) ** 2);
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
|
-
if (pairDiffs.length === 0 || allValues.length < 2) return 1;
|
|
78
|
-
const observedDisagreement = pairDiffs.reduce((a, b) => a + b, 0) / pairDiffs.length;
|
|
79
|
-
let expectedDisagreement = 0;
|
|
80
|
-
let expectedCount = 0;
|
|
81
|
-
for (let i = 0; i < allValues.length; i++) {
|
|
82
|
-
for (let j = i + 1; j < allValues.length; j++) {
|
|
83
|
-
expectedDisagreement += (allValues[i] - allValues[j]) ** 2;
|
|
84
|
-
expectedCount++;
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
expectedDisagreement = expectedCount > 0 ? expectedDisagreement / expectedCount : 0;
|
|
88
|
-
if (expectedDisagreement === 0) return 1;
|
|
89
|
-
return 1 - observedDisagreement / expectedDisagreement;
|
|
90
|
-
}
|
|
91
|
-
function mannWhitneyU(a, b) {
|
|
92
|
-
if (a.length === 0 || b.length === 0) return { u: 0, p: 1 };
|
|
93
|
-
const n1 = a.length;
|
|
94
|
-
const n2 = b.length;
|
|
95
|
-
const combined = [
|
|
96
|
-
...a.map((v) => ({ v, group: "a" })),
|
|
97
|
-
...b.map((v) => ({ v, group: "b" }))
|
|
98
|
-
].sort((x, y) => x.v - y.v);
|
|
99
|
-
const ranks = new Array(combined.length);
|
|
100
|
-
let i = 0;
|
|
101
|
-
while (i < combined.length) {
|
|
102
|
-
let j = i;
|
|
103
|
-
while (j < combined.length && combined[j].v === combined[i].v) j++;
|
|
104
|
-
const avgRank = (i + 1 + j) / 2;
|
|
105
|
-
for (let k = i; k < j; k++) ranks[k] = avgRank;
|
|
106
|
-
i = j;
|
|
107
|
-
}
|
|
108
|
-
let r1 = 0;
|
|
109
|
-
for (let k = 0; k < combined.length; k++) {
|
|
110
|
-
if (combined[k].group === "a") r1 += ranks[k];
|
|
111
|
-
}
|
|
112
|
-
const u1 = r1 - n1 * (n1 + 1) / 2;
|
|
113
|
-
const u2 = n1 * n2 - u1;
|
|
114
|
-
const u = Math.min(u1, u2);
|
|
115
|
-
const mu = n1 * n2 / 2;
|
|
116
|
-
const sigma = Math.sqrt(n1 * n2 * (n1 + n2 + 1) / 12);
|
|
117
|
-
if (sigma === 0) return { u, p: 1 };
|
|
118
|
-
const z = Math.abs(u - mu) / sigma;
|
|
119
|
-
const p = 2 * (1 - normalCdf(z));
|
|
120
|
-
return { u, p };
|
|
121
|
-
}
|
|
122
|
-
function partialCredit(current, target) {
|
|
123
|
-
if (target <= 0) return 1;
|
|
124
|
-
return Math.min(1, Math.max(0, current / target));
|
|
125
|
-
}
|
|
126
|
-
function pairedTTest(before, after) {
|
|
127
|
-
if (before.length !== after.length) {
|
|
128
|
-
throw new ValidationError(
|
|
129
|
-
`pairedTTest: unequal sample sizes (${before.length} vs ${after.length})`
|
|
130
|
-
);
|
|
131
|
-
}
|
|
132
|
-
const n = before.length;
|
|
133
|
-
if (n < 2) return { t: 0, df: 0, p: 1 };
|
|
134
|
-
const diffs = before.map((b, i) => after[i] - b);
|
|
135
|
-
const mean = diffs.reduce((a, b) => a + b, 0) / n;
|
|
136
|
-
const variance = diffs.reduce((acc, d) => acc + (d - mean) ** 2, 0) / (n - 1);
|
|
137
|
-
const se = Math.sqrt(variance / n);
|
|
138
|
-
if (se === 0) return { t: mean === 0 ? 0 : Infinity, df: n - 1, p: mean === 0 ? 1 : 0 };
|
|
139
|
-
const t = mean / se;
|
|
140
|
-
const df = n - 1;
|
|
141
|
-
const p = 2 * (1 - studentTCdf(Math.abs(t), df));
|
|
142
|
-
return { t, df, p };
|
|
143
|
-
}
|
|
144
|
-
function wilcoxonSignedRank(before, after) {
|
|
145
|
-
if (before.length !== after.length) {
|
|
146
|
-
throw new ValidationError(
|
|
147
|
-
`wilcoxonSignedRank: unequal sample sizes (${before.length} vs ${after.length})`
|
|
148
|
-
);
|
|
149
|
-
}
|
|
150
|
-
const diffs = before.map((b, i2) => after[i2] - b).filter((d) => d !== 0);
|
|
151
|
-
const n = diffs.length;
|
|
152
|
-
if (n < 6) return { w: 0, p: 1 };
|
|
153
|
-
const absRanks = diffs.map((d, i2) => ({ abs: Math.abs(d), sign: Math.sign(d), i: i2 })).sort((a, b) => a.abs - b.abs);
|
|
154
|
-
const ranks = new Array(n);
|
|
155
|
-
let i = 0;
|
|
156
|
-
while (i < n) {
|
|
157
|
-
let j = i;
|
|
158
|
-
while (j < n && absRanks[j].abs === absRanks[i].abs) j++;
|
|
159
|
-
const avg = (i + 1 + j) / 2;
|
|
160
|
-
for (let k = i; k < j; k++) ranks[absRanks[k].i] = avg;
|
|
161
|
-
i = j;
|
|
162
|
-
}
|
|
163
|
-
let wPlus = 0;
|
|
164
|
-
for (let k = 0; k < n; k++) if (diffs[k] > 0) wPlus += ranks[k];
|
|
165
|
-
const mean = n * (n + 1) / 4;
|
|
166
|
-
const variance = n * (n + 1) * (2 * n + 1) / 24;
|
|
167
|
-
const z = (wPlus - mean) / Math.sqrt(variance);
|
|
168
|
-
const p = 2 * (1 - normalCdf(Math.abs(z)));
|
|
169
|
-
return { w: wPlus, p };
|
|
170
|
-
}
|
|
171
|
-
function cohensD(a, b) {
|
|
172
|
-
if (a.length < 2 || b.length < 2) return 0;
|
|
173
|
-
const meanA = a.reduce((x, y) => x + y, 0) / a.length;
|
|
174
|
-
const meanB = b.reduce((x, y) => x + y, 0) / b.length;
|
|
175
|
-
const varA = a.reduce((acc, x) => acc + (x - meanA) ** 2, 0) / (a.length - 1);
|
|
176
|
-
const varB = b.reduce((acc, x) => acc + (x - meanB) ** 2, 0) / (b.length - 1);
|
|
177
|
-
const pooled = Math.sqrt(
|
|
178
|
-
((a.length - 1) * varA + (b.length - 1) * varB) / (a.length + b.length - 2)
|
|
179
|
-
);
|
|
180
|
-
if (pooled === 0) return 0;
|
|
181
|
-
return (meanB - meanA) / pooled;
|
|
182
|
-
}
|
|
183
|
-
function studentTCdf(t, df) {
|
|
184
|
-
if (df <= 0) return 0.5;
|
|
185
|
-
if (df > 100) return normalCdf(t);
|
|
186
|
-
const x = df / (df + t * t);
|
|
187
|
-
const a = df / 2;
|
|
188
|
-
const b = 0.5;
|
|
189
|
-
const ib = incompleteBeta(x, a, b);
|
|
190
|
-
return t >= 0 ? 1 - 0.5 * ib : 0.5 * ib;
|
|
191
|
-
}
|
|
192
|
-
function incompleteBeta(x, a, b) {
|
|
193
|
-
if (x <= 0) return 0;
|
|
194
|
-
if (x >= 1) return 1;
|
|
195
|
-
const lnBeta = lnGamma(a) + lnGamma(b) - lnGamma(a + b);
|
|
196
|
-
const front = Math.exp(Math.log(x) * a + Math.log(1 - x) * b - lnBeta) / a;
|
|
197
|
-
const maxIter = 200;
|
|
198
|
-
const eps = 3e-7;
|
|
199
|
-
let c = 1;
|
|
200
|
-
let d = 1 - (a + b) * x / (a + 1);
|
|
201
|
-
if (Math.abs(d) < 1e-30) d = 1e-30;
|
|
202
|
-
d = 1 / d;
|
|
203
|
-
let f = d;
|
|
204
|
-
for (let m = 1; m <= maxIter; m++) {
|
|
205
|
-
const m2 = 2 * m;
|
|
206
|
-
let num = m * (b - m) * x / ((a + m2 - 1) * (a + m2));
|
|
207
|
-
d = 1 + num * d;
|
|
208
|
-
if (Math.abs(d) < 1e-30) d = 1e-30;
|
|
209
|
-
c = 1 + num / c;
|
|
210
|
-
if (Math.abs(c) < 1e-30) c = 1e-30;
|
|
211
|
-
d = 1 / d;
|
|
212
|
-
f *= d * c;
|
|
213
|
-
num = -((a + m) * (a + b + m) * x) / ((a + m2) * (a + m2 + 1));
|
|
214
|
-
d = 1 + num * d;
|
|
215
|
-
if (Math.abs(d) < 1e-30) d = 1e-30;
|
|
216
|
-
c = 1 + num / c;
|
|
217
|
-
if (Math.abs(c) < 1e-30) c = 1e-30;
|
|
218
|
-
d = 1 / d;
|
|
219
|
-
const delta = d * c;
|
|
220
|
-
f *= delta;
|
|
221
|
-
if (Math.abs(delta - 1) < eps) break;
|
|
222
|
-
}
|
|
223
|
-
return front * f;
|
|
224
|
-
}
|
|
225
|
-
function lnGamma(z) {
|
|
226
|
-
const g = 7;
|
|
227
|
-
const coefs = [
|
|
228
|
-
0.9999999999998099,
|
|
229
|
-
676.5203681218851,
|
|
230
|
-
-1259.1392167224028,
|
|
231
|
-
771.3234287776531,
|
|
232
|
-
-176.6150291621406,
|
|
233
|
-
12.507343278686905,
|
|
234
|
-
-0.13857109526572012,
|
|
235
|
-
9984369578019572e-21,
|
|
236
|
-
15056327351493116e-23
|
|
237
|
-
];
|
|
238
|
-
if (z < 0.5) {
|
|
239
|
-
return Math.log(Math.PI / Math.sin(Math.PI * z)) - lnGamma(1 - z);
|
|
240
|
-
}
|
|
241
|
-
z -= 1;
|
|
242
|
-
let x = coefs[0];
|
|
243
|
-
for (let i = 1; i < g + 2; i++) x += coefs[i] / (z + i);
|
|
244
|
-
const t = z + g + 0.5;
|
|
245
|
-
return 0.5 * Math.log(2 * Math.PI) + (z + 0.5) * Math.log(t) - t + Math.log(x);
|
|
246
|
-
}
|
|
247
|
-
function normalCdf(x) {
|
|
248
|
-
const a1 = 0.254829592;
|
|
249
|
-
const a2 = -0.284496736;
|
|
250
|
-
const a3 = 1.421413741;
|
|
251
|
-
const a4 = -1.453152027;
|
|
252
|
-
const a5 = 1.061405429;
|
|
253
|
-
const p = 0.3275911;
|
|
254
|
-
const sign = x < 0 ? -1 : 1;
|
|
255
|
-
const absX = Math.abs(x);
|
|
256
|
-
const t = 1 / (1 + p * absX);
|
|
257
|
-
const y = 1 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp(-absX * absX / 2);
|
|
258
|
-
return 0.5 * (1 + sign * y);
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
export {
|
|
262
|
-
normalizeScores,
|
|
263
|
-
weightedMean,
|
|
264
|
-
confidenceInterval,
|
|
265
|
-
interRaterReliability,
|
|
266
|
-
mannWhitneyU,
|
|
267
|
-
partialCredit,
|
|
268
|
-
pairedTTest,
|
|
269
|
-
wilcoxonSignedRank,
|
|
270
|
-
cohensD
|
|
271
|
-
};
|
|
272
|
-
//# sourceMappingURL=chunk-I4MBDTY5.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/statistics.ts"],"sourcesContent":["import { ValidationError } from './errors'\nimport type { JudgeScore } from './types'\n\n/** Dimensions where lower raw score = better outcome (inverted semantics) */\nconst INVERTED_DIMENSIONS = new Set(['hallucination', 'false_confidence', 'worst_failure'])\n\n/**\n * Normalize scores so all dimensions follow \"higher = better\".\n * Inverted dimensions (hallucination, false_confidence, worst_failure)\n * already use inverted scoring in the prompt (10 = no hallucination),\n * but this function ensures consistency if raw scores leak through.\n */\nexport function normalizeScores(scores: JudgeScore[]): JudgeScore[] {\n return scores.map((s) => {\n if (INVERTED_DIMENSIONS.has(s.dimension)) {\n return s\n }\n return s\n })\n}\n\n/** Weighted mean — falls back to uniform weights when omitted */\nexport function weightedMean(scores: { score: number; weight?: number }[]): number {\n if (scores.length === 0) return 0\n let totalWeight = 0\n let weightedSum = 0\n for (const { score, weight } of scores) {\n const w = weight ?? 1\n weightedSum += score * w\n totalWeight += w\n }\n return totalWeight > 0 ? weightedSum / totalWeight : 0\n}\n\n/** Bootstrap confidence interval */\nexport function confidenceInterval(\n scores: number[],\n confidence = 0.95,\n): { mean: number; lower: number; upper: number } {\n if (scores.length === 0) return { mean: 0, lower: 0, upper: 0 }\n if (scores.length === 1) return { mean: scores[0]!, lower: scores[0]!, upper: scores[0]! }\n\n const n = scores.length\n const mean = scores.reduce((a, b) => a + b, 0) / n\n\n const B = 1000\n const bootstrapMeans: number[] = []\n\n for (let i = 0; i < B; i++) {\n let sum = 0\n for (let j = 0; j < n; j++) {\n sum += scores[Math.floor(Math.random() * n)]!\n }\n bootstrapMeans.push(sum / n)\n }\n\n bootstrapMeans.sort((a, b) => a - b)\n\n const alpha = 1 - confidence\n const lowerIdx = Math.floor((alpha / 2) * B)\n const upperIdx = Math.floor((1 - alpha / 2) * B) - 1\n\n return {\n mean,\n lower: bootstrapMeans[lowerIdx]!,\n upper: bootstrapMeans[Math.min(upperIdx, B - 1)]!,\n }\n}\n\n/**\n * Inter-rater reliability — simplified Krippendorff's alpha.\n *\n * Each inner array is one judge's scores for all items.\n * All arrays must have the same length (same items scored).\n */\nexport function interRaterReliability(judgeScores: JudgeScore[][]): number {\n if (judgeScores.length < 2) return 1\n\n // Group scores by dimension across judges\n const dimensionMap = new Map<string, number[][]>()\n for (const judgeSet of judgeScores) {\n for (const s of judgeSet) {\n if (!dimensionMap.has(s.dimension)) dimensionMap.set(s.dimension, [])\n const arr = dimensionMap.get(s.dimension)!\n if (arr.length === 0 || arr[arr.length - 1]!.length >= judgeScores.length) {\n arr.push([s.score])\n } else {\n arr[arr.length - 1]!.push(s.score)\n }\n }\n }\n\n // Collect all paired ratings\n const allValues: number[] = []\n const pairDiffs: number[] = []\n\n for (const items of dimensionMap.values()) {\n for (const ratings of items) {\n if (ratings.length < 2) continue\n for (const v of ratings) allValues.push(v)\n for (let i = 0; i < ratings.length; i++) {\n for (let j = i + 1; j < ratings.length; j++) {\n pairDiffs.push((ratings[i]! - ratings[j]!) ** 2)\n }\n }\n }\n }\n\n if (pairDiffs.length === 0 || allValues.length < 2) return 1\n\n const observedDisagreement = pairDiffs.reduce((a, b) => a + b, 0) / pairDiffs.length\n\n // Expected disagreement from all possible pairings of values\n let expectedDisagreement = 0\n let expectedCount = 0\n for (let i = 0; i < allValues.length; i++) {\n for (let j = i + 1; j < allValues.length; j++) {\n expectedDisagreement += (allValues[i]! - allValues[j]!) ** 2\n expectedCount++\n }\n }\n expectedDisagreement = expectedCount > 0 ? expectedDisagreement / expectedCount : 0\n\n if (expectedDisagreement === 0) return 1\n return 1 - observedDisagreement / expectedDisagreement\n}\n\n/**\n * Mann-Whitney U test for comparing two independent groups.\n * Returns U statistic and approximate p-value (normal approximation).\n */\nexport function mannWhitneyU(a: number[], b: number[]): { u: number; p: number } {\n if (a.length === 0 || b.length === 0) return { u: 0, p: 1 }\n\n const n1 = a.length\n const n2 = b.length\n\n // Rank all values together\n const combined = [\n ...a.map((v) => ({ v, group: 'a' as const })),\n ...b.map((v) => ({ v, group: 'b' as const })),\n ].sort((x, y) => x.v - y.v)\n\n // Assign ranks with tie handling\n const ranks: number[] = new Array(combined.length)\n let i = 0\n while (i < combined.length) {\n let j = i\n while (j < combined.length && combined[j]!.v === combined[i]!.v) j++\n const avgRank = (i + 1 + j) / 2\n for (let k = i; k < j; k++) ranks[k] = avgRank\n i = j\n }\n\n // Sum ranks for group a\n let r1 = 0\n for (let k = 0; k < combined.length; k++) {\n if (combined[k]!.group === 'a') r1 += ranks[k]!\n }\n\n const u1 = r1 - (n1 * (n1 + 1)) / 2\n const u2 = n1 * n2 - u1\n const u = Math.min(u1, u2)\n\n // Normal approximation for p-value\n const mu = (n1 * n2) / 2\n const sigma = Math.sqrt((n1 * n2 * (n1 + n2 + 1)) / 12)\n\n if (sigma === 0) return { u, p: 1 }\n\n const z = Math.abs(u - mu) / sigma\n // Two-tailed p-value from z-score (approximation)\n const p = 2 * (1 - normalCdf(z))\n\n return { u, p }\n}\n\n/** Partial credit: returns 0-1 ratio of current toward target */\nexport function partialCredit(current: number, target: number): number {\n if (target <= 0) return 1\n return Math.min(1, Math.max(0, current / target))\n}\n\n/**\n * Paired t-test — before/after measurements on the SAME items.\n * Pairing removes inter-item variance, giving tighter significance than\n * an unpaired test when comparing prompt v1 vs prompt v2 on identical\n * scenarios.\n */\nexport function pairedTTest(\n before: number[],\n after: number[],\n): { t: number; df: number; p: number } {\n if (before.length !== after.length) {\n throw new ValidationError(\n `pairedTTest: unequal sample sizes (${before.length} vs ${after.length})`,\n )\n }\n const n = before.length\n if (n < 2) return { t: 0, df: 0, p: 1 }\n\n const diffs = before.map((b, i) => after[i]! - b)\n const mean = diffs.reduce((a, b) => a + b, 0) / n\n const variance = diffs.reduce((acc, d) => acc + (d - mean) ** 2, 0) / (n - 1)\n const se = Math.sqrt(variance / n)\n if (se === 0) return { t: mean === 0 ? 0 : Infinity, df: n - 1, p: mean === 0 ? 1 : 0 }\n\n const t = mean / se\n const df = n - 1\n const p = 2 * (1 - studentTCdf(Math.abs(t), df))\n return { t, df, p }\n}\n\n/**\n * Wilcoxon signed-rank test — paired non-parametric alternative.\n * Use when the differences aren't normally distributed.\n */\nexport function wilcoxonSignedRank(before: number[], after: number[]): { w: number; p: number } {\n if (before.length !== after.length) {\n throw new ValidationError(\n `wilcoxonSignedRank: unequal sample sizes (${before.length} vs ${after.length})`,\n )\n }\n const diffs = before.map((b, i) => after[i]! - b).filter((d) => d !== 0)\n const n = diffs.length\n if (n < 6) return { w: 0, p: 1 }\n\n const absRanks = diffs\n .map((d, i) => ({ abs: Math.abs(d), sign: Math.sign(d), i }))\n .sort((a, b) => a.abs - b.abs)\n const ranks: number[] = new Array(n)\n let i = 0\n while (i < n) {\n let j = i\n while (j < n && absRanks[j]!.abs === absRanks[i]!.abs) j++\n const avg = (i + 1 + j) / 2\n for (let k = i; k < j; k++) ranks[absRanks[k]!.i] = avg\n i = j\n }\n let wPlus = 0\n for (let k = 0; k < n; k++) if (diffs[k]! > 0) wPlus += ranks[k]!\n\n const mean = (n * (n + 1)) / 4\n const variance = (n * (n + 1) * (2 * n + 1)) / 24\n const z = (wPlus - mean) / Math.sqrt(variance)\n const p = 2 * (1 - normalCdf(Math.abs(z)))\n return { w: wPlus, p }\n}\n\n/**\n * Cohen's d — standardized effect size for two independent groups.\n * Positive d means group b has higher mean than group a.\n * Rule of thumb: |d| < 0.2 negligible, 0.2–0.5 small, 0.5–0.8 medium, > 0.8 large.\n */\nexport function cohensD(a: number[], b: number[]): number {\n if (a.length < 2 || b.length < 2) return 0\n const meanA = a.reduce((x, y) => x + y, 0) / a.length\n const meanB = b.reduce((x, y) => x + y, 0) / b.length\n const varA = a.reduce((acc, x) => acc + (x - meanA) ** 2, 0) / (a.length - 1)\n const varB = b.reduce((acc, x) => acc + (x - meanB) ** 2, 0) / (b.length - 1)\n const pooled = Math.sqrt(\n ((a.length - 1) * varA + (b.length - 1) * varB) / (a.length + b.length - 2),\n )\n if (pooled === 0) return 0\n return (meanB - meanA) / pooled\n}\n\n/** Student-t CDF approximation via Abramowitz-Stegun series. */\nfunction studentTCdf(t: number, df: number): number {\n if (df <= 0) return 0.5\n if (df > 100) return normalCdf(t)\n const x = df / (df + t * t)\n const a = df / 2\n const b = 0.5\n const ib = incompleteBeta(x, a, b)\n return t >= 0 ? 1 - 0.5 * ib : 0.5 * ib\n}\n\n/** Regularized incomplete beta function via continued fraction (Lentz). */\nfunction incompleteBeta(x: number, a: number, b: number): number {\n if (x <= 0) return 0\n if (x >= 1) return 1\n const lnBeta = lnGamma(a) + lnGamma(b) - lnGamma(a + b)\n const front = Math.exp(Math.log(x) * a + Math.log(1 - x) * b - lnBeta) / a\n const maxIter = 200\n const eps = 3e-7\n let c = 1\n let d = 1 - ((a + b) * x) / (a + 1)\n if (Math.abs(d) < 1e-30) d = 1e-30\n d = 1 / d\n let f = d\n for (let m = 1; m <= maxIter; m++) {\n const m2 = 2 * m\n let num = (m * (b - m) * x) / ((a + m2 - 1) * (a + m2))\n d = 1 + num * d\n if (Math.abs(d) < 1e-30) d = 1e-30\n c = 1 + num / c\n if (Math.abs(c) < 1e-30) c = 1e-30\n d = 1 / d\n f *= d * c\n num = -((a + m) * (a + b + m) * x) / ((a + m2) * (a + m2 + 1))\n d = 1 + num * d\n if (Math.abs(d) < 1e-30) d = 1e-30\n c = 1 + num / c\n if (Math.abs(c) < 1e-30) c = 1e-30\n d = 1 / d\n const delta = d * c\n f *= delta\n if (Math.abs(delta - 1) < eps) break\n }\n return front * f\n}\n\n/** Lanczos approximation to ln Γ(z). */\nfunction lnGamma(z: number): number {\n const g = 7\n const coefs = [\n 0.99999999999980993, 676.5203681218851, -1259.1392167224028, 771.32342877765313,\n -176.61502916214059, 12.507343278686905, -0.13857109526572012, 9.9843695780195716e-6,\n 1.5056327351493116e-7,\n ]\n if (z < 0.5) {\n return Math.log(Math.PI / Math.sin(Math.PI * z)) - lnGamma(1 - z)\n }\n z -= 1\n let x = coefs[0]!\n for (let i = 1; i < g + 2; i++) x += coefs[i]! / (z + i)\n const t = z + g + 0.5\n return 0.5 * Math.log(2 * Math.PI) + (z + 0.5) * Math.log(t) - t + Math.log(x)\n}\n\n// Standard normal CDF approximation (Abramowitz and Stegun)\nfunction normalCdf(x: number): number {\n const a1 = 0.254829592\n const a2 = -0.284496736\n const a3 = 1.421413741\n const a4 = -1.453152027\n const a5 = 1.061405429\n const p = 0.3275911\n\n const sign = x < 0 ? -1 : 1\n const absX = Math.abs(x)\n const t = 1 / (1 + p * absX)\n const y = 1 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp((-absX * absX) / 2)\n\n return 0.5 * (1 + sign * y)\n}\n"],"mappings":";;;;;AAIA,IAAM,sBAAsB,oBAAI,IAAI,CAAC,iBAAiB,oBAAoB,eAAe,CAAC;AAQnF,SAAS,gBAAgB,QAAoC;AAClE,SAAO,OAAO,IAAI,CAAC,MAAM;AACvB,QAAI,oBAAoB,IAAI,EAAE,SAAS,GAAG;AACxC,aAAO;AAAA,IACT;AACA,WAAO;AAAA,EACT,CAAC;AACH;AAGO,SAAS,aAAa,QAAsD;AACjF,MAAI,OAAO,WAAW,EAAG,QAAO;AAChC,MAAI,cAAc;AAClB,MAAI,cAAc;AAClB,aAAW,EAAE,OAAO,OAAO,KAAK,QAAQ;AACtC,UAAM,IAAI,UAAU;AACpB,mBAAe,QAAQ;AACvB,mBAAe;AAAA,EACjB;AACA,SAAO,cAAc,IAAI,cAAc,cAAc;AACvD;AAGO,SAAS,mBACd,QACA,aAAa,MACmC;AAChD,MAAI,OAAO,WAAW,EAAG,QAAO,EAAE,MAAM,GAAG,OAAO,GAAG,OAAO,EAAE;AAC9D,MAAI,OAAO,WAAW,EAAG,QAAO,EAAE,MAAM,OAAO,CAAC,GAAI,OAAO,OAAO,CAAC,GAAI,OAAO,OAAO,CAAC,EAAG;AAEzF,QAAM,IAAI,OAAO;AACjB,QAAM,OAAO,OAAO,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI;AAEjD,QAAM,IAAI;AACV,QAAM,iBAA2B,CAAC;AAElC,WAAS,IAAI,GAAG,IAAI,GAAG,KAAK;AAC1B,QAAI,MAAM;AACV,aAAS,IAAI,GAAG,IAAI,GAAG,KAAK;AAC1B,aAAO,OAAO,KAAK,MAAM,KAAK,OAAO,IAAI,CAAC,CAAC;AAAA,IAC7C;AACA,mBAAe,KAAK,MAAM,CAAC;AAAA,EAC7B;AAEA,iBAAe,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAEnC,QAAM,QAAQ,IAAI;AAClB,QAAM,WAAW,KAAK,MAAO,QAAQ,IAAK,CAAC;AAC3C,QAAM,WAAW,KAAK,OAAO,IAAI,QAAQ,KAAK,CAAC,IAAI;AAEnD,SAAO;AAAA,IACL;AAAA,IACA,OAAO,eAAe,QAAQ;AAAA,IAC9B,OAAO,eAAe,KAAK,IAAI,UAAU,IAAI,CAAC,CAAC;AAAA,EACjD;AACF;AAQO,SAAS,sBAAsB,aAAqC;AACzE,MAAI,YAAY,SAAS,EAAG,QAAO;AAGnC,QAAM,eAAe,oBAAI,IAAwB;AACjD,aAAW,YAAY,aAAa;AAClC,eAAW,KAAK,UAAU;AACxB,UAAI,CAAC,aAAa,IAAI,EAAE,SAAS,EAAG,cAAa,IAAI,EAAE,WAAW,CAAC,CAAC;AACpE,YAAM,MAAM,aAAa,IAAI,EAAE,SAAS;AACxC,UAAI,IAAI,WAAW,KAAK,IAAI,IAAI,SAAS,CAAC,EAAG,UAAU,YAAY,QAAQ;AACzE,YAAI,KAAK,CAAC,EAAE,KAAK,CAAC;AAAA,MACpB,OAAO;AACL,YAAI,IAAI,SAAS,CAAC,EAAG,KAAK,EAAE,KAAK;AAAA,MACnC;AAAA,IACF;AAAA,EACF;AAGA,QAAM,YAAsB,CAAC;AAC7B,QAAM,YAAsB,CAAC;AAE7B,aAAW,SAAS,aAAa,OAAO,GAAG;AACzC,eAAW,WAAW,OAAO;AAC3B,UAAI,QAAQ,SAAS,EAAG;AACxB,iBAAW,KAAK,QAAS,WAAU,KAAK,CAAC;AACzC,eAAS,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AACvC,iBAAS,IAAI,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AAC3C,oBAAU,MAAM,QAAQ,CAAC,IAAK,QAAQ,CAAC,MAAO,CAAC;AAAA,QACjD;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,MAAI,UAAU,WAAW,KAAK,UAAU,SAAS,EAAG,QAAO;AAE3D,QAAM,uBAAuB,UAAU,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,UAAU;AAG9E,MAAI,uBAAuB;AAC3B,MAAI,gBAAgB;AACpB,WAAS,IAAI,GAAG,IAAI,UAAU,QAAQ,KAAK;AACzC,aAAS,IAAI,IAAI,GAAG,IAAI,UAAU,QAAQ,KAAK;AAC7C,+BAAyB,UAAU,CAAC,IAAK,UAAU,CAAC,MAAO;AAC3D;AAAA,IACF;AAAA,EACF;AACA,yBAAuB,gBAAgB,IAAI,uBAAuB,gBAAgB;AAElF,MAAI,yBAAyB,EAAG,QAAO;AACvC,SAAO,IAAI,uBAAuB;AACpC;AAMO,SAAS,aAAa,GAAa,GAAuC;AAC/E,MAAI,EAAE,WAAW,KAAK,EAAE,WAAW,EAAG,QAAO,EAAE,GAAG,GAAG,GAAG,EAAE;AAE1D,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,EAAE;AAGb,QAAM,WAAW;AAAA,IACf,GAAG,EAAE,IAAI,CAAC,OAAO,EAAE,GAAG,OAAO,IAAa,EAAE;AAAA,IAC5C,GAAG,EAAE,IAAI,CAAC,OAAO,EAAE,GAAG,OAAO,IAAa,EAAE;AAAA,EAC9C,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,IAAI,EAAE,CAAC;AAG1B,QAAM,QAAkB,IAAI,MAAM,SAAS,MAAM;AACjD,MAAI,IAAI;AACR,SAAO,IAAI,SAAS,QAAQ;AAC1B,QAAI,IAAI;AACR,WAAO,IAAI,SAAS,UAAU,SAAS,CAAC,EAAG,MAAM,SAAS,CAAC,EAAG,EAAG;AACjE,UAAM,WAAW,IAAI,IAAI,KAAK;AAC9B,aAAS,IAAI,GAAG,IAAI,GAAG,IAAK,OAAM,CAAC,IAAI;AACvC,QAAI;AAAA,EACN;AAGA,MAAI,KAAK;AACT,WAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,QAAI,SAAS,CAAC,EAAG,UAAU,IAAK,OAAM,MAAM,CAAC;AAAA,EAC/C;AAEA,QAAM,KAAK,KAAM,MAAM,KAAK,KAAM;AAClC,QAAM,KAAK,KAAK,KAAK;AACrB,QAAM,IAAI,KAAK,IAAI,IAAI,EAAE;AAGzB,QAAM,KAAM,KAAK,KAAM;AACvB,QAAM,QAAQ,KAAK,KAAM,KAAK,MAAM,KAAK,KAAK,KAAM,EAAE;AAEtD,MAAI,UAAU,EAAG,QAAO,EAAE,GAAG,GAAG,EAAE;AAElC,QAAM,IAAI,KAAK,IAAI,IAAI,EAAE,IAAI;AAE7B,QAAM,IAAI,KAAK,IAAI,UAAU,CAAC;AAE9B,SAAO,EAAE,GAAG,EAAE;AAChB;AAGO,SAAS,cAAc,SAAiB,QAAwB;AACrE,MAAI,UAAU,EAAG,QAAO;AACxB,SAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,UAAU,MAAM,CAAC;AAClD;AAQO,SAAS,YACd,QACA,OACsC;AACtC,MAAI,OAAO,WAAW,MAAM,QAAQ;AAClC,UAAM,IAAI;AAAA,MACR,sCAAsC,OAAO,MAAM,OAAO,MAAM,MAAM;AAAA,IACxE;AAAA,EACF;AACA,QAAM,IAAI,OAAO;AACjB,MAAI,IAAI,EAAG,QAAO,EAAE,GAAG,GAAG,IAAI,GAAG,GAAG,EAAE;AAEtC,QAAM,QAAQ,OAAO,IAAI,CAAC,GAAG,MAAM,MAAM,CAAC,IAAK,CAAC;AAChD,QAAM,OAAO,MAAM,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI;AAChD,QAAM,WAAW,MAAM,OAAO,CAAC,KAAK,MAAM,OAAO,IAAI,SAAS,GAAG,CAAC,KAAK,IAAI;AAC3E,QAAM,KAAK,KAAK,KAAK,WAAW,CAAC;AACjC,MAAI,OAAO,EAAG,QAAO,EAAE,GAAG,SAAS,IAAI,IAAI,UAAU,IAAI,IAAI,GAAG,GAAG,SAAS,IAAI,IAAI,EAAE;AAEtF,QAAM,IAAI,OAAO;AACjB,QAAM,KAAK,IAAI;AACf,QAAM,IAAI,KAAK,IAAI,YAAY,KAAK,IAAI,CAAC,GAAG,EAAE;AAC9C,SAAO,EAAE,GAAG,IAAI,EAAE;AACpB;AAMO,SAAS,mBAAmB,QAAkB,OAA2C;AAC9F,MAAI,OAAO,WAAW,MAAM,QAAQ;AAClC,UAAM,IAAI;AAAA,MACR,6CAA6C,OAAO,MAAM,OAAO,MAAM,MAAM;AAAA,IAC/E;AAAA,EACF;AACA,QAAM,QAAQ,OAAO,IAAI,CAAC,GAAGA,OAAM,MAAMA,EAAC,IAAK,CAAC,EAAE,OAAO,CAAC,MAAM,MAAM,CAAC;AACvE,QAAM,IAAI,MAAM;AAChB,MAAI,IAAI,EAAG,QAAO,EAAE,GAAG,GAAG,GAAG,EAAE;AAE/B,QAAM,WAAW,MACd,IAAI,CAAC,GAAGA,QAAO,EAAE,KAAK,KAAK,IAAI,CAAC,GAAG,MAAM,KAAK,KAAK,CAAC,GAAG,GAAAA,GAAE,EAAE,EAC3D,KAAK,CAAC,GAAG,MAAM,EAAE,MAAM,EAAE,GAAG;AAC/B,QAAM,QAAkB,IAAI,MAAM,CAAC;AACnC,MAAI,IAAI;AACR,SAAO,IAAI,GAAG;AACZ,QAAI,IAAI;AACR,WAAO,IAAI,KAAK,SAAS,CAAC,EAAG,QAAQ,SAAS,CAAC,EAAG,IAAK;AACvD,UAAM,OAAO,IAAI,IAAI,KAAK;AAC1B,aAAS,IAAI,GAAG,IAAI,GAAG,IAAK,OAAM,SAAS,CAAC,EAAG,CAAC,IAAI;AACpD,QAAI;AAAA,EACN;AACA,MAAI,QAAQ;AACZ,WAAS,IAAI,GAAG,IAAI,GAAG,IAAK,KAAI,MAAM,CAAC,IAAK,EAAG,UAAS,MAAM,CAAC;AAE/D,QAAM,OAAQ,KAAK,IAAI,KAAM;AAC7B,QAAM,WAAY,KAAK,IAAI,MAAM,IAAI,IAAI,KAAM;AAC/C,QAAM,KAAK,QAAQ,QAAQ,KAAK,KAAK,QAAQ;AAC7C,QAAM,IAAI,KAAK,IAAI,UAAU,KAAK,IAAI,CAAC,CAAC;AACxC,SAAO,EAAE,GAAG,OAAO,EAAE;AACvB;AAOO,SAAS,QAAQ,GAAa,GAAqB;AACxD,MAAI,EAAE,SAAS,KAAK,EAAE,SAAS,EAAG,QAAO;AACzC,QAAM,QAAQ,EAAE,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,EAAE;AAC/C,QAAM,QAAQ,EAAE,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,EAAE;AAC/C,QAAM,OAAO,EAAE,OAAO,CAAC,KAAK,MAAM,OAAO,IAAI,UAAU,GAAG,CAAC,KAAK,EAAE,SAAS;AAC3E,QAAM,OAAO,EAAE,OAAO,CAAC,KAAK,MAAM,OAAO,IAAI,UAAU,GAAG,CAAC,KAAK,EAAE,SAAS;AAC3E,QAAM,SAAS,KAAK;AAAA,MAChB,EAAE,SAAS,KAAK,QAAQ,EAAE,SAAS,KAAK,SAAS,EAAE,SAAS,EAAE,SAAS;AAAA,EAC3E;AACA,MAAI,WAAW,EAAG,QAAO;AACzB,UAAQ,QAAQ,SAAS;AAC3B;AAGA,SAAS,YAAY,GAAW,IAAoB;AAClD,MAAI,MAAM,EAAG,QAAO;AACpB,MAAI,KAAK,IAAK,QAAO,UAAU,CAAC;AAChC,QAAM,IAAI,MAAM,KAAK,IAAI;AACzB,QAAM,IAAI,KAAK;AACf,QAAM,IAAI;AACV,QAAM,KAAK,eAAe,GAAG,GAAG,CAAC;AACjC,SAAO,KAAK,IAAI,IAAI,MAAM,KAAK,MAAM;AACvC;AAGA,SAAS,eAAe,GAAW,GAAW,GAAmB;AAC/D,MAAI,KAAK,EAAG,QAAO;AACnB,MAAI,KAAK,EAAG,QAAO;AACnB,QAAM,SAAS,QAAQ,CAAC,IAAI,QAAQ,CAAC,IAAI,QAAQ,IAAI,CAAC;AACtD,QAAM,QAAQ,KAAK,IAAI,KAAK,IAAI,CAAC,IAAI,IAAI,KAAK,IAAI,IAAI,CAAC,IAAI,IAAI,MAAM,IAAI;AACzE,QAAM,UAAU;AAChB,QAAM,MAAM;AACZ,MAAI,IAAI;AACR,MAAI,IAAI,KAAM,IAAI,KAAK,KAAM,IAAI;AACjC,MAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,MAAI,IAAI;AACR,MAAI,IAAI;AACR,WAAS,IAAI,GAAG,KAAK,SAAS,KAAK;AACjC,UAAM,KAAK,IAAI;AACf,QAAI,MAAO,KAAK,IAAI,KAAK,MAAO,IAAI,KAAK,MAAM,IAAI;AACnD,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI;AACR,SAAK,IAAI;AACT,UAAM,GAAG,IAAI,MAAM,IAAI,IAAI,KAAK,OAAO,IAAI,OAAO,IAAI,KAAK;AAC3D,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI;AACR,UAAM,QAAQ,IAAI;AAClB,SAAK;AACL,QAAI,KAAK,IAAI,QAAQ,CAAC,IAAI,IAAK;AAAA,EACjC;AACA,SAAO,QAAQ;AACjB;AAGA,SAAS,QAAQ,GAAmB;AAClC,QAAM,IAAI;AACV,QAAM,QAAQ;AAAA,IACZ;AAAA,IAAqB;AAAA,IAAmB;AAAA,IAAqB;AAAA,IAC7D;AAAA,IAAqB;AAAA,IAAoB;AAAA,IAAsB;AAAA,IAC/D;AAAA,EACF;AACA,MAAI,IAAI,KAAK;AACX,WAAO,KAAK,IAAI,KAAK,KAAK,KAAK,IAAI,KAAK,KAAK,CAAC,CAAC,IAAI,QAAQ,IAAI,CAAC;AAAA,EAClE;AACA,OAAK;AACL,MAAI,IAAI,MAAM,CAAC;AACf,WAAS,IAAI,GAAG,IAAI,IAAI,GAAG,IAAK,MAAK,MAAM,CAAC,KAAM,IAAI;AACtD,QAAM,IAAI,IAAI,IAAI;AAClB,SAAO,MAAM,KAAK,IAAI,IAAI,KAAK,EAAE,KAAK,IAAI,OAAO,KAAK,IAAI,CAAC,IAAI,IAAI,KAAK,IAAI,CAAC;AAC/E;AAGA,SAAS,UAAU,GAAmB;AACpC,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,IAAI;AAEV,QAAM,OAAO,IAAI,IAAI,KAAK;AAC1B,QAAM,OAAO,KAAK,IAAI,CAAC;AACvB,QAAM,IAAI,KAAK,IAAI,IAAI;AACvB,QAAM,IAAI,QAAQ,KAAK,IAAI,MAAM,IAAI,MAAM,IAAI,MAAM,IAAI,MAAM,IAAI,KAAK,IAAK,CAAC,OAAO,OAAQ,CAAC;AAE9F,SAAO,OAAO,IAAI,OAAO;AAC3B;","names":["i"]}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/failure-taxonomy.ts","../src/pipelines/failure-cluster.ts","../src/tool-use-metrics.ts","../src/baseline.ts"],"sourcesContent":["/**\n * Failure taxonomy — canonical classes + a default classifier.\n *\n * Every failed run should end up in a named class. The classifier here\n * is rule-based (fast, deterministic); an LLM fallback can be added by\n * the consumer for novel cases and trained into the rule base over time.\n *\n * Consumers call `classifyFailure(run, spans, events)` and persist the\n * returned class as `Run.outcome.failureClass`.\n */\n\nimport type { FailureClass, Run, Span, TraceEvent } from './trace/schema'\nimport { FAILURE_CLASSES } from './trace/schema'\n\nexport { FAILURE_CLASSES, type FailureClass }\n\nexport interface FailureContext {\n run: Run\n spans: Span[]\n events: TraceEvent[]\n}\n\nexport interface FailureClassification {\n failureClass: FailureClass\n reason: string\n triggerSpanId?: string\n triggerEventId?: string\n}\n\n/** Ordered rules — first match wins. */\nexport interface FailureRule {\n id: string\n match: (ctx: FailureContext) => {\n failureClass: FailureClass\n reason: string\n triggerSpanId?: string\n triggerEventId?: string\n } | null\n}\n\nexport const DEFAULT_RULES: FailureRule[] = [\n // Outcome already named? Respect it.\n {\n id: 'explicit-outcome',\n match: ({ run }) => {\n const fc = run.outcome?.failureClass\n if (fc && fc !== 'unknown')\n return { failureClass: fc, reason: 'outcome.failureClass set explicitly' }\n return null\n },\n },\n {\n id: 'knowledge-readiness-blocked',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'readiness_scored' &&\n e.payload.passed === false,\n )\n return event\n ? {\n failureClass: 'knowledge_readiness_blocked',\n reason: 'knowledge readiness report blocked execution',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'bad-integration-manifest',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n ((e.payload.kind === 'integration_manifest_validated' && e.payload.valid === false) ||\n (e.payload.kind === 'integration_invoke_failed' &&\n e.payload.code === 'manifest_invalid')),\n )\n return event\n ? {\n failureClass: 'bad_integration_manifest',\n reason: 'integration manifest validation failed before launch',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'missing-integration-connection',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'integration_manifest_resolved' &&\n hasResolutionStatus(e.payload, 'missing_connection'),\n )\n return event\n ? {\n failureClass: 'missing_integration_connection',\n reason: 'required integration connection was missing',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'missing-integration-scope',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n ((e.payload.kind === 'integration_manifest_resolved' && hasMissingScopes(e.payload)) ||\n (e.payload.kind === 'integration_invoke_failed' && e.payload.code === 'scope_denied')),\n )\n return event\n ? {\n failureClass: 'missing_integration_scope',\n reason: 'integration grant or connection lacks required scopes',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'integration-approval-required',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n ((e.payload.kind === 'integration_invoke' && e.payload.status === 'approval_required') ||\n (e.payload.kind === 'integration_invoke_failed' &&\n e.payload.code === 'approval_required') ||\n e.payload.kind === 'integration_approval_required'),\n )\n return event\n ? {\n failureClass: 'integration_approval_required',\n reason: 'integration write paused for user approval',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'integration-auth-expired',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'integration_invoke_failed' &&\n (e.payload.code === 'auth_expired' ||\n e.payload.code === 'connection_not_active' ||\n e.payload.code === 'capability_expired' ||\n e.payload.status === 'expired'),\n )\n return event\n ? {\n failureClass: 'integration_auth_expired',\n reason: 'integration connection or capability expired',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'unsafe-integration-write-denied',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'integration_invoke_failed' &&\n (e.payload.code === 'unsafe_write_denied' ||\n e.payload.code === 'policy_denied' ||\n e.payload.code === 'action_denied'),\n )\n return event\n ? {\n failureClass: 'unsafe_integration_write_denied',\n reason: 'integration write was denied by policy or capability scope',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'integration-provider-failure',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'integration_invoke_failed' &&\n ![\n 'scope_denied',\n 'approval_required',\n 'auth_expired',\n 'connection_not_active',\n 'capability_expired',\n 'unsafe_write_denied',\n 'policy_denied',\n 'action_denied',\n 'manifest_invalid',\n ].includes(String(e.payload.code)),\n )\n return event\n ? {\n failureClass: 'integration_provider_failure',\n reason: 'integration provider invocation failed',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'missing-credentials',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'knowledge_gap' &&\n e.payload.category === 'credential_or_secret',\n )\n return event\n ? {\n failureClass: 'missing_credentials',\n reason: 'required credential or secret was missing',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'bad-retrieval',\n match: ({ run, spans }) => {\n if (run.outcome?.pass !== false) return null\n const retrieval = spans.find(\n (s) =>\n s.kind === 'retrieval' && (s.hits.length === 0 || s.hits.every((hit) => hit.score <= 0)),\n )\n return retrieval\n ? {\n failureClass: 'bad_retrieval',\n reason: 'retrieval returned no useful hits for a failed run',\n triggerSpanId: retrieval.spanId,\n }\n : null\n },\n },\n {\n id: 'insufficient-evidence',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'knowledge_gap' &&\n e.payload.reason === 'insufficient_evidence',\n )\n return event\n ? {\n failureClass: 'insufficient_evidence',\n reason: 'task proceeded with insufficient supporting evidence',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'contradictory-evidence',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'knowledge_gap' &&\n e.payload.reason === 'contradictory_evidence',\n )\n return event\n ? {\n failureClass: 'contradictory_evidence',\n reason: 'supporting evidence contradicted itself',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n // Budget breach events\n {\n id: 'budget-breach',\n match: ({ events }) => {\n const breach = events.find((e) => e.kind === 'budget_breach')\n return breach\n ? {\n failureClass: 'budget_exceeded',\n reason: `budget breached on ${breach.payload.dimension ?? 'unknown dimension'}`,\n triggerEventId: breach.eventId,\n }\n : null\n },\n },\n // Policy violations\n {\n id: 'policy-violation',\n match: ({ events }) => {\n const e = events.find((x) => x.kind === 'policy_violation')\n return e\n ? {\n failureClass: 'policy_violation',\n reason: 'policy_violation event emitted',\n triggerEventId: e.eventId,\n }\n : null\n },\n },\n // Sandbox non-zero exit code\n {\n id: 'sandbox-failure',\n match: ({ spans }) => {\n const s = spans.find(\n (x) => x.kind === 'sandbox' && typeof x.exitCode === 'number' && x.exitCode !== 0,\n )\n if (!s) return null\n return {\n failureClass: 'sandbox_failure',\n reason: `sandbox exited ${(s as Extract<Span, { kind: 'sandbox' }>).exitCode}`,\n triggerSpanId: s.spanId,\n }\n },\n },\n // Timeout: run aborted by external signal\n {\n id: 'timeout',\n match: ({ run, events }) => {\n if (run.status !== 'aborted') return null\n const hasTimeout = events.some(\n (e) =>\n e.kind === 'error' &&\n String(e.payload.reason ?? '')\n .toLowerCase()\n .includes('timeout'),\n )\n const note = (run.outcome?.notes ?? '').toLowerCase()\n if (hasTimeout || note.includes('timeout') || note.includes('deadline')) {\n return { failureClass: 'timeout', reason: 'timeout signal observed' }\n }\n return null\n },\n },\n // Tool recovery failure: many consecutive tool errors on the same tool\n {\n id: 'tool-recovery-failure',\n match: ({ spans }) => {\n const tools = spans.filter((s) => s.kind === 'tool')\n const byTool = new Map<string, Span[]>()\n for (const t of tools) {\n const name = (t as Extract<Span, { kind: 'tool' }>).toolName\n const arr = byTool.get(name) ?? []\n arr.push(t)\n byTool.set(name, arr)\n }\n for (const [name, arr] of byTool) {\n const errs = arr.filter((s) => s.status === 'error')\n if (errs.length >= 3 && errs.length === arr.length) {\n return {\n failureClass: 'tool_recovery_failure',\n reason: `${errs.length} consecutive errors on tool \"${name}\"`,\n triggerSpanId: errs[errs.length - 1]!.spanId,\n }\n }\n }\n return null\n },\n },\n // Tool selection error: the run failed and agent called zero tools despite having them\n {\n id: 'tool-selection-error',\n match: ({ run, spans }) => {\n if (run.outcome?.pass !== false) return null\n const hasToolsAvailable = spans.some(\n (s) =>\n s.kind === 'agent' &&\n (s.attributes?.toolsAvailable as number | undefined) !== undefined &&\n (s.attributes?.toolsAvailable as number) > 0,\n )\n const tools = spans.filter((s) => s.kind === 'tool')\n if (hasToolsAvailable && tools.length === 0) {\n return {\n failureClass: 'tool_selection_error',\n reason: 'tools were available but none were called',\n }\n }\n return null\n },\n },\n // Format drift: scored by a judge with dimension='format' below threshold\n {\n id: 'format-drift',\n match: ({ spans }) => {\n const judge = spans.find(\n (s) =>\n s.kind === 'judge' &&\n (s as Extract<Span, { kind: 'judge' }>).dimension === 'format' &&\n (s as Extract<Span, { kind: 'judge' }>).score < 0.5,\n )\n return judge\n ? {\n failureClass: 'format_drift',\n reason: 'format judge scored below 0.5',\n triggerSpanId: judge.spanId,\n }\n : null\n },\n },\n]\n\nfunction hasResolutionStatus(payload: Record<string, unknown>, status: string): boolean {\n if (status === 'missing_connection' && stringArray(payload.missingConnections).length > 0)\n return true\n return resolutionItems(payload).some((item) => item.status === status)\n}\n\nfunction hasMissingScopes(payload: Record<string, unknown>): boolean {\n if (stringArray(payload.missingScopes).length > 0) return true\n return resolutionItems(payload).some(\n (item) => Array.isArray(item.missingScopes) && item.missingScopes.length > 0,\n )\n}\n\nfunction resolutionItems(payload: Record<string, unknown>): Array<Record<string, unknown>> {\n return [\n ...records(payload.missing),\n ...records(payload.optionalMissing),\n ...records(payload.ready),\n ]\n}\n\nfunction records(value: unknown): Array<Record<string, unknown>> {\n if (!Array.isArray(value)) return []\n return value.filter(\n (item): item is Record<string, unknown> =>\n Boolean(item) && typeof item === 'object' && !Array.isArray(item),\n )\n}\n\nfunction stringArray(value: unknown): string[] {\n return Array.isArray(value)\n ? value.filter((item): item is string => typeof item === 'string')\n : []\n}\n\n/** Classify the failure mode of a run using an ordered rule list. */\nexport function classifyFailure(\n ctx: FailureContext,\n rules: FailureRule[] = DEFAULT_RULES,\n): FailureClassification {\n if (ctx.run.outcome?.pass !== false && ctx.run.status === 'completed') {\n return { failureClass: 'success', reason: 'run completed with pass=true (or no explicit fail)' }\n }\n for (const rule of rules) {\n const hit = rule.match(ctx)\n if (hit) return hit\n }\n return { failureClass: 'unknown', reason: 'no rule matched; run failed for unclassified reason' }\n}\n","/**\n * FailureClusterView — groups failed runs by (failureClass, triggerTool,\n * argHash-prefix) so weekly reviews can prioritize the top-N clusters.\n *\n * Each cluster includes: N runs, scenarios affected, representative\n * error message, a proposed mitigation hint (rule → action table).\n */\n\nimport { classifyFailure, DEFAULT_RULES, type FailureRule } from '../failure-taxonomy'\nimport { argHash, toolSpans } from '../trace/query'\nimport type { FailureClass, Span } from '../trace/schema'\nimport type { TraceStore } from '../trace/store'\n\nexport interface FailureCluster {\n failureClass: FailureClass\n /** Tool name when the trigger was a tool span, else undefined. */\n toolName?: string\n /** First 16 chars of argHash — clusters similar args. */\n argPrefix?: string\n /**\n * Source dimension when the trigger was a judge span (e.g. `'format'`,\n * `'safety'`, `'correctness'`). Lets cross-template aggregators\n * group failures by the dimension that fired without overloading\n * `argPrefix`. Optional — legacy clusters without this field\n * deserialize cleanly.\n */\n dimension?: string\n runCount: number\n scenarioIds: string[]\n exampleError?: string\n exampleRunId: string\n}\n\nexport interface FailureClusterReport {\n clusters: FailureCluster[]\n totalFailures: number\n totalRuns: number\n}\n\nexport async function failureClusterView(\n store: TraceStore,\n options: { rules?: FailureRule[]; minClusterSize?: number } = {},\n): Promise<FailureClusterReport> {\n const rules = options.rules ?? DEFAULT_RULES\n const minSize = options.minClusterSize ?? 1\n const runs = await store.listRuns()\n\n type Key = string\n const clusters = new Map<Key, FailureCluster>()\n let totalFailures = 0\n\n for (const run of runs) {\n if (run.status === 'completed' && run.outcome?.pass !== false) continue\n totalFailures++\n const spans = await store.spans({ runId: run.runId })\n const events = await store.events({ runId: run.runId })\n const cls = classifyFailure({ run, spans, events }, rules)\n\n let toolName: string | undefined\n let argPrefix: string | undefined\n let dimension: string | undefined\n if (cls.triggerSpanId) {\n const trig = spans.find((s) => s.spanId === cls.triggerSpanId)\n if (trig?.kind === 'tool') {\n toolName = trig.toolName\n argPrefix = argHash(trig.args).slice(0, 16)\n } else if (trig?.kind === 'judge') {\n dimension = trig.dimension\n }\n }\n // Fallback: look at the last errored tool span\n if (!toolName) {\n const ts = await toolSpans(store, run.runId)\n const errored = ts.filter((t) => t.status === 'error').pop()\n if (errored) {\n toolName = errored.toolName\n argPrefix = argHash(errored.args).slice(0, 16)\n }\n }\n // Secondary signal: any judge span on the failed run carries a\n // dimension. Useful when the rule classified by judge score but\n // didn't surface the trigger span (or surfaced a non-judge span).\n if (!dimension) {\n const judge = spans.find((s) => s.kind === 'judge' && typeof s.dimension === 'string')\n if (judge?.kind === 'judge') dimension = judge.dimension\n }\n\n const key = `${cls.failureClass}|${toolName ?? ''}|${argPrefix ?? ''}|${dimension ?? ''}`\n let cluster = clusters.get(key)\n if (!cluster) {\n cluster = {\n failureClass: cls.failureClass,\n toolName,\n argPrefix,\n dimension,\n runCount: 0,\n scenarioIds: [],\n exampleRunId: run.runId,\n exampleError: firstErrorMessage(spans) ?? cls.reason,\n }\n clusters.set(key, cluster)\n }\n cluster.runCount++\n if (!cluster.scenarioIds.includes(run.scenarioId)) cluster.scenarioIds.push(run.scenarioId)\n }\n\n const arr = [...clusters.values()]\n .filter((c) => c.runCount >= minSize)\n .sort((a, b) => b.runCount - a.runCount)\n\n return { clusters: arr, totalFailures, totalRuns: runs.length }\n}\n\nfunction firstErrorMessage(spans: Span[]): string | undefined {\n const errored = spans.find((s) => s.status === 'error')\n return errored?.error\n}\n","/**\n * Tool-use metrics — derived purely from trace data.\n *\n * No scoring assumptions: consumers supply optional ground-truth tool\n * selections per turn + optional \"information used downstream\" signals.\n * Without those, we still compute descriptive metrics (error rate,\n * retry rate, duplicate-call rate) that are useful on their own.\n */\n\nimport { argHash, groupBy, toolSpans } from './trace/query'\nimport type { Span } from './trace/schema'\nimport type { TraceStore } from './trace/store'\n\nexport interface ToolUseMetrics {\n runId: string\n totalCalls: number\n byTool: Record<string, ToolStats>\n errorRate: number\n /** Ratio of calls with identical (toolName, argHash) already seen earlier in the same run. */\n duplicateRate: number\n /** Ratio of error calls followed by ≥1 retry on same tool. */\n retryRate: number\n /** Optional: of the calls agent made, fraction the evaluator marked as \"correct selection\". */\n selectionAccuracy?: number\n}\n\nexport interface ToolStats {\n calls: number\n errors: number\n avgLatencyMs: number\n duplicates: number\n}\n\nexport interface ToolUseOptions {\n /** Map of spanId → whether the evaluator judged the tool selection correct. Optional. */\n selectionLabels?: Record<string, boolean>\n}\n\nexport async function computeToolUseMetrics(\n store: TraceStore,\n runId: string,\n options: ToolUseOptions = {},\n): Promise<ToolUseMetrics> {\n const tools = await toolSpans(store, runId)\n if (tools.length === 0) {\n return { runId, totalCalls: 0, byTool: {}, errorRate: 0, duplicateRate: 0, retryRate: 0 }\n }\n\n const byTool: Record<string, ToolStats> = {}\n let totalErrors = 0\n let totalDuplicates = 0\n const sortedTools = [...tools].sort((a, b) => a.startedAt - b.startedAt)\n const seenSignatures = new Set<string>()\n\n // duplicate detection + per-tool aggregation\n for (const t of sortedTools) {\n const stat = (byTool[t.toolName] ??= { calls: 0, errors: 0, avgLatencyMs: 0, duplicates: 0 })\n stat.calls += 1\n if (t.status === 'error') {\n stat.errors += 1\n totalErrors += 1\n }\n if (typeof t.latencyMs === 'number') stat.avgLatencyMs += t.latencyMs\n const sig = `${t.toolName}|${argHash(t.args)}`\n if (seenSignatures.has(sig)) {\n stat.duplicates += 1\n totalDuplicates += 1\n }\n seenSignatures.add(sig)\n }\n\n for (const stat of Object.values(byTool)) {\n stat.avgLatencyMs = stat.calls > 0 ? stat.avgLatencyMs / stat.calls : 0\n }\n\n // retry detection: per-tool chronological adjacency where error → next same-tool call\n let retryOpportunities = 0\n let retriesFollowed = 0\n for (const [, arr] of groupBy(sortedTools, (t) => t.toolName)) {\n for (let i = 0; i < arr.length; i++) {\n if (arr[i]!.status !== 'error') continue\n retryOpportunities += 1\n if (arr[i + 1]) retriesFollowed += 1\n }\n }\n const retryRate = retryOpportunities > 0 ? retriesFollowed / retryOpportunities : 0\n\n let selectionAccuracy: number | undefined\n if (options.selectionLabels) {\n const labeled = sortedTools.filter((t) => t.spanId in options.selectionLabels!)\n if (labeled.length > 0) {\n selectionAccuracy =\n labeled.filter((t) => options.selectionLabels![t.spanId]).length / labeled.length\n }\n }\n\n return {\n runId,\n totalCalls: sortedTools.length,\n byTool,\n errorRate: totalErrors / sortedTools.length,\n duplicateRate: totalDuplicates / sortedTools.length,\n retryRate,\n selectionAccuracy,\n }\n}\n\nexport type { Span }\n","/**\n * Baseline regression detection.\n *\n * Lifted from ADC baseline.ts. Every promotion-blocking signal boils down\n * to: \"is this run measurably worse than baseline?\" — with enough\n * statistical rigor to distinguish noise from drift.\n *\n * Uses:\n * - Welch's t-test (unequal variance) for per-metric mean comparison\n * - Cohen's d for effect size magnitude\n * - IQR for stability flag (unstable samples can't be trusted for comparisons)\n *\n * Returns a structured verdict: improved | regressed | stable | unstable.\n */\n\nimport { cohensD } from './statistics'\n\nexport interface MetricSamples {\n /** Stable metric key (e.g. \"overallScore\", \"firstTokenMs\"). */\n metric: string\n /** Whether higher values are better. */\n higherIsBetter: boolean\n baseline: number[]\n candidate: number[]\n}\n\nexport interface MetricVerdict {\n metric: string\n baselineMean: number\n candidateMean: number\n delta: number\n cohensD: number\n welchT: number\n welchDf: number\n welchP: number\n stable: boolean\n /** IQR of the combined samples — used as a rough stability indicator. */\n iqr: number\n verdict: 'improved' | 'regressed' | 'stable' | 'unstable'\n}\n\nexport interface BaselineReport {\n metrics: MetricVerdict[]\n /** True if any critical metric regressed. */\n hasRegression: boolean\n /** True if any metric is unstable (too noisy to judge). */\n hasUnstable: boolean\n}\n\nexport interface BaselineOptions {\n /** Effect size threshold for meaningful delta (default 0.5 — medium effect). */\n effectThreshold?: number\n /** p-value threshold for statistical significance (default 0.05). */\n alpha?: number\n /** IQR/mean ratio above which samples are flagged unstable (default 0.30). */\n unstableCvThreshold?: number\n}\n\n/**\n * Compare candidate samples against baseline per metric. Verdict logic:\n * - unstable: IQR/|mean| > threshold on either set — not enough signal\n * - improved: meaningful effect in the \"better\" direction AND p < alpha\n * - regressed: meaningful effect in the \"worse\" direction AND p < alpha\n * - stable: otherwise (no significant change)\n */\nexport function compareToBaseline(\n samples: MetricSamples[],\n options: BaselineOptions = {},\n): BaselineReport {\n const effectThreshold = options.effectThreshold ?? 0.5\n const alpha = options.alpha ?? 0.05\n const cvThreshold = options.unstableCvThreshold ?? 0.3\n\n const metrics: MetricVerdict[] = samples.map((s) => {\n if (s.baseline.length < 2 || s.candidate.length < 2) {\n throw new Error(`compareToBaseline: need ≥2 samples per side for \"${s.metric}\"`)\n }\n const bMean = mean(s.baseline)\n const cMean = mean(s.candidate)\n const delta = cMean - bMean\n const d = cohensD(s.baseline, s.candidate) // positive = candidate higher\n const { t, df, p } = welchsTTest(s.baseline, s.candidate)\n // Stability is per-side: a comparison is trustworthy only when BOTH\n // samples are internally consistent. Combining the sides would flag\n // large-but-real deltas as \"unstable\" which is exactly what we want\n // to detect.\n const baselineIqr = iqr(s.baseline)\n const candidateIqr = iqr(s.candidate)\n const baselineStable = baselineIqr / Math.max(Math.abs(bMean), 1e-9) <= cvThreshold\n const candidateStable = candidateIqr / Math.max(Math.abs(cMean), 1e-9) <= cvThreshold\n const stable = baselineStable && candidateStable\n const reportedIqr = Math.max(baselineIqr, candidateIqr)\n\n let verdict: MetricVerdict['verdict']\n if (!stable) {\n verdict = 'unstable'\n } else if (p < alpha && Math.abs(d) >= effectThreshold) {\n const candidateIsBetter = s.higherIsBetter ? delta > 0 : delta < 0\n verdict = candidateIsBetter ? 'improved' : 'regressed'\n } else {\n verdict = 'stable'\n }\n\n return {\n metric: s.metric,\n baselineMean: bMean,\n candidateMean: cMean,\n delta,\n cohensD: d,\n welchT: t,\n welchDf: df,\n welchP: p,\n stable,\n iqr: reportedIqr,\n verdict,\n }\n })\n\n return {\n metrics,\n hasRegression: metrics.some((m) => m.verdict === 'regressed'),\n hasUnstable: metrics.some((m) => m.verdict === 'unstable'),\n }\n}\n\nfunction mean(xs: number[]): number {\n return xs.reduce((a, b) => a + b, 0) / xs.length\n}\n\n/** Inter-quartile range; 0 when the sample has no spread. */\nexport function iqr(xs: number[]): number {\n if (xs.length === 0) return 0\n const sorted = [...xs].sort((a, b) => a - b)\n const q = (p: number) => {\n const idx = p * (sorted.length - 1)\n const lo = Math.floor(idx)\n const hi = Math.ceil(idx)\n return sorted[lo]! + (sorted[hi]! - sorted[lo]!) * (idx - lo)\n }\n return q(0.75) - q(0.25)\n}\n\n/**\n * Welch's t-test — unequal-variance two-sample t. Uses the same Student-t\n * CDF as `pairedTTest` (via incomplete beta); falls back to normal tail\n * when df is large.\n */\nexport function welchsTTest(a: number[], b: number[]): { t: number; df: number; p: number } {\n if (a.length < 2 || b.length < 2) return { t: 0, df: 0, p: 1 }\n const mA = mean(a)\n const mB = mean(b)\n const vA = variance(a, mA)\n const vB = variance(b, mB)\n const seSquared = vA / a.length + vB / b.length\n if (seSquared === 0) return { t: mA === mB ? 0 : Infinity, df: 0, p: mA === mB ? 1 : 0 }\n const t = (mB - mA) / Math.sqrt(seSquared)\n const df =\n (seSquared * seSquared) /\n ((vA / a.length) ** 2 / (a.length - 1) + (vB / b.length) ** 2 / (b.length - 1))\n const p = 2 * (1 - studentTCdf(Math.abs(t), df))\n return { t, df, p }\n}\n\nfunction variance(xs: number[], m: number): number {\n return xs.reduce((acc, x) => acc + (x - m) ** 2, 0) / (xs.length - 1)\n}\n\n// Re-used from statistics.ts via small local copy to avoid exporting internals.\nfunction studentTCdf(t: number, df: number): number {\n if (df <= 0) return 0.5\n if (df > 100) return normalCdf(t)\n const x = df / (df + t * t)\n const ib = incompleteBeta(x, df / 2, 0.5)\n return t >= 0 ? 1 - 0.5 * ib : 0.5 * ib\n}\n\nfunction incompleteBeta(x: number, a: number, b: number): number {\n if (x <= 0) return 0\n if (x >= 1) return 1\n const lnBeta = lnGamma(a) + lnGamma(b) - lnGamma(a + b)\n const front = Math.exp(Math.log(x) * a + Math.log(1 - x) * b - lnBeta) / a\n let c = 1\n let d = 1 - ((a + b) * x) / (a + 1)\n if (Math.abs(d) < 1e-30) d = 1e-30\n d = 1 / d\n let f = d\n for (let m = 1; m <= 200; m++) {\n const m2 = 2 * m\n let num = (m * (b - m) * x) / ((a + m2 - 1) * (a + m2))\n d = 1 + num * d\n if (Math.abs(d) < 1e-30) d = 1e-30\n c = 1 + num / c\n if (Math.abs(c) < 1e-30) c = 1e-30\n d = 1 / d\n f *= d * c\n num = -((a + m) * (a + b + m) * x) / ((a + m2) * (a + m2 + 1))\n d = 1 + num * d\n if (Math.abs(d) < 1e-30) d = 1e-30\n c = 1 + num / c\n if (Math.abs(c) < 1e-30) c = 1e-30\n d = 1 / d\n const delta = d * c\n f *= delta\n if (Math.abs(delta - 1) < 3e-7) break\n }\n return front * f\n}\n\nfunction lnGamma(z: number): number {\n const coefs = [\n 0.99999999999980993, 676.5203681218851, -1259.1392167224028, 771.32342877765313,\n -176.61502916214059, 12.507343278686905, -0.13857109526572012, 9.9843695780195716e-6,\n 1.5056327351493116e-7,\n ]\n if (z < 0.5) return Math.log(Math.PI / Math.sin(Math.PI * z)) - lnGamma(1 - z)\n z -= 1\n let x = coefs[0]!\n for (let i = 1; i < 9; i++) x += coefs[i]! / (z + i)\n const t = z + 7.5\n return 0.5 * Math.log(2 * Math.PI) + (z + 0.5) * Math.log(t) - t + Math.log(x)\n}\n\nfunction normalCdf(x: number): number {\n const a1 = 0.254829592\n const a2 = -0.284496736\n const a3 = 1.421413741\n const a4 = -1.453152027\n const a5 = 1.061405429\n const p = 0.3275911\n const sign = x < 0 ? -1 : 1\n const absX = Math.abs(x)\n const t = 1 / (1 + p * absX)\n const y = 1 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp((-absX * absX) / 2)\n return 0.5 * (1 + sign * y)\n}\n"],"mappings":";;;;;;;;;;AAwCO,IAAM,gBAA+B;AAAA;AAAA,EAE1C;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,IAAI,MAAM;AAClB,YAAM,KAAK,IAAI,SAAS;AACxB,UAAI,MAAM,OAAO;AACf,eAAO,EAAE,cAAc,IAAI,QAAQ,sCAAsC;AAC3E,aAAO;AAAA,IACT;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,sBACnB,EAAE,QAAQ,WAAW;AAAA,MACzB;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,aACT,EAAE,QAAQ,SAAS,oCAAoC,EAAE,QAAQ,UAAU,SAC1E,EAAE,QAAQ,SAAS,+BAClB,EAAE,QAAQ,SAAS;AAAA,MAC3B;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,mCACnB,oBAAoB,EAAE,SAAS,oBAAoB;AAAA,MACvD;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,aACT,EAAE,QAAQ,SAAS,mCAAmC,iBAAiB,EAAE,OAAO,KAC/E,EAAE,QAAQ,SAAS,+BAA+B,EAAE,QAAQ,SAAS;AAAA,MAC5E;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,aACT,EAAE,QAAQ,SAAS,wBAAwB,EAAE,QAAQ,WAAW,uBAC/D,EAAE,QAAQ,SAAS,+BAClB,EAAE,QAAQ,SAAS,uBACrB,EAAE,QAAQ,SAAS;AAAA,MACzB;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,gCAClB,EAAE,QAAQ,SAAS,kBAClB,EAAE,QAAQ,SAAS,2BACnB,EAAE,QAAQ,SAAS,wBACnB,EAAE,QAAQ,WAAW;AAAA,MAC3B;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,gCAClB,EAAE,QAAQ,SAAS,yBAClB,EAAE,QAAQ,SAAS,mBACnB,EAAE,QAAQ,SAAS;AAAA,MACzB;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,+BACnB,CAAC;AAAA,UACC;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,QACF,EAAE,SAAS,OAAO,EAAE,QAAQ,IAAI,CAAC;AAAA,MACrC;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,mBACnB,EAAE,QAAQ,aAAa;AAAA,MAC3B;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,KAAK,MAAM,MAAM;AACzB,UAAI,IAAI,SAAS,SAAS,MAAO,QAAO;AACxC,YAAM,YAAY,MAAM;AAAA,QACtB,CAAC,MACC,EAAE,SAAS,gBAAgB,EAAE,KAAK,WAAW,KAAK,EAAE,KAAK,MAAM,CAAC,QAAQ,IAAI,SAAS,CAAC;AAAA,MAC1F;AACA,aAAO,YACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,eAAe,UAAU;AAAA,MAC3B,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,mBACnB,EAAE,QAAQ,WAAW;AAAA,MACzB;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,mBACnB,EAAE,QAAQ,WAAW;AAAA,MACzB;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,SAAS,OAAO,KAAK,CAAC,MAAM,EAAE,SAAS,eAAe;AAC5D,aAAO,SACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ,sBAAsB,OAAO,QAAQ,aAAa,mBAAmB;AAAA,QAC7E,gBAAgB,OAAO;AAAA,MACzB,IACA;AAAA,IACN;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,IAAI,OAAO,KAAK,CAAC,MAAM,EAAE,SAAS,kBAAkB;AAC1D,aAAO,IACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,EAAE;AAAA,MACpB,IACA;AAAA,IACN;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,MAAM,MAAM;AACpB,YAAM,IAAI,MAAM;AAAA,QACd,CAAC,MAAM,EAAE,SAAS,aAAa,OAAO,EAAE,aAAa,YAAY,EAAE,aAAa;AAAA,MAClF;AACA,UAAI,CAAC,EAAG,QAAO;AACf,aAAO;AAAA,QACL,cAAc;AAAA,QACd,QAAQ,kBAAmB,EAAyC,QAAQ;AAAA,QAC5E,eAAe,EAAE;AAAA,MACnB;AAAA,IACF;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,KAAK,OAAO,MAAM;AAC1B,UAAI,IAAI,WAAW,UAAW,QAAO;AACrC,YAAM,aAAa,OAAO;AAAA,QACxB,CAAC,MACC,EAAE,SAAS,WACX,OAAO,EAAE,QAAQ,UAAU,EAAE,EAC1B,YAAY,EACZ,SAAS,SAAS;AAAA,MACzB;AACA,YAAM,QAAQ,IAAI,SAAS,SAAS,IAAI,YAAY;AACpD,UAAI,cAAc,KAAK,SAAS,SAAS,KAAK,KAAK,SAAS,UAAU,GAAG;AACvE,eAAO,EAAE,cAAc,WAAW,QAAQ,0BAA0B;AAAA,MACtE;AACA,aAAO;AAAA,IACT;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,MAAM,MAAM;AACpB,YAAM,QAAQ,MAAM,OAAO,CAAC,MAAM,EAAE,SAAS,MAAM;AACnD,YAAM,SAAS,oBAAI,IAAoB;AACvC,iBAAW,KAAK,OAAO;AACrB,cAAM,OAAQ,EAAsC;AACpD,cAAM,MAAM,OAAO,IAAI,IAAI,KAAK,CAAC;AACjC,YAAI,KAAK,CAAC;AACV,eAAO,IAAI,MAAM,GAAG;AAAA,MACtB;AACA,iBAAW,CAAC,MAAM,GAAG,KAAK,QAAQ;AAChC,cAAM,OAAO,IAAI,OAAO,CAAC,MAAM,EAAE,WAAW,OAAO;AACnD,YAAI,KAAK,UAAU,KAAK,KAAK,WAAW,IAAI,QAAQ;AAClD,iBAAO;AAAA,YACL,cAAc;AAAA,YACd,QAAQ,GAAG,KAAK,MAAM,gCAAgC,IAAI;AAAA,YAC1D,eAAe,KAAK,KAAK,SAAS,CAAC,EAAG;AAAA,UACxC;AAAA,QACF;AAAA,MACF;AACA,aAAO;AAAA,IACT;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,KAAK,MAAM,MAAM;AACzB,UAAI,IAAI,SAAS,SAAS,MAAO,QAAO;AACxC,YAAM,oBAAoB,MAAM;AAAA,QAC9B,CAAC,MACC,EAAE,SAAS,WACV,EAAE,YAAY,mBAA0C,UACxD,EAAE,YAAY,iBAA4B;AAAA,MAC/C;AACA,YAAM,QAAQ,MAAM,OAAO,CAAC,MAAM,EAAE,SAAS,MAAM;AACnD,UAAI,qBAAqB,MAAM,WAAW,GAAG;AAC3C,eAAO;AAAA,UACL,cAAc;AAAA,UACd,QAAQ;AAAA,QACV;AAAA,MACF;AACA,aAAO;AAAA,IACT;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,MAAM,MAAM;AACpB,YAAM,QAAQ,MAAM;AAAA,QAClB,CAAC,MACC,EAAE,SAAS,WACV,EAAuC,cAAc,YACrD,EAAuC,QAAQ;AAAA,MACpD;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,eAAe,MAAM;AAAA,MACvB,IACA;AAAA,IACN;AAAA,EACF;AACF;AAEA,SAAS,oBAAoB,SAAkC,QAAyB;AACtF,MAAI,WAAW,wBAAwB,YAAY,QAAQ,kBAAkB,EAAE,SAAS;AACtF,WAAO;AACT,SAAO,gBAAgB,OAAO,EAAE,KAAK,CAAC,SAAS,KAAK,WAAW,MAAM;AACvE;AAEA,SAAS,iBAAiB,SAA2C;AACnE,MAAI,YAAY,QAAQ,aAAa,EAAE,SAAS,EAAG,QAAO;AAC1D,SAAO,gBAAgB,OAAO,EAAE;AAAA,IAC9B,CAAC,SAAS,MAAM,QAAQ,KAAK,aAAa,KAAK,KAAK,cAAc,SAAS;AAAA,EAC7E;AACF;AAEA,SAAS,gBAAgB,SAAkE;AACzF,SAAO;AAAA,IACL,GAAG,QAAQ,QAAQ,OAAO;AAAA,IAC1B,GAAG,QAAQ,QAAQ,eAAe;AAAA,IAClC,GAAG,QAAQ,QAAQ,KAAK;AAAA,EAC1B;AACF;AAEA,SAAS,QAAQ,OAAgD;AAC/D,MAAI,CAAC,MAAM,QAAQ,KAAK,EAAG,QAAO,CAAC;AACnC,SAAO,MAAM;AAAA,IACX,CAAC,SACC,QAAQ,IAAI,KAAK,OAAO,SAAS,YAAY,CAAC,MAAM,QAAQ,IAAI;AAAA,EACpE;AACF;AAEA,SAAS,YAAY,OAA0B;AAC7C,SAAO,MAAM,QAAQ,KAAK,IACtB,MAAM,OAAO,CAAC,SAAyB,OAAO,SAAS,QAAQ,IAC/D,CAAC;AACP;AAGO,SAAS,gBACd,KACA,QAAuB,eACA;AACvB,MAAI,IAAI,IAAI,SAAS,SAAS,SAAS,IAAI,IAAI,WAAW,aAAa;AACrE,WAAO,EAAE,cAAc,WAAW,QAAQ,qDAAqD;AAAA,EACjG;AACA,aAAW,QAAQ,OAAO;AACxB,UAAM,MAAM,KAAK,MAAM,GAAG;AAC1B,QAAI,IAAK,QAAO;AAAA,EAClB;AACA,SAAO,EAAE,cAAc,WAAW,QAAQ,sDAAsD;AAClG;;;ACtaA,eAAsB,mBACpB,OACA,UAA8D,CAAC,GAChC;AAC/B,QAAM,QAAQ,QAAQ,SAAS;AAC/B,QAAM,UAAU,QAAQ,kBAAkB;AAC1C,QAAM,OAAO,MAAM,MAAM,SAAS;AAGlC,QAAM,WAAW,oBAAI,IAAyB;AAC9C,MAAI,gBAAgB;AAEpB,aAAW,OAAO,MAAM;AACtB,QAAI,IAAI,WAAW,eAAe,IAAI,SAAS,SAAS,MAAO;AAC/D;AACA,UAAM,QAAQ,MAAM,MAAM,MAAM,EAAE,OAAO,IAAI,MAAM,CAAC;AACpD,UAAM,SAAS,MAAM,MAAM,OAAO,EAAE,OAAO,IAAI,MAAM,CAAC;AACtD,UAAM,MAAM,gBAAgB,EAAE,KAAK,OAAO,OAAO,GAAG,KAAK;AAEzD,QAAI;AACJ,QAAI;AACJ,QAAI;AACJ,QAAI,IAAI,eAAe;AACrB,YAAM,OAAO,MAAM,KAAK,CAAC,MAAM,EAAE,WAAW,IAAI,aAAa;AAC7D,UAAI,MAAM,SAAS,QAAQ;AACzB,mBAAW,KAAK;AAChB,oBAAY,QAAQ,KAAK,IAAI,EAAE,MAAM,GAAG,EAAE;AAAA,MAC5C,WAAW,MAAM,SAAS,SAAS;AACjC,oBAAY,KAAK;AAAA,MACnB;AAAA,IACF;AAEA,QAAI,CAAC,UAAU;AACb,YAAM,KAAK,MAAM,UAAU,OAAO,IAAI,KAAK;AAC3C,YAAM,UAAU,GAAG,OAAO,CAAC,MAAM,EAAE,WAAW,OAAO,EAAE,IAAI;AAC3D,UAAI,SAAS;AACX,mBAAW,QAAQ;AACnB,oBAAY,QAAQ,QAAQ,IAAI,EAAE,MAAM,GAAG,EAAE;AAAA,MAC/C;AAAA,IACF;AAIA,QAAI,CAAC,WAAW;AACd,YAAM,QAAQ,MAAM,KAAK,CAAC,MAAM,EAAE,SAAS,WAAW,OAAO,EAAE,cAAc,QAAQ;AACrF,UAAI,OAAO,SAAS,QAAS,aAAY,MAAM;AAAA,IACjD;AAEA,UAAM,MAAM,GAAG,IAAI,YAAY,IAAI,YAAY,EAAE,IAAI,aAAa,EAAE,IAAI,aAAa,EAAE;AACvF,QAAI,UAAU,SAAS,IAAI,GAAG;AAC9B,QAAI,CAAC,SAAS;AACZ,gBAAU;AAAA,QACR,cAAc,IAAI;AAAA,QAClB;AAAA,QACA;AAAA,QACA;AAAA,QACA,UAAU;AAAA,QACV,aAAa,CAAC;AAAA,QACd,cAAc,IAAI;AAAA,QAClB,cAAc,kBAAkB,KAAK,KAAK,IAAI;AAAA,MAChD;AACA,eAAS,IAAI,KAAK,OAAO;AAAA,IAC3B;AACA,YAAQ;AACR,QAAI,CAAC,QAAQ,YAAY,SAAS,IAAI,UAAU,EAAG,SAAQ,YAAY,KAAK,IAAI,UAAU;AAAA,EAC5F;AAEA,QAAM,MAAM,CAAC,GAAG,SAAS,OAAO,CAAC,EAC9B,OAAO,CAAC,MAAM,EAAE,YAAY,OAAO,EACnC,KAAK,CAAC,GAAG,MAAM,EAAE,WAAW,EAAE,QAAQ;AAEzC,SAAO,EAAE,UAAU,KAAK,eAAe,WAAW,KAAK,OAAO;AAChE;AAEA,SAAS,kBAAkB,OAAmC;AAC5D,QAAM,UAAU,MAAM,KAAK,CAAC,MAAM,EAAE,WAAW,OAAO;AACtD,SAAO,SAAS;AAClB;;;AC9EA,eAAsB,sBACpB,OACA,OACA,UAA0B,CAAC,GACF;AACzB,QAAM,QAAQ,MAAM,UAAU,OAAO,KAAK;AAC1C,MAAI,MAAM,WAAW,GAAG;AACtB,WAAO,EAAE,OAAO,YAAY,GAAG,QAAQ,CAAC,GAAG,WAAW,GAAG,eAAe,GAAG,WAAW,EAAE;AAAA,EAC1F;AAEA,QAAM,SAAoC,CAAC;AAC3C,MAAI,cAAc;AAClB,MAAI,kBAAkB;AACtB,QAAM,cAAc,CAAC,GAAG,KAAK,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,YAAY,EAAE,SAAS;AACvE,QAAM,iBAAiB,oBAAI,IAAY;AAGvC,aAAW,KAAK,aAAa;AAC3B,UAAM,OAAQ,OAAO,EAAE,QAAQ,MAAM,EAAE,OAAO,GAAG,QAAQ,GAAG,cAAc,GAAG,YAAY,EAAE;AAC3F,SAAK,SAAS;AACd,QAAI,EAAE,WAAW,SAAS;AACxB,WAAK,UAAU;AACf,qBAAe;AAAA,IACjB;AACA,QAAI,OAAO,EAAE,cAAc,SAAU,MAAK,gBAAgB,EAAE;AAC5D,UAAM,MAAM,GAAG,EAAE,QAAQ,IAAI,QAAQ,EAAE,IAAI,CAAC;AAC5C,QAAI,eAAe,IAAI,GAAG,GAAG;AAC3B,WAAK,cAAc;AACnB,yBAAmB;AAAA,IACrB;AACA,mBAAe,IAAI,GAAG;AAAA,EACxB;AAEA,aAAW,QAAQ,OAAO,OAAO,MAAM,GAAG;AACxC,SAAK,eAAe,KAAK,QAAQ,IAAI,KAAK,eAAe,KAAK,QAAQ;AAAA,EACxE;AAGA,MAAI,qBAAqB;AACzB,MAAI,kBAAkB;AACtB,aAAW,CAAC,EAAE,GAAG,KAAK,QAAQ,aAAa,CAAC,MAAM,EAAE,QAAQ,GAAG;AAC7D,aAAS,IAAI,GAAG,IAAI,IAAI,QAAQ,KAAK;AACnC,UAAI,IAAI,CAAC,EAAG,WAAW,QAAS;AAChC,4BAAsB;AACtB,UAAI,IAAI,IAAI,CAAC,EAAG,oBAAmB;AAAA,IACrC;AAAA,EACF;AACA,QAAM,YAAY,qBAAqB,IAAI,kBAAkB,qBAAqB;AAElF,MAAI;AACJ,MAAI,QAAQ,iBAAiB;AAC3B,UAAM,UAAU,YAAY,OAAO,CAAC,MAAM,EAAE,UAAU,QAAQ,eAAgB;AAC9E,QAAI,QAAQ,SAAS,GAAG;AACtB,0BACE,QAAQ,OAAO,CAAC,MAAM,QAAQ,gBAAiB,EAAE,MAAM,CAAC,EAAE,SAAS,QAAQ;AAAA,IAC/E;AAAA,EACF;AAEA,SAAO;AAAA,IACL;AAAA,IACA,YAAY,YAAY;AAAA,IACxB;AAAA,IACA,WAAW,cAAc,YAAY;AAAA,IACrC,eAAe,kBAAkB,YAAY;AAAA,IAC7C;AAAA,IACA;AAAA,EACF;AACF;;;ACxCO,SAAS,kBACd,SACA,UAA2B,CAAC,GACZ;AAChB,QAAM,kBAAkB,QAAQ,mBAAmB;AACnD,QAAM,QAAQ,QAAQ,SAAS;AAC/B,QAAM,cAAc,QAAQ,uBAAuB;AAEnD,QAAM,UAA2B,QAAQ,IAAI,CAAC,MAAM;AAClD,QAAI,EAAE,SAAS,SAAS,KAAK,EAAE,UAAU,SAAS,GAAG;AACnD,YAAM,IAAI,MAAM,yDAAoD,EAAE,MAAM,GAAG;AAAA,IACjF;AACA,UAAM,QAAQ,KAAK,EAAE,QAAQ;AAC7B,UAAM,QAAQ,KAAK,EAAE,SAAS;AAC9B,UAAM,QAAQ,QAAQ;AACtB,UAAM,IAAI,QAAQ,EAAE,UAAU,EAAE,SAAS;AACzC,UAAM,EAAE,GAAG,IAAI,EAAE,IAAI,YAAY,EAAE,UAAU,EAAE,SAAS;AAKxD,UAAM,cAAc,IAAI,EAAE,QAAQ;AAClC,UAAM,eAAe,IAAI,EAAE,SAAS;AACpC,UAAM,iBAAiB,cAAc,KAAK,IAAI,KAAK,IAAI,KAAK,GAAG,IAAI,KAAK;AACxE,UAAM,kBAAkB,eAAe,KAAK,IAAI,KAAK,IAAI,KAAK,GAAG,IAAI,KAAK;AAC1E,UAAM,SAAS,kBAAkB;AACjC,UAAM,cAAc,KAAK,IAAI,aAAa,YAAY;AAEtD,QAAI;AACJ,QAAI,CAAC,QAAQ;AACX,gBAAU;AAAA,IACZ,WAAW,IAAI,SAAS,KAAK,IAAI,CAAC,KAAK,iBAAiB;AACtD,YAAM,oBAAoB,EAAE,iBAAiB,QAAQ,IAAI,QAAQ;AACjE,gBAAU,oBAAoB,aAAa;AAAA,IAC7C,OAAO;AACL,gBAAU;AAAA,IACZ;AAEA,WAAO;AAAA,MACL,QAAQ,EAAE;AAAA,MACV,cAAc;AAAA,MACd,eAAe;AAAA,MACf;AAAA,MACA,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,SAAS;AAAA,MACT,QAAQ;AAAA,MACR;AAAA,MACA,KAAK;AAAA,MACL;AAAA,IACF;AAAA,EACF,CAAC;AAED,SAAO;AAAA,IACL;AAAA,IACA,eAAe,QAAQ,KAAK,CAAC,MAAM,EAAE,YAAY,WAAW;AAAA,IAC5D,aAAa,QAAQ,KAAK,CAAC,MAAM,EAAE,YAAY,UAAU;AAAA,EAC3D;AACF;AAEA,SAAS,KAAK,IAAsB;AAClC,SAAO,GAAG,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,GAAG;AAC5C;AAGO,SAAS,IAAI,IAAsB;AACxC,MAAI,GAAG,WAAW,EAAG,QAAO;AAC5B,QAAM,SAAS,CAAC,GAAG,EAAE,EAAE,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAC3C,QAAM,IAAI,CAAC,MAAc;AACvB,UAAM,MAAM,KAAK,OAAO,SAAS;AACjC,UAAM,KAAK,KAAK,MAAM,GAAG;AACzB,UAAM,KAAK,KAAK,KAAK,GAAG;AACxB,WAAO,OAAO,EAAE,KAAM,OAAO,EAAE,IAAK,OAAO,EAAE,MAAO,MAAM;AAAA,EAC5D;AACA,SAAO,EAAE,IAAI,IAAI,EAAE,IAAI;AACzB;AAOO,SAAS,YAAY,GAAa,GAAmD;AAC1F,MAAI,EAAE,SAAS,KAAK,EAAE,SAAS,EAAG,QAAO,EAAE,GAAG,GAAG,IAAI,GAAG,GAAG,EAAE;AAC7D,QAAM,KAAK,KAAK,CAAC;AACjB,QAAM,KAAK,KAAK,CAAC;AACjB,QAAM,KAAK,SAAS,GAAG,EAAE;AACzB,QAAM,KAAK,SAAS,GAAG,EAAE;AACzB,QAAM,YAAY,KAAK,EAAE,SAAS,KAAK,EAAE;AACzC,MAAI,cAAc,EAAG,QAAO,EAAE,GAAG,OAAO,KAAK,IAAI,UAAU,IAAI,GAAG,GAAG,OAAO,KAAK,IAAI,EAAE;AACvF,QAAM,KAAK,KAAK,MAAM,KAAK,KAAK,SAAS;AACzC,QAAM,KACH,YAAY,cACX,KAAK,EAAE,WAAW,KAAK,EAAE,SAAS,MAAM,KAAK,EAAE,WAAW,KAAK,EAAE,SAAS;AAC9E,QAAM,IAAI,KAAK,IAAI,YAAY,KAAK,IAAI,CAAC,GAAG,EAAE;AAC9C,SAAO,EAAE,GAAG,IAAI,EAAE;AACpB;AAEA,SAAS,SAAS,IAAc,GAAmB;AACjD,SAAO,GAAG,OAAO,CAAC,KAAK,MAAM,OAAO,IAAI,MAAM,GAAG,CAAC,KAAK,GAAG,SAAS;AACrE;AAGA,SAAS,YAAY,GAAW,IAAoB;AAClD,MAAI,MAAM,EAAG,QAAO;AACpB,MAAI,KAAK,IAAK,QAAO,UAAU,CAAC;AAChC,QAAM,IAAI,MAAM,KAAK,IAAI;AACzB,QAAM,KAAK,eAAe,GAAG,KAAK,GAAG,GAAG;AACxC,SAAO,KAAK,IAAI,IAAI,MAAM,KAAK,MAAM;AACvC;AAEA,SAAS,eAAe,GAAW,GAAW,GAAmB;AAC/D,MAAI,KAAK,EAAG,QAAO;AACnB,MAAI,KAAK,EAAG,QAAO;AACnB,QAAM,SAAS,QAAQ,CAAC,IAAI,QAAQ,CAAC,IAAI,QAAQ,IAAI,CAAC;AACtD,QAAM,QAAQ,KAAK,IAAI,KAAK,IAAI,CAAC,IAAI,IAAI,KAAK,IAAI,IAAI,CAAC,IAAI,IAAI,MAAM,IAAI;AACzE,MAAI,IAAI;AACR,MAAI,IAAI,KAAM,IAAI,KAAK,KAAM,IAAI;AACjC,MAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,MAAI,IAAI;AACR,MAAI,IAAI;AACR,WAAS,IAAI,GAAG,KAAK,KAAK,KAAK;AAC7B,UAAM,KAAK,IAAI;AACf,QAAI,MAAO,KAAK,IAAI,KAAK,MAAO,IAAI,KAAK,MAAM,IAAI;AACnD,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI;AACR,SAAK,IAAI;AACT,UAAM,GAAG,IAAI,MAAM,IAAI,IAAI,KAAK,OAAO,IAAI,OAAO,IAAI,KAAK;AAC3D,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI;AACR,UAAM,QAAQ,IAAI;AAClB,SAAK;AACL,QAAI,KAAK,IAAI,QAAQ,CAAC,IAAI,KAAM;AAAA,EAClC;AACA,SAAO,QAAQ;AACjB;AAEA,SAAS,QAAQ,GAAmB;AAClC,QAAM,QAAQ;AAAA,IACZ;AAAA,IAAqB;AAAA,IAAmB;AAAA,IAAqB;AAAA,IAC7D;AAAA,IAAqB;AAAA,IAAoB;AAAA,IAAsB;AAAA,IAC/D;AAAA,EACF;AACA,MAAI,IAAI,IAAK,QAAO,KAAK,IAAI,KAAK,KAAK,KAAK,IAAI,KAAK,KAAK,CAAC,CAAC,IAAI,QAAQ,IAAI,CAAC;AAC7E,OAAK;AACL,MAAI,IAAI,MAAM,CAAC;AACf,WAAS,IAAI,GAAG,IAAI,GAAG,IAAK,MAAK,MAAM,CAAC,KAAM,IAAI;AAClD,QAAM,IAAI,IAAI;AACd,SAAO,MAAM,KAAK,IAAI,IAAI,KAAK,EAAE,KAAK,IAAI,OAAO,KAAK,IAAI,CAAC,IAAI,IAAI,KAAK,IAAI,CAAC;AAC/E;AAEA,SAAS,UAAU,GAAmB;AACpC,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,IAAI;AACV,QAAM,OAAO,IAAI,IAAI,KAAK;AAC1B,QAAM,OAAO,KAAK,IAAI,CAAC;AACvB,QAAM,IAAI,KAAK,IAAI,IAAI;AACvB,QAAM,IAAI,QAAQ,KAAK,IAAI,MAAM,IAAI,MAAM,IAAI,MAAM,IAAI,MAAM,IAAI,KAAK,IAAK,CAAC,OAAO,OAAQ,CAAC;AAC9F,SAAO,OAAO,IAAI,OAAO;AAC3B;","names":[]}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/trace/store.ts","../src/trace/otel.ts","../src/trace/redact.ts","../src/replay.ts"],"sourcesContent":["/**\n * TraceStore — persistence + query over the TraceSchema v1 corpus.\n *\n * Two implementations ship in the core:\n * - InMemoryTraceStore: dev + tests, fully in-process\n * - FileSystemTraceStore: NDJSON append-only files per entity, suitable\n * for long-running CI jobs; rolled over at 32MB\n *\n * Downstream adapters (DuckDB, Langfuse, R2 parquet) implement this same\n * interface — the rest of the framework is storage-agnostic.\n */\n\nimport type {\n Artifact,\n BudgetLedgerEntry,\n EventKind,\n Run,\n RunStatus,\n Span,\n SpanKind,\n TraceEvent,\n} from './schema'\n\nexport interface RunFilter {\n scenarioId?: string\n variantId?: string\n status?: RunStatus\n since?: number\n until?: number\n tag?: { key: string; value: string }\n parentRunId?: string\n projectId?: string\n chatId?: string\n layer?: import('./schema').RunLayer\n}\n\nexport interface SpanFilter {\n runId?: string\n parentSpanId?: string\n kind?: SpanKind\n name?: string\n toolName?: string\n judgeId?: string\n since?: number\n until?: number\n}\n\nexport interface EventFilter {\n runId?: string\n spanId?: string\n kind?: EventKind\n since?: number\n until?: number\n}\n\nexport interface TraceStore {\n appendRun(run: Run): Promise<void>\n updateRun(runId: string, patch: Partial<Run>): Promise<void>\n appendSpan(span: Span): Promise<void>\n updateSpan(spanId: string, patch: Partial<Span>): Promise<void>\n appendEvent(event: TraceEvent): Promise<void>\n appendArtifact(artifact: Artifact): Promise<void>\n appendBudgetEntry(entry: BudgetLedgerEntry): Promise<void>\n\n getRun(runId: string): Promise<Run | undefined>\n listRuns(filter?: RunFilter): Promise<Run[]>\n spans(filter?: SpanFilter): Promise<Span[]>\n events(filter?: EventFilter): Promise<TraceEvent[]>\n budget(runId: string): Promise<BudgetLedgerEntry[]>\n artifacts(runId: string): Promise<Artifact[]>\n}\n\n// ── In-memory ────────────────────────────────────────────────────────\n\nexport class InMemoryTraceStore implements TraceStore {\n private runs = new Map<string, Run>()\n private allSpans: Span[] = []\n private allEvents: TraceEvent[] = []\n private allArtifacts: Artifact[] = []\n private allBudget: BudgetLedgerEntry[] = []\n\n async appendRun(run: Run): Promise<void> {\n if (this.runs.has(run.runId)) throw new Error(`run ${run.runId} already exists`)\n this.runs.set(run.runId, { ...run })\n }\n\n async updateRun(runId: string, patch: Partial<Run>): Promise<void> {\n const existing = this.runs.get(runId)\n if (!existing) throw new Error(`run ${runId} not found`)\n this.runs.set(runId, { ...existing, ...patch })\n }\n\n async appendSpan(span: Span): Promise<void> {\n this.allSpans.push({ ...span })\n }\n\n async updateSpan(spanId: string, patch: Partial<Span>): Promise<void> {\n const idx = this.allSpans.findIndex((s) => s.spanId === spanId)\n if (idx < 0) throw new Error(`span ${spanId} not found`)\n this.allSpans[idx] = { ...this.allSpans[idx], ...patch } as Span\n }\n\n async appendEvent(event: TraceEvent): Promise<void> {\n this.allEvents.push({ ...event })\n }\n\n async appendArtifact(artifact: Artifact): Promise<void> {\n this.allArtifacts.push({ ...artifact })\n }\n\n async appendBudgetEntry(entry: BudgetLedgerEntry): Promise<void> {\n this.allBudget.push({ ...entry })\n }\n\n async getRun(runId: string): Promise<Run | undefined> {\n const r = this.runs.get(runId)\n return r ? { ...r } : undefined\n }\n\n async listRuns(filter: RunFilter = {}): Promise<Run[]> {\n return [...this.runs.values()].filter((r) => matchesRun(r, filter))\n }\n\n async spans(filter: SpanFilter = {}): Promise<Span[]> {\n return this.allSpans.filter((s) => matchesSpan(s, filter)).map((s) => ({ ...s }))\n }\n\n async events(filter: EventFilter = {}): Promise<TraceEvent[]> {\n return this.allEvents.filter((e) => matchesEvent(e, filter)).map((e) => ({ ...e }))\n }\n\n async budget(runId: string): Promise<BudgetLedgerEntry[]> {\n return this.allBudget.filter((b) => b.runId === runId).map((b) => ({ ...b }))\n }\n\n async artifacts(runId: string): Promise<Artifact[]> {\n return this.allArtifacts.filter((a) => a.runId === runId).map((a) => ({ ...a }))\n }\n}\n\nfunction matchesRun(r: Run, f: RunFilter): boolean {\n if (f.scenarioId && r.scenarioId !== f.scenarioId) return false\n if (f.variantId && r.variantId !== f.variantId) return false\n if (f.status && r.status !== f.status) return false\n if (f.since !== undefined && r.startedAt < f.since) return false\n if (f.until !== undefined && r.startedAt > f.until) return false\n if (f.tag && r.tags?.[f.tag.key] !== f.tag.value) return false\n if (f.parentRunId && r.parentRunId !== f.parentRunId) return false\n if (f.projectId && r.projectId !== f.projectId) return false\n if (f.chatId && r.chatId !== f.chatId) return false\n if (f.layer && r.layer !== f.layer) return false\n return true\n}\n\nfunction matchesSpan(s: Span, f: SpanFilter): boolean {\n if (f.runId && s.runId !== f.runId) return false\n if (f.parentSpanId && s.parentSpanId !== f.parentSpanId) return false\n if (f.kind && s.kind !== f.kind) return false\n if (f.name && s.name !== f.name) return false\n if (f.toolName && (s.kind !== 'tool' || s.toolName !== f.toolName)) return false\n if (f.judgeId && (s.kind !== 'judge' || s.judgeId !== f.judgeId)) return false\n if (f.since !== undefined && s.startedAt < f.since) return false\n if (f.until !== undefined && s.startedAt > f.until) return false\n return true\n}\n\nfunction matchesEvent(e: TraceEvent, f: EventFilter): boolean {\n if (f.runId && e.runId !== f.runId) return false\n if (f.spanId && e.spanId !== f.spanId) return false\n if (f.kind && e.kind !== f.kind) return false\n if (f.since !== undefined && e.timestamp < f.since) return false\n if (f.until !== undefined && e.timestamp > f.until) return false\n return true\n}\n\n// ── Filesystem (NDJSON append-only, one file per entity) ─────────────\n\nexport interface FileSystemTraceStoreOptions {\n dir: string\n /** Roll over NDJSON files when they exceed this size in bytes. Default 32 MB. */\n maxBytes?: number\n}\n\nexport class FileSystemTraceStore implements TraceStore {\n private dir: string\n private maxBytes: number\n /** Lazy in-memory index for queries — populated on first read. */\n private index?: InMemoryTraceStore\n private loaded = false\n\n constructor(options: FileSystemTraceStoreOptions) {\n this.dir = options.dir\n this.maxBytes = options.maxBytes ?? 32 * 1024 * 1024\n }\n\n private async ensureDir(): Promise<void> {\n const fs = await import('node:fs/promises')\n await fs.mkdir(this.dir, { recursive: true })\n }\n\n private async append(name: string, record: unknown): Promise<void> {\n await this.ensureDir()\n const fs = await import('node:fs/promises')\n const path = await import('node:path')\n const active = path.join(this.dir, `${name}.ndjson`)\n try {\n const stat = await fs.stat(active)\n if (stat.size >= this.maxBytes) {\n const rolled = path.join(this.dir, `${name}.${Date.now()}.ndjson`)\n await fs.rename(active, rolled)\n }\n } catch {\n /* file doesn't exist yet */\n }\n await fs.appendFile(active, `${JSON.stringify(record)}\\n`, 'utf8')\n // Mirror genuinely-new rows into the lazy index. Update rows (marked\n // with `_update: true` by updateRun/updateSpan) are applied by those\n // methods directly via the index's update* APIs — re-inserting them\n // here would trigger a duplicate-id error after the first read has\n // populated the index. Regression: 0.23.0 endRun on the second\n // variant against a shared store threw \"run X already exists\".\n if (this.index && !(record as { _update?: boolean })?._update) {\n void this.insertInto(name, record)\n }\n }\n\n private async insertInto(name: string, record: unknown): Promise<void> {\n if (!this.index) return\n switch (name) {\n case 'runs':\n await this.index.appendRun(record as Run)\n break\n case 'spans':\n await this.index.appendSpan(record as Span)\n break\n case 'events':\n await this.index.appendEvent(record as TraceEvent)\n break\n case 'artifacts':\n await this.index.appendArtifact(record as Artifact)\n break\n case 'budget':\n await this.index.appendBudgetEntry(record as BudgetLedgerEntry)\n break\n }\n }\n\n private async load(): Promise<InMemoryTraceStore> {\n if (this.loaded && this.index) return this.index\n const fs = await import('node:fs/promises')\n const path = await import('node:path')\n const store = new InMemoryTraceStore()\n try {\n const entries = await fs.readdir(this.dir)\n for (const file of entries) {\n if (!file.endsWith('.ndjson')) continue\n const full = path.join(this.dir, file)\n const content = await fs.readFile(full, 'utf8')\n const base = file.split('.')[0]\n for (const line of content.split('\\n')) {\n if (!line.trim()) continue\n const record = JSON.parse(line)\n if (base === 'runs') {\n // Allow re-loading without duplicate error\n try {\n await store.appendRun(record)\n } catch {\n await store.updateRun(record.runId, record)\n }\n } else if (base === 'spans') {\n // `updateSpan` appends an `_update: true` patch row instead of\n // rewriting the original span. On reload we must collapse those\n // patches into the prior span — otherwise a fresh\n // FileSystemTraceStore reading the same dir reports duplicate\n // spans (one full, one fragment with no runId/kind/name), which\n // breaks any downstream consumer that re-opens the store\n // cross-process (e.g. the canonical eval's OTLP converter).\n if (record?._update) {\n try {\n await store.updateSpan(record.spanId, record)\n } catch {\n // Patch row arrived before the original — should not happen\n // with locked append order, but fall through to append so we\n // don't lose data.\n await store.appendSpan(record)\n }\n } else {\n await store.appendSpan(record)\n }\n } else if (base === 'events') {\n await store.appendEvent(record)\n } else if (base === 'artifacts') {\n await store.appendArtifact(record)\n } else if (base === 'budget') {\n await store.appendBudgetEntry(record)\n }\n }\n }\n } catch {\n /* empty dir, first run */\n }\n this.index = store\n this.loaded = true\n return store\n }\n\n async appendRun(run: Run): Promise<void> {\n await this.append('runs', run)\n }\n async updateRun(runId: string, patch: Partial<Run>): Promise<void> {\n // NDJSON is append-only; record updates as new rows with the same runId —\n // readers collapse by last-write-wins on load.\n await this.append('runs', { runId, ...patch, _update: true })\n if (this.index) await this.index.updateRun(runId, patch)\n }\n async appendSpan(span: Span): Promise<void> {\n await this.append('spans', span)\n }\n async updateSpan(spanId: string, patch: Partial<Span>): Promise<void> {\n await this.append('spans', { spanId, ...patch, _update: true })\n if (this.index) await this.index.updateSpan(spanId, patch)\n }\n async appendEvent(event: TraceEvent): Promise<void> {\n await this.append('events', event)\n }\n async appendArtifact(artifact: Artifact): Promise<void> {\n await this.append('artifacts', artifact)\n }\n async appendBudgetEntry(entry: BudgetLedgerEntry): Promise<void> {\n await this.append('budget', entry)\n }\n\n async getRun(runId: string): Promise<Run | undefined> {\n return (await this.load()).getRun(runId)\n }\n async listRuns(filter?: RunFilter): Promise<Run[]> {\n return (await this.load()).listRuns(filter)\n }\n async spans(filter?: SpanFilter): Promise<Span[]> {\n return (await this.load()).spans(filter)\n }\n async events(filter?: EventFilter): Promise<TraceEvent[]> {\n return (await this.load()).events(filter)\n }\n async budget(runId: string): Promise<BudgetLedgerEntry[]> {\n return (await this.load()).budget(runId)\n }\n async artifacts(runId: string): Promise<Artifact[]> {\n return (await this.load()).artifacts(runId)\n }\n}\n","/**\n * OpenTelemetry JSON export — maps TraceSchema v1 to OTLP/JSON so\n * traces render natively in Jaeger / Honeycomb / Langfuse / Grafana.\n *\n * Wire format only. We do NOT depend on the @opentelemetry SDK — that\n * would drag in polyfills incompatible with Workers/Edge. Consumers\n * push the JSON to their collector of choice via HTTP.\n *\n * Reference: OTLP 1.3.2 (ResourceSpans / ScopeSpans / Span).\n */\n\nimport type { Run, Span, TraceEvent } from './schema'\nimport type { TraceStore } from './store'\n\nexport const OTEL_AGENT_EVAL_SCOPE = { name: '@tangle-network/agent-eval', version: '0.3.0' }\n\nexport interface OtlpSpan {\n traceId: string\n spanId: string\n parentSpanId?: string\n name: string\n kind: number\n startTimeUnixNano: string\n endTimeUnixNano: string\n attributes: Array<{\n key: string\n value: { stringValue?: string; intValue?: string; doubleValue?: number; boolValue?: boolean }\n }>\n events?: Array<{ timeUnixNano: string; name: string; attributes?: OtlpSpan['attributes'] }>\n status?: { code: number; message?: string }\n}\n\nexport interface OtlpResourceSpans {\n resource: { attributes: OtlpSpan['attributes'] }\n scopeSpans: Array<{ scope: typeof OTEL_AGENT_EVAL_SCOPE; spans: OtlpSpan[] }>\n}\n\nexport interface OtlpExport {\n resourceSpans: OtlpResourceSpans[]\n}\n\n/** Export a single run's spans + events in OTLP/JSON. */\nexport async function exportRunAsOtlp(\n store: TraceStore,\n runId: string,\n resourceAttrs: Record<string, string | number | boolean> = {},\n): Promise<OtlpExport> {\n const run = await store.getRun(runId)\n if (!run) throw new Error(`run ${runId} not found`)\n const spans = await store.spans({ runId })\n const events = await store.events({ runId })\n const eventsBySpan = new Map<string, TraceEvent[]>()\n for (const e of events) {\n if (!e.spanId) continue\n const arr = eventsBySpan.get(e.spanId) ?? []\n arr.push(e)\n eventsBySpan.set(e.spanId, arr)\n }\n const traceId = runToTraceId(run)\n const otlpSpans: OtlpSpan[] = spans.map((s) =>\n spanToOtlp(s, traceId, eventsBySpan.get(s.spanId) ?? []),\n )\n return {\n resourceSpans: [\n {\n resource: {\n attributes: toAttributes({\n 'service.name': 'agent-eval',\n 'run.id': run.runId,\n 'run.scenario_id': run.scenarioId,\n 'run.variant_id': run.variantId ?? '',\n 'run.dataset_version': run.datasetVersion ?? '',\n 'run.code_sha': run.codeSha ?? '',\n 'run.model_fingerprint': run.modelFingerprint ?? '',\n ...resourceAttrs,\n }),\n },\n scopeSpans: [{ scope: OTEL_AGENT_EVAL_SCOPE, spans: otlpSpans }],\n },\n ],\n }\n}\n\nfunction spanToOtlp(span: Span, traceId: string, events: TraceEvent[]): OtlpSpan {\n const endedAt = span.endedAt ?? span.startedAt\n return {\n traceId,\n spanId: padSpanId(span.spanId),\n parentSpanId: span.parentSpanId ? padSpanId(span.parentSpanId) : undefined,\n name: span.name,\n kind: 1, // SPAN_KIND_INTERNAL\n startTimeUnixNano: msToNs(span.startedAt),\n endTimeUnixNano: msToNs(endedAt),\n attributes: toAttributes(flattenSpanAttributes(span)),\n events: events.map((e) => ({\n timeUnixNano: msToNs(e.timestamp),\n name: e.kind,\n attributes: toAttributes(flattenPayload(e.payload)),\n })),\n status: span.status === 'error' ? { code: 2, message: span.error } : { code: 1 },\n }\n}\n\nfunction flattenSpanAttributes(span: Span): Record<string, string | number | boolean> {\n const base: Record<string, string | number | boolean> = {\n 'span.kind': span.kind,\n }\n if (span.kind === 'llm') {\n base['llm.model'] = span.model\n if (span.inputTokens !== undefined) base['llm.input_tokens'] = span.inputTokens\n if (span.outputTokens !== undefined) base['llm.output_tokens'] = span.outputTokens\n if (span.costUsd !== undefined) base['llm.cost_usd'] = span.costUsd\n if (span.finishReason) base['llm.finish_reason'] = span.finishReason\n } else if (span.kind === 'tool') {\n base['tool.name'] = span.toolName\n if (span.latencyMs !== undefined) base['tool.latency_ms'] = span.latencyMs\n } else if (span.kind === 'retrieval') {\n base['retrieval.query'] = span.query\n base['retrieval.hits'] = span.hits.length\n } else if (span.kind === 'judge') {\n base['judge.id'] = span.judgeId\n base['judge.dimension'] = span.dimension\n base['judge.score'] = span.score\n base['judge.target_span_id'] = span.targetSpanId\n } else if (span.kind === 'sandbox') {\n if (span.image) base['sandbox.image'] = span.image\n if (span.exitCode !== undefined) base['sandbox.exit_code'] = span.exitCode\n if (span.testsPassed !== undefined) base['sandbox.tests_passed'] = span.testsPassed\n if (span.testsTotal !== undefined) base['sandbox.tests_total'] = span.testsTotal\n }\n if (span.attributes) {\n for (const [k, v] of Object.entries(span.attributes)) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') base[k] = v\n }\n }\n return base\n}\n\nfunction flattenPayload(\n payload: Record<string, unknown>,\n): Record<string, string | number | boolean> {\n const out: Record<string, string | number | boolean> = {}\n for (const [k, v] of Object.entries(payload)) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') out[k] = v\n else out[k] = JSON.stringify(v)\n }\n return out\n}\n\nfunction toAttributes(record: Record<string, string | number | boolean>): OtlpSpan['attributes'] {\n return Object.entries(record).map(([key, value]) => ({\n key,\n value:\n typeof value === 'number'\n ? Number.isInteger(value)\n ? { intValue: value.toString() }\n : { doubleValue: value }\n : typeof value === 'boolean'\n ? { boolValue: value }\n : { stringValue: value },\n }))\n}\n\nfunction msToNs(ms: number): string {\n return (BigInt(Math.floor(ms)) * 1_000_000n).toString()\n}\n\nfunction padSpanId(id: string): string {\n // OTLP wants 16-hex spanIds. UUIDs are 32-hex; strip dashes and take first 16.\n const cleaned = id.replace(/-/g, '')\n return cleaned.slice(0, 16).padEnd(16, '0')\n}\n\nfunction runToTraceId(run: Run): string {\n // OTLP wants 32-hex traceIds. Use runId directly when it's 32-hex already,\n // else SHA-ish truncate.\n const cleaned = run.runId.replace(/-/g, '')\n return cleaned.slice(0, 32).padEnd(32, '0')\n}\n","/**\n * Redaction — remove PII / secrets from trace payloads before persist.\n *\n * Pre-persistence rules mean raw traces in storage are already scrubbed.\n * Unredacted variants (for debugging / post-mortems) live in a separate\n * storage layer with stricter access controls; this module only covers\n * the default scrub-then-persist path.\n *\n * Rules compose: pass an array of `RedactionRule`, each is applied in\n * order. Strings that match get replaced with a tagged sentinel so the\n * eval framework can count how many redactions happened per run\n * (surfaced via `redaction_applied` events).\n */\n\nexport interface RedactionRule {\n id: string\n pattern: RegExp\n /** Replacement — e.g. '[PII:email]'. Defaults to `[redacted:{id}]`. */\n replacement?: string\n}\n\nexport interface RedactionReport {\n redactionCount: number\n byRule: Record<string, number>\n}\n\n/** OWASP / common-sense defaults — extend per-domain. */\nexport const DEFAULT_REDACTION_RULES: RedactionRule[] = [\n { id: 'email', pattern: /\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,}\\b/gi },\n { id: 'ssn', pattern: /\\b\\d{3}-\\d{2}-\\d{4}\\b/g },\n { id: 'credit-card', pattern: /\\b(?:\\d[ -]*?){13,16}\\b/g },\n { id: 'phone-us', pattern: /\\b(?:\\+?1[-.\\s]?)?\\(?\\d{3}\\)?[-.\\s]?\\d{3}[-.\\s]?\\d{4}\\b/g },\n { id: 'ipv4', pattern: /\\b(?:\\d{1,3}\\.){3}\\d{1,3}\\b/g },\n { id: 'aws-access-key', pattern: /\\bAKIA[0-9A-Z]{16}\\b/g },\n { id: 'bearer', pattern: /\\bBearer\\s+[A-Za-z0-9._~+/=-]{10,}/gi },\n { id: 'sk-key', pattern: /\\bsk-[A-Za-z0-9_-]{10,}\\b/g },\n {\n id: 'private-key-block',\n pattern: /-----BEGIN (?:RSA |EC |OPENSSH |DSA )?PRIVATE KEY-----[\\s\\S]*?-----END[^-]*-----/g,\n },\n]\n\nexport const REDACTION_VERSION = '1.0.0'\n\n/**\n * Redact a single string. Returns the new string and a per-rule count of\n * how many substitutions fired.\n */\nexport function redactString(\n input: string,\n rules: RedactionRule[] = DEFAULT_REDACTION_RULES,\n): { output: string; report: RedactionReport } {\n const byRule: Record<string, number> = {}\n let redactionCount = 0\n let output = input\n for (const rule of rules) {\n let hits = 0\n output = output.replace(rule.pattern, () => {\n hits++\n return rule.replacement ?? `[redacted:${rule.id}]`\n })\n if (hits > 0) {\n byRule[rule.id] = hits\n redactionCount += hits\n }\n }\n return { output, report: { redactionCount, byRule } }\n}\n\n/**\n * Walk a JSON-ish value applying `redactString` to every string leaf.\n * Arrays and plain objects are recursed; other types pass through\n * untouched. Circular references throw — traces should be tree-shaped.\n */\nexport function redactValue(\n value: unknown,\n rules: RedactionRule[] = DEFAULT_REDACTION_RULES,\n report: RedactionReport = { redactionCount: 0, byRule: {} },\n): { value: unknown; report: RedactionReport } {\n if (typeof value === 'string') {\n const { output, report: r } = redactString(value, rules)\n report.redactionCount += r.redactionCount\n for (const [k, v] of Object.entries(r.byRule)) {\n report.byRule[k] = (report.byRule[k] ?? 0) + v\n }\n return { value: output, report }\n }\n if (Array.isArray(value)) {\n return {\n value: value.map((v) => redactValue(v, rules, report).value),\n report,\n }\n }\n if (value !== null && typeof value === 'object') {\n const next: Record<string, unknown> = {}\n for (const [k, v] of Object.entries(value)) {\n next[k] = redactValue(v, rules, report).value\n }\n return { value: next, report }\n }\n return { value, report }\n}\n","/**\n * Replay-from-raw-events — turn every captured campaign run into a\n * re-runnable artifact.\n *\n * The premise: 0.21 made `RawProviderSink` capture every provider HTTP\n * envelope. 0.22's `runEvalCampaign` makes capture the default. Together\n * they mean every past run is a complete fingerprint of what happened on\n * the wire — and that fingerprint is enough to replay the run without\n * burning new LLM cost.\n *\n * Three use cases this primitive enables:\n *\n * 1. **Post-hoc judging** — apply a new judge / rubric / scoring callback\n * to last week's runs without re-calling any LLM. The cost of trying\n * a new rubric drops from \"another full sweep\" to a CPU-bound replay.\n * 2. **Determinism audits** — replay the same campaign and verify the\n * raw responses match byte-for-byte. Any drift is a non-determinism\n * bug (in the harness, the prompt builder, the sandbox, …).\n * 3. **Free judge calibration** — run two judges on identical responses\n * and measure inter-judge agreement without doubling LLM spend.\n *\n * The interface is deliberately fetch-shaped. Inject `createReplayFetch`\n * into `LlmClientOptions.fetch` and every `callLlm` transparently reads\n * from the cache instead of calling the network. No new code path through\n * the LLM client is needed; the cache hit is invisible to the runner.\n */\n\nimport { ReplayError } from './errors'\nimport { canonicalize, hashJson } from './pre-registration'\nimport type { RawProviderEvent, RawProviderSink } from './trace/raw-provider-sink'\n\nexport class ReplayCacheMissError extends ReplayError {\n constructor(\n public readonly url: string,\n public readonly requestKey: string,\n message?: string,\n ) {\n super(message ?? `replay cache miss for ${url} (key=${requestKey})`)\n }\n}\n\nexport interface ReplayCacheEntry {\n request: RawProviderEvent\n response: RawProviderEvent\n}\n\nexport interface ReplayCacheStats {\n total: number\n byProvider: Record<string, number>\n byModel: Record<string, number>\n /** Spans for which we have a request but no response (run aborted mid-call). */\n orphanRequests: number\n}\n\n/**\n * In-memory deterministic cache of (request → response) keyed on a stable\n * hash of the request body. Built from a `RawProviderSink` containing\n * paired `request` and `response` events from a previous run.\n *\n * The cache is the source of truth for replay; `createReplayFetch` is a\n * thin wrapper that reads from it.\n */\nexport class ReplayCache {\n private byKey = new Map<string, ReplayCacheEntry>()\n private orphans = 0\n private byProvider: Record<string, number> = {}\n private byModel: Record<string, number> = {}\n\n /**\n * Build a cache from a sink's events. The sink must implement `list()`.\n * Filter by `runId` / `spanId` to scope to a specific replay.\n */\n static async fromSink(\n sink: RawProviderSink,\n filter: { runId?: string; spanId?: string } = {},\n ): Promise<ReplayCache> {\n if (!sink.list) {\n throw new ReplayError('ReplayCache.fromSink: sink must implement list() to be replayable.')\n }\n const events = await sink.list(filter)\n return ReplayCache.fromEvents(events)\n }\n\n /** Build a cache from an in-memory event list. */\n static async fromEvents(events: RawProviderEvent[]): Promise<ReplayCache> {\n const cache = new ReplayCache()\n // Group by (runId, spanId, attemptIndex) so request/response/error pairs are matched.\n // A cell can have many spans; each retry attempt within a span is a separate group.\n type GroupKey = string\n const groups = new Map<GroupKey, { req?: RawProviderEvent; res?: RawProviderEvent }>()\n for (const e of events) {\n const k = `${e.runId ?? ''}::${e.spanId ?? ''}::${e.attemptIndex}`\n const g = groups.get(k) ?? {}\n if (e.direction === 'request') g.req = e\n else g.res = e\n groups.set(k, g)\n }\n for (const g of groups.values()) {\n if (!g.req) continue\n if (!g.res) {\n cache.orphans += 1\n continue\n }\n const key = await requestKey(g.req)\n cache.byKey.set(key, { request: g.req, response: g.res })\n cache.byProvider[g.req.provider] = (cache.byProvider[g.req.provider] ?? 0) + 1\n cache.byModel[g.req.model] = (cache.byModel[g.req.model] ?? 0) + 1\n }\n return cache\n }\n\n /** Number of cacheable (request, response) pairs in the cache. */\n size(): number {\n return this.byKey.size\n }\n\n stats(): ReplayCacheStats {\n return {\n total: this.byKey.size,\n byProvider: { ...this.byProvider },\n byModel: { ...this.byModel },\n orphanRequests: this.orphans,\n }\n }\n\n /** Iterate every cached `(request, response)` pair in insertion order. */\n *entries(): IterableIterator<ReplayCacheEntry> {\n for (const entry of this.byKey.values()) yield entry\n }\n\n /**\n * Look up a cached response by hashing the (model, messages, temperature,\n * maxTokens, response_format) shape. Returns `undefined` on miss; the\n * caller decides whether to throw, fall back to the network, or skip.\n */\n async lookup(requestBody: unknown): Promise<ReplayCacheEntry | undefined> {\n const key = await keyFromBody(requestBody)\n return this.byKey.get(key)\n }\n}\n\nexport interface ReplayFetchOptions {\n /**\n * Behaviour on cache miss. Default `'throw'`. `'fallback'` calls the\n * `fallbackFetch` (typically `globalThis.fetch`) so a partial replay can\n * still complete; `'fail-closed'` returns a synthetic 599 response so the\n * call site sees a non-retriable failure.\n */\n onMiss?: 'throw' | 'fallback' | 'fail-closed'\n fallbackFetch?: typeof fetch\n /** Optional callback fired once per replayed call (for telemetry / counters). */\n onHit?: (info: { url: string; provider: string; model: string }) => void\n /** Optional callback fired on cache miss before the `onMiss` policy applies. */\n onMissNotify?: (info: { url: string; requestBody: unknown }) => void\n}\n\n/**\n * Build a `fetch`-shaped function that serves cached responses out of a\n * `ReplayCache` for any URL ending in `/chat/completions`. Pass through\n * `LlmClientOptions.fetch` and `callLlm` becomes free.\n *\n * Non-`/chat/completions` URLs are passed straight to the fallback fetch\n * (default: `globalThis.fetch`). This matters because non-LLM HTTP work\n * (judge HTTP servers, sandbox callbacks) sometimes flows through the same\n * `fetch` and shouldn't be intercepted.\n */\nexport function createReplayFetch(cache: ReplayCache, opts: ReplayFetchOptions = {}): typeof fetch {\n const onMiss = opts.onMiss ?? 'throw'\n const fallback = opts.fallbackFetch ?? globalThis.fetch?.bind(globalThis)\n\n return (async (input: RequestInfo | URL, init?: RequestInit) => {\n const url =\n typeof input === 'string' ? input : input instanceof URL ? input.toString() : input.url\n if (!/\\/chat\\/completions(?:[?#].*)?$/.test(url)) {\n if (!fallback)\n throw new ReplayError(\n `replay fetch: non-completions URL ${url} but no fallbackFetch configured`,\n )\n return fallback(input as RequestInfo, init)\n }\n let bodyParsed: unknown\n if (init?.body && typeof init.body === 'string') {\n try {\n bodyParsed = JSON.parse(init.body)\n } catch {\n /* raw body, not JSON */\n }\n }\n const hit = bodyParsed === undefined ? undefined : await cache.lookup(bodyParsed)\n if (hit) {\n opts.onHit?.({ url, provider: hit.request.provider, model: hit.request.model })\n const status = hit.response.statusCode ?? 200\n const headers = new Headers(\n Object.entries(hit.response.responseHeaders ?? { 'Content-Type': 'application/json' }),\n )\n const bodyText =\n typeof hit.response.responseBody === 'string'\n ? hit.response.responseBody\n : JSON.stringify(hit.response.responseBody ?? {})\n return new Response(bodyText, { status, headers })\n }\n opts.onMissNotify?.({ url, requestBody: bodyParsed })\n if (onMiss === 'throw') {\n const key = bodyParsed === undefined ? '<unparseable>' : await keyFromBody(bodyParsed)\n throw new ReplayCacheMissError(url, key)\n }\n if (onMiss === 'fail-closed') {\n return new Response(JSON.stringify({ error: 'replay_cache_miss' }), { status: 599 })\n }\n if (!fallback)\n throw new ReplayError('replay fetch: onMiss=fallback but no fallbackFetch configured')\n return fallback(input as RequestInfo, init)\n }) as typeof fetch\n}\n\n/**\n * Convenience iterator over `(request, response)` pairs in a sink — for\n * post-hoc scoring that doesn't need a `fetch` shim. The judge or scorer\n * runs purely in-process over cached LLM outputs.\n */\nexport async function* iterateRawCalls(\n sink: RawProviderSink,\n filter: { runId?: string; spanId?: string } = {},\n): AsyncGenerator<ReplayCacheEntry> {\n if (!sink.list) {\n throw new ReplayError('iterateRawCalls: sink must implement list().')\n }\n const events = await sink.list(filter)\n const cache = await ReplayCache.fromEvents(events)\n for (const entry of cache.entries()) yield entry\n}\n\n// ── Hashing ──────────────────────────────────────────────────────────────\n\n/**\n * Canonical request key.\n *\n * `model + messages + temperature + max_tokens|max_completion_tokens +\n * response_format` are the dimensions that affect the response shape.\n * Other fields (timestamp headers, provider-specific metadata) are\n * intentionally excluded so a request hashes the same across re-runs.\n */\nasync function requestKey(event: RawProviderEvent): Promise<string> {\n return keyFromBody(event.requestBody)\n}\n\nasync function keyFromBody(body: unknown): Promise<string> {\n if (body == null || typeof body !== 'object') return hashJson({ raw: String(body) })\n const b = body as Record<string, unknown>\n const reduced = canonicalize({\n model: b.model ?? null,\n messages: b.messages ?? null,\n temperature: b.temperature ?? null,\n max_tokens: b.max_tokens ?? null,\n max_completion_tokens: b.max_completion_tokens ?? null,\n response_format: b.response_format ?? null,\n })\n return hashJson(reduced)\n}\n"],"mappings":";;;;;;;;;AA0EO,IAAM,qBAAN,MAA+C;AAAA,EAC5C,OAAO,oBAAI,IAAiB;AAAA,EAC5B,WAAmB,CAAC;AAAA,EACpB,YAA0B,CAAC;AAAA,EAC3B,eAA2B,CAAC;AAAA,EAC5B,YAAiC,CAAC;AAAA,EAE1C,MAAM,UAAU,KAAyB;AACvC,QAAI,KAAK,KAAK,IAAI,IAAI,KAAK,EAAG,OAAM,IAAI,MAAM,OAAO,IAAI,KAAK,iBAAiB;AAC/E,SAAK,KAAK,IAAI,IAAI,OAAO,EAAE,GAAG,IAAI,CAAC;AAAA,EACrC;AAAA,EAEA,MAAM,UAAU,OAAe,OAAoC;AACjE,UAAM,WAAW,KAAK,KAAK,IAAI,KAAK;AACpC,QAAI,CAAC,SAAU,OAAM,IAAI,MAAM,OAAO,KAAK,YAAY;AACvD,SAAK,KAAK,IAAI,OAAO,EAAE,GAAG,UAAU,GAAG,MAAM,CAAC;AAAA,EAChD;AAAA,EAEA,MAAM,WAAW,MAA2B;AAC1C,SAAK,SAAS,KAAK,EAAE,GAAG,KAAK,CAAC;AAAA,EAChC;AAAA,EAEA,MAAM,WAAW,QAAgB,OAAqC;AACpE,UAAM,MAAM,KAAK,SAAS,UAAU,CAAC,MAAM,EAAE,WAAW,MAAM;AAC9D,QAAI,MAAM,EAAG,OAAM,IAAI,MAAM,QAAQ,MAAM,YAAY;AACvD,SAAK,SAAS,GAAG,IAAI,EAAE,GAAG,KAAK,SAAS,GAAG,GAAG,GAAG,MAAM;AAAA,EACzD;AAAA,EAEA,MAAM,YAAY,OAAkC;AAClD,SAAK,UAAU,KAAK,EAAE,GAAG,MAAM,CAAC;AAAA,EAClC;AAAA,EAEA,MAAM,eAAe,UAAmC;AACtD,SAAK,aAAa,KAAK,EAAE,GAAG,SAAS,CAAC;AAAA,EACxC;AAAA,EAEA,MAAM,kBAAkB,OAAyC;AAC/D,SAAK,UAAU,KAAK,EAAE,GAAG,MAAM,CAAC;AAAA,EAClC;AAAA,EAEA,MAAM,OAAO,OAAyC;AACpD,UAAM,IAAI,KAAK,KAAK,IAAI,KAAK;AAC7B,WAAO,IAAI,EAAE,GAAG,EAAE,IAAI;AAAA,EACxB;AAAA,EAEA,MAAM,SAAS,SAAoB,CAAC,GAAmB;AACrD,WAAO,CAAC,GAAG,KAAK,KAAK,OAAO,CAAC,EAAE,OAAO,CAAC,MAAM,WAAW,GAAG,MAAM,CAAC;AAAA,EACpE;AAAA,EAEA,MAAM,MAAM,SAAqB,CAAC,GAAoB;AACpD,WAAO,KAAK,SAAS,OAAO,CAAC,MAAM,YAAY,GAAG,MAAM,CAAC,EAAE,IAAI,CAAC,OAAO,EAAE,GAAG,EAAE,EAAE;AAAA,EAClF;AAAA,EAEA,MAAM,OAAO,SAAsB,CAAC,GAA0B;AAC5D,WAAO,KAAK,UAAU,OAAO,CAAC,MAAM,aAAa,GAAG,MAAM,CAAC,EAAE,IAAI,CAAC,OAAO,EAAE,GAAG,EAAE,EAAE;AAAA,EACpF;AAAA,EAEA,MAAM,OAAO,OAA6C;AACxD,WAAO,KAAK,UAAU,OAAO,CAAC,MAAM,EAAE,UAAU,KAAK,EAAE,IAAI,CAAC,OAAO,EAAE,GAAG,EAAE,EAAE;AAAA,EAC9E;AAAA,EAEA,MAAM,UAAU,OAAoC;AAClD,WAAO,KAAK,aAAa,OAAO,CAAC,MAAM,EAAE,UAAU,KAAK,EAAE,IAAI,CAAC,OAAO,EAAE,GAAG,EAAE,EAAE;AAAA,EACjF;AACF;AAEA,SAAS,WAAW,GAAQ,GAAuB;AACjD,MAAI,EAAE,cAAc,EAAE,eAAe,EAAE,WAAY,QAAO;AAC1D,MAAI,EAAE,aAAa,EAAE,cAAc,EAAE,UAAW,QAAO;AACvD,MAAI,EAAE,UAAU,EAAE,WAAW,EAAE,OAAQ,QAAO;AAC9C,MAAI,EAAE,UAAU,UAAa,EAAE,YAAY,EAAE,MAAO,QAAO;AAC3D,MAAI,EAAE,UAAU,UAAa,EAAE,YAAY,EAAE,MAAO,QAAO;AAC3D,MAAI,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,GAAG,MAAM,EAAE,IAAI,MAAO,QAAO;AACzD,MAAI,EAAE,eAAe,EAAE,gBAAgB,EAAE,YAAa,QAAO;AAC7D,MAAI,EAAE,aAAa,EAAE,cAAc,EAAE,UAAW,QAAO;AACvD,MAAI,EAAE,UAAU,EAAE,WAAW,EAAE,OAAQ,QAAO;AAC9C,MAAI,EAAE,SAAS,EAAE,UAAU,EAAE,MAAO,QAAO;AAC3C,SAAO;AACT;AAEA,SAAS,YAAY,GAAS,GAAwB;AACpD,MAAI,EAAE,SAAS,EAAE,UAAU,EAAE,MAAO,QAAO;AAC3C,MAAI,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,aAAc,QAAO;AAChE,MAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAM,QAAO;AACxC,MAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAM,QAAO;AACxC,MAAI,EAAE,aAAa,EAAE,SAAS,UAAU,EAAE,aAAa,EAAE,UAAW,QAAO;AAC3E,MAAI,EAAE,YAAY,EAAE,SAAS,WAAW,EAAE,YAAY,EAAE,SAAU,QAAO;AACzE,MAAI,EAAE,UAAU,UAAa,EAAE,YAAY,EAAE,MAAO,QAAO;AAC3D,MAAI,EAAE,UAAU,UAAa,EAAE,YAAY,EAAE,MAAO,QAAO;AAC3D,SAAO;AACT;AAEA,SAAS,aAAa,GAAe,GAAyB;AAC5D,MAAI,EAAE,SAAS,EAAE,UAAU,EAAE,MAAO,QAAO;AAC3C,MAAI,EAAE,UAAU,EAAE,WAAW,EAAE,OAAQ,QAAO;AAC9C,MAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAM,QAAO;AACxC,MAAI,EAAE,UAAU,UAAa,EAAE,YAAY,EAAE,MAAO,QAAO;AAC3D,MAAI,EAAE,UAAU,UAAa,EAAE,YAAY,EAAE,MAAO,QAAO;AAC3D,SAAO;AACT;AAUO,IAAM,uBAAN,MAAiD;AAAA,EAC9C;AAAA,EACA;AAAA;AAAA,EAEA;AAAA,EACA,SAAS;AAAA,EAEjB,YAAY,SAAsC;AAChD,SAAK,MAAM,QAAQ;AACnB,SAAK,WAAW,QAAQ,YAAY,KAAK,OAAO;AAAA,EAClD;AAAA,EAEA,MAAc,YAA2B;AACvC,UAAM,KAAK,MAAM,OAAO,aAAkB;AAC1C,UAAM,GAAG,MAAM,KAAK,KAAK,EAAE,WAAW,KAAK,CAAC;AAAA,EAC9C;AAAA,EAEA,MAAc,OAAO,MAAc,QAAgC;AACjE,UAAM,KAAK,UAAU;AACrB,UAAM,KAAK,MAAM,OAAO,aAAkB;AAC1C,UAAM,OAAO,MAAM,OAAO,MAAW;AACrC,UAAM,SAAS,KAAK,KAAK,KAAK,KAAK,GAAG,IAAI,SAAS;AACnD,QAAI;AACF,YAAM,OAAO,MAAM,GAAG,KAAK,MAAM;AACjC,UAAI,KAAK,QAAQ,KAAK,UAAU;AAC9B,cAAM,SAAS,KAAK,KAAK,KAAK,KAAK,GAAG,IAAI,IAAI,KAAK,IAAI,CAAC,SAAS;AACjE,cAAM,GAAG,OAAO,QAAQ,MAAM;AAAA,MAChC;AAAA,IACF,QAAQ;AAAA,IAER;AACA,UAAM,GAAG,WAAW,QAAQ,GAAG,KAAK,UAAU,MAAM,CAAC;AAAA,GAAM,MAAM;AAOjE,QAAI,KAAK,SAAS,CAAE,QAAkC,SAAS;AAC7D,WAAK,KAAK,WAAW,MAAM,MAAM;AAAA,IACnC;AAAA,EACF;AAAA,EAEA,MAAc,WAAW,MAAc,QAAgC;AACrE,QAAI,CAAC,KAAK,MAAO;AACjB,YAAQ,MAAM;AAAA,MACZ,KAAK;AACH,cAAM,KAAK,MAAM,UAAU,MAAa;AACxC;AAAA,MACF,KAAK;AACH,cAAM,KAAK,MAAM,WAAW,MAAc;AAC1C;AAAA,MACF,KAAK;AACH,cAAM,KAAK,MAAM,YAAY,MAAoB;AACjD;AAAA,MACF,KAAK;AACH,cAAM,KAAK,MAAM,eAAe,MAAkB;AAClD;AAAA,MACF,KAAK;AACH,cAAM,KAAK,MAAM,kBAAkB,MAA2B;AAC9D;AAAA,IACJ;AAAA,EACF;AAAA,EAEA,MAAc,OAAoC;AAChD,QAAI,KAAK,UAAU,KAAK,MAAO,QAAO,KAAK;AAC3C,UAAM,KAAK,MAAM,OAAO,aAAkB;AAC1C,UAAM,OAAO,MAAM,OAAO,MAAW;AACrC,UAAM,QAAQ,IAAI,mBAAmB;AACrC,QAAI;AACF,YAAM,UAAU,MAAM,GAAG,QAAQ,KAAK,GAAG;AACzC,iBAAW,QAAQ,SAAS;AAC1B,YAAI,CAAC,KAAK,SAAS,SAAS,EAAG;AAC/B,cAAM,OAAO,KAAK,KAAK,KAAK,KAAK,IAAI;AACrC,cAAM,UAAU,MAAM,GAAG,SAAS,MAAM,MAAM;AAC9C,cAAM,OAAO,KAAK,MAAM,GAAG,EAAE,CAAC;AAC9B,mBAAW,QAAQ,QAAQ,MAAM,IAAI,GAAG;AACtC,cAAI,CAAC,KAAK,KAAK,EAAG;AAClB,gBAAM,SAAS,KAAK,MAAM,IAAI;AAC9B,cAAI,SAAS,QAAQ;AAEnB,gBAAI;AACF,oBAAM,MAAM,UAAU,MAAM;AAAA,YAC9B,QAAQ;AACN,oBAAM,MAAM,UAAU,OAAO,OAAO,MAAM;AAAA,YAC5C;AAAA,UACF,WAAW,SAAS,SAAS;AAQ3B,gBAAI,QAAQ,SAAS;AACnB,kBAAI;AACF,sBAAM,MAAM,WAAW,OAAO,QAAQ,MAAM;AAAA,cAC9C,QAAQ;AAIN,sBAAM,MAAM,WAAW,MAAM;AAAA,cAC/B;AAAA,YACF,OAAO;AACL,oBAAM,MAAM,WAAW,MAAM;AAAA,YAC/B;AAAA,UACF,WAAW,SAAS,UAAU;AAC5B,kBAAM,MAAM,YAAY,MAAM;AAAA,UAChC,WAAW,SAAS,aAAa;AAC/B,kBAAM,MAAM,eAAe,MAAM;AAAA,UACnC,WAAW,SAAS,UAAU;AAC5B,kBAAM,MAAM,kBAAkB,MAAM;AAAA,UACtC;AAAA,QACF;AAAA,MACF;AAAA,IACF,QAAQ;AAAA,IAER;AACA,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,UAAU,KAAyB;AACvC,UAAM,KAAK,OAAO,QAAQ,GAAG;AAAA,EAC/B;AAAA,EACA,MAAM,UAAU,OAAe,OAAoC;AAGjE,UAAM,KAAK,OAAO,QAAQ,EAAE,OAAO,GAAG,OAAO,SAAS,KAAK,CAAC;AAC5D,QAAI,KAAK,MAAO,OAAM,KAAK,MAAM,UAAU,OAAO,KAAK;AAAA,EACzD;AAAA,EACA,MAAM,WAAW,MAA2B;AAC1C,UAAM,KAAK,OAAO,SAAS,IAAI;AAAA,EACjC;AAAA,EACA,MAAM,WAAW,QAAgB,OAAqC;AACpE,UAAM,KAAK,OAAO,SAAS,EAAE,QAAQ,GAAG,OAAO,SAAS,KAAK,CAAC;AAC9D,QAAI,KAAK,MAAO,OAAM,KAAK,MAAM,WAAW,QAAQ,KAAK;AAAA,EAC3D;AAAA,EACA,MAAM,YAAY,OAAkC;AAClD,UAAM,KAAK,OAAO,UAAU,KAAK;AAAA,EACnC;AAAA,EACA,MAAM,eAAe,UAAmC;AACtD,UAAM,KAAK,OAAO,aAAa,QAAQ;AAAA,EACzC;AAAA,EACA,MAAM,kBAAkB,OAAyC;AAC/D,UAAM,KAAK,OAAO,UAAU,KAAK;AAAA,EACnC;AAAA,EAEA,MAAM,OAAO,OAAyC;AACpD,YAAQ,MAAM,KAAK,KAAK,GAAG,OAAO,KAAK;AAAA,EACzC;AAAA,EACA,MAAM,SAAS,QAAoC;AACjD,YAAQ,MAAM,KAAK,KAAK,GAAG,SAAS,MAAM;AAAA,EAC5C;AAAA,EACA,MAAM,MAAM,QAAsC;AAChD,YAAQ,MAAM,KAAK,KAAK,GAAG,MAAM,MAAM;AAAA,EACzC;AAAA,EACA,MAAM,OAAO,QAA6C;AACxD,YAAQ,MAAM,KAAK,KAAK,GAAG,OAAO,MAAM;AAAA,EAC1C;AAAA,EACA,MAAM,OAAO,OAA6C;AACxD,YAAQ,MAAM,KAAK,KAAK,GAAG,OAAO,KAAK;AAAA,EACzC;AAAA,EACA,MAAM,UAAU,OAAoC;AAClD,YAAQ,MAAM,KAAK,KAAK,GAAG,UAAU,KAAK;AAAA,EAC5C;AACF;;;AChVO,IAAM,wBAAwB,EAAE,MAAM,8BAA8B,SAAS,QAAQ;AA4B5F,eAAsB,gBACpB,OACA,OACA,gBAA2D,CAAC,GACvC;AACrB,QAAM,MAAM,MAAM,MAAM,OAAO,KAAK;AACpC,MAAI,CAAC,IAAK,OAAM,IAAI,MAAM,OAAO,KAAK,YAAY;AAClD,QAAM,QAAQ,MAAM,MAAM,MAAM,EAAE,MAAM,CAAC;AACzC,QAAM,SAAS,MAAM,MAAM,OAAO,EAAE,MAAM,CAAC;AAC3C,QAAM,eAAe,oBAAI,IAA0B;AACnD,aAAW,KAAK,QAAQ;AACtB,QAAI,CAAC,EAAE,OAAQ;AACf,UAAM,MAAM,aAAa,IAAI,EAAE,MAAM,KAAK,CAAC;AAC3C,QAAI,KAAK,CAAC;AACV,iBAAa,IAAI,EAAE,QAAQ,GAAG;AAAA,EAChC;AACA,QAAM,UAAU,aAAa,GAAG;AAChC,QAAM,YAAwB,MAAM;AAAA,IAAI,CAAC,MACvC,WAAW,GAAG,SAAS,aAAa,IAAI,EAAE,MAAM,KAAK,CAAC,CAAC;AAAA,EACzD;AACA,SAAO;AAAA,IACL,eAAe;AAAA,MACb;AAAA,QACE,UAAU;AAAA,UACR,YAAY,aAAa;AAAA,YACvB,gBAAgB;AAAA,YAChB,UAAU,IAAI;AAAA,YACd,mBAAmB,IAAI;AAAA,YACvB,kBAAkB,IAAI,aAAa;AAAA,YACnC,uBAAuB,IAAI,kBAAkB;AAAA,YAC7C,gBAAgB,IAAI,WAAW;AAAA,YAC/B,yBAAyB,IAAI,oBAAoB;AAAA,YACjD,GAAG;AAAA,UACL,CAAC;AAAA,QACH;AAAA,QACA,YAAY,CAAC,EAAE,OAAO,uBAAuB,OAAO,UAAU,CAAC;AAAA,MACjE;AAAA,IACF;AAAA,EACF;AACF;AAEA,SAAS,WAAW,MAAY,SAAiB,QAAgC;AAC/E,QAAM,UAAU,KAAK,WAAW,KAAK;AACrC,SAAO;AAAA,IACL;AAAA,IACA,QAAQ,UAAU,KAAK,MAAM;AAAA,IAC7B,cAAc,KAAK,eAAe,UAAU,KAAK,YAAY,IAAI;AAAA,IACjE,MAAM,KAAK;AAAA,IACX,MAAM;AAAA;AAAA,IACN,mBAAmB,OAAO,KAAK,SAAS;AAAA,IACxC,iBAAiB,OAAO,OAAO;AAAA,IAC/B,YAAY,aAAa,sBAAsB,IAAI,CAAC;AAAA,IACpD,QAAQ,OAAO,IAAI,CAAC,OAAO;AAAA,MACzB,cAAc,OAAO,EAAE,SAAS;AAAA,MAChC,MAAM,EAAE;AAAA,MACR,YAAY,aAAa,eAAe,EAAE,OAAO,CAAC;AAAA,IACpD,EAAE;AAAA,IACF,QAAQ,KAAK,WAAW,UAAU,EAAE,MAAM,GAAG,SAAS,KAAK,MAAM,IAAI,EAAE,MAAM,EAAE;AAAA,EACjF;AACF;AAEA,SAAS,sBAAsB,MAAuD;AACpF,QAAM,OAAkD;AAAA,IACtD,aAAa,KAAK;AAAA,EACpB;AACA,MAAI,KAAK,SAAS,OAAO;AACvB,SAAK,WAAW,IAAI,KAAK;AACzB,QAAI,KAAK,gBAAgB,OAAW,MAAK,kBAAkB,IAAI,KAAK;AACpE,QAAI,KAAK,iBAAiB,OAAW,MAAK,mBAAmB,IAAI,KAAK;AACtE,QAAI,KAAK,YAAY,OAAW,MAAK,cAAc,IAAI,KAAK;AAC5D,QAAI,KAAK,aAAc,MAAK,mBAAmB,IAAI,KAAK;AAAA,EAC1D,WAAW,KAAK,SAAS,QAAQ;AAC/B,SAAK,WAAW,IAAI,KAAK;AACzB,QAAI,KAAK,cAAc,OAAW,MAAK,iBAAiB,IAAI,KAAK;AAAA,EACnE,WAAW,KAAK,SAAS,aAAa;AACpC,SAAK,iBAAiB,IAAI,KAAK;AAC/B,SAAK,gBAAgB,IAAI,KAAK,KAAK;AAAA,EACrC,WAAW,KAAK,SAAS,SAAS;AAChC,SAAK,UAAU,IAAI,KAAK;AACxB,SAAK,iBAAiB,IAAI,KAAK;AAC/B,SAAK,aAAa,IAAI,KAAK;AAC3B,SAAK,sBAAsB,IAAI,KAAK;AAAA,EACtC,WAAW,KAAK,SAAS,WAAW;AAClC,QAAI,KAAK,MAAO,MAAK,eAAe,IAAI,KAAK;AAC7C,QAAI,KAAK,aAAa,OAAW,MAAK,mBAAmB,IAAI,KAAK;AAClE,QAAI,KAAK,gBAAgB,OAAW,MAAK,sBAAsB,IAAI,KAAK;AACxE,QAAI,KAAK,eAAe,OAAW,MAAK,qBAAqB,IAAI,KAAK;AAAA,EACxE;AACA,MAAI,KAAK,YAAY;AACnB,eAAW,CAAC,GAAG,CAAC,KAAK,OAAO,QAAQ,KAAK,UAAU,GAAG;AACpD,UAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,UAAW,MAAK,CAAC,IAAI;AAAA,IAC1F;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,eACP,SAC2C;AAC3C,QAAM,MAAiD,CAAC;AACxD,aAAW,CAAC,GAAG,CAAC,KAAK,OAAO,QAAQ,OAAO,GAAG;AAC5C,QAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,UAAW,KAAI,CAAC,IAAI;AAAA,QAClF,KAAI,CAAC,IAAI,KAAK,UAAU,CAAC;AAAA,EAChC;AACA,SAAO;AACT;AAEA,SAAS,aAAa,QAA2E;AAC/F,SAAO,OAAO,QAAQ,MAAM,EAAE,IAAI,CAAC,CAAC,KAAK,KAAK,OAAO;AAAA,IACnD;AAAA,IACA,OACE,OAAO,UAAU,WACb,OAAO,UAAU,KAAK,IACpB,EAAE,UAAU,MAAM,SAAS,EAAE,IAC7B,EAAE,aAAa,MAAM,IACvB,OAAO,UAAU,YACf,EAAE,WAAW,MAAM,IACnB,EAAE,aAAa,MAAM;AAAA,EAC/B,EAAE;AACJ;AAEA,SAAS,OAAO,IAAoB;AAClC,UAAQ,OAAO,KAAK,MAAM,EAAE,CAAC,IAAI,UAAY,SAAS;AACxD;AAEA,SAAS,UAAU,IAAoB;AAErC,QAAM,UAAU,GAAG,QAAQ,MAAM,EAAE;AACnC,SAAO,QAAQ,MAAM,GAAG,EAAE,EAAE,OAAO,IAAI,GAAG;AAC5C;AAEA,SAAS,aAAa,KAAkB;AAGtC,QAAM,UAAU,IAAI,MAAM,QAAQ,MAAM,EAAE;AAC1C,SAAO,QAAQ,MAAM,GAAG,EAAE,EAAE,OAAO,IAAI,GAAG;AAC5C;;;ACvJO,IAAM,0BAA2C;AAAA,EACtD,EAAE,IAAI,SAAS,SAAS,8CAA8C;AAAA,EACtE,EAAE,IAAI,OAAO,SAAS,yBAAyB;AAAA,EAC/C,EAAE,IAAI,eAAe,SAAS,2BAA2B;AAAA,EACzD,EAAE,IAAI,YAAY,SAAS,2DAA2D;AAAA,EACtF,EAAE,IAAI,QAAQ,SAAS,+BAA+B;AAAA,EACtD,EAAE,IAAI,kBAAkB,SAAS,wBAAwB;AAAA,EACzD,EAAE,IAAI,UAAU,SAAS,uCAAuC;AAAA,EAChE,EAAE,IAAI,UAAU,SAAS,6BAA6B;AAAA,EACtD;AAAA,IACE,IAAI;AAAA,IACJ,SAAS;AAAA,EACX;AACF;AAEO,IAAM,oBAAoB;AAM1B,SAAS,aACd,OACA,QAAyB,yBACoB;AAC7C,QAAM,SAAiC,CAAC;AACxC,MAAI,iBAAiB;AACrB,MAAI,SAAS;AACb,aAAW,QAAQ,OAAO;AACxB,QAAI,OAAO;AACX,aAAS,OAAO,QAAQ,KAAK,SAAS,MAAM;AAC1C;AACA,aAAO,KAAK,eAAe,aAAa,KAAK,EAAE;AAAA,IACjD,CAAC;AACD,QAAI,OAAO,GAAG;AACZ,aAAO,KAAK,EAAE,IAAI;AAClB,wBAAkB;AAAA,IACpB;AAAA,EACF;AACA,SAAO,EAAE,QAAQ,QAAQ,EAAE,gBAAgB,OAAO,EAAE;AACtD;AAOO,SAAS,YACd,OACA,QAAyB,yBACzB,SAA0B,EAAE,gBAAgB,GAAG,QAAQ,CAAC,EAAE,GACb;AAC7C,MAAI,OAAO,UAAU,UAAU;AAC7B,UAAM,EAAE,QAAQ,QAAQ,EAAE,IAAI,aAAa,OAAO,KAAK;AACvD,WAAO,kBAAkB,EAAE;AAC3B,eAAW,CAAC,GAAG,CAAC,KAAK,OAAO,QAAQ,EAAE,MAAM,GAAG;AAC7C,aAAO,OAAO,CAAC,KAAK,OAAO,OAAO,CAAC,KAAK,KAAK;AAAA,IAC/C;AACA,WAAO,EAAE,OAAO,QAAQ,OAAO;AAAA,EACjC;AACA,MAAI,MAAM,QAAQ,KAAK,GAAG;AACxB,WAAO;AAAA,MACL,OAAO,MAAM,IAAI,CAAC,MAAM,YAAY,GAAG,OAAO,MAAM,EAAE,KAAK;AAAA,MAC3D;AAAA,IACF;AAAA,EACF;AACA,MAAI,UAAU,QAAQ,OAAO,UAAU,UAAU;AAC/C,UAAM,OAAgC,CAAC;AACvC,eAAW,CAAC,GAAG,CAAC,KAAK,OAAO,QAAQ,KAAK,GAAG;AAC1C,WAAK,CAAC,IAAI,YAAY,GAAG,OAAO,MAAM,EAAE;AAAA,IAC1C;AACA,WAAO,EAAE,OAAO,MAAM,OAAO;AAAA,EAC/B;AACA,SAAO,EAAE,OAAO,OAAO;AACzB;;;ACtEO,IAAM,uBAAN,cAAmC,YAAY;AAAA,EACpD,YACkB,KACAA,aAChB,SACA;AACA,UAAM,WAAW,yBAAyB,GAAG,SAASA,WAAU,GAAG;AAJnD;AACA,sBAAAA;AAAA,EAIlB;AAAA,EALkB;AAAA,EACA;AAKpB;AAuBO,IAAM,cAAN,MAAM,aAAY;AAAA,EACf,QAAQ,oBAAI,IAA8B;AAAA,EAC1C,UAAU;AAAA,EACV,aAAqC,CAAC;AAAA,EACtC,UAAkC,CAAC;AAAA;AAAA;AAAA;AAAA;AAAA,EAM3C,aAAa,SACX,MACA,SAA8C,CAAC,GACzB;AACtB,QAAI,CAAC,KAAK,MAAM;AACd,YAAM,IAAI,YAAY,oEAAoE;AAAA,IAC5F;AACA,UAAM,SAAS,MAAM,KAAK,KAAK,MAAM;AACrC,WAAO,aAAY,WAAW,MAAM;AAAA,EACtC;AAAA;AAAA,EAGA,aAAa,WAAW,QAAkD;AACxE,UAAM,QAAQ,IAAI,aAAY;AAI9B,UAAM,SAAS,oBAAI,IAAkE;AACrF,eAAW,KAAK,QAAQ;AACtB,YAAM,IAAI,GAAG,EAAE,SAAS,EAAE,KAAK,EAAE,UAAU,EAAE,KAAK,EAAE,YAAY;AAChE,YAAM,IAAI,OAAO,IAAI,CAAC,KAAK,CAAC;AAC5B,UAAI,EAAE,cAAc,UAAW,GAAE,MAAM;AAAA,UAClC,GAAE,MAAM;AACb,aAAO,IAAI,GAAG,CAAC;AAAA,IACjB;AACA,eAAW,KAAK,OAAO,OAAO,GAAG;AAC/B,UAAI,CAAC,EAAE,IAAK;AACZ,UAAI,CAAC,EAAE,KAAK;AACV,cAAM,WAAW;AACjB;AAAA,MACF;AACA,YAAM,MAAM,MAAM,WAAW,EAAE,GAAG;AAClC,YAAM,MAAM,IAAI,KAAK,EAAE,SAAS,EAAE,KAAK,UAAU,EAAE,IAAI,CAAC;AACxD,YAAM,WAAW,EAAE,IAAI,QAAQ,KAAK,MAAM,WAAW,EAAE,IAAI,QAAQ,KAAK,KAAK;AAC7E,YAAM,QAAQ,EAAE,IAAI,KAAK,KAAK,MAAM,QAAQ,EAAE,IAAI,KAAK,KAAK,KAAK;AAAA,IACnE;AACA,WAAO;AAAA,EACT;AAAA;AAAA,EAGA,OAAe;AACb,WAAO,KAAK,MAAM;AAAA,EACpB;AAAA,EAEA,QAA0B;AACxB,WAAO;AAAA,MACL,OAAO,KAAK,MAAM;AAAA,MAClB,YAAY,EAAE,GAAG,KAAK,WAAW;AAAA,MACjC,SAAS,EAAE,GAAG,KAAK,QAAQ;AAAA,MAC3B,gBAAgB,KAAK;AAAA,IACvB;AAAA,EACF;AAAA;AAAA,EAGA,CAAC,UAA8C;AAC7C,eAAW,SAAS,KAAK,MAAM,OAAO,EAAG,OAAM;AAAA,EACjD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,OAAO,aAA6D;AACxE,UAAM,MAAM,MAAM,YAAY,WAAW;AACzC,WAAO,KAAK,MAAM,IAAI,GAAG;AAAA,EAC3B;AACF;AA2BO,SAAS,kBAAkB,OAAoB,OAA2B,CAAC,GAAiB;AACjG,QAAM,SAAS,KAAK,UAAU;AAC9B,QAAM,WAAW,KAAK,iBAAiB,WAAW,OAAO,KAAK,UAAU;AAExE,UAAQ,OAAO,OAA0B,SAAuB;AAC9D,UAAM,MACJ,OAAO,UAAU,WAAW,QAAQ,iBAAiB,MAAM,MAAM,SAAS,IAAI,MAAM;AACtF,QAAI,CAAC,kCAAkC,KAAK,GAAG,GAAG;AAChD,UAAI,CAAC;AACH,cAAM,IAAI;AAAA,UACR,qCAAqC,GAAG;AAAA,QAC1C;AACF,aAAO,SAAS,OAAsB,IAAI;AAAA,IAC5C;AACA,QAAI;AACJ,QAAI,MAAM,QAAQ,OAAO,KAAK,SAAS,UAAU;AAC/C,UAAI;AACF,qBAAa,KAAK,MAAM,KAAK,IAAI;AAAA,MACnC,QAAQ;AAAA,MAER;AAAA,IACF;AACA,UAAM,MAAM,eAAe,SAAY,SAAY,MAAM,MAAM,OAAO,UAAU;AAChF,QAAI,KAAK;AACP,WAAK,QAAQ,EAAE,KAAK,UAAU,IAAI,QAAQ,UAAU,OAAO,IAAI,QAAQ,MAAM,CAAC;AAC9E,YAAM,SAAS,IAAI,SAAS,cAAc;AAC1C,YAAM,UAAU,IAAI;AAAA,QAClB,OAAO,QAAQ,IAAI,SAAS,mBAAmB,EAAE,gBAAgB,mBAAmB,CAAC;AAAA,MACvF;AACA,YAAM,WACJ,OAAO,IAAI,SAAS,iBAAiB,WACjC,IAAI,SAAS,eACb,KAAK,UAAU,IAAI,SAAS,gBAAgB,CAAC,CAAC;AACpD,aAAO,IAAI,SAAS,UAAU,EAAE,QAAQ,QAAQ,CAAC;AAAA,IACnD;AACA,SAAK,eAAe,EAAE,KAAK,aAAa,WAAW,CAAC;AACpD,QAAI,WAAW,SAAS;AACtB,YAAM,MAAM,eAAe,SAAY,kBAAkB,MAAM,YAAY,UAAU;AACrF,YAAM,IAAI,qBAAqB,KAAK,GAAG;AAAA,IACzC;AACA,QAAI,WAAW,eAAe;AAC5B,aAAO,IAAI,SAAS,KAAK,UAAU,EAAE,OAAO,oBAAoB,CAAC,GAAG,EAAE,QAAQ,IAAI,CAAC;AAAA,IACrF;AACA,QAAI,CAAC;AACH,YAAM,IAAI,YAAY,+DAA+D;AACvF,WAAO,SAAS,OAAsB,IAAI;AAAA,EAC5C;AACF;AAOA,gBAAuB,gBACrB,MACA,SAA8C,CAAC,GACb;AAClC,MAAI,CAAC,KAAK,MAAM;AACd,UAAM,IAAI,YAAY,8CAA8C;AAAA,EACtE;AACA,QAAM,SAAS,MAAM,KAAK,KAAK,MAAM;AACrC,QAAM,QAAQ,MAAM,YAAY,WAAW,MAAM;AACjD,aAAW,SAAS,MAAM,QAAQ,EAAG,OAAM;AAC7C;AAYA,eAAe,WAAW,OAA0C;AAClE,SAAO,YAAY,MAAM,WAAW;AACtC;AAEA,eAAe,YAAY,MAAgC;AACzD,MAAI,QAAQ,QAAQ,OAAO,SAAS,SAAU,QAAO,SAAS,EAAE,KAAK,OAAO,IAAI,EAAE,CAAC;AACnF,QAAM,IAAI;AACV,QAAM,UAAU,aAAa;AAAA,IAC3B,OAAO,EAAE,SAAS;AAAA,IAClB,UAAU,EAAE,YAAY;AAAA,IACxB,aAAa,EAAE,eAAe;AAAA,IAC9B,YAAY,EAAE,cAAc;AAAA,IAC5B,uBAAuB,EAAE,yBAAyB;AAAA,IAClD,iBAAiB,EAAE,mBAAmB;AAAA,EACxC,CAAC;AACD,SAAO,SAAS,OAAO;AACzB;","names":["requestKey"]}
|