@tangle-network/agent-eval 0.23.1 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +80 -0
- package/README.md +141 -79
- package/dist/baseline-4R5deP0N.d.ts +108 -0
- package/dist/benchmarks/index.d.ts +3 -2
- package/dist/benchmarks/index.js +1 -1
- package/dist/builder-eval/index.d.ts +249 -0
- package/dist/builder-eval/index.js +391 -0
- package/dist/builder-eval/index.js.map +1 -0
- package/dist/{chunk-IOXMGMHQ.js → chunk-2A5XJB43.js} +142 -318
- package/dist/chunk-2A5XJB43.js.map +1 -0
- package/dist/chunk-47X6LRCE.js +76 -0
- package/dist/chunk-47X6LRCE.js.map +1 -0
- package/dist/{chunk-6M774GY6.js → chunk-4F5DQN55.js} +1 -1
- package/dist/chunk-4F5DQN55.js.map +1 -0
- package/dist/{chunk-KAO3Q65R.js → chunk-4S4BM3QQ.js} +15 -13
- package/dist/chunk-4S4BM3QQ.js.map +1 -0
- package/dist/chunk-5BKGXME7.js +65 -0
- package/dist/chunk-5BKGXME7.js.map +1 -0
- package/dist/{chunk-42I2QC2L.js → chunk-6QDKWHLS.js} +18 -14
- package/dist/chunk-6QDKWHLS.js.map +1 -0
- package/dist/chunk-I4MBDTY5.js +272 -0
- package/dist/chunk-I4MBDTY5.js.map +1 -0
- package/dist/chunk-K2TPS5LB.js +569 -0
- package/dist/chunk-K2TPS5LB.js.map +1 -0
- package/dist/chunk-KKHDIONI.js +414 -0
- package/dist/chunk-KKHDIONI.js.map +1 -0
- package/dist/chunk-KMPRBJK4.js +74 -0
- package/dist/chunk-KMPRBJK4.js.map +1 -0
- package/dist/{chunk-QUKKGHTZ.js → chunk-KTGTIOFD.js} +6 -3
- package/dist/chunk-KTGTIOFD.js.map +1 -0
- package/dist/chunk-LSH4MMOZ.js +838 -0
- package/dist/chunk-LSH4MMOZ.js.map +1 -0
- package/dist/chunk-NG236HPC.js +57 -0
- package/dist/chunk-NG236HPC.js.map +1 -0
- package/dist/{chunk-QBW3YBTR.js → chunk-NLMNWKVM.js} +14 -6
- package/dist/chunk-NLMNWKVM.js.map +1 -0
- package/dist/chunk-NU65VQ7M.js +99 -0
- package/dist/chunk-NU65VQ7M.js.map +1 -0
- package/dist/chunk-OHEPNJQN.js +554 -0
- package/dist/chunk-OHEPNJQN.js.map +1 -0
- package/dist/chunk-OWLAAMME.js +250 -0
- package/dist/chunk-OWLAAMME.js.map +1 -0
- package/dist/{chunk-SQQLHODJ.js → chunk-PC4UYEBM.js} +7 -4
- package/dist/chunk-PC4UYEBM.js.map +1 -0
- package/dist/{chunk-7EAUOUQS.js → chunk-RAF443UI.js} +213 -115
- package/dist/chunk-RAF443UI.js.map +1 -0
- package/dist/chunk-RZTMDUO7.js +49 -0
- package/dist/chunk-RZTMDUO7.js.map +1 -0
- package/dist/{chunk-EXGR4XEM.js → chunk-SESZDQPX.js} +23 -19
- package/dist/chunk-SESZDQPX.js.map +1 -0
- package/dist/{chunk-6KQG5HAH.js → chunk-SY6WAAAD.js} +84 -71
- package/dist/chunk-SY6WAAAD.js.map +1 -0
- package/dist/{chunk-5IIQKMD5.js → chunk-TVVP3ZZQ.js} +14 -4
- package/dist/chunk-TVVP3ZZQ.js.map +1 -0
- package/dist/{chunk-VQQSPGSM.js → chunk-VRJVTXRV.js} +169 -111
- package/dist/chunk-VRJVTXRV.js.map +1 -0
- package/dist/chunk-WWYCWKUM.js +196 -0
- package/dist/chunk-WWYCWKUM.js.map +1 -0
- package/dist/{chunk-AXHNWLIX.js → chunk-YRZ4M5GS.js} +2 -90
- package/dist/chunk-YRZ4M5GS.js.map +1 -0
- package/dist/chunk-ZN274SWR.js +613 -0
- package/dist/chunk-ZN274SWR.js.map +1 -0
- package/dist/cli.js +10 -6
- package/dist/cli.js.map +1 -1
- package/dist/{control-DvkH87qJ.d.ts → control-CBShYYA6.d.ts} +32 -33
- package/dist/control-runtime-BuJHoLg0.d.ts +180 -0
- package/dist/control.d.ts +8 -6
- package/dist/control.js +10 -7
- package/dist/{dataset-B9qvlm_o.d.ts → dataset-CiK_3LDr.d.ts} +5 -2
- package/dist/{emitter-B2XqDKFU.d.ts → emitter-DP_cSSiw.d.ts} +1 -1
- package/dist/errors-BZ9sTdz7.d.ts +70 -0
- package/dist/failure-cluster-C2EGSDiT.d.ts +76 -0
- package/dist/feedback-trajectory-DfFdrraJ.d.ts +169 -0
- package/dist/governance/index.d.ts +5 -0
- package/dist/governance/index.js +18 -0
- package/dist/governance/index.js.map +1 -0
- package/dist/{index-DDTlbHEK.d.ts → index--fVrWDiR.d.ts} +1 -1
- package/dist/index-Oj9fAPPN.d.ts +270 -0
- package/dist/index.d.ts +1866 -3151
- package/dist/index.js +5457 -7809
- package/dist/index.js.map +1 -1
- package/dist/{integrity-Cr5YodSY.d.ts → integrity-DK2EBVZC.d.ts} +4 -3
- package/dist/knowledge/index.d.ts +102 -0
- package/dist/knowledge/index.js +18 -0
- package/dist/knowledge/index.js.map +1 -0
- package/dist/meta-eval/index.d.ts +99 -0
- package/dist/meta-eval/index.js +324 -0
- package/dist/meta-eval/index.js.map +1 -0
- package/dist/multi-layer-verifier-LkP3LVKj.d.ts +141 -0
- package/dist/openapi.json +1 -1
- package/dist/optimization.d.ts +11 -8
- package/dist/optimization.js +11 -9
- package/dist/outcome-store-D6KWmYvj.d.ts +63 -0
- package/dist/pipelines/index.d.ts +172 -0
- package/dist/pipelines/index.js +409 -0
- package/dist/pipelines/index.js.map +1 -0
- package/dist/prm/index.d.ts +99 -0
- package/dist/prm/index.js +222 -0
- package/dist/prm/index.js.map +1 -0
- package/dist/query-DODUYdPg.d.ts +30 -0
- package/dist/release-report-TDPn1cxq.d.ts +292 -0
- package/dist/replay-BL96gCEP.d.ts +226 -0
- package/dist/reporting.d.ts +10 -295
- package/dist/reporting.js +10 -6
- package/dist/{eval-campaign-Ds5QljIh.d.ts → researcher-CUOiGcGv.d.ts} +148 -146
- package/dist/rl.d.ts +1762 -8
- package/dist/rl.js +2035 -58
- package/dist/rl.js.map +1 -1
- package/dist/rubric-D5tjHNJQ.d.ts +72 -0
- package/dist/rubric-predictive-validity-C0uDYwG6.d.ts +105 -0
- package/dist/{run-record-DNiOMBrZ.d.ts → run-record-CqzahIbx.d.ts} +4 -1
- package/dist/sequential-Dgz1n51-.d.ts +139 -0
- package/dist/{store-u47QaJ9G.d.ts → store-Db2Bv8Cf.d.ts} +1 -1
- package/dist/{summary-report-Ce1r4EYo.d.ts → summary-report-BXGs_9V0.d.ts} +3 -76
- package/dist/telemetry/file.js +4 -1
- package/dist/telemetry/file.js.map +1 -1
- package/dist/telemetry/index.js +57 -57
- package/dist/telemetry/index.js.map +1 -1
- package/dist/test-graded-scenario-B2kWEdh9.d.ts +146 -0
- package/dist/traces.d.ts +142 -387
- package/dist/traces.js +1302 -40
- package/dist/traces.js.map +1 -1
- package/dist/trajectory-CnoBo-JY.d.ts +32 -0
- package/dist/wire/index.d.ts +22 -22
- package/dist/wire/index.js +4 -3
- package/package.json +44 -18
- package/dist/chunk-42I2QC2L.js.map +0 -1
- package/dist/chunk-5IIQKMD5.js.map +0 -1
- package/dist/chunk-6KQG5HAH.js.map +0 -1
- package/dist/chunk-6M774GY6.js.map +0 -1
- package/dist/chunk-7EAUOUQS.js.map +0 -1
- package/dist/chunk-AXHNWLIX.js.map +0 -1
- package/dist/chunk-EXGR4XEM.js.map +0 -1
- package/dist/chunk-IOXMGMHQ.js.map +0 -1
- package/dist/chunk-KAO3Q65R.js.map +0 -1
- package/dist/chunk-LZKIOBG2.js +0 -2026
- package/dist/chunk-LZKIOBG2.js.map +0 -1
- package/dist/chunk-QBW3YBTR.js.map +0 -1
- package/dist/chunk-QUKKGHTZ.js.map +0 -1
- package/dist/chunk-SQQLHODJ.js.map +0 -1
- package/dist/chunk-V5QSWN7L.js +0 -1310
- package/dist/chunk-V5QSWN7L.js.map +0 -1
- package/dist/chunk-VQQSPGSM.js.map +0 -1
- package/dist/chunk-XPHOZPOM.js +0 -1947
- package/dist/chunk-XPHOZPOM.js.map +0 -1
- package/dist/feedback-trajectory-c43WGtTX.d.ts +0 -346
- package/dist/index-ekBXweiQ.d.ts +0 -1894
- package/dist/sequential-DgU2mFsE.d.ts +0 -304
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import {
|
|
2
2
|
summaryTable
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-2A5XJB43.js";
|
|
4
|
+
import {
|
|
5
|
+
VerificationError
|
|
6
|
+
} from "./chunk-NG236HPC.js";
|
|
4
7
|
|
|
5
8
|
// src/release-confidence.ts
|
|
6
9
|
var DEFAULT_THRESHOLDS = {
|
|
@@ -58,10 +61,18 @@ function evaluateReleaseConfidence(input) {
|
|
|
58
61
|
searchMeanScore,
|
|
59
62
|
holdoutMeanScore,
|
|
60
63
|
overfitGap: safeDiff(searchMeanScore, holdoutMeanScore),
|
|
61
|
-
meanCostUsd: mean([
|
|
62
|
-
|
|
64
|
+
meanCostUsd: mean([
|
|
65
|
+
...runs.map((r) => r.costUsd),
|
|
66
|
+
...traces.map((t) => t.costUsd).filter(isFiniteNumber)
|
|
67
|
+
]),
|
|
68
|
+
p95WallMs: percentile(
|
|
69
|
+
[...runs.map((r) => r.wallMs), ...traces.map((t) => t.durationMs).filter(isFiniteNumber)],
|
|
70
|
+
0.95
|
|
71
|
+
),
|
|
63
72
|
failedRows: failedRows(runs, traces, thresholds.failureScoreThreshold).length,
|
|
64
|
-
failuresWithAsi: failedRows(runs, traces, thresholds.failureScoreThreshold).filter(
|
|
73
|
+
failuresWithAsi: failedRows(runs, traces, thresholds.failureScoreThreshold).filter(
|
|
74
|
+
(row) => row.hasAsi
|
|
75
|
+
).length,
|
|
65
76
|
singleShotTraces: traces.filter((t) => t.turnCount === 1).length,
|
|
66
77
|
multiShotTraces: traces.filter((t) => (t.turnCount ?? 0) > 1).length,
|
|
67
78
|
splitCounts,
|
|
@@ -94,7 +105,7 @@ function evaluateReleaseConfidence(input) {
|
|
|
94
105
|
function assertReleaseConfidence(input) {
|
|
95
106
|
const scorecard = evaluateReleaseConfidence(input);
|
|
96
107
|
if (scorecard.status === "fail") {
|
|
97
|
-
throw new
|
|
108
|
+
throw new VerificationError(scorecard.summary);
|
|
98
109
|
}
|
|
99
110
|
return scorecard;
|
|
100
111
|
}
|
|
@@ -104,41 +115,88 @@ function filterCandidate(runs, candidateId, baselineId) {
|
|
|
104
115
|
return [...runs];
|
|
105
116
|
}
|
|
106
117
|
function filterTraceCandidate(traces, candidateId, baselineId) {
|
|
107
|
-
if (candidateId)
|
|
108
|
-
|
|
118
|
+
if (candidateId)
|
|
119
|
+
return traces.filter((t) => t.candidateId === void 0 || t.candidateId === candidateId);
|
|
120
|
+
if (baselineId)
|
|
121
|
+
return traces.filter((t) => t.candidateId === void 0 || t.candidateId !== baselineId);
|
|
109
122
|
return [...traces];
|
|
110
123
|
}
|
|
111
124
|
function checkCorpus(input, thresholds, metrics, issues) {
|
|
112
125
|
if (thresholds.requireCorpus && !input.dataset && (input.scenarios?.length ?? 0) === 0) {
|
|
113
|
-
issues.push({
|
|
126
|
+
issues.push({
|
|
127
|
+
axis: "corpus",
|
|
128
|
+
severity: "critical",
|
|
129
|
+
code: "missing_corpus",
|
|
130
|
+
detail: "No Dataset manifest or scenarios supplied."
|
|
131
|
+
});
|
|
114
132
|
}
|
|
115
133
|
if (metrics.scenarioCount < thresholds.minScenarioCount) {
|
|
116
|
-
issues.push({
|
|
134
|
+
issues.push({
|
|
135
|
+
axis: "corpus",
|
|
136
|
+
severity: "critical",
|
|
137
|
+
code: "few_scenarios",
|
|
138
|
+
detail: `${metrics.scenarioCount} scenario(s) < min ${thresholds.minScenarioCount}.`
|
|
139
|
+
});
|
|
117
140
|
}
|
|
118
141
|
if (thresholds.requireHoldout && metrics.splitCounts.holdout === 0) {
|
|
119
|
-
issues.push({
|
|
142
|
+
issues.push({
|
|
143
|
+
axis: "corpus",
|
|
144
|
+
severity: "critical",
|
|
145
|
+
code: "missing_holdout_split",
|
|
146
|
+
detail: "Corpus has no holdout scenarios."
|
|
147
|
+
});
|
|
120
148
|
}
|
|
121
149
|
}
|
|
122
150
|
function checkQuality(thresholds, metrics, issues) {
|
|
123
151
|
if (metrics.searchRuns < thresholds.minSearchRuns) {
|
|
124
|
-
issues.push({
|
|
152
|
+
issues.push({
|
|
153
|
+
axis: "quality",
|
|
154
|
+
severity: "critical",
|
|
155
|
+
code: "few_search_runs",
|
|
156
|
+
detail: `${metrics.searchRuns} search run(s) < min ${thresholds.minSearchRuns}.`
|
|
157
|
+
});
|
|
125
158
|
}
|
|
126
159
|
if (metrics.passRate < thresholds.minPassRate) {
|
|
127
|
-
issues.push({
|
|
160
|
+
issues.push({
|
|
161
|
+
axis: "quality",
|
|
162
|
+
severity: "critical",
|
|
163
|
+
code: "low_pass_rate",
|
|
164
|
+
detail: `passRate ${fmt(metrics.passRate)} < ${fmt(thresholds.minPassRate)}.`
|
|
165
|
+
});
|
|
128
166
|
}
|
|
129
167
|
if (metrics.meanScore < thresholds.minMeanScore) {
|
|
130
|
-
issues.push({
|
|
168
|
+
issues.push({
|
|
169
|
+
axis: "quality",
|
|
170
|
+
severity: "critical",
|
|
171
|
+
code: "low_mean_score",
|
|
172
|
+
detail: `meanScore ${fmt(metrics.meanScore)} < ${fmt(thresholds.minMeanScore)}.`
|
|
173
|
+
});
|
|
131
174
|
}
|
|
132
175
|
}
|
|
133
176
|
function checkGeneralization(gateDecision, thresholds, metrics, issues) {
|
|
134
177
|
if (thresholds.requireHoldout && metrics.holdoutRuns < thresholds.minHoldoutRuns) {
|
|
135
|
-
issues.push({
|
|
178
|
+
issues.push({
|
|
179
|
+
axis: "generalization",
|
|
180
|
+
severity: "critical",
|
|
181
|
+
code: "few_holdout_runs",
|
|
182
|
+
detail: `${metrics.holdoutRuns} holdout run(s) < min ${thresholds.minHoldoutRuns}.`
|
|
183
|
+
});
|
|
136
184
|
}
|
|
137
185
|
if (Number.isFinite(metrics.overfitGap) && metrics.overfitGap > thresholds.maxOverfitGap) {
|
|
138
|
-
issues.push({
|
|
186
|
+
issues.push({
|
|
187
|
+
axis: "generalization",
|
|
188
|
+
severity: "critical",
|
|
189
|
+
code: "overfit_gap",
|
|
190
|
+
detail: `search-holdout gap ${fmt(metrics.overfitGap)} > ${fmt(thresholds.maxOverfitGap)}.`
|
|
191
|
+
});
|
|
139
192
|
}
|
|
140
193
|
if (gateDecision && !gateDecision.promote) {
|
|
141
|
-
issues.push({
|
|
194
|
+
issues.push({
|
|
195
|
+
axis: "generalization",
|
|
196
|
+
severity: "critical",
|
|
197
|
+
code: `gate_${gateDecision.rejectionCode ?? "reject"}`,
|
|
198
|
+
detail: gateDecision.reason
|
|
199
|
+
});
|
|
142
200
|
}
|
|
143
201
|
}
|
|
144
202
|
function checkDiagnostics(thresholds, metrics, issues) {
|
|
@@ -154,19 +212,54 @@ function checkDiagnostics(thresholds, metrics, issues) {
|
|
|
154
212
|
}
|
|
155
213
|
function checkEfficiency(thresholds, metrics, issues) {
|
|
156
214
|
if (metrics.meanCostUsd > thresholds.maxMeanCostUsd) {
|
|
157
|
-
issues.push({
|
|
215
|
+
issues.push({
|
|
216
|
+
axis: "efficiency",
|
|
217
|
+
severity: "critical",
|
|
218
|
+
code: "cost_budget",
|
|
219
|
+
detail: `meanCostUsd ${fmt(metrics.meanCostUsd)} > ${fmt(thresholds.maxMeanCostUsd)}.`
|
|
220
|
+
});
|
|
158
221
|
}
|
|
159
222
|
if (metrics.p95WallMs > thresholds.maxP95WallMs) {
|
|
160
|
-
issues.push({
|
|
223
|
+
issues.push({
|
|
224
|
+
axis: "efficiency",
|
|
225
|
+
severity: "critical",
|
|
226
|
+
code: "latency_budget",
|
|
227
|
+
detail: `p95WallMs ${fmt(metrics.p95WallMs)} > ${fmt(thresholds.maxP95WallMs)}.`
|
|
228
|
+
});
|
|
161
229
|
}
|
|
162
230
|
}
|
|
163
231
|
function buildAxes(metrics, thresholds, gateDecision, issues) {
|
|
164
232
|
return [
|
|
165
|
-
axis(
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
233
|
+
axis(
|
|
234
|
+
"corpus",
|
|
235
|
+
issues,
|
|
236
|
+
bounded(metrics.scenarioCount / Math.max(1, thresholds.minScenarioCount)),
|
|
237
|
+
`${metrics.scenarioCount} scenarios; holdout=${metrics.splitCounts.holdout}`
|
|
238
|
+
),
|
|
239
|
+
axis(
|
|
240
|
+
"quality",
|
|
241
|
+
issues,
|
|
242
|
+
Math.min(metrics.passRate, metrics.meanScore),
|
|
243
|
+
`passRate=${fmt(metrics.passRate)} meanScore=${fmt(metrics.meanScore)}`
|
|
244
|
+
),
|
|
245
|
+
axis(
|
|
246
|
+
"generalization",
|
|
247
|
+
issues,
|
|
248
|
+
gateDecision && !gateDecision.promote ? 0 : gapScore(metrics.overfitGap, thresholds.maxOverfitGap),
|
|
249
|
+
`holdoutRuns=${metrics.holdoutRuns} overfitGap=${fmt(metrics.overfitGap)}`
|
|
250
|
+
),
|
|
251
|
+
axis(
|
|
252
|
+
"diagnostics",
|
|
253
|
+
issues,
|
|
254
|
+
metrics.failedRows === 0 ? 1 : metrics.failuresWithAsi / metrics.failedRows,
|
|
255
|
+
`failuresWithAsi=${metrics.failuresWithAsi}/${metrics.failedRows}`
|
|
256
|
+
),
|
|
257
|
+
axis(
|
|
258
|
+
"efficiency",
|
|
259
|
+
issues,
|
|
260
|
+
efficiencyScore(metrics, thresholds),
|
|
261
|
+
`meanCostUsd=${fmt(metrics.meanCostUsd)} p95WallMs=${fmt(metrics.p95WallMs)}`
|
|
262
|
+
)
|
|
170
263
|
];
|
|
171
264
|
}
|
|
172
265
|
function axis(name, issues, score, detail) {
|
|
@@ -236,7 +329,9 @@ function passRate(runs, traces, threshold) {
|
|
|
236
329
|
const score = run.outcome.holdoutScore ?? run.outcome.searchScore;
|
|
237
330
|
return !run.failureMode && score !== void 0 && score >= threshold;
|
|
238
331
|
}),
|
|
239
|
-
...traces.map(
|
|
332
|
+
...traces.map(
|
|
333
|
+
(trace) => trace.ok !== false && (trace.score === void 0 || trace.score >= threshold)
|
|
334
|
+
)
|
|
240
335
|
];
|
|
241
336
|
if (outcomes.length === 0) return 0;
|
|
242
337
|
return outcomes.filter(Boolean).length / outcomes.length;
|
|
@@ -285,94 +380,6 @@ function fmt(x) {
|
|
|
285
380
|
return x.toFixed(4);
|
|
286
381
|
}
|
|
287
382
|
|
|
288
|
-
// src/release-report.ts
|
|
289
|
-
function renderReleaseReport(scorecard, options = {}) {
|
|
290
|
-
const title = options.title ?? `Release Report: ${scorecard.target}`;
|
|
291
|
-
const lines = [];
|
|
292
|
-
lines.push(`# ${title}`);
|
|
293
|
-
lines.push("");
|
|
294
|
-
lines.push(`Status: **${scorecard.status.toUpperCase()}**`);
|
|
295
|
-
lines.push(`Promote: **${scorecard.promote ? "yes" : "no"}**`);
|
|
296
|
-
if (scorecard.candidateId) lines.push(`Candidate: \`${scorecard.candidateId}\``);
|
|
297
|
-
if (scorecard.baselineId) lines.push(`Baseline: \`${scorecard.baselineId}\``);
|
|
298
|
-
lines.push("");
|
|
299
|
-
lines.push(scorecard.summary);
|
|
300
|
-
lines.push("");
|
|
301
|
-
lines.push("## Metrics");
|
|
302
|
-
lines.push("");
|
|
303
|
-
lines.push("| Metric | Value |");
|
|
304
|
-
lines.push("|---|---:|");
|
|
305
|
-
lines.push(`| Scenarios | ${scorecard.metrics.scenarioCount} |`);
|
|
306
|
-
lines.push(`| Search runs | ${scorecard.metrics.searchRuns} |`);
|
|
307
|
-
lines.push(`| Holdout runs | ${scorecard.metrics.holdoutRuns} |`);
|
|
308
|
-
lines.push(`| Pass rate | ${pct(scorecard.metrics.passRate)} |`);
|
|
309
|
-
lines.push(`| Mean score | ${num(scorecard.metrics.meanScore)} |`);
|
|
310
|
-
lines.push(`| Search mean | ${num(scorecard.metrics.searchMeanScore)} |`);
|
|
311
|
-
lines.push(`| Holdout mean | ${num(scorecard.metrics.holdoutMeanScore)} |`);
|
|
312
|
-
lines.push(`| Overfit gap | ${num(scorecard.metrics.overfitGap)} |`);
|
|
313
|
-
lines.push(`| Mean cost | $${num(scorecard.metrics.meanCostUsd)} |`);
|
|
314
|
-
lines.push(`| p95 wall time | ${Math.round(scorecard.metrics.p95WallMs)} ms |`);
|
|
315
|
-
lines.push("");
|
|
316
|
-
if (scorecard.issues.length > 0) {
|
|
317
|
-
lines.push("## Issues");
|
|
318
|
-
lines.push("");
|
|
319
|
-
for (const issue of scorecard.issues) {
|
|
320
|
-
lines.push(`- **${issue.severity}** \`${issue.code}\` (${issue.axis}): ${issue.detail}`);
|
|
321
|
-
}
|
|
322
|
-
lines.push("");
|
|
323
|
-
}
|
|
324
|
-
const surfaces = entries(scorecard.metrics.responsibleSurfaceCounts);
|
|
325
|
-
if (surfaces.length > 0) {
|
|
326
|
-
lines.push("## Responsible Surfaces");
|
|
327
|
-
lines.push("");
|
|
328
|
-
for (const [surface, count] of surfaces) lines.push(`- ${surface}: ${count}`);
|
|
329
|
-
lines.push("");
|
|
330
|
-
}
|
|
331
|
-
const failures = entries(scorecard.metrics.failureModeCounts);
|
|
332
|
-
if (failures.length > 0) {
|
|
333
|
-
lines.push("## Failure Modes");
|
|
334
|
-
lines.push("");
|
|
335
|
-
for (const [mode, count] of failures) lines.push(`- ${mode}: ${count}`);
|
|
336
|
-
lines.push("");
|
|
337
|
-
}
|
|
338
|
-
if (options.runs && options.runs.length > 0) {
|
|
339
|
-
lines.push("## Run Summary");
|
|
340
|
-
lines.push("");
|
|
341
|
-
lines.push(summaryTable([...options.runs], {
|
|
342
|
-
comparator: options.comparator ?? scorecard.baselineId ?? void 0,
|
|
343
|
-
split: "holdout"
|
|
344
|
-
}).markdown);
|
|
345
|
-
lines.push("");
|
|
346
|
-
}
|
|
347
|
-
if (options.traceAnalystFindings && options.traceAnalystFindings.length > 0) {
|
|
348
|
-
lines.push("## TraceAnalyst Findings");
|
|
349
|
-
lines.push("");
|
|
350
|
-
for (const finding of options.traceAnalystFindings) lines.push(`- ${finding}`);
|
|
351
|
-
lines.push("");
|
|
352
|
-
}
|
|
353
|
-
const nextActions = options.nextActions ?? defaultNextActions(scorecard);
|
|
354
|
-
if (nextActions.length > 0) {
|
|
355
|
-
lines.push("## Next Actions");
|
|
356
|
-
lines.push("");
|
|
357
|
-
for (const action of nextActions) lines.push(`- ${action}`);
|
|
358
|
-
lines.push("");
|
|
359
|
-
}
|
|
360
|
-
return lines.join("\n").trimEnd() + "\n";
|
|
361
|
-
}
|
|
362
|
-
function defaultNextActions(scorecard) {
|
|
363
|
-
if (scorecard.promote) return ["Promote the candidate and keep canaries enabled."];
|
|
364
|
-
return scorecard.issues.filter((issue) => issue.severity === "critical").map((issue) => `Resolve ${issue.code}: ${issue.detail}`);
|
|
365
|
-
}
|
|
366
|
-
function entries(values) {
|
|
367
|
-
return Object.entries(values).filter(([, count]) => count > 0).sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]));
|
|
368
|
-
}
|
|
369
|
-
function pct(value) {
|
|
370
|
-
return Number.isFinite(value) ? `${(value * 100).toFixed(1)}%` : "n/a";
|
|
371
|
-
}
|
|
372
|
-
function num(value) {
|
|
373
|
-
return Number.isFinite(value) ? value.toFixed(3) : "n/a";
|
|
374
|
-
}
|
|
375
|
-
|
|
376
383
|
// src/promotion-gate.ts
|
|
377
384
|
function bootstrapCi(baseline, candidate, options = {}) {
|
|
378
385
|
const alpha = options.alpha ?? 0.05;
|
|
@@ -484,12 +491,103 @@ async function scoreAll(outputs, judge, concurrency) {
|
|
|
484
491
|
return results;
|
|
485
492
|
}
|
|
486
493
|
|
|
494
|
+
// src/release-report.ts
|
|
495
|
+
function renderReleaseReport(scorecard, options = {}) {
|
|
496
|
+
const title = options.title ?? `Release Report: ${scorecard.target}`;
|
|
497
|
+
const lines = [];
|
|
498
|
+
lines.push(`# ${title}`);
|
|
499
|
+
lines.push("");
|
|
500
|
+
lines.push(`Status: **${scorecard.status.toUpperCase()}**`);
|
|
501
|
+
lines.push(`Promote: **${scorecard.promote ? "yes" : "no"}**`);
|
|
502
|
+
if (scorecard.candidateId) lines.push(`Candidate: \`${scorecard.candidateId}\``);
|
|
503
|
+
if (scorecard.baselineId) lines.push(`Baseline: \`${scorecard.baselineId}\``);
|
|
504
|
+
lines.push("");
|
|
505
|
+
lines.push(scorecard.summary);
|
|
506
|
+
lines.push("");
|
|
507
|
+
lines.push("## Metrics");
|
|
508
|
+
lines.push("");
|
|
509
|
+
lines.push("| Metric | Value |");
|
|
510
|
+
lines.push("|---|---:|");
|
|
511
|
+
lines.push(`| Scenarios | ${scorecard.metrics.scenarioCount} |`);
|
|
512
|
+
lines.push(`| Search runs | ${scorecard.metrics.searchRuns} |`);
|
|
513
|
+
lines.push(`| Holdout runs | ${scorecard.metrics.holdoutRuns} |`);
|
|
514
|
+
lines.push(`| Pass rate | ${pct(scorecard.metrics.passRate)} |`);
|
|
515
|
+
lines.push(`| Mean score | ${num(scorecard.metrics.meanScore)} |`);
|
|
516
|
+
lines.push(`| Search mean | ${num(scorecard.metrics.searchMeanScore)} |`);
|
|
517
|
+
lines.push(`| Holdout mean | ${num(scorecard.metrics.holdoutMeanScore)} |`);
|
|
518
|
+
lines.push(`| Overfit gap | ${num(scorecard.metrics.overfitGap)} |`);
|
|
519
|
+
lines.push(`| Mean cost | $${num(scorecard.metrics.meanCostUsd)} |`);
|
|
520
|
+
lines.push(`| p95 wall time | ${Math.round(scorecard.metrics.p95WallMs)} ms |`);
|
|
521
|
+
lines.push("");
|
|
522
|
+
if (scorecard.issues.length > 0) {
|
|
523
|
+
lines.push("## Issues");
|
|
524
|
+
lines.push("");
|
|
525
|
+
for (const issue of scorecard.issues) {
|
|
526
|
+
lines.push(`- **${issue.severity}** \`${issue.code}\` (${issue.axis}): ${issue.detail}`);
|
|
527
|
+
}
|
|
528
|
+
lines.push("");
|
|
529
|
+
}
|
|
530
|
+
const surfaces = entries(scorecard.metrics.responsibleSurfaceCounts);
|
|
531
|
+
if (surfaces.length > 0) {
|
|
532
|
+
lines.push("## Responsible Surfaces");
|
|
533
|
+
lines.push("");
|
|
534
|
+
for (const [surface, count] of surfaces) lines.push(`- ${surface}: ${count}`);
|
|
535
|
+
lines.push("");
|
|
536
|
+
}
|
|
537
|
+
const failures = entries(scorecard.metrics.failureModeCounts);
|
|
538
|
+
if (failures.length > 0) {
|
|
539
|
+
lines.push("## Failure Modes");
|
|
540
|
+
lines.push("");
|
|
541
|
+
for (const [mode, count] of failures) lines.push(`- ${mode}: ${count}`);
|
|
542
|
+
lines.push("");
|
|
543
|
+
}
|
|
544
|
+
if (options.runs && options.runs.length > 0) {
|
|
545
|
+
lines.push("## Run Summary");
|
|
546
|
+
lines.push("");
|
|
547
|
+
lines.push(
|
|
548
|
+
summaryTable([...options.runs], {
|
|
549
|
+
comparator: options.comparator ?? scorecard.baselineId ?? void 0,
|
|
550
|
+
split: "holdout"
|
|
551
|
+
}).markdown
|
|
552
|
+
);
|
|
553
|
+
lines.push("");
|
|
554
|
+
}
|
|
555
|
+
if (options.traceAnalystFindings && options.traceAnalystFindings.length > 0) {
|
|
556
|
+
lines.push("## TraceAnalyst Findings");
|
|
557
|
+
lines.push("");
|
|
558
|
+
for (const finding of options.traceAnalystFindings) lines.push(`- ${finding}`);
|
|
559
|
+
lines.push("");
|
|
560
|
+
}
|
|
561
|
+
const nextActions = options.nextActions ?? defaultNextActions(scorecard);
|
|
562
|
+
if (nextActions.length > 0) {
|
|
563
|
+
lines.push("## Next Actions");
|
|
564
|
+
lines.push("");
|
|
565
|
+
for (const action of nextActions) lines.push(`- ${action}`);
|
|
566
|
+
lines.push("");
|
|
567
|
+
}
|
|
568
|
+
return `${lines.join("\n").trimEnd()}
|
|
569
|
+
`;
|
|
570
|
+
}
|
|
571
|
+
function defaultNextActions(scorecard) {
|
|
572
|
+
if (scorecard.promote) return ["Promote the candidate and keep canaries enabled."];
|
|
573
|
+
return scorecard.issues.filter((issue) => issue.severity === "critical").map((issue) => `Resolve ${issue.code}: ${issue.detail}`);
|
|
574
|
+
}
|
|
575
|
+
function entries(values) {
|
|
576
|
+
return Object.entries(values).filter(([, count]) => count > 0).sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]));
|
|
577
|
+
}
|
|
578
|
+
function pct(value) {
|
|
579
|
+
return Number.isFinite(value) ? `${(value * 100).toFixed(1)}%` : "n/a";
|
|
580
|
+
}
|
|
581
|
+
function num(value) {
|
|
582
|
+
return Number.isFinite(value) ? value.toFixed(3) : "n/a";
|
|
583
|
+
}
|
|
584
|
+
|
|
487
585
|
export {
|
|
488
586
|
releaseTraceEvidenceFromMultiShotTrials,
|
|
489
587
|
evaluateReleaseConfidence,
|
|
490
588
|
assertReleaseConfidence,
|
|
491
|
-
renderReleaseReport,
|
|
492
589
|
bootstrapCi,
|
|
493
|
-
judgeReplayGate
|
|
590
|
+
judgeReplayGate,
|
|
591
|
+
renderReleaseReport
|
|
494
592
|
};
|
|
495
|
-
//# sourceMappingURL=chunk-
|
|
593
|
+
//# sourceMappingURL=chunk-RAF443UI.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/release-confidence.ts","../src/promotion-gate.ts","../src/release-report.ts"],"sourcesContent":["/**\n * Release confidence gate.\n *\n * This is the production-facing composition layer over the lower-level\n * primitives:\n * - Dataset manifests prove corpus/version coverage.\n * - RunRecord rows prove reproducible search/holdout outcomes.\n * - Multi-shot trace evidence carries turn counts and ASI diagnostics.\n * - HeldOutGate decisions remain the paired promotion authority.\n *\n * The gate is intentionally pure and conservative. Missing declared evidence\n * fails closed instead of being treated as a neutral zero.\n */\n\nimport type { DatasetManifest, DatasetScenario, DatasetSplit } from './dataset'\nimport { VerificationError } from './errors'\nimport type { GateDecision } from './held-out-gate'\nimport type { ActionableSideInfo, MultiShotTrialResult } from './multi-shot-optimization'\nimport type { RunRecord, RunSplitTag } from './run-record'\n\nexport type ReleaseConfidenceStatus = 'pass' | 'warn' | 'fail'\nexport type ReleaseConfidenceAxisName =\n | 'corpus'\n | 'quality'\n | 'generalization'\n | 'diagnostics'\n | 'efficiency'\n\nexport interface ReleaseTraceEvidence {\n scenarioId: string\n candidateId?: string\n split?: RunSplitTag\n score?: number\n ok?: boolean\n turnCount?: number\n costUsd?: number\n durationMs?: number\n failureMode?: string\n asi?: ActionableSideInfo[]\n metadata?: Record<string, unknown>\n}\n\nexport interface ReleaseConfidenceThresholds {\n /** Require a Dataset manifest or explicit scenarios. Default true. */\n requireCorpus?: boolean\n minScenarioCount?: number\n minSearchRuns?: number\n minHoldoutRuns?: number\n /** Require at least one holdout scenario/run. Default true. */\n requireHoldout?: boolean\n minPassRate?: number\n minMeanScore?: number\n /** Search mean may exceed holdout mean by at most this much. */\n maxOverfitGap?: number\n maxMeanCostUsd?: number\n maxP95WallMs?: number\n /** Low-score/failed rows must carry ASI. Default true. */\n requireAsiForFailures?: boolean\n /** Score below this is considered a failure for ASI coverage. Default 0.5. */\n failureScoreThreshold?: number\n}\n\nexport interface ReleaseConfidenceInput {\n target: string\n candidateId?: string\n baselineId?: string\n dataset?: DatasetManifest\n scenarios?: readonly DatasetScenario[]\n runs?: readonly RunRecord[]\n traces?: readonly ReleaseTraceEvidence[]\n gateDecision?: GateDecision | null\n thresholds?: ReleaseConfidenceThresholds\n}\n\nexport interface ReleaseConfidenceAxis {\n name: ReleaseConfidenceAxisName\n status: ReleaseConfidenceStatus\n score: number\n detail: string\n}\n\nexport interface ReleaseConfidenceIssue {\n axis: ReleaseConfidenceAxisName\n severity: 'critical' | 'warning'\n code: string\n detail: string\n}\n\nexport interface ReleaseConfidenceMetrics {\n scenarioCount: number\n searchRuns: number\n holdoutRuns: number\n passRate: number\n meanScore: number\n searchMeanScore: number\n holdoutMeanScore: number\n overfitGap: number\n meanCostUsd: number\n p95WallMs: number\n failedRows: number\n failuresWithAsi: number\n singleShotTraces: number\n multiShotTraces: number\n splitCounts: Record<DatasetSplit, number>\n domainCounts: Record<string, number>\n failureModeCounts: Record<string, number>\n responsibleSurfaceCounts: Record<string, number>\n}\n\nexport interface ReleaseConfidenceScorecard {\n target: string\n candidateId: string | null\n baselineId: string | null\n status: ReleaseConfidenceStatus\n promote: boolean\n axes: ReleaseConfidenceAxis[]\n issues: ReleaseConfidenceIssue[]\n metrics: ReleaseConfidenceMetrics\n dataset: DatasetManifest | null\n gateDecision: GateDecision | null\n summary: string\n}\n\nconst DEFAULT_THRESHOLDS: Required<ReleaseConfidenceThresholds> = {\n requireCorpus: true,\n minScenarioCount: 1,\n minSearchRuns: 1,\n minHoldoutRuns: 1,\n requireHoldout: true,\n minPassRate: 0.8,\n minMeanScore: 0.7,\n maxOverfitGap: 0.15,\n maxMeanCostUsd: Number.POSITIVE_INFINITY,\n maxP95WallMs: Number.POSITIVE_INFINITY,\n requireAsiForFailures: true,\n failureScoreThreshold: 0.5,\n}\n\nexport function releaseTraceEvidenceFromMultiShotTrials(\n trials: readonly MultiShotTrialResult[],\n): ReleaseTraceEvidence[] {\n return trials.map((trial) => ({\n scenarioId: trial.scenarioId,\n candidateId: trial.variantId,\n split: trial.split === 'holdout' ? 'holdout' : trial.split === 'dev' ? 'dev' : 'search',\n score: trial.score,\n ok: trial.ok,\n turnCount: Array.isArray(trial.trace?.turns) ? trial.trace.turns.length : undefined,\n costUsd: trial.cost,\n durationMs: trial.durationMs,\n failureMode: trial.error ? 'runtime_error' : undefined,\n asi: trial.asi,\n metadata: trial.metadata,\n }))\n}\n\nexport function evaluateReleaseConfidence(\n input: ReleaseConfidenceInput,\n): ReleaseConfidenceScorecard {\n const thresholds = { ...DEFAULT_THRESHOLDS, ...input.thresholds }\n const candidateId = input.candidateId ?? null\n const runs = filterCandidate(input.runs ?? [], candidateId, input.baselineId)\n const traces = filterTraceCandidate(input.traces ?? [], candidateId, input.baselineId)\n const scenarios = input.scenarios ?? []\n const scenarioCount = input.dataset?.scenarioCount ?? scenarios.length\n const splitCounts = input.dataset?.splitCounts ?? countScenarioSplits(scenarios)\n const searchScores = scoresFor(runs, 'search')\n const holdoutScores = scoresFor(runs, 'holdout')\n const allScores = [...searchScores, ...holdoutScores]\n const traceScores = traces.map((t) => t.score).filter(isFiniteNumber)\n const scoreUniverse = allScores.length > 0 ? allScores : traceScores\n const searchRuns = runs.filter((r) => r.splitTag === 'search').length\n const holdoutRuns = runs.filter((r) => r.splitTag === 'holdout').length\n const searchMeanScore = mean(searchScores)\n const holdoutMeanScore = mean(holdoutScores)\n const metrics: ReleaseConfidenceMetrics = {\n scenarioCount,\n searchRuns,\n holdoutRuns,\n passRate: passRate(runs, traces, thresholds.failureScoreThreshold),\n meanScore: mean(scoreUniverse),\n searchMeanScore,\n holdoutMeanScore,\n overfitGap: safeDiff(searchMeanScore, holdoutMeanScore),\n meanCostUsd: mean([\n ...runs.map((r) => r.costUsd),\n ...traces.map((t) => t.costUsd).filter(isFiniteNumber),\n ]),\n p95WallMs: percentile(\n [...runs.map((r) => r.wallMs), ...traces.map((t) => t.durationMs).filter(isFiniteNumber)],\n 0.95,\n ),\n failedRows: failedRows(runs, traces, thresholds.failureScoreThreshold).length,\n failuresWithAsi: failedRows(runs, traces, thresholds.failureScoreThreshold).filter(\n (row) => row.hasAsi,\n ).length,\n singleShotTraces: traces.filter((t) => t.turnCount === 1).length,\n multiShotTraces: traces.filter((t) => (t.turnCount ?? 0) > 1).length,\n splitCounts,\n domainCounts: countDomains(scenarios),\n failureModeCounts: countFailureModes(runs, traces, thresholds.failureScoreThreshold),\n responsibleSurfaceCounts: countResponsibleSurfaces(traces),\n }\n\n const issues: ReleaseConfidenceIssue[] = []\n checkCorpus(input, thresholds, metrics, issues)\n checkQuality(thresholds, metrics, issues)\n checkGeneralization(input.gateDecision ?? null, thresholds, metrics, issues)\n checkDiagnostics(thresholds, metrics, issues)\n checkEfficiency(thresholds, metrics, issues)\n\n const axes = buildAxes(metrics, thresholds, input.gateDecision ?? null, issues)\n const status = issues.some((i) => i.severity === 'critical')\n ? 'fail'\n : issues.length > 0\n ? 'warn'\n : 'pass'\n\n return {\n target: input.target,\n candidateId,\n baselineId: input.baselineId ?? null,\n status,\n promote: status === 'pass' && (input.gateDecision ? input.gateDecision.promote : true),\n axes,\n issues,\n metrics,\n dataset: input.dataset ?? null,\n gateDecision: input.gateDecision ?? null,\n summary: renderSummary(input.target, status, metrics, issues),\n }\n}\n\nexport function assertReleaseConfidence(input: ReleaseConfidenceInput): ReleaseConfidenceScorecard {\n const scorecard = evaluateReleaseConfidence(input)\n if (scorecard.status === 'fail') {\n throw new VerificationError(scorecard.summary)\n }\n return scorecard\n}\n\nfunction filterCandidate(\n runs: readonly RunRecord[],\n candidateId: string | null,\n baselineId?: string,\n): RunRecord[] {\n if (candidateId) return runs.filter((r) => r.candidateId === candidateId)\n if (baselineId) return runs.filter((r) => r.candidateId !== baselineId)\n return [...runs]\n}\n\nfunction filterTraceCandidate(\n traces: readonly ReleaseTraceEvidence[],\n candidateId: string | null,\n baselineId?: string,\n): ReleaseTraceEvidence[] {\n if (candidateId)\n return traces.filter((t) => t.candidateId === undefined || t.candidateId === candidateId)\n if (baselineId)\n return traces.filter((t) => t.candidateId === undefined || t.candidateId !== baselineId)\n return [...traces]\n}\n\nfunction checkCorpus(\n input: ReleaseConfidenceInput,\n thresholds: Required<ReleaseConfidenceThresholds>,\n metrics: ReleaseConfidenceMetrics,\n issues: ReleaseConfidenceIssue[],\n): void {\n if (thresholds.requireCorpus && !input.dataset && (input.scenarios?.length ?? 0) === 0) {\n issues.push({\n axis: 'corpus',\n severity: 'critical',\n code: 'missing_corpus',\n detail: 'No Dataset manifest or scenarios supplied.',\n })\n }\n if (metrics.scenarioCount < thresholds.minScenarioCount) {\n issues.push({\n axis: 'corpus',\n severity: 'critical',\n code: 'few_scenarios',\n detail: `${metrics.scenarioCount} scenario(s) < min ${thresholds.minScenarioCount}.`,\n })\n }\n if (thresholds.requireHoldout && metrics.splitCounts.holdout === 0) {\n issues.push({\n axis: 'corpus',\n severity: 'critical',\n code: 'missing_holdout_split',\n detail: 'Corpus has no holdout scenarios.',\n })\n }\n}\n\nfunction checkQuality(\n thresholds: Required<ReleaseConfidenceThresholds>,\n metrics: ReleaseConfidenceMetrics,\n issues: ReleaseConfidenceIssue[],\n): void {\n if (metrics.searchRuns < thresholds.minSearchRuns) {\n issues.push({\n axis: 'quality',\n severity: 'critical',\n code: 'few_search_runs',\n detail: `${metrics.searchRuns} search run(s) < min ${thresholds.minSearchRuns}.`,\n })\n }\n if (metrics.passRate < thresholds.minPassRate) {\n issues.push({\n axis: 'quality',\n severity: 'critical',\n code: 'low_pass_rate',\n detail: `passRate ${fmt(metrics.passRate)} < ${fmt(thresholds.minPassRate)}.`,\n })\n }\n if (metrics.meanScore < thresholds.minMeanScore) {\n issues.push({\n axis: 'quality',\n severity: 'critical',\n code: 'low_mean_score',\n detail: `meanScore ${fmt(metrics.meanScore)} < ${fmt(thresholds.minMeanScore)}.`,\n })\n }\n}\n\nfunction checkGeneralization(\n gateDecision: GateDecision | null,\n thresholds: Required<ReleaseConfidenceThresholds>,\n metrics: ReleaseConfidenceMetrics,\n issues: ReleaseConfidenceIssue[],\n): void {\n if (thresholds.requireHoldout && metrics.holdoutRuns < thresholds.minHoldoutRuns) {\n issues.push({\n axis: 'generalization',\n severity: 'critical',\n code: 'few_holdout_runs',\n detail: `${metrics.holdoutRuns} holdout run(s) < min ${thresholds.minHoldoutRuns}.`,\n })\n }\n if (Number.isFinite(metrics.overfitGap) && metrics.overfitGap > thresholds.maxOverfitGap) {\n issues.push({\n axis: 'generalization',\n severity: 'critical',\n code: 'overfit_gap',\n detail: `search-holdout gap ${fmt(metrics.overfitGap)} > ${fmt(thresholds.maxOverfitGap)}.`,\n })\n }\n if (gateDecision && !gateDecision.promote) {\n issues.push({\n axis: 'generalization',\n severity: 'critical',\n code: `gate_${gateDecision.rejectionCode ?? 'reject'}`,\n detail: gateDecision.reason,\n })\n }\n}\n\nfunction checkDiagnostics(\n thresholds: Required<ReleaseConfidenceThresholds>,\n metrics: ReleaseConfidenceMetrics,\n issues: ReleaseConfidenceIssue[],\n): void {\n if (!thresholds.requireAsiForFailures) return\n if (metrics.failedRows > metrics.failuresWithAsi) {\n issues.push({\n axis: 'diagnostics',\n severity: 'critical',\n code: 'missing_failure_asi',\n detail: `${metrics.failedRows - metrics.failuresWithAsi} failed row(s) have no actionable side information.`,\n })\n }\n}\n\nfunction checkEfficiency(\n thresholds: Required<ReleaseConfidenceThresholds>,\n metrics: ReleaseConfidenceMetrics,\n issues: ReleaseConfidenceIssue[],\n): void {\n if (metrics.meanCostUsd > thresholds.maxMeanCostUsd) {\n issues.push({\n axis: 'efficiency',\n severity: 'critical',\n code: 'cost_budget',\n detail: `meanCostUsd ${fmt(metrics.meanCostUsd)} > ${fmt(thresholds.maxMeanCostUsd)}.`,\n })\n }\n if (metrics.p95WallMs > thresholds.maxP95WallMs) {\n issues.push({\n axis: 'efficiency',\n severity: 'critical',\n code: 'latency_budget',\n detail: `p95WallMs ${fmt(metrics.p95WallMs)} > ${fmt(thresholds.maxP95WallMs)}.`,\n })\n }\n}\n\nfunction buildAxes(\n metrics: ReleaseConfidenceMetrics,\n thresholds: Required<ReleaseConfidenceThresholds>,\n gateDecision: GateDecision | null,\n issues: ReleaseConfidenceIssue[],\n): ReleaseConfidenceAxis[] {\n return [\n axis(\n 'corpus',\n issues,\n bounded(metrics.scenarioCount / Math.max(1, thresholds.minScenarioCount)),\n `${metrics.scenarioCount} scenarios; holdout=${metrics.splitCounts.holdout}`,\n ),\n axis(\n 'quality',\n issues,\n Math.min(metrics.passRate, metrics.meanScore),\n `passRate=${fmt(metrics.passRate)} meanScore=${fmt(metrics.meanScore)}`,\n ),\n axis(\n 'generalization',\n issues,\n gateDecision && !gateDecision.promote\n ? 0\n : gapScore(metrics.overfitGap, thresholds.maxOverfitGap),\n `holdoutRuns=${metrics.holdoutRuns} overfitGap=${fmt(metrics.overfitGap)}`,\n ),\n axis(\n 'diagnostics',\n issues,\n metrics.failedRows === 0 ? 1 : metrics.failuresWithAsi / metrics.failedRows,\n `failuresWithAsi=${metrics.failuresWithAsi}/${metrics.failedRows}`,\n ),\n axis(\n 'efficiency',\n issues,\n efficiencyScore(metrics, thresholds),\n `meanCostUsd=${fmt(metrics.meanCostUsd)} p95WallMs=${fmt(metrics.p95WallMs)}`,\n ),\n ]\n}\n\nfunction axis(\n name: ReleaseConfidenceAxisName,\n issues: ReleaseConfidenceIssue[],\n score: number,\n detail: string,\n): ReleaseConfidenceAxis {\n const own = issues.filter((i) => i.axis === name)\n const status = own.some((i) => i.severity === 'critical')\n ? 'fail'\n : own.length > 0\n ? 'warn'\n : 'pass'\n return { name, status, score: bounded(score), detail }\n}\n\nfunction countScenarioSplits(scenarios: readonly DatasetScenario[]): Record<DatasetSplit, number> {\n const counts: Record<DatasetSplit, number> = { train: 0, dev: 0, test: 0, holdout: 0 }\n for (const scenario of scenarios) counts[scenario.split ?? 'train']++\n return counts\n}\n\nfunction countDomains(scenarios: readonly DatasetScenario[]): Record<string, number> {\n const out: Record<string, number> = {}\n for (const scenario of scenarios) {\n const domain = scenario.tags?.domain ?? scenario.tags?.category ?? 'uncategorized'\n out[domain] = (out[domain] ?? 0) + 1\n }\n return out\n}\n\nfunction countFailureModes(\n runs: readonly RunRecord[],\n traces: readonly ReleaseTraceEvidence[],\n threshold: number,\n): Record<string, number> {\n const out: Record<string, number> = {}\n for (const run of runs) {\n const score = run.outcome.holdoutScore ?? run.outcome.searchScore\n if (run.failureMode || (score !== undefined && score < threshold)) {\n const mode = run.failureMode ?? 'low_score'\n out[mode] = (out[mode] ?? 0) + 1\n }\n }\n for (const trace of traces) {\n if (\n trace.failureMode ||\n trace.ok === false ||\n (trace.score !== undefined && trace.score < threshold)\n ) {\n const mode = trace.failureMode ?? (trace.ok === false ? 'not_ok' : 'low_score')\n out[mode] = (out[mode] ?? 0) + 1\n }\n }\n return out\n}\n\nfunction countResponsibleSurfaces(traces: readonly ReleaseTraceEvidence[]): Record<string, number> {\n const out: Record<string, number> = {}\n for (const trace of traces) {\n for (const asi of trace.asi ?? []) {\n const surface = asi.responsibleSurface ?? 'unknown'\n out[surface] = (out[surface] ?? 0) + 1\n }\n }\n return out\n}\n\nfunction failedRows(\n runs: readonly RunRecord[],\n traces: readonly ReleaseTraceEvidence[],\n threshold: number,\n): Array<{ hasAsi: boolean }> {\n const out: Array<{ hasAsi: boolean }> = []\n for (const run of runs) {\n const score = run.outcome.holdoutScore ?? run.outcome.searchScore\n if (run.failureMode || (score !== undefined && score < threshold)) {\n const asiMetric = run.outcome.raw.asi\n out.push({ hasAsi: typeof asiMetric === 'number' && asiMetric > 0 })\n }\n }\n for (const trace of traces) {\n if (\n trace.failureMode ||\n trace.ok === false ||\n (trace.score !== undefined && trace.score < threshold)\n ) {\n out.push({ hasAsi: (trace.asi?.length ?? 0) > 0 })\n }\n }\n return out\n}\n\nfunction passRate(\n runs: readonly RunRecord[],\n traces: readonly ReleaseTraceEvidence[],\n threshold: number,\n): number {\n const outcomes = [\n ...runs.map((run) => {\n const score = run.outcome.holdoutScore ?? run.outcome.searchScore\n return !run.failureMode && score !== undefined && score >= threshold\n }),\n ...traces.map(\n (trace) => trace.ok !== false && (trace.score === undefined || trace.score >= threshold),\n ),\n ]\n if (outcomes.length === 0) return 0\n return outcomes.filter(Boolean).length / outcomes.length\n}\n\nfunction scoresFor(runs: readonly RunRecord[], split: RunSplitTag): number[] {\n return runs\n .filter((run) => run.splitTag === split)\n .map((run) => (split === 'holdout' ? run.outcome.holdoutScore : run.outcome.searchScore))\n .filter(isFiniteNumber)\n}\n\nfunction mean(xs: readonly number[]): number {\n if (xs.length === 0) return Number.NaN\n return xs.reduce((sum, x) => sum + x, 0) / xs.length\n}\n\nfunction percentile(xs: readonly number[], p: number): number {\n if (xs.length === 0) return Number.NaN\n const sorted = [...xs].sort((a, b) => a - b)\n return sorted[Math.min(sorted.length - 1, Math.max(0, Math.ceil(p * sorted.length) - 1))]!\n}\n\nfunction isFiniteNumber(value: unknown): value is number {\n return typeof value === 'number' && Number.isFinite(value)\n}\n\nfunction safeDiff(a: number, b: number): number {\n if (!Number.isFinite(a) || !Number.isFinite(b)) return Number.NaN\n return a - b\n}\n\nfunction gapScore(gap: number, maxGap: number): number {\n if (!Number.isFinite(gap)) return 0\n if (maxGap <= 0) return gap <= 0 ? 1 : 0\n return bounded(1 - Math.max(0, gap) / maxGap)\n}\n\nfunction efficiencyScore(\n metrics: ReleaseConfidenceMetrics,\n thresholds: Required<ReleaseConfidenceThresholds>,\n): number {\n const cost =\n Number.isFinite(thresholds.maxMeanCostUsd) && Number.isFinite(metrics.meanCostUsd)\n ? bounded(thresholds.maxMeanCostUsd / Math.max(metrics.meanCostUsd, 1e-12))\n : 1\n const latency =\n Number.isFinite(thresholds.maxP95WallMs) && Number.isFinite(metrics.p95WallMs)\n ? bounded(thresholds.maxP95WallMs / Math.max(metrics.p95WallMs, 1e-12))\n : 1\n return Math.min(cost, latency)\n}\n\nfunction bounded(x: number): number {\n if (!Number.isFinite(x)) return 0\n return Math.max(0, Math.min(1, x))\n}\n\nfunction renderSummary(\n target: string,\n status: ReleaseConfidenceStatus,\n metrics: ReleaseConfidenceMetrics,\n issues: ReleaseConfidenceIssue[],\n): string {\n const prefix = `release confidence ${status}: ${target}`\n const metricText = `scenarios=${metrics.scenarioCount} searchRuns=${metrics.searchRuns} holdoutRuns=${metrics.holdoutRuns} passRate=${fmt(metrics.passRate)} meanScore=${fmt(metrics.meanScore)}`\n if (issues.length === 0) return `${prefix}; ${metricText}`\n return `${prefix}; ${metricText}; issues=${issues.map((i) => i.code).join(',')}`\n}\n\nfunction fmt(x: number): string {\n if (!Number.isFinite(x)) return String(x)\n return x.toFixed(4)\n}\n","/**\n * Bootstrap-CI promotion gate.\n *\n * In any iterative-improvement loop (GEPA, prompt evolution, dataset\n * curation), the question is \"did this generation actually improve, or are\n * we celebrating noise?\". With small N and noisy outcomes, point-estimate\n * deltas lie. Bootstrap confidence intervals tell the operator whether the\n * delta is real before code or prompts get promoted.\n *\n * This module is pure functions — no I/O, no model calls. Easy to unit-test\n * and to compose into any verdict gate.\n *\n * Default gate:\n * - Bootstrap mean baseline vs candidate (1k resamples).\n * - Compute the delta distribution; pass if the lower CI bound > 0.\n * - Tunable confidence (default 95%) and resample count.\n *\n * Verdict semantics intentionally match the existing `experiments.jsonl`\n * vocabulary:\n * - ADVANCE: candidate's CI lower bound > baseline mean (real win)\n * - KEEP: overlap, but candidate point estimate >= baseline (neutral)\n * - REVERT: candidate's CI upper bound < baseline mean (real regression)\n * - INCONCLUSIVE: not enough samples or CI straddles zero with no signal\n */\n\nexport type Verdict = 'ADVANCE' | 'KEEP' | 'REVERT' | 'INCONCLUSIVE'\n\nexport interface BootstrapResult {\n baselineMean: number\n candidateMean: number\n /** candidateMean - baselineMean, point estimate. */\n delta: number\n /** Lower bound of the (1 - alpha) CI on the delta. */\n ciLower: number\n /** Upper bound of the (1 - alpha) CI on the delta. */\n ciUpper: number\n /** Number of bootstrap resamples used. */\n iterations: number\n alpha: number\n verdict: Verdict\n}\n\nexport interface BootstrapOptions {\n /** Confidence level alpha (default 0.05 → 95% CI). */\n alpha?: number\n /** Number of resamples (default 1000). */\n iterations?: number\n /**\n * Minimum total samples (baseline + candidate) below which we always\n * return INCONCLUSIVE — bootstrap with too few samples is meaningless.\n * Default 6 (combined).\n */\n minTotalSamples?: number\n /** RNG seed for reproducibility. Default: Math.random. */\n seed?: number\n}\n\n/**\n * Compute the bootstrap CI on (candidateMean - baselineMean) and a verdict.\n *\n * Uses simple percentile bootstrap on the difference of resampled means.\n * That's the standard non-parametric primitive — no distributional\n * assumptions, robust to skew, easy to reason about.\n */\nexport function bootstrapCi(\n baseline: number[],\n candidate: number[],\n options: BootstrapOptions = {},\n): BootstrapResult {\n const alpha = options.alpha ?? 0.05\n const iterations = options.iterations ?? 1000\n const minTotal = options.minTotalSamples ?? 6\n const rng = mulberry32(options.seed ?? hashSeed(baseline, candidate))\n\n const baselineMean = mean(baseline)\n const candidateMean = mean(candidate)\n const delta = candidateMean - baselineMean\n\n if (\n baseline.length + candidate.length < minTotal ||\n baseline.length === 0 ||\n candidate.length === 0\n ) {\n return {\n baselineMean,\n candidateMean,\n delta,\n ciLower: -Infinity,\n ciUpper: Infinity,\n iterations: 0,\n alpha,\n verdict: 'INCONCLUSIVE',\n }\n }\n\n const deltas: number[] = new Array(iterations)\n for (let i = 0; i < iterations; i++) {\n const bResample = resample(baseline, rng)\n const cResample = resample(candidate, rng)\n deltas[i] = mean(cResample) - mean(bResample)\n }\n deltas.sort((a, b) => a - b)\n const lowerIdx = Math.floor((alpha / 2) * iterations)\n const upperIdx = Math.floor((1 - alpha / 2) * iterations) - 1\n const ciLower = deltas[Math.max(0, lowerIdx)]!\n const ciUpper = deltas[Math.min(iterations - 1, upperIdx)]!\n\n let verdict: Verdict\n if (ciLower > 0) verdict = 'ADVANCE'\n else if (ciUpper < 0) verdict = 'REVERT'\n else if (delta >= 0) verdict = 'KEEP'\n else verdict = 'INCONCLUSIVE'\n\n return {\n baselineMean,\n candidateMean,\n delta,\n ciLower,\n ciUpper,\n iterations,\n alpha,\n verdict,\n }\n}\n\nfunction mean(xs: number[]): number {\n if (xs.length === 0) return 0\n let s = 0\n for (const x of xs) s += x\n return s / xs.length\n}\n\nfunction resample(xs: number[], rng: () => number): number[] {\n const out = new Array(xs.length)\n for (let i = 0; i < xs.length; i++) out[i] = xs[Math.floor(rng() * xs.length)]\n return out\n}\n\n/** Mulberry32 — fast deterministic PRNG. Stable across runs given the same seed. */\nfunction mulberry32(seed: number): () => number {\n let t = seed >>> 0\n return () => {\n t += 0x6d2b79f5\n let r = t\n r = Math.imul(r ^ (r >>> 15), r | 1)\n r ^= r + Math.imul(r ^ (r >>> 7), r | 61)\n return ((r ^ (r >>> 14)) >>> 0) / 4294967296\n }\n}\n\n/** Stable seed derived from the inputs — same data → same CI bounds. */\nfunction hashSeed(a: number[], b: number[]): number {\n let h = 2166136261\n for (const x of [...a, ...b]) {\n const view = new Float64Array([x])\n const bytes = new Uint8Array(view.buffer)\n for (const byte of bytes) {\n h ^= byte\n h = Math.imul(h, 16777619)\n }\n }\n return h >>> 0\n}\n\n/**\n * Judge-replay promotion gate.\n *\n * The cheap inner-loop judge that drives an evolution run is by definition\n * fast and noisy. When you're about to promote a winning variant to the\n * canonical default, you want a STRONGER judge (a more expensive model, a\n * human grader, a separately-trained reward model) to confirm the win\n * generalises beyond the inner loop.\n *\n * This helper takes raw winner + baseline outputs, scores both through the\n * stronger judge, and applies `bootstrapCi`. ADVANCE means the stronger\n * judge agrees the winner is real with the configured confidence. Doesn't\n * matter what shape your \"output\" is — pass a string, an object, anything\n * the judge can read.\n */\nexport interface JudgeReplayGateArgs<TOutput> {\n baselineOutputs: TOutput[]\n candidateOutputs: TOutput[]\n /** Stronger judge — async to allow LLM calls. Return a 0..N scalar score. */\n judge: (output: TOutput) => Promise<number> | number\n alpha?: number\n iterations?: number\n /** RNG seed for reproducibility. */\n seed?: number\n /** Maximum concurrent judge calls. Default 4. */\n judgeConcurrency?: number\n}\n\nexport async function judgeReplayGate<TOutput>(\n args: JudgeReplayGateArgs<TOutput>,\n): Promise<BootstrapResult & { baselineSamples: number; candidateSamples: number }> {\n const concurrency = args.judgeConcurrency ?? 4\n const baselineScores = await scoreAll(args.baselineOutputs, args.judge, concurrency)\n const candidateScores = await scoreAll(args.candidateOutputs, args.judge, concurrency)\n const ci = bootstrapCi(baselineScores, candidateScores, {\n ...(args.alpha !== undefined ? { alpha: args.alpha } : {}),\n ...(args.iterations !== undefined ? { iterations: args.iterations } : {}),\n ...(args.seed !== undefined ? { seed: args.seed } : {}),\n })\n return {\n ...ci,\n baselineSamples: baselineScores.length,\n candidateSamples: candidateScores.length,\n }\n}\n\nasync function scoreAll<TOutput>(\n outputs: TOutput[],\n judge: (output: TOutput) => Promise<number> | number,\n concurrency: number,\n): Promise<number[]> {\n const results: number[] = new Array(outputs.length)\n let next = 0\n async function worker(): Promise<void> {\n while (true) {\n const i = next++\n if (i >= outputs.length) return\n const v = await judge(outputs[i]!)\n results[i] = Number.isFinite(v) ? v : 0\n }\n }\n await Promise.all(Array.from({ length: Math.max(1, concurrency) }, () => worker()))\n return results\n}\n","import type { ReleaseConfidenceScorecard } from './release-confidence'\nimport type { RunRecord } from './run-record'\nimport { summaryTable } from './summary-report'\n\nexport interface RenderReleaseReportOptions {\n title?: string\n runs?: readonly RunRecord[]\n comparator?: string\n traceAnalystFindings?: readonly string[]\n nextActions?: readonly string[]\n}\n\nexport function renderReleaseReport(\n scorecard: ReleaseConfidenceScorecard,\n options: RenderReleaseReportOptions = {},\n): string {\n const title = options.title ?? `Release Report: ${scorecard.target}`\n const lines: string[] = []\n lines.push(`# ${title}`)\n lines.push('')\n lines.push(`Status: **${scorecard.status.toUpperCase()}**`)\n lines.push(`Promote: **${scorecard.promote ? 'yes' : 'no'}**`)\n if (scorecard.candidateId) lines.push(`Candidate: \\`${scorecard.candidateId}\\``)\n if (scorecard.baselineId) lines.push(`Baseline: \\`${scorecard.baselineId}\\``)\n lines.push('')\n lines.push(scorecard.summary)\n lines.push('')\n\n lines.push('## Metrics')\n lines.push('')\n lines.push('| Metric | Value |')\n lines.push('|---|---:|')\n lines.push(`| Scenarios | ${scorecard.metrics.scenarioCount} |`)\n lines.push(`| Search runs | ${scorecard.metrics.searchRuns} |`)\n lines.push(`| Holdout runs | ${scorecard.metrics.holdoutRuns} |`)\n lines.push(`| Pass rate | ${pct(scorecard.metrics.passRate)} |`)\n lines.push(`| Mean score | ${num(scorecard.metrics.meanScore)} |`)\n lines.push(`| Search mean | ${num(scorecard.metrics.searchMeanScore)} |`)\n lines.push(`| Holdout mean | ${num(scorecard.metrics.holdoutMeanScore)} |`)\n lines.push(`| Overfit gap | ${num(scorecard.metrics.overfitGap)} |`)\n lines.push(`| Mean cost | $${num(scorecard.metrics.meanCostUsd)} |`)\n lines.push(`| p95 wall time | ${Math.round(scorecard.metrics.p95WallMs)} ms |`)\n lines.push('')\n\n if (scorecard.issues.length > 0) {\n lines.push('## Issues')\n lines.push('')\n for (const issue of scorecard.issues) {\n lines.push(`- **${issue.severity}** \\`${issue.code}\\` (${issue.axis}): ${issue.detail}`)\n }\n lines.push('')\n }\n\n const surfaces = entries(scorecard.metrics.responsibleSurfaceCounts)\n if (surfaces.length > 0) {\n lines.push('## Responsible Surfaces')\n lines.push('')\n for (const [surface, count] of surfaces) lines.push(`- ${surface}: ${count}`)\n lines.push('')\n }\n\n const failures = entries(scorecard.metrics.failureModeCounts)\n if (failures.length > 0) {\n lines.push('## Failure Modes')\n lines.push('')\n for (const [mode, count] of failures) lines.push(`- ${mode}: ${count}`)\n lines.push('')\n }\n\n if (options.runs && options.runs.length > 0) {\n lines.push('## Run Summary')\n lines.push('')\n lines.push(\n summaryTable([...options.runs], {\n comparator: options.comparator ?? scorecard.baselineId ?? undefined,\n split: 'holdout',\n }).markdown,\n )\n lines.push('')\n }\n\n if (options.traceAnalystFindings && options.traceAnalystFindings.length > 0) {\n lines.push('## TraceAnalyst Findings')\n lines.push('')\n for (const finding of options.traceAnalystFindings) lines.push(`- ${finding}`)\n lines.push('')\n }\n\n const nextActions = options.nextActions ?? defaultNextActions(scorecard)\n if (nextActions.length > 0) {\n lines.push('## Next Actions')\n lines.push('')\n for (const action of nextActions) lines.push(`- ${action}`)\n lines.push('')\n }\n\n return `${lines.join('\\n').trimEnd()}\\n`\n}\n\nfunction defaultNextActions(scorecard: ReleaseConfidenceScorecard): string[] {\n if (scorecard.promote) return ['Promote the candidate and keep canaries enabled.']\n return scorecard.issues\n .filter((issue) => issue.severity === 'critical')\n .map((issue) => `Resolve ${issue.code}: ${issue.detail}`)\n}\n\nfunction entries(values: Record<string, number>): Array<[string, number]> {\n return Object.entries(values)\n .filter(([, count]) => count > 0)\n .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))\n}\n\nfunction pct(value: number): string {\n return Number.isFinite(value) ? `${(value * 100).toFixed(1)}%` : 'n/a'\n}\n\nfunction num(value: number): string {\n return Number.isFinite(value) ? value.toFixed(3) : 'n/a'\n}\n"],"mappings":";;;;;;;;AA2HA,IAAM,qBAA4D;AAAA,EAChE,eAAe;AAAA,EACf,kBAAkB;AAAA,EAClB,eAAe;AAAA,EACf,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,aAAa;AAAA,EACb,cAAc;AAAA,EACd,eAAe;AAAA,EACf,gBAAgB,OAAO;AAAA,EACvB,cAAc,OAAO;AAAA,EACrB,uBAAuB;AAAA,EACvB,uBAAuB;AACzB;AAEO,SAAS,wCACd,QACwB;AACxB,SAAO,OAAO,IAAI,CAAC,WAAW;AAAA,IAC5B,YAAY,MAAM;AAAA,IAClB,aAAa,MAAM;AAAA,IACnB,OAAO,MAAM,UAAU,YAAY,YAAY,MAAM,UAAU,QAAQ,QAAQ;AAAA,IAC/E,OAAO,MAAM;AAAA,IACb,IAAI,MAAM;AAAA,IACV,WAAW,MAAM,QAAQ,MAAM,OAAO,KAAK,IAAI,MAAM,MAAM,MAAM,SAAS;AAAA,IAC1E,SAAS,MAAM;AAAA,IACf,YAAY,MAAM;AAAA,IAClB,aAAa,MAAM,QAAQ,kBAAkB;AAAA,IAC7C,KAAK,MAAM;AAAA,IACX,UAAU,MAAM;AAAA,EAClB,EAAE;AACJ;AAEO,SAAS,0BACd,OAC4B;AAC5B,QAAM,aAAa,EAAE,GAAG,oBAAoB,GAAG,MAAM,WAAW;AAChE,QAAM,cAAc,MAAM,eAAe;AACzC,QAAM,OAAO,gBAAgB,MAAM,QAAQ,CAAC,GAAG,aAAa,MAAM,UAAU;AAC5E,QAAM,SAAS,qBAAqB,MAAM,UAAU,CAAC,GAAG,aAAa,MAAM,UAAU;AACrF,QAAM,YAAY,MAAM,aAAa,CAAC;AACtC,QAAM,gBAAgB,MAAM,SAAS,iBAAiB,UAAU;AAChE,QAAM,cAAc,MAAM,SAAS,eAAe,oBAAoB,SAAS;AAC/E,QAAM,eAAe,UAAU,MAAM,QAAQ;AAC7C,QAAM,gBAAgB,UAAU,MAAM,SAAS;AAC/C,QAAM,YAAY,CAAC,GAAG,cAAc,GAAG,aAAa;AACpD,QAAM,cAAc,OAAO,IAAI,CAAC,MAAM,EAAE,KAAK,EAAE,OAAO,cAAc;AACpE,QAAM,gBAAgB,UAAU,SAAS,IAAI,YAAY;AACzD,QAAM,aAAa,KAAK,OAAO,CAAC,MAAM,EAAE,aAAa,QAAQ,EAAE;AAC/D,QAAM,cAAc,KAAK,OAAO,CAAC,MAAM,EAAE,aAAa,SAAS,EAAE;AACjE,QAAM,kBAAkB,KAAK,YAAY;AACzC,QAAM,mBAAmB,KAAK,aAAa;AAC3C,QAAM,UAAoC;AAAA,IACxC;AAAA,IACA;AAAA,IACA;AAAA,IACA,UAAU,SAAS,MAAM,QAAQ,WAAW,qBAAqB;AAAA,IACjE,WAAW,KAAK,aAAa;AAAA,IAC7B;AAAA,IACA;AAAA,IACA,YAAY,SAAS,iBAAiB,gBAAgB;AAAA,IACtD,aAAa,KAAK;AAAA,MAChB,GAAG,KAAK,IAAI,CAAC,MAAM,EAAE,OAAO;AAAA,MAC5B,GAAG,OAAO,IAAI,CAAC,MAAM,EAAE,OAAO,EAAE,OAAO,cAAc;AAAA,IACvD,CAAC;AAAA,IACD,WAAW;AAAA,MACT,CAAC,GAAG,KAAK,IAAI,CAAC,MAAM,EAAE,MAAM,GAAG,GAAG,OAAO,IAAI,CAAC,MAAM,EAAE,UAAU,EAAE,OAAO,cAAc,CAAC;AAAA,MACxF;AAAA,IACF;AAAA,IACA,YAAY,WAAW,MAAM,QAAQ,WAAW,qBAAqB,EAAE;AAAA,IACvE,iBAAiB,WAAW,MAAM,QAAQ,WAAW,qBAAqB,EAAE;AAAA,MAC1E,CAAC,QAAQ,IAAI;AAAA,IACf,EAAE;AAAA,IACF,kBAAkB,OAAO,OAAO,CAAC,MAAM,EAAE,cAAc,CAAC,EAAE;AAAA,IAC1D,iBAAiB,OAAO,OAAO,CAAC,OAAO,EAAE,aAAa,KAAK,CAAC,EAAE;AAAA,IAC9D;AAAA,IACA,cAAc,aAAa,SAAS;AAAA,IACpC,mBAAmB,kBAAkB,MAAM,QAAQ,WAAW,qBAAqB;AAAA,IACnF,0BAA0B,yBAAyB,MAAM;AAAA,EAC3D;AAEA,QAAM,SAAmC,CAAC;AAC1C,cAAY,OAAO,YAAY,SAAS,MAAM;AAC9C,eAAa,YAAY,SAAS,MAAM;AACxC,sBAAoB,MAAM,gBAAgB,MAAM,YAAY,SAAS,MAAM;AAC3E,mBAAiB,YAAY,SAAS,MAAM;AAC5C,kBAAgB,YAAY,SAAS,MAAM;AAE3C,QAAM,OAAO,UAAU,SAAS,YAAY,MAAM,gBAAgB,MAAM,MAAM;AAC9E,QAAM,SAAS,OAAO,KAAK,CAAC,MAAM,EAAE,aAAa,UAAU,IACvD,SACA,OAAO,SAAS,IACd,SACA;AAEN,SAAO;AAAA,IACL,QAAQ,MAAM;AAAA,IACd;AAAA,IACA,YAAY,MAAM,cAAc;AAAA,IAChC;AAAA,IACA,SAAS,WAAW,WAAW,MAAM,eAAe,MAAM,aAAa,UAAU;AAAA,IACjF;AAAA,IACA;AAAA,IACA;AAAA,IACA,SAAS,MAAM,WAAW;AAAA,IAC1B,cAAc,MAAM,gBAAgB;AAAA,IACpC,SAAS,cAAc,MAAM,QAAQ,QAAQ,SAAS,MAAM;AAAA,EAC9D;AACF;AAEO,SAAS,wBAAwB,OAA2D;AACjG,QAAM,YAAY,0BAA0B,KAAK;AACjD,MAAI,UAAU,WAAW,QAAQ;AAC/B,UAAM,IAAI,kBAAkB,UAAU,OAAO;AAAA,EAC/C;AACA,SAAO;AACT;AAEA,SAAS,gBACP,MACA,aACA,YACa;AACb,MAAI,YAAa,QAAO,KAAK,OAAO,CAAC,MAAM,EAAE,gBAAgB,WAAW;AACxE,MAAI,WAAY,QAAO,KAAK,OAAO,CAAC,MAAM,EAAE,gBAAgB,UAAU;AACtE,SAAO,CAAC,GAAG,IAAI;AACjB;AAEA,SAAS,qBACP,QACA,aACA,YACwB;AACxB,MAAI;AACF,WAAO,OAAO,OAAO,CAAC,MAAM,EAAE,gBAAgB,UAAa,EAAE,gBAAgB,WAAW;AAC1F,MAAI;AACF,WAAO,OAAO,OAAO,CAAC,MAAM,EAAE,gBAAgB,UAAa,EAAE,gBAAgB,UAAU;AACzF,SAAO,CAAC,GAAG,MAAM;AACnB;AAEA,SAAS,YACP,OACA,YACA,SACA,QACM;AACN,MAAI,WAAW,iBAAiB,CAAC,MAAM,YAAY,MAAM,WAAW,UAAU,OAAO,GAAG;AACtF,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,UAAU;AAAA,MACV,MAAM;AAAA,MACN,QAAQ;AAAA,IACV,CAAC;AAAA,EACH;AACA,MAAI,QAAQ,gBAAgB,WAAW,kBAAkB;AACvD,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,UAAU;AAAA,MACV,MAAM;AAAA,MACN,QAAQ,GAAG,QAAQ,aAAa,sBAAsB,WAAW,gBAAgB;AAAA,IACnF,CAAC;AAAA,EACH;AACA,MAAI,WAAW,kBAAkB,QAAQ,YAAY,YAAY,GAAG;AAClE,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,UAAU;AAAA,MACV,MAAM;AAAA,MACN,QAAQ;AAAA,IACV,CAAC;AAAA,EACH;AACF;AAEA,SAAS,aACP,YACA,SACA,QACM;AACN,MAAI,QAAQ,aAAa,WAAW,eAAe;AACjD,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,UAAU;AAAA,MACV,MAAM;AAAA,MACN,QAAQ,GAAG,QAAQ,UAAU,wBAAwB,WAAW,aAAa;AAAA,IAC/E,CAAC;AAAA,EACH;AACA,MAAI,QAAQ,WAAW,WAAW,aAAa;AAC7C,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,UAAU;AAAA,MACV,MAAM;AAAA,MACN,QAAQ,YAAY,IAAI,QAAQ,QAAQ,CAAC,MAAM,IAAI,WAAW,WAAW,CAAC;AAAA,IAC5E,CAAC;AAAA,EACH;AACA,MAAI,QAAQ,YAAY,WAAW,cAAc;AAC/C,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,UAAU;AAAA,MACV,MAAM;AAAA,MACN,QAAQ,aAAa,IAAI,QAAQ,SAAS,CAAC,MAAM,IAAI,WAAW,YAAY,CAAC;AAAA,IAC/E,CAAC;AAAA,EACH;AACF;AAEA,SAAS,oBACP,cACA,YACA,SACA,QACM;AACN,MAAI,WAAW,kBAAkB,QAAQ,cAAc,WAAW,gBAAgB;AAChF,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,UAAU;AAAA,MACV,MAAM;AAAA,MACN,QAAQ,GAAG,QAAQ,WAAW,yBAAyB,WAAW,cAAc;AAAA,IAClF,CAAC;AAAA,EACH;AACA,MAAI,OAAO,SAAS,QAAQ,UAAU,KAAK,QAAQ,aAAa,WAAW,eAAe;AACxF,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,UAAU;AAAA,MACV,MAAM;AAAA,MACN,QAAQ,sBAAsB,IAAI,QAAQ,UAAU,CAAC,MAAM,IAAI,WAAW,aAAa,CAAC;AAAA,IAC1F,CAAC;AAAA,EACH;AACA,MAAI,gBAAgB,CAAC,aAAa,SAAS;AACzC,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,UAAU;AAAA,MACV,MAAM,QAAQ,aAAa,iBAAiB,QAAQ;AAAA,MACpD,QAAQ,aAAa;AAAA,IACvB,CAAC;AAAA,EACH;AACF;AAEA,SAAS,iBACP,YACA,SACA,QACM;AACN,MAAI,CAAC,WAAW,sBAAuB;AACvC,MAAI,QAAQ,aAAa,QAAQ,iBAAiB;AAChD,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,UAAU;AAAA,MACV,MAAM;AAAA,MACN,QAAQ,GAAG,QAAQ,aAAa,QAAQ,eAAe;AAAA,IACzD,CAAC;AAAA,EACH;AACF;AAEA,SAAS,gBACP,YACA,SACA,QACM;AACN,MAAI,QAAQ,cAAc,WAAW,gBAAgB;AACnD,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,UAAU;AAAA,MACV,MAAM;AAAA,MACN,QAAQ,eAAe,IAAI,QAAQ,WAAW,CAAC,MAAM,IAAI,WAAW,cAAc,CAAC;AAAA,IACrF,CAAC;AAAA,EACH;AACA,MAAI,QAAQ,YAAY,WAAW,cAAc;AAC/C,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,UAAU;AAAA,MACV,MAAM;AAAA,MACN,QAAQ,aAAa,IAAI,QAAQ,SAAS,CAAC,MAAM,IAAI,WAAW,YAAY,CAAC;AAAA,IAC/E,CAAC;AAAA,EACH;AACF;AAEA,SAAS,UACP,SACA,YACA,cACA,QACyB;AACzB,SAAO;AAAA,IACL;AAAA,MACE;AAAA,MACA;AAAA,MACA,QAAQ,QAAQ,gBAAgB,KAAK,IAAI,GAAG,WAAW,gBAAgB,CAAC;AAAA,MACxE,GAAG,QAAQ,aAAa,uBAAuB,QAAQ,YAAY,OAAO;AAAA,IAC5E;AAAA,IACA;AAAA,MACE;AAAA,MACA;AAAA,MACA,KAAK,IAAI,QAAQ,UAAU,QAAQ,SAAS;AAAA,MAC5C,YAAY,IAAI,QAAQ,QAAQ,CAAC,cAAc,IAAI,QAAQ,SAAS,CAAC;AAAA,IACvE;AAAA,IACA;AAAA,MACE;AAAA,MACA;AAAA,MACA,gBAAgB,CAAC,aAAa,UAC1B,IACA,SAAS,QAAQ,YAAY,WAAW,aAAa;AAAA,MACzD,eAAe,QAAQ,WAAW,eAAe,IAAI,QAAQ,UAAU,CAAC;AAAA,IAC1E;AAAA,IACA;AAAA,MACE;AAAA,MACA;AAAA,MACA,QAAQ,eAAe,IAAI,IAAI,QAAQ,kBAAkB,QAAQ;AAAA,MACjE,mBAAmB,QAAQ,eAAe,IAAI,QAAQ,UAAU;AAAA,IAClE;AAAA,IACA;AAAA,MACE;AAAA,MACA;AAAA,MACA,gBAAgB,SAAS,UAAU;AAAA,MACnC,eAAe,IAAI,QAAQ,WAAW,CAAC,cAAc,IAAI,QAAQ,SAAS,CAAC;AAAA,IAC7E;AAAA,EACF;AACF;AAEA,SAAS,KACP,MACA,QACA,OACA,QACuB;AACvB,QAAM,MAAM,OAAO,OAAO,CAAC,MAAM,EAAE,SAAS,IAAI;AAChD,QAAM,SAAS,IAAI,KAAK,CAAC,MAAM,EAAE,aAAa,UAAU,IACpD,SACA,IAAI,SAAS,IACX,SACA;AACN,SAAO,EAAE,MAAM,QAAQ,OAAO,QAAQ,KAAK,GAAG,OAAO;AACvD;AAEA,SAAS,oBAAoB,WAAqE;AAChG,QAAM,SAAuC,EAAE,OAAO,GAAG,KAAK,GAAG,MAAM,GAAG,SAAS,EAAE;AACrF,aAAW,YAAY,UAAW,QAAO,SAAS,SAAS,OAAO;AAClE,SAAO;AACT;AAEA,SAAS,aAAa,WAA+D;AACnF,QAAM,MAA8B,CAAC;AACrC,aAAW,YAAY,WAAW;AAChC,UAAM,SAAS,SAAS,MAAM,UAAU,SAAS,MAAM,YAAY;AACnE,QAAI,MAAM,KAAK,IAAI,MAAM,KAAK,KAAK;AAAA,EACrC;AACA,SAAO;AACT;AAEA,SAAS,kBACP,MACA,QACA,WACwB;AACxB,QAAM,MAA8B,CAAC;AACrC,aAAW,OAAO,MAAM;AACtB,UAAM,QAAQ,IAAI,QAAQ,gBAAgB,IAAI,QAAQ;AACtD,QAAI,IAAI,eAAgB,UAAU,UAAa,QAAQ,WAAY;AACjE,YAAM,OAAO,IAAI,eAAe;AAChC,UAAI,IAAI,KAAK,IAAI,IAAI,KAAK,KAAK;AAAA,IACjC;AAAA,EACF;AACA,aAAW,SAAS,QAAQ;AAC1B,QACE,MAAM,eACN,MAAM,OAAO,SACZ,MAAM,UAAU,UAAa,MAAM,QAAQ,WAC5C;AACA,YAAM,OAAO,MAAM,gBAAgB,MAAM,OAAO,QAAQ,WAAW;AACnE,UAAI,IAAI,KAAK,IAAI,IAAI,KAAK,KAAK;AAAA,IACjC;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,yBAAyB,QAAiE;AACjG,QAAM,MAA8B,CAAC;AACrC,aAAW,SAAS,QAAQ;AAC1B,eAAW,OAAO,MAAM,OAAO,CAAC,GAAG;AACjC,YAAM,UAAU,IAAI,sBAAsB;AAC1C,UAAI,OAAO,KAAK,IAAI,OAAO,KAAK,KAAK;AAAA,IACvC;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,WACP,MACA,QACA,WAC4B;AAC5B,QAAM,MAAkC,CAAC;AACzC,aAAW,OAAO,MAAM;AACtB,UAAM,QAAQ,IAAI,QAAQ,gBAAgB,IAAI,QAAQ;AACtD,QAAI,IAAI,eAAgB,UAAU,UAAa,QAAQ,WAAY;AACjE,YAAM,YAAY,IAAI,QAAQ,IAAI;AAClC,UAAI,KAAK,EAAE,QAAQ,OAAO,cAAc,YAAY,YAAY,EAAE,CAAC;AAAA,IACrE;AAAA,EACF;AACA,aAAW,SAAS,QAAQ;AAC1B,QACE,MAAM,eACN,MAAM,OAAO,SACZ,MAAM,UAAU,UAAa,MAAM,QAAQ,WAC5C;AACA,UAAI,KAAK,EAAE,SAAS,MAAM,KAAK,UAAU,KAAK,EAAE,CAAC;AAAA,IACnD;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,SACP,MACA,QACA,WACQ;AACR,QAAM,WAAW;AAAA,IACf,GAAG,KAAK,IAAI,CAAC,QAAQ;AACnB,YAAM,QAAQ,IAAI,QAAQ,gBAAgB,IAAI,QAAQ;AACtD,aAAO,CAAC,IAAI,eAAe,UAAU,UAAa,SAAS;AAAA,IAC7D,CAAC;AAAA,IACD,GAAG,OAAO;AAAA,MACR,CAAC,UAAU,MAAM,OAAO,UAAU,MAAM,UAAU,UAAa,MAAM,SAAS;AAAA,IAChF;AAAA,EACF;AACA,MAAI,SAAS,WAAW,EAAG,QAAO;AAClC,SAAO,SAAS,OAAO,OAAO,EAAE,SAAS,SAAS;AACpD;AAEA,SAAS,UAAU,MAA4B,OAA8B;AAC3E,SAAO,KACJ,OAAO,CAAC,QAAQ,IAAI,aAAa,KAAK,EACtC,IAAI,CAAC,QAAS,UAAU,YAAY,IAAI,QAAQ,eAAe,IAAI,QAAQ,WAAY,EACvF,OAAO,cAAc;AAC1B;AAEA,SAAS,KAAK,IAA+B;AAC3C,MAAI,GAAG,WAAW,EAAG,QAAO,OAAO;AACnC,SAAO,GAAG,OAAO,CAAC,KAAK,MAAM,MAAM,GAAG,CAAC,IAAI,GAAG;AAChD;AAEA,SAAS,WAAW,IAAuB,GAAmB;AAC5D,MAAI,GAAG,WAAW,EAAG,QAAO,OAAO;AACnC,QAAM,SAAS,CAAC,GAAG,EAAE,EAAE,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAC3C,SAAO,OAAO,KAAK,IAAI,OAAO,SAAS,GAAG,KAAK,IAAI,GAAG,KAAK,KAAK,IAAI,OAAO,MAAM,IAAI,CAAC,CAAC,CAAC;AAC1F;AAEA,SAAS,eAAe,OAAiC;AACvD,SAAO,OAAO,UAAU,YAAY,OAAO,SAAS,KAAK;AAC3D;AAEA,SAAS,SAAS,GAAW,GAAmB;AAC9C,MAAI,CAAC,OAAO,SAAS,CAAC,KAAK,CAAC,OAAO,SAAS,CAAC,EAAG,QAAO,OAAO;AAC9D,SAAO,IAAI;AACb;AAEA,SAAS,SAAS,KAAa,QAAwB;AACrD,MAAI,CAAC,OAAO,SAAS,GAAG,EAAG,QAAO;AAClC,MAAI,UAAU,EAAG,QAAO,OAAO,IAAI,IAAI;AACvC,SAAO,QAAQ,IAAI,KAAK,IAAI,GAAG,GAAG,IAAI,MAAM;AAC9C;AAEA,SAAS,gBACP,SACA,YACQ;AACR,QAAM,OACJ,OAAO,SAAS,WAAW,cAAc,KAAK,OAAO,SAAS,QAAQ,WAAW,IAC7E,QAAQ,WAAW,iBAAiB,KAAK,IAAI,QAAQ,aAAa,KAAK,CAAC,IACxE;AACN,QAAM,UACJ,OAAO,SAAS,WAAW,YAAY,KAAK,OAAO,SAAS,QAAQ,SAAS,IACzE,QAAQ,WAAW,eAAe,KAAK,IAAI,QAAQ,WAAW,KAAK,CAAC,IACpE;AACN,SAAO,KAAK,IAAI,MAAM,OAAO;AAC/B;AAEA,SAAS,QAAQ,GAAmB;AAClC,MAAI,CAAC,OAAO,SAAS,CAAC,EAAG,QAAO;AAChC,SAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,CAAC,CAAC;AACnC;AAEA,SAAS,cACP,QACA,QACA,SACA,QACQ;AACR,QAAM,SAAS,sBAAsB,MAAM,KAAK,MAAM;AACtD,QAAM,aAAa,aAAa,QAAQ,aAAa,eAAe,QAAQ,UAAU,gBAAgB,QAAQ,WAAW,aAAa,IAAI,QAAQ,QAAQ,CAAC,cAAc,IAAI,QAAQ,SAAS,CAAC;AAC/L,MAAI,OAAO,WAAW,EAAG,QAAO,GAAG,MAAM,KAAK,UAAU;AACxD,SAAO,GAAG,MAAM,KAAK,UAAU,YAAY,OAAO,IAAI,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,GAAG,CAAC;AAChF;AAEA,SAAS,IAAI,GAAmB;AAC9B,MAAI,CAAC,OAAO,SAAS,CAAC,EAAG,QAAO,OAAO,CAAC;AACxC,SAAO,EAAE,QAAQ,CAAC;AACpB;;;ACziBO,SAAS,YACd,UACA,WACA,UAA4B,CAAC,GACZ;AACjB,QAAM,QAAQ,QAAQ,SAAS;AAC/B,QAAM,aAAa,QAAQ,cAAc;AACzC,QAAM,WAAW,QAAQ,mBAAmB;AAC5C,QAAM,MAAM,WAAW,QAAQ,QAAQ,SAAS,UAAU,SAAS,CAAC;AAEpE,QAAM,eAAeA,MAAK,QAAQ;AAClC,QAAM,gBAAgBA,MAAK,SAAS;AACpC,QAAM,QAAQ,gBAAgB;AAE9B,MACE,SAAS,SAAS,UAAU,SAAS,YACrC,SAAS,WAAW,KACpB,UAAU,WAAW,GACrB;AACA,WAAO;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,MACA,SAAS;AAAA,MACT,SAAS;AAAA,MACT,YAAY;AAAA,MACZ;AAAA,MACA,SAAS;AAAA,IACX;AAAA,EACF;AAEA,QAAM,SAAmB,IAAI,MAAM,UAAU;AAC7C,WAAS,IAAI,GAAG,IAAI,YAAY,KAAK;AACnC,UAAM,YAAY,SAAS,UAAU,GAAG;AACxC,UAAM,YAAY,SAAS,WAAW,GAAG;AACzC,WAAO,CAAC,IAAIA,MAAK,SAAS,IAAIA,MAAK,SAAS;AAAA,EAC9C;AACA,SAAO,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAC3B,QAAM,WAAW,KAAK,MAAO,QAAQ,IAAK,UAAU;AACpD,QAAM,WAAW,KAAK,OAAO,IAAI,QAAQ,KAAK,UAAU,IAAI;AAC5D,QAAM,UAAU,OAAO,KAAK,IAAI,GAAG,QAAQ,CAAC;AAC5C,QAAM,UAAU,OAAO,KAAK,IAAI,aAAa,GAAG,QAAQ,CAAC;AAEzD,MAAI;AACJ,MAAI,UAAU,EAAG,WAAU;AAAA,WAClB,UAAU,EAAG,WAAU;AAAA,WACvB,SAAS,EAAG,WAAU;AAAA,MAC1B,WAAU;AAEf,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAEA,SAASA,MAAK,IAAsB;AAClC,MAAI,GAAG,WAAW,EAAG,QAAO;AAC5B,MAAI,IAAI;AACR,aAAW,KAAK,GAAI,MAAK;AACzB,SAAO,IAAI,GAAG;AAChB;AAEA,SAAS,SAAS,IAAc,KAA6B;AAC3D,QAAM,MAAM,IAAI,MAAM,GAAG,MAAM;AAC/B,WAAS,IAAI,GAAG,IAAI,GAAG,QAAQ,IAAK,KAAI,CAAC,IAAI,GAAG,KAAK,MAAM,IAAI,IAAI,GAAG,MAAM,CAAC;AAC7E,SAAO;AACT;AAGA,SAAS,WAAW,MAA4B;AAC9C,MAAI,IAAI,SAAS;AACjB,SAAO,MAAM;AACX,SAAK;AACL,QAAI,IAAI;AACR,QAAI,KAAK,KAAK,IAAK,MAAM,IAAK,IAAI,CAAC;AACnC,SAAK,IAAI,KAAK,KAAK,IAAK,MAAM,GAAI,IAAI,EAAE;AACxC,aAAS,IAAK,MAAM,QAAS,KAAK;AAAA,EACpC;AACF;AAGA,SAAS,SAAS,GAAa,GAAqB;AAClD,MAAI,IAAI;AACR,aAAW,KAAK,CAAC,GAAG,GAAG,GAAG,CAAC,GAAG;AAC5B,UAAM,OAAO,IAAI,aAAa,CAAC,CAAC,CAAC;AACjC,UAAM,QAAQ,IAAI,WAAW,KAAK,MAAM;AACxC,eAAW,QAAQ,OAAO;AACxB,WAAK;AACL,UAAI,KAAK,KAAK,GAAG,QAAQ;AAAA,IAC3B;AAAA,EACF;AACA,SAAO,MAAM;AACf;AA8BA,eAAsB,gBACpB,MACkF;AAClF,QAAM,cAAc,KAAK,oBAAoB;AAC7C,QAAM,iBAAiB,MAAM,SAAS,KAAK,iBAAiB,KAAK,OAAO,WAAW;AACnF,QAAM,kBAAkB,MAAM,SAAS,KAAK,kBAAkB,KAAK,OAAO,WAAW;AACrF,QAAM,KAAK,YAAY,gBAAgB,iBAAiB;AAAA,IACtD,GAAI,KAAK,UAAU,SAAY,EAAE,OAAO,KAAK,MAAM,IAAI,CAAC;AAAA,IACxD,GAAI,KAAK,eAAe,SAAY,EAAE,YAAY,KAAK,WAAW,IAAI,CAAC;AAAA,IACvE,GAAI,KAAK,SAAS,SAAY,EAAE,MAAM,KAAK,KAAK,IAAI,CAAC;AAAA,EACvD,CAAC;AACD,SAAO;AAAA,IACL,GAAG;AAAA,IACH,iBAAiB,eAAe;AAAA,IAChC,kBAAkB,gBAAgB;AAAA,EACpC;AACF;AAEA,eAAe,SACb,SACA,OACA,aACmB;AACnB,QAAM,UAAoB,IAAI,MAAM,QAAQ,MAAM;AAClD,MAAI,OAAO;AACX,iBAAe,SAAwB;AACrC,WAAO,MAAM;AACX,YAAM,IAAI;AACV,UAAI,KAAK,QAAQ,OAAQ;AACzB,YAAM,IAAI,MAAM,MAAM,QAAQ,CAAC,CAAE;AACjC,cAAQ,CAAC,IAAI,OAAO,SAAS,CAAC,IAAI,IAAI;AAAA,IACxC;AAAA,EACF;AACA,QAAM,QAAQ,IAAI,MAAM,KAAK,EAAE,QAAQ,KAAK,IAAI,GAAG,WAAW,EAAE,GAAG,MAAM,OAAO,CAAC,CAAC;AAClF,SAAO;AACT;;;ACvNO,SAAS,oBACd,WACA,UAAsC,CAAC,GAC/B;AACR,QAAM,QAAQ,QAAQ,SAAS,mBAAmB,UAAU,MAAM;AAClE,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,KAAK,KAAK,EAAE;AACvB,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,aAAa,UAAU,OAAO,YAAY,CAAC,IAAI;AAC1D,QAAM,KAAK,cAAc,UAAU,UAAU,QAAQ,IAAI,IAAI;AAC7D,MAAI,UAAU,YAAa,OAAM,KAAK,gBAAgB,UAAU,WAAW,IAAI;AAC/E,MAAI,UAAU,WAAY,OAAM,KAAK,eAAe,UAAU,UAAU,IAAI;AAC5E,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,UAAU,OAAO;AAC5B,QAAM,KAAK,EAAE;AAEb,QAAM,KAAK,YAAY;AACvB,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,oBAAoB;AAC/B,QAAM,KAAK,YAAY;AACvB,QAAM,KAAK,iBAAiB,UAAU,QAAQ,aAAa,IAAI;AAC/D,QAAM,KAAK,mBAAmB,UAAU,QAAQ,UAAU,IAAI;AAC9D,QAAM,KAAK,oBAAoB,UAAU,QAAQ,WAAW,IAAI;AAChE,QAAM,KAAK,iBAAiB,IAAI,UAAU,QAAQ,QAAQ,CAAC,IAAI;AAC/D,QAAM,KAAK,kBAAkB,IAAI,UAAU,QAAQ,SAAS,CAAC,IAAI;AACjE,QAAM,KAAK,mBAAmB,IAAI,UAAU,QAAQ,eAAe,CAAC,IAAI;AACxE,QAAM,KAAK,oBAAoB,IAAI,UAAU,QAAQ,gBAAgB,CAAC,IAAI;AAC1E,QAAM,KAAK,mBAAmB,IAAI,UAAU,QAAQ,UAAU,CAAC,IAAI;AACnE,QAAM,KAAK,kBAAkB,IAAI,UAAU,QAAQ,WAAW,CAAC,IAAI;AACnE,QAAM,KAAK,qBAAqB,KAAK,MAAM,UAAU,QAAQ,SAAS,CAAC,OAAO;AAC9E,QAAM,KAAK,EAAE;AAEb,MAAI,UAAU,OAAO,SAAS,GAAG;AAC/B,UAAM,KAAK,WAAW;AACtB,UAAM,KAAK,EAAE;AACb,eAAW,SAAS,UAAU,QAAQ;AACpC,YAAM,KAAK,OAAO,MAAM,QAAQ,QAAQ,MAAM,IAAI,OAAO,MAAM,IAAI,MAAM,MAAM,MAAM,EAAE;AAAA,IACzF;AACA,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,QAAM,WAAW,QAAQ,UAAU,QAAQ,wBAAwB;AACnE,MAAI,SAAS,SAAS,GAAG;AACvB,UAAM,KAAK,yBAAyB;AACpC,UAAM,KAAK,EAAE;AACb,eAAW,CAAC,SAAS,KAAK,KAAK,SAAU,OAAM,KAAK,KAAK,OAAO,KAAK,KAAK,EAAE;AAC5E,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,QAAM,WAAW,QAAQ,UAAU,QAAQ,iBAAiB;AAC5D,MAAI,SAAS,SAAS,GAAG;AACvB,UAAM,KAAK,kBAAkB;AAC7B,UAAM,KAAK,EAAE;AACb,eAAW,CAAC,MAAM,KAAK,KAAK,SAAU,OAAM,KAAK,KAAK,IAAI,KAAK,KAAK,EAAE;AACtE,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,QAAQ,QAAQ,QAAQ,KAAK,SAAS,GAAG;AAC3C,UAAM,KAAK,gBAAgB;AAC3B,UAAM,KAAK,EAAE;AACb,UAAM;AAAA,MACJ,aAAa,CAAC,GAAG,QAAQ,IAAI,GAAG;AAAA,QAC9B,YAAY,QAAQ,cAAc,UAAU,cAAc;AAAA,QAC1D,OAAO;AAAA,MACT,CAAC,EAAE;AAAA,IACL;AACA,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,QAAQ,wBAAwB,QAAQ,qBAAqB,SAAS,GAAG;AAC3E,UAAM,KAAK,0BAA0B;AACrC,UAAM,KAAK,EAAE;AACb,eAAW,WAAW,QAAQ,qBAAsB,OAAM,KAAK,KAAK,OAAO,EAAE;AAC7E,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,QAAM,cAAc,QAAQ,eAAe,mBAAmB,SAAS;AACvE,MAAI,YAAY,SAAS,GAAG;AAC1B,UAAM,KAAK,iBAAiB;AAC5B,UAAM,KAAK,EAAE;AACb,eAAW,UAAU,YAAa,OAAM,KAAK,KAAK,MAAM,EAAE;AAC1D,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,SAAO,GAAG,MAAM,KAAK,IAAI,EAAE,QAAQ,CAAC;AAAA;AACtC;AAEA,SAAS,mBAAmB,WAAiD;AAC3E,MAAI,UAAU,QAAS,QAAO,CAAC,kDAAkD;AACjF,SAAO,UAAU,OACd,OAAO,CAAC,UAAU,MAAM,aAAa,UAAU,EAC/C,IAAI,CAAC,UAAU,WAAW,MAAM,IAAI,KAAK,MAAM,MAAM,EAAE;AAC5D;AAEA,SAAS,QAAQ,QAAyD;AACxE,SAAO,OAAO,QAAQ,MAAM,EACzB,OAAO,CAAC,CAAC,EAAE,KAAK,MAAM,QAAQ,CAAC,EAC/B,KAAK,CAAC,GAAG,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,EAAE,CAAC,EAAE,cAAc,EAAE,CAAC,CAAC,CAAC;AAC3D;AAEA,SAAS,IAAI,OAAuB;AAClC,SAAO,OAAO,SAAS,KAAK,IAAI,IAAI,QAAQ,KAAK,QAAQ,CAAC,CAAC,MAAM;AACnE;AAEA,SAAS,IAAI,OAAuB;AAClC,SAAO,OAAO,SAAS,KAAK,IAAI,MAAM,QAAQ,CAAC,IAAI;AACrD;","names":["mean"]}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
// src/trajectory.ts
|
|
2
|
+
async function buildTrajectory(store, runId) {
|
|
3
|
+
const spans = await store.spans({ runId });
|
|
4
|
+
const events = await store.events({ runId });
|
|
5
|
+
const childrenOf = /* @__PURE__ */ new Map();
|
|
6
|
+
for (const s of spans) {
|
|
7
|
+
const arr = childrenOf.get(s.parentSpanId) ?? [];
|
|
8
|
+
arr.push(s);
|
|
9
|
+
childrenOf.set(s.parentSpanId, arr);
|
|
10
|
+
}
|
|
11
|
+
for (const arr of childrenOf.values()) arr.sort((a, b) => a.startedAt - b.startedAt);
|
|
12
|
+
const eventsBySpan = /* @__PURE__ */ new Map();
|
|
13
|
+
for (const e of events) {
|
|
14
|
+
if (!e.spanId) continue;
|
|
15
|
+
const arr = eventsBySpan.get(e.spanId) ?? [];
|
|
16
|
+
arr.push(e);
|
|
17
|
+
eventsBySpan.set(e.spanId, arr);
|
|
18
|
+
}
|
|
19
|
+
const steps = [];
|
|
20
|
+
const walk = (spanId, depth) => {
|
|
21
|
+
const kids = childrenOf.get(spanId) ?? [];
|
|
22
|
+
for (const child of kids) {
|
|
23
|
+
steps.push({
|
|
24
|
+
index: steps.length,
|
|
25
|
+
span: child,
|
|
26
|
+
depth,
|
|
27
|
+
events: eventsBySpan.get(child.spanId) ?? []
|
|
28
|
+
});
|
|
29
|
+
walk(child.spanId, depth + 1);
|
|
30
|
+
}
|
|
31
|
+
};
|
|
32
|
+
walk(void 0, 0);
|
|
33
|
+
const llmTurns = steps.filter((s) => s.span.kind === "llm").length;
|
|
34
|
+
const toolCalls = steps.filter((s) => s.span.kind === "tool").length;
|
|
35
|
+
const judgeVerdicts = steps.filter((s) => s.span.kind === "judge").length;
|
|
36
|
+
const retrievals = steps.filter((s) => s.span.kind === "retrieval").length;
|
|
37
|
+
let totalDurationMs = 0;
|
|
38
|
+
if (steps.length > 0) {
|
|
39
|
+
const starts = spans.map((s) => s.startedAt);
|
|
40
|
+
const ends = spans.map((s) => s.endedAt ?? s.startedAt);
|
|
41
|
+
totalDurationMs = Math.max(...ends) - Math.min(...starts);
|
|
42
|
+
}
|
|
43
|
+
return { runId, steps, llmTurns, toolCalls, judgeVerdicts, retrievals, totalDurationMs };
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export {
|
|
47
|
+
buildTrajectory
|
|
48
|
+
};
|
|
49
|
+
//# sourceMappingURL=chunk-RZTMDUO7.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/trajectory.ts"],"sourcesContent":["/**\n * Trajectory — ordered, structured view over a run's spans.\n *\n * A pure function `buildTrajectory(store, runId) → Trajectory` returns\n * a topologically ordered list of `TrajectoryStep` with parent-child\n * grouping collapsed into a single line-of-agent-work. Separate\n * analyzers (stuck-loop detection, waste ratio) live in\n * `pipelines/` and consume the trajectory.\n */\n\nimport type { Span, TraceEvent } from './trace/schema'\nimport type { TraceStore } from './trace/store'\n\nexport interface TrajectoryStep {\n index: number\n span: Span\n /** Depth in the span tree from the root. 0 = top-level. */\n depth: number\n /** Events attached to this span. */\n events: TraceEvent[]\n}\n\nexport interface Trajectory {\n runId: string\n steps: TrajectoryStep[]\n llmTurns: number\n toolCalls: number\n judgeVerdicts: number\n retrievals: number\n totalDurationMs: number\n}\n\nexport async function buildTrajectory(store: TraceStore, runId: string): Promise<Trajectory> {\n const spans = await store.spans({ runId })\n const events = await store.events({ runId })\n const childrenOf = new Map<string | undefined, Span[]>()\n for (const s of spans) {\n const arr = childrenOf.get(s.parentSpanId) ?? []\n arr.push(s)\n childrenOf.set(s.parentSpanId, arr)\n }\n // Sort children by startedAt so DFS yields chronological order within siblings.\n for (const arr of childrenOf.values()) arr.sort((a, b) => a.startedAt - b.startedAt)\n\n const eventsBySpan = new Map<string, TraceEvent[]>()\n for (const e of events) {\n if (!e.spanId) continue\n const arr = eventsBySpan.get(e.spanId) ?? []\n arr.push(e)\n eventsBySpan.set(e.spanId, arr)\n }\n\n const steps: TrajectoryStep[] = []\n const walk = (spanId: string | undefined, depth: number): void => {\n const kids = childrenOf.get(spanId) ?? []\n for (const child of kids) {\n steps.push({\n index: steps.length,\n span: child,\n depth,\n events: eventsBySpan.get(child.spanId) ?? [],\n })\n walk(child.spanId, depth + 1)\n }\n }\n walk(undefined, 0)\n\n const llmTurns = steps.filter((s) => s.span.kind === 'llm').length\n const toolCalls = steps.filter((s) => s.span.kind === 'tool').length\n const judgeVerdicts = steps.filter((s) => s.span.kind === 'judge').length\n const retrievals = steps.filter((s) => s.span.kind === 'retrieval').length\n\n let totalDurationMs = 0\n if (steps.length > 0) {\n const starts = spans.map((s) => s.startedAt)\n const ends = spans.map((s) => s.endedAt ?? s.startedAt)\n totalDurationMs = Math.max(...ends) - Math.min(...starts)\n }\n\n return { runId, steps, llmTurns, toolCalls, judgeVerdicts, retrievals, totalDurationMs }\n}\n\n// Re-export core types for convenience so consumers don't import from two paths.\nexport type { Span, TraceEvent } from './trace/schema'\nexport type { TraceStore } from './trace/store'\n"],"mappings":";AAgCA,eAAsB,gBAAgB,OAAmB,OAAoC;AAC3F,QAAM,QAAQ,MAAM,MAAM,MAAM,EAAE,MAAM,CAAC;AACzC,QAAM,SAAS,MAAM,MAAM,OAAO,EAAE,MAAM,CAAC;AAC3C,QAAM,aAAa,oBAAI,IAAgC;AACvD,aAAW,KAAK,OAAO;AACrB,UAAM,MAAM,WAAW,IAAI,EAAE,YAAY,KAAK,CAAC;AAC/C,QAAI,KAAK,CAAC;AACV,eAAW,IAAI,EAAE,cAAc,GAAG;AAAA,EACpC;AAEA,aAAW,OAAO,WAAW,OAAO,EAAG,KAAI,KAAK,CAAC,GAAG,MAAM,EAAE,YAAY,EAAE,SAAS;AAEnF,QAAM,eAAe,oBAAI,IAA0B;AACnD,aAAW,KAAK,QAAQ;AACtB,QAAI,CAAC,EAAE,OAAQ;AACf,UAAM,MAAM,aAAa,IAAI,EAAE,MAAM,KAAK,CAAC;AAC3C,QAAI,KAAK,CAAC;AACV,iBAAa,IAAI,EAAE,QAAQ,GAAG;AAAA,EAChC;AAEA,QAAM,QAA0B,CAAC;AACjC,QAAM,OAAO,CAAC,QAA4B,UAAwB;AAChE,UAAM,OAAO,WAAW,IAAI,MAAM,KAAK,CAAC;AACxC,eAAW,SAAS,MAAM;AACxB,YAAM,KAAK;AAAA,QACT,OAAO,MAAM;AAAA,QACb,MAAM;AAAA,QACN;AAAA,QACA,QAAQ,aAAa,IAAI,MAAM,MAAM,KAAK,CAAC;AAAA,MAC7C,CAAC;AACD,WAAK,MAAM,QAAQ,QAAQ,CAAC;AAAA,IAC9B;AAAA,EACF;AACA,OAAK,QAAW,CAAC;AAEjB,QAAM,WAAW,MAAM,OAAO,CAAC,MAAM,EAAE,KAAK,SAAS,KAAK,EAAE;AAC5D,QAAM,YAAY,MAAM,OAAO,CAAC,MAAM,EAAE,KAAK,SAAS,MAAM,EAAE;AAC9D,QAAM,gBAAgB,MAAM,OAAO,CAAC,MAAM,EAAE,KAAK,SAAS,OAAO,EAAE;AACnE,QAAM,aAAa,MAAM,OAAO,CAAC,MAAM,EAAE,KAAK,SAAS,WAAW,EAAE;AAEpE,MAAI,kBAAkB;AACtB,MAAI,MAAM,SAAS,GAAG;AACpB,UAAM,SAAS,MAAM,IAAI,CAAC,MAAM,EAAE,SAAS;AAC3C,UAAM,OAAO,MAAM,IAAI,CAAC,MAAM,EAAE,WAAW,EAAE,SAAS;AACtD,sBAAkB,KAAK,IAAI,GAAG,IAAI,IAAI,KAAK,IAAI,GAAG,MAAM;AAAA,EAC1D;AAEA,SAAO,EAAE,OAAO,OAAO,UAAU,WAAW,eAAe,YAAY,gBAAgB;AACzF;","names":[]}
|