@tangle-network/agent-eval 0.23.1 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +80 -0
- package/README.md +141 -79
- package/dist/baseline-4R5deP0N.d.ts +108 -0
- package/dist/benchmarks/index.d.ts +3 -2
- package/dist/benchmarks/index.js +1 -1
- package/dist/builder-eval/index.d.ts +249 -0
- package/dist/builder-eval/index.js +391 -0
- package/dist/builder-eval/index.js.map +1 -0
- package/dist/{chunk-IOXMGMHQ.js → chunk-2A5XJB43.js} +142 -318
- package/dist/chunk-2A5XJB43.js.map +1 -0
- package/dist/chunk-47X6LRCE.js +76 -0
- package/dist/chunk-47X6LRCE.js.map +1 -0
- package/dist/{chunk-6M774GY6.js → chunk-4F5DQN55.js} +1 -1
- package/dist/chunk-4F5DQN55.js.map +1 -0
- package/dist/{chunk-KAO3Q65R.js → chunk-4S4BM3QQ.js} +15 -13
- package/dist/chunk-4S4BM3QQ.js.map +1 -0
- package/dist/chunk-5BKGXME7.js +65 -0
- package/dist/chunk-5BKGXME7.js.map +1 -0
- package/dist/{chunk-42I2QC2L.js → chunk-6QDKWHLS.js} +18 -14
- package/dist/chunk-6QDKWHLS.js.map +1 -0
- package/dist/chunk-I4MBDTY5.js +272 -0
- package/dist/chunk-I4MBDTY5.js.map +1 -0
- package/dist/chunk-K2TPS5LB.js +569 -0
- package/dist/chunk-K2TPS5LB.js.map +1 -0
- package/dist/chunk-KKHDIONI.js +414 -0
- package/dist/chunk-KKHDIONI.js.map +1 -0
- package/dist/chunk-KMPRBJK4.js +74 -0
- package/dist/chunk-KMPRBJK4.js.map +1 -0
- package/dist/{chunk-QUKKGHTZ.js → chunk-KTGTIOFD.js} +6 -3
- package/dist/chunk-KTGTIOFD.js.map +1 -0
- package/dist/chunk-LSH4MMOZ.js +838 -0
- package/dist/chunk-LSH4MMOZ.js.map +1 -0
- package/dist/chunk-NG236HPC.js +57 -0
- package/dist/chunk-NG236HPC.js.map +1 -0
- package/dist/{chunk-QBW3YBTR.js → chunk-NLMNWKVM.js} +14 -6
- package/dist/chunk-NLMNWKVM.js.map +1 -0
- package/dist/chunk-NU65VQ7M.js +99 -0
- package/dist/chunk-NU65VQ7M.js.map +1 -0
- package/dist/chunk-OHEPNJQN.js +554 -0
- package/dist/chunk-OHEPNJQN.js.map +1 -0
- package/dist/chunk-OWLAAMME.js +250 -0
- package/dist/chunk-OWLAAMME.js.map +1 -0
- package/dist/{chunk-SQQLHODJ.js → chunk-PC4UYEBM.js} +7 -4
- package/dist/chunk-PC4UYEBM.js.map +1 -0
- package/dist/{chunk-7EAUOUQS.js → chunk-RAF443UI.js} +213 -115
- package/dist/chunk-RAF443UI.js.map +1 -0
- package/dist/chunk-RZTMDUO7.js +49 -0
- package/dist/chunk-RZTMDUO7.js.map +1 -0
- package/dist/{chunk-EXGR4XEM.js → chunk-SESZDQPX.js} +23 -19
- package/dist/chunk-SESZDQPX.js.map +1 -0
- package/dist/{chunk-6KQG5HAH.js → chunk-SY6WAAAD.js} +84 -71
- package/dist/chunk-SY6WAAAD.js.map +1 -0
- package/dist/{chunk-5IIQKMD5.js → chunk-TVVP3ZZQ.js} +14 -4
- package/dist/chunk-TVVP3ZZQ.js.map +1 -0
- package/dist/{chunk-VQQSPGSM.js → chunk-VRJVTXRV.js} +169 -111
- package/dist/chunk-VRJVTXRV.js.map +1 -0
- package/dist/chunk-WWYCWKUM.js +196 -0
- package/dist/chunk-WWYCWKUM.js.map +1 -0
- package/dist/{chunk-AXHNWLIX.js → chunk-YRZ4M5GS.js} +2 -90
- package/dist/chunk-YRZ4M5GS.js.map +1 -0
- package/dist/chunk-ZN274SWR.js +613 -0
- package/dist/chunk-ZN274SWR.js.map +1 -0
- package/dist/cli.js +10 -6
- package/dist/cli.js.map +1 -1
- package/dist/{control-DvkH87qJ.d.ts → control-CBShYYA6.d.ts} +32 -33
- package/dist/control-runtime-BuJHoLg0.d.ts +180 -0
- package/dist/control.d.ts +8 -6
- package/dist/control.js +10 -7
- package/dist/{dataset-B9qvlm_o.d.ts → dataset-CiK_3LDr.d.ts} +5 -2
- package/dist/{emitter-B2XqDKFU.d.ts → emitter-DP_cSSiw.d.ts} +1 -1
- package/dist/errors-BZ9sTdz7.d.ts +70 -0
- package/dist/failure-cluster-C2EGSDiT.d.ts +76 -0
- package/dist/feedback-trajectory-DfFdrraJ.d.ts +169 -0
- package/dist/governance/index.d.ts +5 -0
- package/dist/governance/index.js +18 -0
- package/dist/governance/index.js.map +1 -0
- package/dist/{index-DDTlbHEK.d.ts → index--fVrWDiR.d.ts} +1 -1
- package/dist/index-Oj9fAPPN.d.ts +270 -0
- package/dist/index.d.ts +1866 -3151
- package/dist/index.js +5457 -7809
- package/dist/index.js.map +1 -1
- package/dist/{integrity-Cr5YodSY.d.ts → integrity-DK2EBVZC.d.ts} +4 -3
- package/dist/knowledge/index.d.ts +102 -0
- package/dist/knowledge/index.js +18 -0
- package/dist/knowledge/index.js.map +1 -0
- package/dist/meta-eval/index.d.ts +99 -0
- package/dist/meta-eval/index.js +324 -0
- package/dist/meta-eval/index.js.map +1 -0
- package/dist/multi-layer-verifier-LkP3LVKj.d.ts +141 -0
- package/dist/openapi.json +1 -1
- package/dist/optimization.d.ts +11 -8
- package/dist/optimization.js +11 -9
- package/dist/outcome-store-D6KWmYvj.d.ts +63 -0
- package/dist/pipelines/index.d.ts +172 -0
- package/dist/pipelines/index.js +409 -0
- package/dist/pipelines/index.js.map +1 -0
- package/dist/prm/index.d.ts +99 -0
- package/dist/prm/index.js +222 -0
- package/dist/prm/index.js.map +1 -0
- package/dist/query-DODUYdPg.d.ts +30 -0
- package/dist/release-report-TDPn1cxq.d.ts +292 -0
- package/dist/replay-BL96gCEP.d.ts +226 -0
- package/dist/reporting.d.ts +10 -295
- package/dist/reporting.js +10 -6
- package/dist/{eval-campaign-Ds5QljIh.d.ts → researcher-CUOiGcGv.d.ts} +148 -146
- package/dist/rl.d.ts +1762 -8
- package/dist/rl.js +2035 -58
- package/dist/rl.js.map +1 -1
- package/dist/rubric-D5tjHNJQ.d.ts +72 -0
- package/dist/rubric-predictive-validity-C0uDYwG6.d.ts +105 -0
- package/dist/{run-record-DNiOMBrZ.d.ts → run-record-CqzahIbx.d.ts} +4 -1
- package/dist/sequential-Dgz1n51-.d.ts +139 -0
- package/dist/{store-u47QaJ9G.d.ts → store-Db2Bv8Cf.d.ts} +1 -1
- package/dist/{summary-report-Ce1r4EYo.d.ts → summary-report-BXGs_9V0.d.ts} +3 -76
- package/dist/telemetry/file.js +4 -1
- package/dist/telemetry/file.js.map +1 -1
- package/dist/telemetry/index.js +57 -57
- package/dist/telemetry/index.js.map +1 -1
- package/dist/test-graded-scenario-B2kWEdh9.d.ts +146 -0
- package/dist/traces.d.ts +142 -387
- package/dist/traces.js +1302 -40
- package/dist/traces.js.map +1 -1
- package/dist/trajectory-CnoBo-JY.d.ts +32 -0
- package/dist/wire/index.d.ts +22 -22
- package/dist/wire/index.js +4 -3
- package/package.json +44 -18
- package/dist/chunk-42I2QC2L.js.map +0 -1
- package/dist/chunk-5IIQKMD5.js.map +0 -1
- package/dist/chunk-6KQG5HAH.js.map +0 -1
- package/dist/chunk-6M774GY6.js.map +0 -1
- package/dist/chunk-7EAUOUQS.js.map +0 -1
- package/dist/chunk-AXHNWLIX.js.map +0 -1
- package/dist/chunk-EXGR4XEM.js.map +0 -1
- package/dist/chunk-IOXMGMHQ.js.map +0 -1
- package/dist/chunk-KAO3Q65R.js.map +0 -1
- package/dist/chunk-LZKIOBG2.js +0 -2026
- package/dist/chunk-LZKIOBG2.js.map +0 -1
- package/dist/chunk-QBW3YBTR.js.map +0 -1
- package/dist/chunk-QUKKGHTZ.js.map +0 -1
- package/dist/chunk-SQQLHODJ.js.map +0 -1
- package/dist/chunk-V5QSWN7L.js +0 -1310
- package/dist/chunk-V5QSWN7L.js.map +0 -1
- package/dist/chunk-VQQSPGSM.js.map +0 -1
- package/dist/chunk-XPHOZPOM.js +0 -1947
- package/dist/chunk-XPHOZPOM.js.map +0 -1
- package/dist/feedback-trajectory-c43WGtTX.d.ts +0 -346
- package/dist/index-ekBXweiQ.d.ts +0 -1894
- package/dist/sequential-DgU2mFsE.d.ts +0 -304
|
@@ -152,95 +152,7 @@ function makeRng(seed) {
|
|
|
152
152
|
};
|
|
153
153
|
}
|
|
154
154
|
|
|
155
|
-
// src/sequential.ts
|
|
156
|
-
function pairedEvalueSequence(deltas, opts = {}) {
|
|
157
|
-
const c = opts.bound ?? 1;
|
|
158
|
-
const alpha = opts.alpha ?? 0.05;
|
|
159
|
-
const initialShrink = opts.initialBetShrinkage ?? 0.5;
|
|
160
|
-
const rope = opts.rope ?? null;
|
|
161
|
-
if (c <= 0) throw new Error("pairedEvalueSequence: bound must be > 0");
|
|
162
|
-
if (alpha <= 0 || alpha >= 1) throw new Error("pairedEvalueSequence: alpha must be in (0,1)");
|
|
163
|
-
if (rope && !(Number.isFinite(rope.low) && Number.isFinite(rope.high) && rope.low <= rope.high)) {
|
|
164
|
-
throw new Error("pairedEvalueSequence: rope must satisfy low \u2264 high");
|
|
165
|
-
}
|
|
166
|
-
const steps = [];
|
|
167
|
-
let clipped = false;
|
|
168
|
-
let evalue = 1;
|
|
169
|
-
let decisionFiredAt = null;
|
|
170
|
-
let sum = 0;
|
|
171
|
-
let sumSq = 0;
|
|
172
|
-
let count = 0;
|
|
173
|
-
for (let i = 0; i < deltas.length; i++) {
|
|
174
|
-
let d = deltas[i];
|
|
175
|
-
if (d < -c || d > c) {
|
|
176
|
-
d = Math.max(-c, Math.min(c, d));
|
|
177
|
-
clipped = true;
|
|
178
|
-
}
|
|
179
|
-
const muHat = count === 0 ? 0 : sum / count;
|
|
180
|
-
const varHat = count === 0 ? c * c : Math.max(1e-12, sumSq / count - muHat * muHat);
|
|
181
|
-
const t = i + 1;
|
|
182
|
-
const shrink = initialShrink * Math.min(1, count / 32);
|
|
183
|
-
let lambda = muHat / (varHat + c * c) * shrink;
|
|
184
|
-
const lambdaMax = 0.99 / c;
|
|
185
|
-
if (lambda > lambdaMax) lambda = lambdaMax;
|
|
186
|
-
if (lambda < -lambdaMax) lambda = -lambdaMax;
|
|
187
|
-
evalue = evalue * (1 + lambda * d);
|
|
188
|
-
if (!Number.isFinite(evalue) || evalue < 0) evalue = 0;
|
|
189
|
-
sum += d;
|
|
190
|
-
sumSq += d * d;
|
|
191
|
-
count += 1;
|
|
192
|
-
const pValue = Math.min(1, 1 / Math.max(evalue, 1e-300));
|
|
193
|
-
const cs = empiricalBernsteinCs(sum, sumSq, count, c, alpha);
|
|
194
|
-
let decision = "continue";
|
|
195
|
-
if (rope && cs.low >= rope.low && cs.high <= rope.high) decision = "equivalent";
|
|
196
|
-
else if (evalue >= 2 / alpha && muHat > 0) decision = "promote_now";
|
|
197
|
-
else if (evalue >= 2 / alpha && muHat < 0) decision = "reject_now";
|
|
198
|
-
else if (rope && cs.high < rope.low) decision = "reject_now";
|
|
199
|
-
if (decision !== "continue" && decisionFiredAt === null) decisionFiredAt = t;
|
|
200
|
-
steps.push({ t, delta: d, evalue, pValue, csLow: cs.low, csHigh: cs.high, decision });
|
|
201
|
-
}
|
|
202
|
-
const finalDecision = steps.length === 0 ? "continue" : steps[steps.length - 1].decision;
|
|
203
|
-
return { steps, finalDecision, decisionFiredAt, clipped };
|
|
204
|
-
}
|
|
205
|
-
function evaluateInterimReleaseConfidence(input) {
|
|
206
|
-
const candidates = input.deltaSeries.map((s) => {
|
|
207
|
-
const seq = pairedEvalueSequence(s.deltas, {
|
|
208
|
-
alpha: input.alpha,
|
|
209
|
-
bound: input.bound,
|
|
210
|
-
rope: input.rope
|
|
211
|
-
});
|
|
212
|
-
const last = seq.steps[seq.steps.length - 1];
|
|
213
|
-
return {
|
|
214
|
-
candidateId: s.candidateId,
|
|
215
|
-
decision: seq.finalDecision,
|
|
216
|
-
decisionFiredAt: seq.decisionFiredAt,
|
|
217
|
-
finalEvalue: last?.evalue ?? 1,
|
|
218
|
-
finalPValue: last?.pValue ?? 1,
|
|
219
|
-
pairs: seq.steps.length,
|
|
220
|
-
csLow: last?.csLow ?? Number.NEGATIVE_INFINITY,
|
|
221
|
-
csHigh: last?.csHigh ?? Number.POSITIVE_INFINITY
|
|
222
|
-
};
|
|
223
|
-
});
|
|
224
|
-
const promote = candidates.find((c) => c.decision === "promote_now");
|
|
225
|
-
if (promote) return { candidates, recommendation: { decision: "promote_now", candidateId: promote.candidateId } };
|
|
226
|
-
const live = candidates.find((c) => c.decision === "continue");
|
|
227
|
-
if (live) return { candidates, recommendation: { decision: "continue", candidateId: null } };
|
|
228
|
-
const equiv = candidates.find((c) => c.decision === "equivalent");
|
|
229
|
-
if (equiv) return { candidates, recommendation: { decision: "equivalent", candidateId: equiv.candidateId } };
|
|
230
|
-
return { candidates, recommendation: { decision: "reject_now", candidateId: null } };
|
|
231
|
-
}
|
|
232
|
-
function empiricalBernsteinCs(sum, sumSq, n, bound, alpha) {
|
|
233
|
-
if (n === 0) return { low: -bound, high: bound };
|
|
234
|
-
const mean = sum / n;
|
|
235
|
-
const variance = Math.max(0, sumSq / n - mean * mean);
|
|
236
|
-
const psi = Math.log(2 / alpha) + 1.7 * Math.log(Math.log(Math.max(Math.E, n)) + 1);
|
|
237
|
-
const radius = Math.sqrt(2 * variance * psi / n) + 3 * bound * psi / n;
|
|
238
|
-
return { low: mean - radius, high: mean + radius };
|
|
239
|
-
}
|
|
240
|
-
|
|
241
155
|
export {
|
|
242
|
-
rubricPredictiveValidity
|
|
243
|
-
pairedEvalueSequence,
|
|
244
|
-
evaluateInterimReleaseConfidence
|
|
156
|
+
rubricPredictiveValidity
|
|
245
157
|
};
|
|
246
|
-
//# sourceMappingURL=chunk-
|
|
158
|
+
//# sourceMappingURL=chunk-YRZ4M5GS.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/meta-eval/rubric-predictive-validity.ts"],"sourcesContent":["/**\n * Rubric predictive validity — does our eval rubric predict deployment\n * outcomes?\n *\n * `correlationStudy` (already in this package) joins a `TraceStore` to an\n * `OutcomeStore` and computes Pearson + Spearman + bootstrap CI for each\n * (eval-metric, outcome-metric) pair. That answers \"does X correlate with\n * Y at all.\" `rubricPredictiveValidity` is the campaign-shaped wrapper\n * around it: take a sequence of `RunRecord`s (the canonical campaign\n * artifact) and a `DeploymentOutcomeStore`, join on `runId`, return a\n * ranked verdict on every rubric whose dimension scores were captured in\n * `outcome.raw`.\n *\n * The point — quoting the methodology doc — is that **without this loop\n * every rubric is faith-based**. Once it's wired, you know which rubrics\n * have earned their promotion power and which ones are decoration.\n *\n * const validity = await rubricPredictiveValidity({\n * runs: lastQuarter,\n * outcomes: shipFlagOutcomeStore,\n * outcomeMetrics: ['revenue_lift', 'retention_30d', 'csat'],\n * rubrics: ['anti_slop', 'semantic_concept', 'tool_recovery'],\n * })\n * for (const r of validity.ranked) {\n * console.log(`${r.rubric} → ${r.bestOutcome}: ρ=${r.spearman.toFixed(2)}`)\n * }\n *\n * The function is intentionally read-only. Use the verdict to deprecate\n * decorative rubrics, re-weight composite scores, or trigger a\n * recalibration sweep when predictive validity drops below a threshold.\n */\n\nimport type { RunRecord } from '../run-record'\nimport type { DeploymentOutcome, OutcomeStore } from './outcome-store'\n\nexport interface RubricPredictiveValidityInput {\n /**\n * Canonical campaign output. Each record's `outcome.raw[<rubricId>]`\n * provides the eval score; missing keys are silently skipped per pair.\n */\n runs: RunRecord[]\n outcomes: OutcomeStore\n /**\n * Outcome metric names to evaluate against. Each must appear in at\n * least one `DeploymentOutcome.metrics` keyspace; pairs with too few\n * joined samples are excluded from the result.\n */\n outcomeMetrics: string[]\n /**\n * Rubric ids to evaluate. Must appear as keys in `RunRecord.outcome.raw`.\n * If omitted, every numeric key in `outcome.raw` across the run set is\n * treated as a rubric.\n */\n rubrics?: string[]\n /** Minimum joined-sample count before a pair is reported. Default 8. */\n minSamples?: number\n /** Bootstrap resamples for CI. Default 500. */\n bootstrapResamples?: number\n /** Random seed for the bootstrap (mulberry32). Default unset (Math.random). */\n seed?: number\n /**\n * Reduction when multiple outcomes attach to one runId. Default `'latest'`\n * (most recently captured).\n */\n reduction?: 'latest' | 'mean' | 'max'\n}\n\nexport interface RubricOutcomePair {\n rubric: string\n outcome: string\n n: number\n pearson: number\n spearman: number\n ci95: { low: number; high: number }\n /**\n * Verdict bucket. `load_bearing` ≥ 0.7, `informative` ≥ 0.4,\n * `decorative` < 0.4 in absolute correlation. A negative correlation\n * with a desired outcome is also `decorative` — actively misleading\n * is worse than uninformative.\n */\n verdict: 'load_bearing' | 'informative' | 'decorative'\n}\n\nexport interface RubricRanking {\n rubric: string\n /** Outcome metric this rubric correlated best with. */\n bestOutcome: string\n spearman: number\n pearson: number\n n: number\n verdict: RubricOutcomePair['verdict']\n}\n\nexport interface RubricPredictiveValidityReport {\n pairs: RubricOutcomePair[]\n /** Per-rubric best pair, sorted descending by |spearman|. */\n ranked: RubricRanking[]\n joinedSamples: number\n skippedRuns: number\n /** Rubrics that were declared but never produced a usable score. */\n rubricsWithoutData: string[]\n}\n\nexport async function rubricPredictiveValidity(\n input: RubricPredictiveValidityInput,\n): Promise<RubricPredictiveValidityReport> {\n const minSamples = input.minSamples ?? 8\n const reduction = input.reduction ?? 'latest'\n const resamples = input.bootstrapResamples ?? 500\n const rng = makeRng(input.seed)\n\n const outcomes = await input.outcomes.list()\n const outcomesByRun = new Map<string, DeploymentOutcome[]>()\n for (const o of outcomes) {\n const arr = outcomesByRun.get(o.runId) ?? []\n arr.push(o)\n outcomesByRun.set(o.runId, arr)\n }\n\n // Discover rubrics: caller-declared OR every numeric key in outcome.raw\n // observed across runs.\n const observedRubrics = new Set<string>()\n for (const r of input.runs) {\n for (const k of Object.keys(r.outcome.raw)) observedRubrics.add(k)\n }\n const rubrics = input.rubrics ?? [...observedRubrics]\n\n // Collect aligned (x, y) pairs per (rubric, outcome).\n type Bucket = { rubric: string; outcome: string; xs: number[]; ys: number[] }\n const buckets: Bucket[] = []\n for (const r of rubrics) {\n for (const o of input.outcomeMetrics) {\n buckets.push({ rubric: r, outcome: o, xs: [], ys: [] })\n }\n }\n\n let joined = 0\n let skipped = 0\n for (const run of input.runs) {\n const os = outcomesByRun.get(run.runId)\n if (!os || os.length === 0) {\n skipped++\n continue\n }\n let joinedThisRun = false\n for (const r of rubrics) {\n const x = run.outcome.raw[r]\n if (typeof x !== 'number' || !Number.isFinite(x)) continue\n for (const o of input.outcomeMetrics) {\n const values = os\n .map((row) => row.metrics[o])\n .filter((v): v is number => typeof v === 'number' && Number.isFinite(v))\n if (values.length === 0) continue\n const y = reduce(values, os, o, reduction)\n if (y === null) continue\n const bucket = buckets.find((b) => b.rubric === r && b.outcome === o)!\n bucket.xs.push(x)\n bucket.ys.push(y)\n joinedThisRun = true\n }\n }\n if (joinedThisRun) joined++\n }\n\n const pairs: RubricOutcomePair[] = []\n for (const b of buckets) {\n if (b.xs.length < minSamples) continue\n const pearson = pearsonR(b.xs, b.ys)\n const spearman = pearsonR(rankWithTies(b.xs), rankWithTies(b.ys))\n const ci = bootstrapCi(b.xs, b.ys, resamples, rng)\n const verdict: RubricOutcomePair['verdict'] =\n Math.abs(spearman) >= 0.7\n ? 'load_bearing'\n : Math.abs(spearman) >= 0.4\n ? 'informative'\n : 'decorative'\n pairs.push({\n rubric: b.rubric,\n outcome: b.outcome,\n n: b.xs.length,\n pearson,\n spearman,\n ci95: ci,\n verdict,\n })\n }\n\n const byRubric = new Map<string, RubricOutcomePair[]>()\n for (const p of pairs) {\n const arr = byRubric.get(p.rubric) ?? []\n arr.push(p)\n byRubric.set(p.rubric, arr)\n }\n const ranked: RubricRanking[] = [...byRubric.entries()]\n .map(([rubric, ps]) => {\n const best = ps.reduce((a, b) => (Math.abs(b.spearman) > Math.abs(a.spearman) ? b : a))\n return {\n rubric,\n bestOutcome: best.outcome,\n spearman: best.spearman,\n pearson: best.pearson,\n n: best.n,\n verdict: best.verdict,\n }\n })\n .sort((a, b) => Math.abs(b.spearman) - Math.abs(a.spearman))\n\n const rubricsWithoutData = rubrics.filter((r) => !byRubric.has(r))\n\n return { pairs, ranked, joinedSamples: joined, skippedRuns: skipped, rubricsWithoutData }\n}\n\n// ── Helpers ──────────────────────────────────────────────────────────────\n\nfunction reduce(\n values: number[],\n outcomes: DeploymentOutcome[],\n metric: string,\n kind: 'latest' | 'mean' | 'max',\n): number | null {\n if (values.length === 0) return null\n if (kind === 'mean') return values.reduce((s, v) => s + v, 0) / values.length\n if (kind === 'max') return Math.max(...values)\n // 'latest'\n const sorted = [...outcomes]\n .filter((o) => typeof o.metrics[metric] === 'number')\n .sort((a, b) => b.capturedAt - a.capturedAt)\n return sorted[0]?.metrics[metric] ?? null\n}\n\nfunction pearsonR(a: number[], b: number[]): number {\n if (a.length !== b.length || a.length < 2) return Number.NaN\n const ma = a.reduce((s, v) => s + v, 0) / a.length\n const mb = b.reduce((s, v) => s + v, 0) / b.length\n let num = 0,\n da = 0,\n db = 0\n for (let i = 0; i < a.length; i++) {\n const xa = a[i]! - ma\n const xb = b[i]! - mb\n num += xa * xb\n da += xa * xa\n db += xb * xb\n }\n if (da === 0 || db === 0) return da === 0 && db === 0 ? 1 : 0\n return num / Math.sqrt(da * db)\n}\n\nfunction rankWithTies(xs: number[]): number[] {\n const indexed = xs.map((v, i) => ({ v, i })).sort((a, b) => a.v - b.v)\n const r = new Array<number>(xs.length)\n for (let i = 0; i < indexed.length; ) {\n let j = i\n while (j + 1 < indexed.length && indexed[j + 1]!.v === indexed[i]!.v) j++\n const avg = (i + j + 2) / 2\n for (let k = i; k <= j; k++) r[indexed[k]!.i] = avg\n i = j + 1\n }\n return r\n}\n\nfunction bootstrapCi(\n xs: number[],\n ys: number[],\n iterations: number,\n rng: () => number,\n): { low: number; high: number } {\n const n = xs.length\n if (n < 3) return { low: Number.NaN, high: Number.NaN }\n const samples: number[] = []\n for (let b = 0; b < iterations; b++) {\n const rx = new Array<number>(n)\n const ry = new Array<number>(n)\n for (let i = 0; i < n; i++) {\n const idx = Math.floor(rng() * n)\n rx[i] = xs[idx]!\n ry[i] = ys[idx]!\n }\n const r = pearsonR(rx, ry)\n if (Number.isFinite(r)) samples.push(r)\n }\n samples.sort((a, b) => a - b)\n if (samples.length === 0) return { low: Number.NaN, high: Number.NaN }\n return {\n low: samples[Math.floor(0.025 * samples.length)]!,\n high: samples[Math.min(samples.length - 1, Math.floor(0.975 * samples.length))]!,\n }\n}\n\nfunction makeRng(seed?: number): () => number {\n if (seed === undefined) return Math.random\n let s = seed >>> 0\n return () => {\n s = (s + 0x6d2b79f5) >>> 0\n let t = s\n t = Math.imul(t ^ (t >>> 15), t | 1)\n t ^= t + Math.imul(t ^ (t >>> 7), t | 61)\n return ((t ^ (t >>> 14)) >>> 0) / 4294967296\n }\n}\n"],"mappings":";AAuGA,eAAsB,yBACpB,OACyC;AACzC,QAAM,aAAa,MAAM,cAAc;AACvC,QAAM,YAAY,MAAM,aAAa;AACrC,QAAM,YAAY,MAAM,sBAAsB;AAC9C,QAAM,MAAM,QAAQ,MAAM,IAAI;AAE9B,QAAM,WAAW,MAAM,MAAM,SAAS,KAAK;AAC3C,QAAM,gBAAgB,oBAAI,IAAiC;AAC3D,aAAW,KAAK,UAAU;AACxB,UAAM,MAAM,cAAc,IAAI,EAAE,KAAK,KAAK,CAAC;AAC3C,QAAI,KAAK,CAAC;AACV,kBAAc,IAAI,EAAE,OAAO,GAAG;AAAA,EAChC;AAIA,QAAM,kBAAkB,oBAAI,IAAY;AACxC,aAAW,KAAK,MAAM,MAAM;AAC1B,eAAW,KAAK,OAAO,KAAK,EAAE,QAAQ,GAAG,EAAG,iBAAgB,IAAI,CAAC;AAAA,EACnE;AACA,QAAM,UAAU,MAAM,WAAW,CAAC,GAAG,eAAe;AAIpD,QAAM,UAAoB,CAAC;AAC3B,aAAW,KAAK,SAAS;AACvB,eAAW,KAAK,MAAM,gBAAgB;AACpC,cAAQ,KAAK,EAAE,QAAQ,GAAG,SAAS,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,EAAE,CAAC;AAAA,IACxD;AAAA,EACF;AAEA,MAAI,SAAS;AACb,MAAI,UAAU;AACd,aAAW,OAAO,MAAM,MAAM;AAC5B,UAAM,KAAK,cAAc,IAAI,IAAI,KAAK;AACtC,QAAI,CAAC,MAAM,GAAG,WAAW,GAAG;AAC1B;AACA;AAAA,IACF;AACA,QAAI,gBAAgB;AACpB,eAAW,KAAK,SAAS;AACvB,YAAM,IAAI,IAAI,QAAQ,IAAI,CAAC;AAC3B,UAAI,OAAO,MAAM,YAAY,CAAC,OAAO,SAAS,CAAC,EAAG;AAClD,iBAAW,KAAK,MAAM,gBAAgB;AACpC,cAAM,SAAS,GACZ,IAAI,CAAC,QAAQ,IAAI,QAAQ,CAAC,CAAC,EAC3B,OAAO,CAAC,MAAmB,OAAO,MAAM,YAAY,OAAO,SAAS,CAAC,CAAC;AACzE,YAAI,OAAO,WAAW,EAAG;AACzB,cAAM,IAAI,OAAO,QAAQ,IAAI,GAAG,SAAS;AACzC,YAAI,MAAM,KAAM;AAChB,cAAM,SAAS,QAAQ,KAAK,CAAC,MAAM,EAAE,WAAW,KAAK,EAAE,YAAY,CAAC;AACpE,eAAO,GAAG,KAAK,CAAC;AAChB,eAAO,GAAG,KAAK,CAAC;AAChB,wBAAgB;AAAA,MAClB;AAAA,IACF;AACA,QAAI,cAAe;AAAA,EACrB;AAEA,QAAM,QAA6B,CAAC;AACpC,aAAW,KAAK,SAAS;AACvB,QAAI,EAAE,GAAG,SAAS,WAAY;AAC9B,UAAM,UAAU,SAAS,EAAE,IAAI,EAAE,EAAE;AACnC,UAAM,WAAW,SAAS,aAAa,EAAE,EAAE,GAAG,aAAa,EAAE,EAAE,CAAC;AAChE,UAAM,KAAK,YAAY,EAAE,IAAI,EAAE,IAAI,WAAW,GAAG;AACjD,UAAM,UACJ,KAAK,IAAI,QAAQ,KAAK,MAClB,iBACA,KAAK,IAAI,QAAQ,KAAK,MACpB,gBACA;AACR,UAAM,KAAK;AAAA,MACT,QAAQ,EAAE;AAAA,MACV,SAAS,EAAE;AAAA,MACX,GAAG,EAAE,GAAG;AAAA,MACR;AAAA,MACA;AAAA,MACA,MAAM;AAAA,MACN;AAAA,IACF,CAAC;AAAA,EACH;AAEA,QAAM,WAAW,oBAAI,IAAiC;AACtD,aAAW,KAAK,OAAO;AACrB,UAAM,MAAM,SAAS,IAAI,EAAE,MAAM,KAAK,CAAC;AACvC,QAAI,KAAK,CAAC;AACV,aAAS,IAAI,EAAE,QAAQ,GAAG;AAAA,EAC5B;AACA,QAAM,SAA0B,CAAC,GAAG,SAAS,QAAQ,CAAC,EACnD,IAAI,CAAC,CAAC,QAAQ,EAAE,MAAM;AACrB,UAAM,OAAO,GAAG,OAAO,CAAC,GAAG,MAAO,KAAK,IAAI,EAAE,QAAQ,IAAI,KAAK,IAAI,EAAE,QAAQ,IAAI,IAAI,CAAE;AACtF,WAAO;AAAA,MACL;AAAA,MACA,aAAa,KAAK;AAAA,MAClB,UAAU,KAAK;AAAA,MACf,SAAS,KAAK;AAAA,MACd,GAAG,KAAK;AAAA,MACR,SAAS,KAAK;AAAA,IAChB;AAAA,EACF,CAAC,EACA,KAAK,CAAC,GAAG,MAAM,KAAK,IAAI,EAAE,QAAQ,IAAI,KAAK,IAAI,EAAE,QAAQ,CAAC;AAE7D,QAAM,qBAAqB,QAAQ,OAAO,CAAC,MAAM,CAAC,SAAS,IAAI,CAAC,CAAC;AAEjE,SAAO,EAAE,OAAO,QAAQ,eAAe,QAAQ,aAAa,SAAS,mBAAmB;AAC1F;AAIA,SAAS,OACP,QACA,UACA,QACA,MACe;AACf,MAAI,OAAO,WAAW,EAAG,QAAO;AAChC,MAAI,SAAS,OAAQ,QAAO,OAAO,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,OAAO;AACvE,MAAI,SAAS,MAAO,QAAO,KAAK,IAAI,GAAG,MAAM;AAE7C,QAAM,SAAS,CAAC,GAAG,QAAQ,EACxB,OAAO,CAAC,MAAM,OAAO,EAAE,QAAQ,MAAM,MAAM,QAAQ,EACnD,KAAK,CAAC,GAAG,MAAM,EAAE,aAAa,EAAE,UAAU;AAC7C,SAAO,OAAO,CAAC,GAAG,QAAQ,MAAM,KAAK;AACvC;AAEA,SAAS,SAAS,GAAa,GAAqB;AAClD,MAAI,EAAE,WAAW,EAAE,UAAU,EAAE,SAAS,EAAG,QAAO,OAAO;AACzD,QAAM,KAAK,EAAE,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,EAAE;AAC5C,QAAM,KAAK,EAAE,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,EAAE;AAC5C,MAAI,MAAM,GACR,KAAK,GACL,KAAK;AACP,WAAS,IAAI,GAAG,IAAI,EAAE,QAAQ,KAAK;AACjC,UAAM,KAAK,EAAE,CAAC,IAAK;AACnB,UAAM,KAAK,EAAE,CAAC,IAAK;AACnB,WAAO,KAAK;AACZ,UAAM,KAAK;AACX,UAAM,KAAK;AAAA,EACb;AACA,MAAI,OAAO,KAAK,OAAO,EAAG,QAAO,OAAO,KAAK,OAAO,IAAI,IAAI;AAC5D,SAAO,MAAM,KAAK,KAAK,KAAK,EAAE;AAChC;AAEA,SAAS,aAAa,IAAwB;AAC5C,QAAM,UAAU,GAAG,IAAI,CAAC,GAAG,OAAO,EAAE,GAAG,EAAE,EAAE,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,IAAI,EAAE,CAAC;AACrE,QAAM,IAAI,IAAI,MAAc,GAAG,MAAM;AACrC,WAAS,IAAI,GAAG,IAAI,QAAQ,UAAU;AACpC,QAAI,IAAI;AACR,WAAO,IAAI,IAAI,QAAQ,UAAU,QAAQ,IAAI,CAAC,EAAG,MAAM,QAAQ,CAAC,EAAG,EAAG;AACtE,UAAM,OAAO,IAAI,IAAI,KAAK;AAC1B,aAAS,IAAI,GAAG,KAAK,GAAG,IAAK,GAAE,QAAQ,CAAC,EAAG,CAAC,IAAI;AAChD,QAAI,IAAI;AAAA,EACV;AACA,SAAO;AACT;AAEA,SAAS,YACP,IACA,IACA,YACA,KAC+B;AAC/B,QAAM,IAAI,GAAG;AACb,MAAI,IAAI,EAAG,QAAO,EAAE,KAAK,OAAO,KAAK,MAAM,OAAO,IAAI;AACtD,QAAM,UAAoB,CAAC;AAC3B,WAAS,IAAI,GAAG,IAAI,YAAY,KAAK;AACnC,UAAM,KAAK,IAAI,MAAc,CAAC;AAC9B,UAAM,KAAK,IAAI,MAAc,CAAC;AAC9B,aAAS,IAAI,GAAG,IAAI,GAAG,KAAK;AAC1B,YAAM,MAAM,KAAK,MAAM,IAAI,IAAI,CAAC;AAChC,SAAG,CAAC,IAAI,GAAG,GAAG;AACd,SAAG,CAAC,IAAI,GAAG,GAAG;AAAA,IAChB;AACA,UAAM,IAAI,SAAS,IAAI,EAAE;AACzB,QAAI,OAAO,SAAS,CAAC,EAAG,SAAQ,KAAK,CAAC;AAAA,EACxC;AACA,UAAQ,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAC5B,MAAI,QAAQ,WAAW,EAAG,QAAO,EAAE,KAAK,OAAO,KAAK,MAAM,OAAO,IAAI;AACrE,SAAO;AAAA,IACL,KAAK,QAAQ,KAAK,MAAM,QAAQ,QAAQ,MAAM,CAAC;AAAA,IAC/C,MAAM,QAAQ,KAAK,IAAI,QAAQ,SAAS,GAAG,KAAK,MAAM,QAAQ,QAAQ,MAAM,CAAC,CAAC;AAAA,EAChF;AACF;AAEA,SAAS,QAAQ,MAA6B;AAC5C,MAAI,SAAS,OAAW,QAAO,KAAK;AACpC,MAAI,IAAI,SAAS;AACjB,SAAO,MAAM;AACX,QAAK,IAAI,eAAgB;AACzB,QAAI,IAAI;AACR,QAAI,KAAK,KAAK,IAAK,MAAM,IAAK,IAAI,CAAC;AACnC,SAAK,IAAI,KAAK,KAAK,IAAK,MAAM,GAAI,IAAI,EAAE;AACxC,aAAS,IAAK,MAAM,QAAS,KAAK;AAAA,EACpC;AACF;","names":[]}
|