@tangle-network/agent-eval 0.22.0 → 0.23.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/CHANGELOG.md +156 -0
  2. package/README.md +13 -3
  3. package/dist/benchmarks/index.d.ts +2 -2
  4. package/dist/{chunk-UAND2LOT.js → chunk-7EAUOUQS.js} +4 -247
  5. package/dist/chunk-7EAUOUQS.js.map +1 -0
  6. package/dist/chunk-AXHNWLIX.js +246 -0
  7. package/dist/chunk-AXHNWLIX.js.map +1 -0
  8. package/dist/chunk-EXGR4XEM.js +283 -0
  9. package/dist/chunk-EXGR4XEM.js.map +1 -0
  10. package/dist/chunk-LZKIOBG2.js +2026 -0
  11. package/dist/chunk-LZKIOBG2.js.map +1 -0
  12. package/dist/{chunk-YUFXO3TU.js → chunk-QBW3YBTR.js} +1 -1
  13. package/dist/chunk-QBW3YBTR.js.map +1 -0
  14. package/dist/{chunk-ARZ6BEV6.js → chunk-V5QSWN7L.js} +2 -2
  15. package/dist/{chunk-USHQBPMH.js → chunk-VQQSPGSM.js} +7 -283
  16. package/dist/chunk-VQQSPGSM.js.map +1 -0
  17. package/dist/{chunk-4W4NCYM2.js → chunk-XPHOZPOM.js} +4 -2
  18. package/dist/chunk-XPHOZPOM.js.map +1 -0
  19. package/dist/{control-cxwMOAsy.d.ts → control-DvkH87qJ.d.ts} +2 -2
  20. package/dist/control.d.ts +3 -3
  21. package/dist/control.js +2 -2
  22. package/dist/{optimization-UVDNKaO6.d.ts → eval-campaign-Ds5QljIh.d.ts} +4 -5
  23. package/dist/{feedback-trajectory-CB0A32o3.d.ts → feedback-trajectory-c43WGtTX.d.ts} +1 -1
  24. package/dist/{index-c5saLbKD.d.ts → index-DDTlbHEK.d.ts} +1 -1
  25. package/dist/index-ekBXweiQ.d.ts +1894 -0
  26. package/dist/index.d.ts +18 -154
  27. package/dist/index.js +126 -26
  28. package/dist/index.js.map +1 -1
  29. package/dist/{integrity-K2oVlF57.d.ts → integrity-Cr5YodSY.d.ts} +1 -1
  30. package/dist/openapi.json +1 -1
  31. package/dist/optimization.d.ts +5 -5
  32. package/dist/optimization.js +7 -5
  33. package/dist/reporting.d.ts +294 -4
  34. package/dist/reporting.js +6 -4
  35. package/dist/rl.d.ts +8 -0
  36. package/dist/rl.js +113 -0
  37. package/dist/rl.js.map +1 -0
  38. package/dist/{run-record-CX_jcAyr.d.ts → run-record-DNiOMBrZ.d.ts} +10 -1
  39. package/dist/sequential-DgU2mFsE.d.ts +304 -0
  40. package/dist/{summary-report-D4p7RlDu.d.ts → summary-report-Ce1r4EYo.d.ts} +2 -2
  41. package/dist/traces.d.ts +2 -2
  42. package/dist/traces.js +6 -6
  43. package/docs/auto-research-loop-end-to-end.md +186 -0
  44. package/docs/three-package-architecture.md +180 -0
  45. package/package.json +22 -10
  46. package/dist/chunk-4W4NCYM2.js.map +0 -1
  47. package/dist/chunk-UAND2LOT.js.map +0 -1
  48. package/dist/chunk-USHQBPMH.js.map +0 -1
  49. package/dist/chunk-YUFXO3TU.js.map +0 -1
  50. package/dist/reporting-B82RSv9C.d.ts +0 -593
  51. /package/dist/{chunk-ARZ6BEV6.js.map → chunk-V5QSWN7L.js.map} +0 -0
package/CHANGELOG.md CHANGED
@@ -1,5 +1,161 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.23.1 — FileSystemTraceStore.updateRun no longer double-appends
4
+
5
+ ### Fixed
6
+
7
+ - **`FileSystemTraceStore.updateRun` / `updateSpan`** — once the lazy
8
+ in-memory index had been populated (by any prior `getRun` / `listRuns` /
9
+ `spans` / `events` query), an `updateRun` would mirror the synthetic
10
+ update row back into the index via `appendRun`, throwing
11
+ `run X already exists`. Same root cause for `updateSpan`, which would
12
+ silently insert a phantom duplicate span row. The `append()` helper now
13
+ skips `insertInto` for rows carrying the internal `_update: true` marker;
14
+ `updateRun` / `updateSpan` continue to apply the patch directly via the
15
+ index's `updateRun` / `updateSpan` APIs.
16
+
17
+ Surfaced by tax-agent's canonical eval running multiple variants per
18
+ persona against a shared store: the second variant's `endRun`
19
+ consistently threw, forcing callers to instantiate one store per
20
+ (persona × variant) cell and stitch results back together post-hoc.
21
+ After this fix, a single `FileSystemTraceStore` can fan out runs across
22
+ arbitrarily many cells with interleaved reads, which is the intended
23
+ usage pattern. Regression test added in `tests/trace-store.test.ts`.
24
+
25
+ ## 0.23.0 — RL primitives + auto-research worked example
26
+
27
+ In addition to the RL bridge primitives below, this release ships the
28
+ canonical worked example of the auto-research loop end-to-end against
29
+ agent-builder, plus a concrete prime-rl SFT integration. The auto-research
30
+ thesis — capture → score → preferences → mutate → improved candidate —
31
+ is now demonstrably real, not aspirational.
32
+
33
+ ### Added (worked examples)
34
+
35
+ - **`examples/auto-research-with-agent-builder/`** — runnable demo of the
36
+ closed loop: a synthetic agent-builder driver iterates 4 generations
37
+ of prompt variants, with each generation's runs feeding
38
+ `analyzeOptimizationResult` for preferences + reward-hacking + sequential
39
+ verdict, and the next generation proposed via a deterministic mutator.
40
+ The demo shows score climbing from 0.739 → 0.973 over 4 iterations on
41
+ the synthetic environment. Real-driver mode (replace the synthetic
42
+ runner with `runForgeBuilderSim` from `agent-builder`) is documented
43
+ inline.
44
+ - **`examples/fine-tune-with-prime-rl/`** — concrete integration with
45
+ Prime Intellect's prime-rl SFT trainer. Reads `RunRecord[]` (NDJSON),
46
+ filters to high-quality runs, projects via `toSftRows` to messages-list
47
+ JSONL, writes a 15-line prime-rl SFT TOML config, prints the runnable
48
+ command. ~150 LoC of glue. SFT was chosen as the first integration
49
+ because it's the cleanest fit between agent-eval's exporters and
50
+ prime-rl's entrypoints (DPO/PRM go to TRL; offline GRPO requires a
51
+ custom verifiers env — both called out in the README).
52
+ - **`docs/three-package-architecture.md`** — the contracts between
53
+ agent-eval, agent-knowledge, agent-runtime. Dependency direction (both
54
+ consume agent-eval; agent-eval imports neither), shared data
55
+ interchange (RunRecord, Scenario, KnowledgeBundle), and known
56
+ contract gaps tracked as follow-ups.
57
+ - **`docs/auto-research-loop-end-to-end.md`** — the runnable composition
58
+ pattern with the explicit invariants every iteration must preserve
59
+ (canonical RunRecord with scenarioId, capture wired by construction,
60
+ stable comparator, deterministic mutator).
61
+
62
+ ### Added (RL primitives)
63
+
64
+ 0.22 made eval rigorous and integrated; 0.23 closes the loop back to RL training. The package now ships the canonical primitives a working RL-on-LLM-agents team needs — verifiable rewards, preference extraction, off-policy evaluation, process reward scaffolding, contamination probing, Bradley-Terry / Elo tournaments, adversarial scenario search, and test-time compute scaling — all designed to consume the standardised `RunRecord` artifact 0.22 produced. The auto-research loop is now coherent end-to-end.
65
+
66
+ #### RL barrel — `@tangle-network/agent-eval/rl` (new subpath)
67
+
68
+ A single subpath for every RL-shaped primitive, importable as a unit. The 9 modules:
69
+
70
+ 1. **`run-record-adapters.ts`** — convert `TrialResult[]` (from `runPromptEvolution` / `runMultiShotOptimization`), `VerificationReport` (from `MultiLayerVerifier`), and `VariantAggregate` into canonical `RunRecord[]`. Closes the integration gap between the pre-0.22 optimization stack and the post-0.22 campaign artifact. Existing optimization runs become `replayCache`-able and `rubricPredictiveValidity`-scorable for free.
71
+
72
+ 2. **`verifiable-reward.ts`** — extract a clean `VerifiableReward` from `VerificationReport` or `RunRecord`. Distinguishes `'deterministic'` (compile, test, schema, sandbox) from `'probabilistic'` (judge) reward sources. The seam every credible 2025-2026 frontier RL result on coding agents leans on (DeepSeek-R1 GRPO on test pass-rate, AlphaProof on Lean kernel checking).
73
+
74
+ 3. **`preferences.ts`** — `extractPreferences(runRecords)` produces DPO/PPO/KTO-shape `(chosen, rejected)` triples with three documented strategies (`paired-by-scenario-and-seed`, `paired-by-scenario`, `top-vs-bottom`). Bridge from campaign artifact to RL training. Includes `toTRLFormat` and `toAnthropicFormat` adapters.
75
+
76
+ 4. **`off-policy.ts`** — IPS, SNIPS, doubly-robust off-policy estimators (Dudík–Langford–Li 2011 for DR, Owen 2013 for SNIPS SE). Caller supplies behavior + target propensity scores (typically from token log-probs). All three return matched-shape `OffPolicyEstimate` with effective-sample-size and max-importance-weight diagnostics. `offPolicyEstimateAll` runs all three side-by-side — agreement across estimators is a much stronger signal than any one alone.
77
+
78
+ 5. **`process-reward.ts`** — step-level credit assignment from trace spans. `extractStepRewards(store, runId, scorers)` produces `StepReward[]`; `prmTrainingPairs(stepRewardsByRun)` produces `(prefix, chosen_step, rejected_step)` triples in the canonical Lightman et al. / DeepSeek-R1 process supervision shape. We ship the data extraction, not the trainer — gradient descent over a transformer is out of scope for a TS package.
79
+
80
+ 6. **`contamination.ts`** — held-out perturbation contamination probe. `runContaminationProbe({ originals, perturbation, scoreFn })` runs the policy against original + perturbed scenarios, computes paired Wilcoxon on the deltas, and flags suspected contamination when median drop ≥ 5pp at p < 0.05. Stock perturbations: `renameVariables`, `shuffleOrder`, `injectIrrelevantClause`. Catches the SWE-Bench → SWE-Bench-Verified failure mode upstream.
81
+
82
+ 7. **`tournament.ts`** — `fitBradleyTerry(outcomes)` uses Hunter's MM algorithm to recover candidate strengths from pairwise outcomes; `applyEloUpdate(ratings, outcome)` for online updates with FIDE-style K-factor. `buildPairwiseFromCampaign` extracts pairwise outcomes from per-scenario campaign runs. Sample-efficient ranking for many-candidate sweeps; the methodology Chatbot Arena and AlpacaEval converged on.
83
+
84
+ 8. **`adversarial.ts`** — `adversarialScenarioSearch({ seeds, mutations, scoreFn })` actively searches for inputs the policy fails on. Hill-climb-against-failure-indicator loop (the simplest version of AdA / POET / auto-jailbreak rigs). Caller supplies mutation strategies; the harness deduplicates, budgets, and reports per-generation statistics.
85
+
86
+ 9. **`compute-curves.ts`** — characterize a candidate as a *curve* across compute budgets, not a point. `runComputeCurve` produces `(cost, score)` points + log-slope. `bestOfN`, `selfConsistency` are the canonical test-time-scaling primitives (Snell et al. 2024). `paretoFrontier` removes dominated (candidate, compute) combinations. Required for honest cost-quality reporting in the o1-era.
87
+
88
+ #### RL barrel — additional experimental modules
89
+
90
+ The 9 modules above are stable and tested. The following modules are also shipped under `@tangle-network/agent-eval/rl` as **experimental** — interfaces are reasonable but may evolve based on real production consumer feedback. Marked clearly in the barrel docstring; flagged here so consumers know the contract may shift.
91
+
92
+ 10. **`active-curriculum.ts`** — adaptive scenario allocation. `varianceBasedCurriculum` (Neyman 1934 optimal allocation: weight ∝ √variance + 1/√n for under-sampled-cell tie-break) and `thompsonCurriculum` (Beta-Bernoulli posterior + decision-threshold-weighted sampling) reallocate next-round budget toward cells whose outcome is uncertain.
93
+
94
+ 11. **`reward-hacking.ts`** — `detectRewardHacking({ runs, truthOf })` watches four signature signals (proxy-vs-truth divergence, distributional shift, reward disagreement between independent rewards, judge drift relative to deterministic reward) and returns a structured `'clean' | 'suspect' | 'gaming'` verdict with per-signal severity. Krakovna et al. + Skalse et al. 2022 + Kim et al. 2023 lineage.
95
+
96
+ 12. **`adaptation-eval.ts`** — `runAdaptationCurve` and `compareAdaptationCurves` for sample-efficient adaptation evaluation. The metric a foundation-model-based agent should be measured on isn't end-state performance but the curve of score vs k (k=0, 1, 2, 4, 8, 16 demonstrations). Returns area-under-curve summary + per-k bootstrap CIs.
97
+
98
+ 13. **`exporters.ts`** — trainer-format export functions. `toDpoRows` (HuggingFace TRL DPO/IPO/KTO format), `toGrpoRows` (offline GRPO `{prompt, completions[], rewards[]}`), `toSftRows` (TRL/prime-rl SFT messages list), `toPrmRows` (Lightman-style PRM training shape), `stepRewardsToJsonl` (step-level rewards for value-function regression). **Honest scope:** `toSftRows` is the only one that maps directly onto a prime-rl entrypoint; the others target TRL or custom trainers — see `examples/fine-tune-with-prime-rl/README.md` for the explicit fit table.
99
+
100
+ 14. **`rl-campaign.ts`** — `runRLCampaign(opts)` wraps `runEvalCampaign` and runs the full RL bridge (verifiable rewards + preferences + sequential interim verdict + reward-hacking + optional predictive validity + optional trainer export) in one call. The single top-level orchestrator the pre-0.23 audit panel called out as missing.
101
+
102
+ 15. **`auto-research.ts`** — `analyzeOptimizationResult({ result, ctx, comparator })` takes a `PromptEvolutionResult` or `MultiShotOptimizationResult` (the existing GEPA/AxRLM stack outputs) and runs the same RL bridge on top, producing a unified artifact. Closes the architectural fragmentation between the optimization primitives and the RL bridge.
103
+
104
+ 16. **`predictive-validity-researcher.ts`** — `PredictiveValidityResearcher` is a concrete `Researcher` interface implementation (the interface had been a placeholder + `NoopResearcher` until now). Drives steering changes from outcome-anchored predictive validity: rubrics that don't predict deployment outcomes get down-weighted; load-bearing rubrics get up-weighted.
105
+
106
+ 17. **`run-record.ts`** — `RunRecord.scenarioId` is now an optional canonical field (was previously inferred from `outcome.raw.scenario_id`). Populated automatically by `runEvalCampaign` and the optimization adapters; legacy `RunRecord[]` arrays without it fall back to the `outcome.raw.scenario_id` convention. Closes the fragility called out by the 0.23 audit.
107
+
108
+ #### Build / surface
109
+
110
+ - New build entry: `dist/rl.{js,d.ts}` exposed via the `@tangle-network/agent-eval/rl` package subpath.
111
+ - All RL primitives also re-exported from the root barrel for ergonomic single-import use.
112
+ - Default `BradleyTerry` smoothing raised from 0 to 0.1 — Hunter's MM degenerates when a candidate has zero wins; 0.1 keeps the iteration well-conditioned without meaningfully biasing real win counts.
113
+
114
+ ### Why
115
+
116
+ The previous release shipped EvalCampaign + replay + sequential + outcome calibration as parallel infrastructure to the existing optimization primitives. That left a real gap: `runMultiShotOptimization` and `runPromptEvolution` produced their own trial shapes that didn't compose with the new artifacts. 0.23 closes that gap with the adapter layer, and ships the eight downstream primitives that turn the unified artifact into RL training data, OPE estimates, contamination probes, tournament rankings, adversarial scenarios, and compute curves.
117
+
118
+ After 0.23, the auto-research loop is coherent end-to-end:
119
+
120
+ ```
121
+ mutate (existing primitives)
122
+ → trial outcomes (TrialResult)
123
+ → adapter (run-record-adapters)
124
+ → RunRecord[] (canonical artifact)
125
+ → preferences / verifiable rewards / OPE / step rewards
126
+ → policy update (consumer's choice of TRL / GRPO / PPO / DPO)
127
+ → next sweep
128
+ ```
129
+
130
+ ### References
131
+
132
+ - Dudík, M., Langford, J., Li, L. (2011). Doubly Robust Policy Evaluation and Learning. *ICML*.
133
+ - Owen, A. B. (2013). *Monte Carlo Theory, Methods and Examples*. Ch. 9 — Importance Sampling.
134
+ - Hunter, D. R. (2004). MM algorithms for generalized Bradley-Terry models. *Annals of Statistics*, 32(1), 384–406.
135
+ - Bradley, R. A., Terry, M. E. (1952). Rank analysis of incomplete block designs. *Biometrika*, 39(3/4).
136
+ - Lightman, H. et al. (2023). Let's Verify Step by Step. *arXiv:2305.20050*.
137
+ - Snell, C. et al. (2024). Scaling LLM Test-Time Compute Optimally. *arXiv:2408.03314*.
138
+ - Plus the foundational citations from 0.21 / 0.22.
139
+
140
+ ### Migration
141
+
142
+ All 0.23 primitives are additive. Existing consumers don't need to change. Recommended adoption sequence:
143
+
144
+ 1. Add `trialsToRunRecords(trials, ctx)` after every existing optimization sweep — every old run becomes replay-able and predictive-validity-scorable for free.
145
+ 2. Wire `extractVerifiableReward` into your scoring pipeline; route deterministic and probabilistic rewards into separate training batches.
146
+ 3. Use `extractPreferences` to produce DPO/PPO triples for any RL training the consumer runs.
147
+ 4. Run `rubricPredictiveValidity` quarterly + `runContaminationProbe` per release to keep the rubric weights honest.
148
+ 5. Replace fixed-comparator HeldOutGate with `fitBradleyTerry` once you have ≥ 5 candidates running on shared scenarios.
149
+ 6. Replace single-budget evaluation with `runComputeCurve` for any candidate where compute scaling is a question.
150
+
151
+ ### Caveats and out-of-scope
152
+
153
+ - The DR estimator's Q-function is caller-supplied. We don't ship a learned Q-function trainer — that's a regression problem with too many domain-specific choices to ship a default.
154
+ - PRM training itself (gradient descent over a transformer) is out of scope; we ship the data extraction shape.
155
+ - The contamination probe's per-scenario q-values use a heuristic pseudo-p (the load-bearing test is the global Wilcoxon).
156
+ - `prmTrainingPairs` matches trajectories by step name + kind; production use should replace this with a token-level prefix hash.
157
+ - Adversarial scenario search is a simple hill-climb; novel scenario synthesis (compositional, language-model-driven) is future work.
158
+
3
159
  ## 0.22.0 — EvalCampaign + replay + always-valid + outcome calibration
4
160
 
5
161
  0.21 shipped the four capture-integrity primitives as opt-in. Every consumer still had to wire them by hand, and the bug class blueprint-agent reported (forgotten wiring → silent partial-capture) reappears the moment a new consumer adopts agent-eval cold. **0.22 makes the right thing the default path** — and adds three primitives that compound on top of standardized capture: replay-from-raw-events, anytime-valid sequential evaluation, and rubric predictive validity. The four primitives together turn agent-eval from a TS framework into research-grade evaluation infrastructure.
package/README.md CHANGED
@@ -96,9 +96,10 @@ import { renderReleaseReport } from '@tangle-network/agent-eval/reporting'
96
96
  | Subpath | Use for |
97
97
  | --- | --- |
98
98
  | `@tangle-network/agent-eval/control` | `observe -> validate -> decide -> act`, action policy, propose/review loops |
99
- | `@tangle-network/agent-eval/traces` | trace stores, emitters, TraceAnalyst |
100
- | `@tangle-network/agent-eval/optimization` | feedback trajectories, multi-shot optimization, prompt evolution |
101
- | `@tangle-network/agent-eval/reporting` | release confidence, paired stats, report/table/chart specs |
99
+ | `@tangle-network/agent-eval/traces` | trace stores, emitters, TraceAnalyst, replay |
100
+ | `@tangle-network/agent-eval/optimization` | feedback trajectories, multi-shot optimization, prompt evolution, EvalCampaign |
101
+ | `@tangle-network/agent-eval/reporting` | release confidence, paired stats, sequential e-values, report/table/chart specs, predictive validity |
102
+ | `@tangle-network/agent-eval/rl` | RL bridge: adapters, verifiable rewards, preferences, OPE, PRM, contamination, tournaments, adversarial, compute curves |
102
103
  | `@tangle-network/agent-eval/wire` | HTTP/RPC judge server and schemas |
103
104
  | `@tangle-network/agent-eval/benchmarks` | benchmark adapter contracts and reference wrappers |
104
105
 
@@ -120,6 +121,15 @@ import { renderReleaseReport } from '@tangle-network/agent-eval/reporting'
120
121
  | Re-judge / determinism-audit a past campaign for free | `ReplayCache`, `createReplayFetch` |
121
122
  | Ship-when-decisive with anytime-valid α across rolling looks | `pairedEvalueSequence`, `evaluateInterimReleaseConfidence` |
122
123
  | Tell load-bearing rubrics from decorative ones using deployment outcomes | `rubricPredictiveValidity` |
124
+ | Bridge legacy optimization output to canonical `RunRecord[]` | `trialsToRunRecords`, `verificationReportToRunRecord` |
125
+ | Extract a clean reward signal for RL training (compile/test/schema vs judge) | `extractVerifiableReward`, `filterDeterministicallyRewarded` |
126
+ | Produce DPO / PPO / KTO `(chosen, rejected)` triples | `extractPreferences` |
127
+ | Estimate a new policy's value on old trajectories without re-running | `offPolicyEstimateAll` (IPS + SNIPS + DR) |
128
+ | Step-level credit assignment / PRM training data | `extractStepRewards`, `prmTrainingPairs` |
129
+ | Detect benchmark contamination via held-out perturbations | `runContaminationProbe` |
130
+ | Pairwise tournament ratings for many-candidate sweeps | `fitBradleyTerry`, `applyEloUpdate` |
131
+ | Active search for inputs the policy fails on | `adversarialScenarioSearch` |
132
+ | Characterise a candidate across compute budgets | `runComputeCurve`, `bestOfN`, `selfConsistency`, `paretoFrontier` |
123
133
  | Model missing context separately from bad reasoning | `KnowledgeRequirement`, `KnowledgeBundle` |
124
134
 
125
135
  ### Capture integrity (0.21+)
@@ -1,2 +1,2 @@
1
- export { B as BENCHMARK_SPLIT_SEED, a as BenchmarkAdapter, b as BenchmarkDatasetItem, c as BenchmarkEvaluation, d as deterministicSplit, e as routing } from '../index-c5saLbKD.js';
2
- import '../run-record-CX_jcAyr.js';
1
+ export { B as BENCHMARK_SPLIT_SEED, a as BenchmarkAdapter, b as BenchmarkDatasetItem, c as BenchmarkEvaluation, d as deterministicSplit, e as routing } from '../index-DDTlbHEK.js';
2
+ import '../run-record-DNiOMBrZ.js';
@@ -285,246 +285,6 @@ function fmt(x) {
285
285
  return x.toFixed(4);
286
286
  }
287
287
 
288
- // src/meta-eval/rubric-predictive-validity.ts
289
- async function rubricPredictiveValidity(input) {
290
- const minSamples = input.minSamples ?? 8;
291
- const reduction = input.reduction ?? "latest";
292
- const resamples = input.bootstrapResamples ?? 500;
293
- const rng = makeRng(input.seed);
294
- const outcomes = await input.outcomes.list();
295
- const outcomesByRun = /* @__PURE__ */ new Map();
296
- for (const o of outcomes) {
297
- const arr = outcomesByRun.get(o.runId) ?? [];
298
- arr.push(o);
299
- outcomesByRun.set(o.runId, arr);
300
- }
301
- const observedRubrics = /* @__PURE__ */ new Set();
302
- for (const r of input.runs) {
303
- for (const k of Object.keys(r.outcome.raw)) observedRubrics.add(k);
304
- }
305
- const rubrics = input.rubrics ?? [...observedRubrics];
306
- const buckets = [];
307
- for (const r of rubrics) {
308
- for (const o of input.outcomeMetrics) {
309
- buckets.push({ rubric: r, outcome: o, xs: [], ys: [] });
310
- }
311
- }
312
- let joined = 0;
313
- let skipped = 0;
314
- for (const run of input.runs) {
315
- const os = outcomesByRun.get(run.runId);
316
- if (!os || os.length === 0) {
317
- skipped++;
318
- continue;
319
- }
320
- let joinedThisRun = false;
321
- for (const r of rubrics) {
322
- const x = run.outcome.raw[r];
323
- if (typeof x !== "number" || !Number.isFinite(x)) continue;
324
- for (const o of input.outcomeMetrics) {
325
- const values = os.map((row) => row.metrics[o]).filter((v) => typeof v === "number" && Number.isFinite(v));
326
- if (values.length === 0) continue;
327
- const y = reduce(values, os, o, reduction);
328
- if (y === null) continue;
329
- const bucket = buckets.find((b) => b.rubric === r && b.outcome === o);
330
- bucket.xs.push(x);
331
- bucket.ys.push(y);
332
- joinedThisRun = true;
333
- }
334
- }
335
- if (joinedThisRun) joined++;
336
- }
337
- const pairs = [];
338
- for (const b of buckets) {
339
- if (b.xs.length < minSamples) continue;
340
- const pearson = pearsonR(b.xs, b.ys);
341
- const spearman = pearsonR(rankWithTies(b.xs), rankWithTies(b.ys));
342
- const ci = bootstrapCi(b.xs, b.ys, resamples, rng);
343
- const verdict = Math.abs(spearman) >= 0.7 ? "load_bearing" : Math.abs(spearman) >= 0.4 ? "informative" : "decorative";
344
- pairs.push({
345
- rubric: b.rubric,
346
- outcome: b.outcome,
347
- n: b.xs.length,
348
- pearson,
349
- spearman,
350
- ci95: ci,
351
- verdict
352
- });
353
- }
354
- const byRubric = /* @__PURE__ */ new Map();
355
- for (const p of pairs) {
356
- const arr = byRubric.get(p.rubric) ?? [];
357
- arr.push(p);
358
- byRubric.set(p.rubric, arr);
359
- }
360
- const ranked = [...byRubric.entries()].map(([rubric, ps]) => {
361
- const best = ps.reduce((a, b) => Math.abs(b.spearman) > Math.abs(a.spearman) ? b : a);
362
- return {
363
- rubric,
364
- bestOutcome: best.outcome,
365
- spearman: best.spearman,
366
- pearson: best.pearson,
367
- n: best.n,
368
- verdict: best.verdict
369
- };
370
- }).sort((a, b) => Math.abs(b.spearman) - Math.abs(a.spearman));
371
- const rubricsWithoutData = rubrics.filter((r) => !byRubric.has(r));
372
- return { pairs, ranked, joinedSamples: joined, skippedRuns: skipped, rubricsWithoutData };
373
- }
374
- function reduce(values, outcomes, metric, kind) {
375
- if (values.length === 0) return null;
376
- if (kind === "mean") return values.reduce((s, v) => s + v, 0) / values.length;
377
- if (kind === "max") return Math.max(...values);
378
- const sorted = [...outcomes].filter((o) => typeof o.metrics[metric] === "number").sort((a, b) => b.capturedAt - a.capturedAt);
379
- return sorted[0]?.metrics[metric] ?? null;
380
- }
381
- function pearsonR(a, b) {
382
- if (a.length !== b.length || a.length < 2) return Number.NaN;
383
- const ma = a.reduce((s, v) => s + v, 0) / a.length;
384
- const mb = b.reduce((s, v) => s + v, 0) / b.length;
385
- let num2 = 0, da = 0, db = 0;
386
- for (let i = 0; i < a.length; i++) {
387
- const xa = a[i] - ma;
388
- const xb = b[i] - mb;
389
- num2 += xa * xb;
390
- da += xa * xa;
391
- db += xb * xb;
392
- }
393
- if (da === 0 || db === 0) return da === 0 && db === 0 ? 1 : 0;
394
- return num2 / Math.sqrt(da * db);
395
- }
396
- function rankWithTies(xs) {
397
- const indexed = xs.map((v, i) => ({ v, i })).sort((a, b) => a.v - b.v);
398
- const r = new Array(xs.length);
399
- for (let i = 0; i < indexed.length; ) {
400
- let j = i;
401
- while (j + 1 < indexed.length && indexed[j + 1].v === indexed[i].v) j++;
402
- const avg = (i + j + 2) / 2;
403
- for (let k = i; k <= j; k++) r[indexed[k].i] = avg;
404
- i = j + 1;
405
- }
406
- return r;
407
- }
408
- function bootstrapCi(xs, ys, iterations, rng) {
409
- const n = xs.length;
410
- if (n < 3) return { low: Number.NaN, high: Number.NaN };
411
- const samples = [];
412
- for (let b = 0; b < iterations; b++) {
413
- const rx = new Array(n);
414
- const ry = new Array(n);
415
- for (let i = 0; i < n; i++) {
416
- const idx = Math.floor(rng() * n);
417
- rx[i] = xs[idx];
418
- ry[i] = ys[idx];
419
- }
420
- const r = pearsonR(rx, ry);
421
- if (Number.isFinite(r)) samples.push(r);
422
- }
423
- samples.sort((a, b) => a - b);
424
- if (samples.length === 0) return { low: Number.NaN, high: Number.NaN };
425
- return {
426
- low: samples[Math.floor(0.025 * samples.length)],
427
- high: samples[Math.min(samples.length - 1, Math.floor(0.975 * samples.length))]
428
- };
429
- }
430
- function makeRng(seed) {
431
- if (seed === void 0) return Math.random;
432
- let s = seed >>> 0;
433
- return () => {
434
- s = s + 1831565813 >>> 0;
435
- let t = s;
436
- t = Math.imul(t ^ t >>> 15, t | 1);
437
- t ^= t + Math.imul(t ^ t >>> 7, t | 61);
438
- return ((t ^ t >>> 14) >>> 0) / 4294967296;
439
- };
440
- }
441
-
442
- // src/sequential.ts
443
- function pairedEvalueSequence(deltas, opts = {}) {
444
- const c = opts.bound ?? 1;
445
- const alpha = opts.alpha ?? 0.05;
446
- const initialShrink = opts.initialBetShrinkage ?? 0.5;
447
- const rope = opts.rope ?? null;
448
- if (c <= 0) throw new Error("pairedEvalueSequence: bound must be > 0");
449
- if (alpha <= 0 || alpha >= 1) throw new Error("pairedEvalueSequence: alpha must be in (0,1)");
450
- if (rope && !(Number.isFinite(rope.low) && Number.isFinite(rope.high) && rope.low <= rope.high)) {
451
- throw new Error("pairedEvalueSequence: rope must satisfy low \u2264 high");
452
- }
453
- const steps = [];
454
- let clipped = false;
455
- let evalue = 1;
456
- let decisionFiredAt = null;
457
- let sum = 0;
458
- let sumSq = 0;
459
- let count = 0;
460
- for (let i = 0; i < deltas.length; i++) {
461
- let d = deltas[i];
462
- if (d < -c || d > c) {
463
- d = Math.max(-c, Math.min(c, d));
464
- clipped = true;
465
- }
466
- const muHat = count === 0 ? 0 : sum / count;
467
- const varHat = count === 0 ? c * c : Math.max(1e-12, sumSq / count - muHat * muHat);
468
- const t = i + 1;
469
- const shrink = initialShrink * Math.min(1, count / 32);
470
- let lambda = muHat / (varHat + c * c) * shrink;
471
- const lambdaMax = 0.99 / c;
472
- if (lambda > lambdaMax) lambda = lambdaMax;
473
- if (lambda < -lambdaMax) lambda = -lambdaMax;
474
- evalue = evalue * (1 + lambda * d);
475
- if (!Number.isFinite(evalue) || evalue < 0) evalue = 0;
476
- sum += d;
477
- sumSq += d * d;
478
- count += 1;
479
- const pValue = Math.min(1, 1 / Math.max(evalue, 1e-300));
480
- const cs = empiricalBernsteinCs(sum, sumSq, count, c, alpha);
481
- let decision = "continue";
482
- if (rope && cs.low >= rope.low && cs.high <= rope.high) decision = "equivalent";
483
- else if (evalue >= 2 / alpha && muHat > 0) decision = "promote_now";
484
- else if (evalue >= 2 / alpha && muHat < 0) decision = "reject_now";
485
- else if (rope && cs.high < rope.low) decision = "reject_now";
486
- if (decision !== "continue" && decisionFiredAt === null) decisionFiredAt = t;
487
- steps.push({ t, delta: d, evalue, pValue, csLow: cs.low, csHigh: cs.high, decision });
488
- }
489
- const finalDecision = steps.length === 0 ? "continue" : steps[steps.length - 1].decision;
490
- return { steps, finalDecision, decisionFiredAt, clipped };
491
- }
492
- function evaluateInterimReleaseConfidence(input) {
493
- const candidates = input.deltaSeries.map((s) => {
494
- const seq = pairedEvalueSequence(s.deltas, {
495
- alpha: input.alpha,
496
- bound: input.bound,
497
- rope: input.rope
498
- });
499
- const last = seq.steps[seq.steps.length - 1];
500
- return {
501
- candidateId: s.candidateId,
502
- decision: seq.finalDecision,
503
- decisionFiredAt: seq.decisionFiredAt,
504
- finalEvalue: last?.evalue ?? 1,
505
- finalPValue: last?.pValue ?? 1,
506
- pairs: seq.steps.length,
507
- csLow: last?.csLow ?? Number.NEGATIVE_INFINITY,
508
- csHigh: last?.csHigh ?? Number.POSITIVE_INFINITY
509
- };
510
- });
511
- const promote = candidates.find((c) => c.decision === "promote_now");
512
- if (promote) return { candidates, recommendation: { decision: "promote_now", candidateId: promote.candidateId } };
513
- const live = candidates.find((c) => c.decision === "continue");
514
- if (live) return { candidates, recommendation: { decision: "continue", candidateId: null } };
515
- const equiv = candidates.find((c) => c.decision === "equivalent");
516
- if (equiv) return { candidates, recommendation: { decision: "equivalent", candidateId: equiv.candidateId } };
517
- return { candidates, recommendation: { decision: "reject_now", candidateId: null } };
518
- }
519
- function empiricalBernsteinCs(sum, sumSq, n, bound, alpha) {
520
- if (n === 0) return { low: -bound, high: bound };
521
- const mean3 = sum / n;
522
- const variance = Math.max(0, sumSq / n - mean3 * mean3);
523
- const psi = Math.log(2 / alpha) + 1.7 * Math.log(Math.log(Math.max(Math.E, n)) + 1);
524
- const radius = Math.sqrt(2 * variance * psi / n) + 3 * bound * psi / n;
525
- return { low: mean3 - radius, high: mean3 + radius };
526
- }
527
-
528
288
  // src/release-report.ts
529
289
  function renderReleaseReport(scorecard, options = {}) {
530
290
  const title = options.title ?? `Release Report: ${scorecard.target}`;
@@ -614,7 +374,7 @@ function num(value) {
614
374
  }
615
375
 
616
376
  // src/promotion-gate.ts
617
- function bootstrapCi2(baseline, candidate, options = {}) {
377
+ function bootstrapCi(baseline, candidate, options = {}) {
618
378
  const alpha = options.alpha ?? 0.05;
619
379
  const iterations = options.iterations ?? 1e3;
620
380
  const minTotal = options.minTotalSamples ?? 6;
@@ -698,7 +458,7 @@ async function judgeReplayGate(args) {
698
458
  const concurrency = args.judgeConcurrency ?? 4;
699
459
  const baselineScores = await scoreAll(args.baselineOutputs, args.judge, concurrency);
700
460
  const candidateScores = await scoreAll(args.candidateOutputs, args.judge, concurrency);
701
- const ci = bootstrapCi2(baselineScores, candidateScores, {
461
+ const ci = bootstrapCi(baselineScores, candidateScores, {
702
462
  ...args.alpha !== void 0 ? { alpha: args.alpha } : {},
703
463
  ...args.iterations !== void 0 ? { iterations: args.iterations } : {},
704
464
  ...args.seed !== void 0 ? { seed: args.seed } : {}
@@ -728,11 +488,8 @@ export {
728
488
  releaseTraceEvidenceFromMultiShotTrials,
729
489
  evaluateReleaseConfidence,
730
490
  assertReleaseConfidence,
731
- rubricPredictiveValidity,
732
- pairedEvalueSequence,
733
- evaluateInterimReleaseConfidence,
734
491
  renderReleaseReport,
735
- bootstrapCi2 as bootstrapCi,
492
+ bootstrapCi,
736
493
  judgeReplayGate
737
494
  };
738
- //# sourceMappingURL=chunk-UAND2LOT.js.map
495
+ //# sourceMappingURL=chunk-7EAUOUQS.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/release-confidence.ts","../src/release-report.ts","../src/promotion-gate.ts"],"sourcesContent":["/**\n * Release confidence gate.\n *\n * This is the production-facing composition layer over the lower-level\n * primitives:\n * - Dataset manifests prove corpus/version coverage.\n * - RunRecord rows prove reproducible search/holdout outcomes.\n * - Multi-shot trace evidence carries turn counts and ASI diagnostics.\n * - HeldOutGate decisions remain the paired promotion authority.\n *\n * The gate is intentionally pure and conservative. Missing declared evidence\n * fails closed instead of being treated as a neutral zero.\n */\n\nimport type { DatasetManifest, DatasetScenario, DatasetSplit } from './dataset'\nimport type { GateDecision } from './held-out-gate'\nimport type { ActionableSideInfo, MultiShotTrialResult } from './multi-shot-optimization'\nimport type { RunRecord, RunSplitTag } from './run-record'\n\nexport type ReleaseConfidenceStatus = 'pass' | 'warn' | 'fail'\nexport type ReleaseConfidenceAxisName =\n | 'corpus'\n | 'quality'\n | 'generalization'\n | 'diagnostics'\n | 'efficiency'\n\nexport interface ReleaseTraceEvidence {\n scenarioId: string\n candidateId?: string\n split?: RunSplitTag\n score?: number\n ok?: boolean\n turnCount?: number\n costUsd?: number\n durationMs?: number\n failureMode?: string\n asi?: ActionableSideInfo[]\n metadata?: Record<string, unknown>\n}\n\nexport interface ReleaseConfidenceThresholds {\n /** Require a Dataset manifest or explicit scenarios. Default true. */\n requireCorpus?: boolean\n minScenarioCount?: number\n minSearchRuns?: number\n minHoldoutRuns?: number\n /** Require at least one holdout scenario/run. Default true. */\n requireHoldout?: boolean\n minPassRate?: number\n minMeanScore?: number\n /** Search mean may exceed holdout mean by at most this much. */\n maxOverfitGap?: number\n maxMeanCostUsd?: number\n maxP95WallMs?: number\n /** Low-score/failed rows must carry ASI. Default true. */\n requireAsiForFailures?: boolean\n /** Score below this is considered a failure for ASI coverage. Default 0.5. */\n failureScoreThreshold?: number\n}\n\nexport interface ReleaseConfidenceInput {\n target: string\n candidateId?: string\n baselineId?: string\n dataset?: DatasetManifest\n scenarios?: readonly DatasetScenario[]\n runs?: readonly RunRecord[]\n traces?: readonly ReleaseTraceEvidence[]\n gateDecision?: GateDecision | null\n thresholds?: ReleaseConfidenceThresholds\n}\n\nexport interface ReleaseConfidenceAxis {\n name: ReleaseConfidenceAxisName\n status: ReleaseConfidenceStatus\n score: number\n detail: string\n}\n\nexport interface ReleaseConfidenceIssue {\n axis: ReleaseConfidenceAxisName\n severity: 'critical' | 'warning'\n code: string\n detail: string\n}\n\nexport interface ReleaseConfidenceMetrics {\n scenarioCount: number\n searchRuns: number\n holdoutRuns: number\n passRate: number\n meanScore: number\n searchMeanScore: number\n holdoutMeanScore: number\n overfitGap: number\n meanCostUsd: number\n p95WallMs: number\n failedRows: number\n failuresWithAsi: number\n singleShotTraces: number\n multiShotTraces: number\n splitCounts: Record<DatasetSplit, number>\n domainCounts: Record<string, number>\n failureModeCounts: Record<string, number>\n responsibleSurfaceCounts: Record<string, number>\n}\n\nexport interface ReleaseConfidenceScorecard {\n target: string\n candidateId: string | null\n baselineId: string | null\n status: ReleaseConfidenceStatus\n promote: boolean\n axes: ReleaseConfidenceAxis[]\n issues: ReleaseConfidenceIssue[]\n metrics: ReleaseConfidenceMetrics\n dataset: DatasetManifest | null\n gateDecision: GateDecision | null\n summary: string\n}\n\nconst DEFAULT_THRESHOLDS: Required<ReleaseConfidenceThresholds> = {\n requireCorpus: true,\n minScenarioCount: 1,\n minSearchRuns: 1,\n minHoldoutRuns: 1,\n requireHoldout: true,\n minPassRate: 0.8,\n minMeanScore: 0.7,\n maxOverfitGap: 0.15,\n maxMeanCostUsd: Number.POSITIVE_INFINITY,\n maxP95WallMs: Number.POSITIVE_INFINITY,\n requireAsiForFailures: true,\n failureScoreThreshold: 0.5,\n}\n\nexport function releaseTraceEvidenceFromMultiShotTrials(\n trials: readonly MultiShotTrialResult[],\n): ReleaseTraceEvidence[] {\n return trials.map((trial) => ({\n scenarioId: trial.scenarioId,\n candidateId: trial.variantId,\n split: trial.split === 'holdout' ? 'holdout' : trial.split === 'dev' ? 'dev' : 'search',\n score: trial.score,\n ok: trial.ok,\n turnCount: Array.isArray(trial.trace?.turns) ? trial.trace.turns.length : undefined,\n costUsd: trial.cost,\n durationMs: trial.durationMs,\n failureMode: trial.error ? 'runtime_error' : undefined,\n asi: trial.asi,\n metadata: trial.metadata,\n }))\n}\n\nexport function evaluateReleaseConfidence(input: ReleaseConfidenceInput): ReleaseConfidenceScorecard {\n const thresholds = { ...DEFAULT_THRESHOLDS, ...input.thresholds }\n const candidateId = input.candidateId ?? null\n const runs = filterCandidate(input.runs ?? [], candidateId, input.baselineId)\n const traces = filterTraceCandidate(input.traces ?? [], candidateId, input.baselineId)\n const scenarios = input.scenarios ?? []\n const scenarioCount = input.dataset?.scenarioCount ?? scenarios.length\n const splitCounts = input.dataset?.splitCounts ?? countScenarioSplits(scenarios)\n const searchScores = scoresFor(runs, 'search')\n const holdoutScores = scoresFor(runs, 'holdout')\n const allScores = [...searchScores, ...holdoutScores]\n const traceScores = traces.map((t) => t.score).filter(isFiniteNumber)\n const scoreUniverse = allScores.length > 0 ? allScores : traceScores\n const searchRuns = runs.filter((r) => r.splitTag === 'search').length\n const holdoutRuns = runs.filter((r) => r.splitTag === 'holdout').length\n const searchMeanScore = mean(searchScores)\n const holdoutMeanScore = mean(holdoutScores)\n const metrics: ReleaseConfidenceMetrics = {\n scenarioCount,\n searchRuns,\n holdoutRuns,\n passRate: passRate(runs, traces, thresholds.failureScoreThreshold),\n meanScore: mean(scoreUniverse),\n searchMeanScore,\n holdoutMeanScore,\n overfitGap: safeDiff(searchMeanScore, holdoutMeanScore),\n meanCostUsd: mean([...runs.map((r) => r.costUsd), ...traces.map((t) => t.costUsd).filter(isFiniteNumber)]),\n p95WallMs: percentile([...runs.map((r) => r.wallMs), ...traces.map((t) => t.durationMs).filter(isFiniteNumber)], 0.95),\n failedRows: failedRows(runs, traces, thresholds.failureScoreThreshold).length,\n failuresWithAsi: failedRows(runs, traces, thresholds.failureScoreThreshold).filter((row) => row.hasAsi).length,\n singleShotTraces: traces.filter((t) => t.turnCount === 1).length,\n multiShotTraces: traces.filter((t) => (t.turnCount ?? 0) > 1).length,\n splitCounts,\n domainCounts: countDomains(scenarios),\n failureModeCounts: countFailureModes(runs, traces, thresholds.failureScoreThreshold),\n responsibleSurfaceCounts: countResponsibleSurfaces(traces),\n }\n\n const issues: ReleaseConfidenceIssue[] = []\n checkCorpus(input, thresholds, metrics, issues)\n checkQuality(thresholds, metrics, issues)\n checkGeneralization(input.gateDecision ?? null, thresholds, metrics, issues)\n checkDiagnostics(thresholds, metrics, issues)\n checkEfficiency(thresholds, metrics, issues)\n\n const axes = buildAxes(metrics, thresholds, input.gateDecision ?? null, issues)\n const status = issues.some((i) => i.severity === 'critical') ? 'fail'\n : issues.length > 0 ? 'warn'\n : 'pass'\n\n return {\n target: input.target,\n candidateId,\n baselineId: input.baselineId ?? null,\n status,\n promote: status === 'pass' && (input.gateDecision ? input.gateDecision.promote : true),\n axes,\n issues,\n metrics,\n dataset: input.dataset ?? null,\n gateDecision: input.gateDecision ?? null,\n summary: renderSummary(input.target, status, metrics, issues),\n }\n}\n\nexport function assertReleaseConfidence(input: ReleaseConfidenceInput): ReleaseConfidenceScorecard {\n const scorecard = evaluateReleaseConfidence(input)\n if (scorecard.status === 'fail') {\n throw new Error(scorecard.summary)\n }\n return scorecard\n}\n\nfunction filterCandidate(\n runs: readonly RunRecord[],\n candidateId: string | null,\n baselineId?: string,\n): RunRecord[] {\n if (candidateId) return runs.filter((r) => r.candidateId === candidateId)\n if (baselineId) return runs.filter((r) => r.candidateId !== baselineId)\n return [...runs]\n}\n\nfunction filterTraceCandidate(\n traces: readonly ReleaseTraceEvidence[],\n candidateId: string | null,\n baselineId?: string,\n): ReleaseTraceEvidence[] {\n if (candidateId) return traces.filter((t) => t.candidateId === undefined || t.candidateId === candidateId)\n if (baselineId) return traces.filter((t) => t.candidateId === undefined || t.candidateId !== baselineId)\n return [...traces]\n}\n\nfunction checkCorpus(\n input: ReleaseConfidenceInput,\n thresholds: Required<ReleaseConfidenceThresholds>,\n metrics: ReleaseConfidenceMetrics,\n issues: ReleaseConfidenceIssue[],\n): void {\n if (thresholds.requireCorpus && !input.dataset && (input.scenarios?.length ?? 0) === 0) {\n issues.push({ axis: 'corpus', severity: 'critical', code: 'missing_corpus', detail: 'No Dataset manifest or scenarios supplied.' })\n }\n if (metrics.scenarioCount < thresholds.minScenarioCount) {\n issues.push({ axis: 'corpus', severity: 'critical', code: 'few_scenarios', detail: `${metrics.scenarioCount} scenario(s) < min ${thresholds.minScenarioCount}.` })\n }\n if (thresholds.requireHoldout && metrics.splitCounts.holdout === 0) {\n issues.push({ axis: 'corpus', severity: 'critical', code: 'missing_holdout_split', detail: 'Corpus has no holdout scenarios.' })\n }\n}\n\nfunction checkQuality(\n thresholds: Required<ReleaseConfidenceThresholds>,\n metrics: ReleaseConfidenceMetrics,\n issues: ReleaseConfidenceIssue[],\n): void {\n if (metrics.searchRuns < thresholds.minSearchRuns) {\n issues.push({ axis: 'quality', severity: 'critical', code: 'few_search_runs', detail: `${metrics.searchRuns} search run(s) < min ${thresholds.minSearchRuns}.` })\n }\n if (metrics.passRate < thresholds.minPassRate) {\n issues.push({ axis: 'quality', severity: 'critical', code: 'low_pass_rate', detail: `passRate ${fmt(metrics.passRate)} < ${fmt(thresholds.minPassRate)}.` })\n }\n if (metrics.meanScore < thresholds.minMeanScore) {\n issues.push({ axis: 'quality', severity: 'critical', code: 'low_mean_score', detail: `meanScore ${fmt(metrics.meanScore)} < ${fmt(thresholds.minMeanScore)}.` })\n }\n}\n\nfunction checkGeneralization(\n gateDecision: GateDecision | null,\n thresholds: Required<ReleaseConfidenceThresholds>,\n metrics: ReleaseConfidenceMetrics,\n issues: ReleaseConfidenceIssue[],\n): void {\n if (thresholds.requireHoldout && metrics.holdoutRuns < thresholds.minHoldoutRuns) {\n issues.push({ axis: 'generalization', severity: 'critical', code: 'few_holdout_runs', detail: `${metrics.holdoutRuns} holdout run(s) < min ${thresholds.minHoldoutRuns}.` })\n }\n if (Number.isFinite(metrics.overfitGap) && metrics.overfitGap > thresholds.maxOverfitGap) {\n issues.push({ axis: 'generalization', severity: 'critical', code: 'overfit_gap', detail: `search-holdout gap ${fmt(metrics.overfitGap)} > ${fmt(thresholds.maxOverfitGap)}.` })\n }\n if (gateDecision && !gateDecision.promote) {\n issues.push({ axis: 'generalization', severity: 'critical', code: `gate_${gateDecision.rejectionCode ?? 'reject'}`, detail: gateDecision.reason })\n }\n}\n\nfunction checkDiagnostics(\n thresholds: Required<ReleaseConfidenceThresholds>,\n metrics: ReleaseConfidenceMetrics,\n issues: ReleaseConfidenceIssue[],\n): void {\n if (!thresholds.requireAsiForFailures) return\n if (metrics.failedRows > metrics.failuresWithAsi) {\n issues.push({\n axis: 'diagnostics',\n severity: 'critical',\n code: 'missing_failure_asi',\n detail: `${metrics.failedRows - metrics.failuresWithAsi} failed row(s) have no actionable side information.`,\n })\n }\n}\n\nfunction checkEfficiency(\n thresholds: Required<ReleaseConfidenceThresholds>,\n metrics: ReleaseConfidenceMetrics,\n issues: ReleaseConfidenceIssue[],\n): void {\n if (metrics.meanCostUsd > thresholds.maxMeanCostUsd) {\n issues.push({ axis: 'efficiency', severity: 'critical', code: 'cost_budget', detail: `meanCostUsd ${fmt(metrics.meanCostUsd)} > ${fmt(thresholds.maxMeanCostUsd)}.` })\n }\n if (metrics.p95WallMs > thresholds.maxP95WallMs) {\n issues.push({ axis: 'efficiency', severity: 'critical', code: 'latency_budget', detail: `p95WallMs ${fmt(metrics.p95WallMs)} > ${fmt(thresholds.maxP95WallMs)}.` })\n }\n}\n\nfunction buildAxes(\n metrics: ReleaseConfidenceMetrics,\n thresholds: Required<ReleaseConfidenceThresholds>,\n gateDecision: GateDecision | null,\n issues: ReleaseConfidenceIssue[],\n): ReleaseConfidenceAxis[] {\n return [\n axis('corpus', issues, bounded(metrics.scenarioCount / Math.max(1, thresholds.minScenarioCount)), `${metrics.scenarioCount} scenarios; holdout=${metrics.splitCounts.holdout}`),\n axis('quality', issues, Math.min(metrics.passRate, metrics.meanScore), `passRate=${fmt(metrics.passRate)} meanScore=${fmt(metrics.meanScore)}`),\n axis('generalization', issues, gateDecision && !gateDecision.promote ? 0 : gapScore(metrics.overfitGap, thresholds.maxOverfitGap), `holdoutRuns=${metrics.holdoutRuns} overfitGap=${fmt(metrics.overfitGap)}`),\n axis('diagnostics', issues, metrics.failedRows === 0 ? 1 : metrics.failuresWithAsi / metrics.failedRows, `failuresWithAsi=${metrics.failuresWithAsi}/${metrics.failedRows}`),\n axis('efficiency', issues, efficiencyScore(metrics, thresholds), `meanCostUsd=${fmt(metrics.meanCostUsd)} p95WallMs=${fmt(metrics.p95WallMs)}`),\n ]\n}\n\nfunction axis(\n name: ReleaseConfidenceAxisName,\n issues: ReleaseConfidenceIssue[],\n score: number,\n detail: string,\n): ReleaseConfidenceAxis {\n const own = issues.filter((i) => i.axis === name)\n const status = own.some((i) => i.severity === 'critical') ? 'fail'\n : own.length > 0 ? 'warn'\n : 'pass'\n return { name, status, score: bounded(score), detail }\n}\n\nfunction countScenarioSplits(scenarios: readonly DatasetScenario[]): Record<DatasetSplit, number> {\n const counts: Record<DatasetSplit, number> = { train: 0, dev: 0, test: 0, holdout: 0 }\n for (const scenario of scenarios) counts[scenario.split ?? 'train']++\n return counts\n}\n\nfunction countDomains(scenarios: readonly DatasetScenario[]): Record<string, number> {\n const out: Record<string, number> = {}\n for (const scenario of scenarios) {\n const domain = scenario.tags?.domain ?? scenario.tags?.category ?? 'uncategorized'\n out[domain] = (out[domain] ?? 0) + 1\n }\n return out\n}\n\nfunction countFailureModes(\n runs: readonly RunRecord[],\n traces: readonly ReleaseTraceEvidence[],\n threshold: number,\n): Record<string, number> {\n const out: Record<string, number> = {}\n for (const run of runs) {\n const score = run.outcome.holdoutScore ?? run.outcome.searchScore\n if (run.failureMode || (score !== undefined && score < threshold)) {\n const mode = run.failureMode ?? 'low_score'\n out[mode] = (out[mode] ?? 0) + 1\n }\n }\n for (const trace of traces) {\n if (trace.failureMode || trace.ok === false || (trace.score !== undefined && trace.score < threshold)) {\n const mode = trace.failureMode ?? (trace.ok === false ? 'not_ok' : 'low_score')\n out[mode] = (out[mode] ?? 0) + 1\n }\n }\n return out\n}\n\nfunction countResponsibleSurfaces(traces: readonly ReleaseTraceEvidence[]): Record<string, number> {\n const out: Record<string, number> = {}\n for (const trace of traces) {\n for (const asi of trace.asi ?? []) {\n const surface = asi.responsibleSurface ?? 'unknown'\n out[surface] = (out[surface] ?? 0) + 1\n }\n }\n return out\n}\n\nfunction failedRows(\n runs: readonly RunRecord[],\n traces: readonly ReleaseTraceEvidence[],\n threshold: number,\n): Array<{ hasAsi: boolean }> {\n const out: Array<{ hasAsi: boolean }> = []\n for (const run of runs) {\n const score = run.outcome.holdoutScore ?? run.outcome.searchScore\n if (run.failureMode || (score !== undefined && score < threshold)) {\n const asiMetric = run.outcome.raw.asi\n out.push({ hasAsi: typeof asiMetric === 'number' && asiMetric > 0 })\n }\n }\n for (const trace of traces) {\n if (trace.failureMode || trace.ok === false || (trace.score !== undefined && trace.score < threshold)) {\n out.push({ hasAsi: (trace.asi?.length ?? 0) > 0 })\n }\n }\n return out\n}\n\nfunction passRate(\n runs: readonly RunRecord[],\n traces: readonly ReleaseTraceEvidence[],\n threshold: number,\n): number {\n const outcomes = [\n ...runs.map((run) => {\n const score = run.outcome.holdoutScore ?? run.outcome.searchScore\n return !run.failureMode && score !== undefined && score >= threshold\n }),\n ...traces.map((trace) => trace.ok !== false && (trace.score === undefined || trace.score >= threshold)),\n ]\n if (outcomes.length === 0) return 0\n return outcomes.filter(Boolean).length / outcomes.length\n}\n\nfunction scoresFor(runs: readonly RunRecord[], split: RunSplitTag): number[] {\n return runs\n .filter((run) => run.splitTag === split)\n .map((run) => split === 'holdout' ? run.outcome.holdoutScore : run.outcome.searchScore)\n .filter(isFiniteNumber)\n}\n\nfunction mean(xs: readonly number[]): number {\n if (xs.length === 0) return Number.NaN\n return xs.reduce((sum, x) => sum + x, 0) / xs.length\n}\n\nfunction percentile(xs: readonly number[], p: number): number {\n if (xs.length === 0) return Number.NaN\n const sorted = [...xs].sort((a, b) => a - b)\n return sorted[Math.min(sorted.length - 1, Math.max(0, Math.ceil(p * sorted.length) - 1))]!\n}\n\nfunction isFiniteNumber(value: unknown): value is number {\n return typeof value === 'number' && Number.isFinite(value)\n}\n\nfunction safeDiff(a: number, b: number): number {\n if (!Number.isFinite(a) || !Number.isFinite(b)) return Number.NaN\n return a - b\n}\n\nfunction gapScore(gap: number, maxGap: number): number {\n if (!Number.isFinite(gap)) return 0\n if (maxGap <= 0) return gap <= 0 ? 1 : 0\n return bounded(1 - Math.max(0, gap) / maxGap)\n}\n\nfunction efficiencyScore(\n metrics: ReleaseConfidenceMetrics,\n thresholds: Required<ReleaseConfidenceThresholds>,\n): number {\n const cost = Number.isFinite(thresholds.maxMeanCostUsd) && Number.isFinite(metrics.meanCostUsd)\n ? bounded(thresholds.maxMeanCostUsd / Math.max(metrics.meanCostUsd, 1e-12))\n : 1\n const latency = Number.isFinite(thresholds.maxP95WallMs) && Number.isFinite(metrics.p95WallMs)\n ? bounded(thresholds.maxP95WallMs / Math.max(metrics.p95WallMs, 1e-12))\n : 1\n return Math.min(cost, latency)\n}\n\nfunction bounded(x: number): number {\n if (!Number.isFinite(x)) return 0\n return Math.max(0, Math.min(1, x))\n}\n\nfunction renderSummary(\n target: string,\n status: ReleaseConfidenceStatus,\n metrics: ReleaseConfidenceMetrics,\n issues: ReleaseConfidenceIssue[],\n): string {\n const prefix = `release confidence ${status}: ${target}`\n const metricText = `scenarios=${metrics.scenarioCount} searchRuns=${metrics.searchRuns} holdoutRuns=${metrics.holdoutRuns} passRate=${fmt(metrics.passRate)} meanScore=${fmt(metrics.meanScore)}`\n if (issues.length === 0) return `${prefix}; ${metricText}`\n return `${prefix}; ${metricText}; issues=${issues.map((i) => i.code).join(',')}`\n}\n\nfunction fmt(x: number): string {\n if (!Number.isFinite(x)) return String(x)\n return x.toFixed(4)\n}\n","import type { ReleaseConfidenceScorecard } from './release-confidence'\nimport { summaryTable } from './summary-report'\nimport type { RunRecord } from './run-record'\n\nexport interface RenderReleaseReportOptions {\n title?: string\n runs?: readonly RunRecord[]\n comparator?: string\n traceAnalystFindings?: readonly string[]\n nextActions?: readonly string[]\n}\n\nexport function renderReleaseReport(\n scorecard: ReleaseConfidenceScorecard,\n options: RenderReleaseReportOptions = {},\n): string {\n const title = options.title ?? `Release Report: ${scorecard.target}`\n const lines: string[] = []\n lines.push(`# ${title}`)\n lines.push('')\n lines.push(`Status: **${scorecard.status.toUpperCase()}**`)\n lines.push(`Promote: **${scorecard.promote ? 'yes' : 'no'}**`)\n if (scorecard.candidateId) lines.push(`Candidate: \\`${scorecard.candidateId}\\``)\n if (scorecard.baselineId) lines.push(`Baseline: \\`${scorecard.baselineId}\\``)\n lines.push('')\n lines.push(scorecard.summary)\n lines.push('')\n\n lines.push('## Metrics')\n lines.push('')\n lines.push('| Metric | Value |')\n lines.push('|---|---:|')\n lines.push(`| Scenarios | ${scorecard.metrics.scenarioCount} |`)\n lines.push(`| Search runs | ${scorecard.metrics.searchRuns} |`)\n lines.push(`| Holdout runs | ${scorecard.metrics.holdoutRuns} |`)\n lines.push(`| Pass rate | ${pct(scorecard.metrics.passRate)} |`)\n lines.push(`| Mean score | ${num(scorecard.metrics.meanScore)} |`)\n lines.push(`| Search mean | ${num(scorecard.metrics.searchMeanScore)} |`)\n lines.push(`| Holdout mean | ${num(scorecard.metrics.holdoutMeanScore)} |`)\n lines.push(`| Overfit gap | ${num(scorecard.metrics.overfitGap)} |`)\n lines.push(`| Mean cost | $${num(scorecard.metrics.meanCostUsd)} |`)\n lines.push(`| p95 wall time | ${Math.round(scorecard.metrics.p95WallMs)} ms |`)\n lines.push('')\n\n if (scorecard.issues.length > 0) {\n lines.push('## Issues')\n lines.push('')\n for (const issue of scorecard.issues) {\n lines.push(`- **${issue.severity}** \\`${issue.code}\\` (${issue.axis}): ${issue.detail}`)\n }\n lines.push('')\n }\n\n const surfaces = entries(scorecard.metrics.responsibleSurfaceCounts)\n if (surfaces.length > 0) {\n lines.push('## Responsible Surfaces')\n lines.push('')\n for (const [surface, count] of surfaces) lines.push(`- ${surface}: ${count}`)\n lines.push('')\n }\n\n const failures = entries(scorecard.metrics.failureModeCounts)\n if (failures.length > 0) {\n lines.push('## Failure Modes')\n lines.push('')\n for (const [mode, count] of failures) lines.push(`- ${mode}: ${count}`)\n lines.push('')\n }\n\n if (options.runs && options.runs.length > 0) {\n lines.push('## Run Summary')\n lines.push('')\n lines.push(summaryTable([...options.runs], {\n comparator: options.comparator ?? scorecard.baselineId ?? undefined,\n split: 'holdout',\n }).markdown)\n lines.push('')\n }\n\n if (options.traceAnalystFindings && options.traceAnalystFindings.length > 0) {\n lines.push('## TraceAnalyst Findings')\n lines.push('')\n for (const finding of options.traceAnalystFindings) lines.push(`- ${finding}`)\n lines.push('')\n }\n\n const nextActions = options.nextActions ?? defaultNextActions(scorecard)\n if (nextActions.length > 0) {\n lines.push('## Next Actions')\n lines.push('')\n for (const action of nextActions) lines.push(`- ${action}`)\n lines.push('')\n }\n\n return lines.join('\\n').trimEnd() + '\\n'\n}\n\nfunction defaultNextActions(scorecard: ReleaseConfidenceScorecard): string[] {\n if (scorecard.promote) return ['Promote the candidate and keep canaries enabled.']\n return scorecard.issues\n .filter((issue) => issue.severity === 'critical')\n .map((issue) => `Resolve ${issue.code}: ${issue.detail}`)\n}\n\nfunction entries(values: Record<string, number>): Array<[string, number]> {\n return Object.entries(values)\n .filter(([, count]) => count > 0)\n .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))\n}\n\nfunction pct(value: number): string {\n return Number.isFinite(value) ? `${(value * 100).toFixed(1)}%` : 'n/a'\n}\n\nfunction num(value: number): string {\n return Number.isFinite(value) ? value.toFixed(3) : 'n/a'\n}\n","/**\n * Bootstrap-CI promotion gate.\n *\n * In any iterative-improvement loop (GEPA, prompt evolution, dataset\n * curation), the question is \"did this generation actually improve, or are\n * we celebrating noise?\". With small N and noisy outcomes, point-estimate\n * deltas lie. Bootstrap confidence intervals tell the operator whether the\n * delta is real before code or prompts get promoted.\n *\n * This module is pure functions — no I/O, no model calls. Easy to unit-test\n * and to compose into any verdict gate.\n *\n * Default gate:\n * - Bootstrap mean baseline vs candidate (1k resamples).\n * - Compute the delta distribution; pass if the lower CI bound > 0.\n * - Tunable confidence (default 95%) and resample count.\n *\n * Verdict semantics intentionally match the existing `experiments.jsonl`\n * vocabulary:\n * - ADVANCE: candidate's CI lower bound > baseline mean (real win)\n * - KEEP: overlap, but candidate point estimate >= baseline (neutral)\n * - REVERT: candidate's CI upper bound < baseline mean (real regression)\n * - INCONCLUSIVE: not enough samples or CI straddles zero with no signal\n */\n\nexport type Verdict = 'ADVANCE' | 'KEEP' | 'REVERT' | 'INCONCLUSIVE'\n\nexport interface BootstrapResult {\n baselineMean: number\n candidateMean: number\n /** candidateMean - baselineMean, point estimate. */\n delta: number\n /** Lower bound of the (1 - alpha) CI on the delta. */\n ciLower: number\n /** Upper bound of the (1 - alpha) CI on the delta. */\n ciUpper: number\n /** Number of bootstrap resamples used. */\n iterations: number\n alpha: number\n verdict: Verdict\n}\n\nexport interface BootstrapOptions {\n /** Confidence level alpha (default 0.05 → 95% CI). */\n alpha?: number\n /** Number of resamples (default 1000). */\n iterations?: number\n /**\n * Minimum total samples (baseline + candidate) below which we always\n * return INCONCLUSIVE — bootstrap with too few samples is meaningless.\n * Default 6 (combined).\n */\n minTotalSamples?: number\n /** RNG seed for reproducibility. Default: Math.random. */\n seed?: number\n}\n\n/**\n * Compute the bootstrap CI on (candidateMean - baselineMean) and a verdict.\n *\n * Uses simple percentile bootstrap on the difference of resampled means.\n * That's the standard non-parametric primitive — no distributional\n * assumptions, robust to skew, easy to reason about.\n */\nexport function bootstrapCi(\n baseline: number[],\n candidate: number[],\n options: BootstrapOptions = {},\n): BootstrapResult {\n const alpha = options.alpha ?? 0.05\n const iterations = options.iterations ?? 1000\n const minTotal = options.minTotalSamples ?? 6\n const rng = mulberry32(options.seed ?? hashSeed(baseline, candidate))\n\n const baselineMean = mean(baseline)\n const candidateMean = mean(candidate)\n const delta = candidateMean - baselineMean\n\n if (baseline.length + candidate.length < minTotal || baseline.length === 0 || candidate.length === 0) {\n return {\n baselineMean,\n candidateMean,\n delta,\n ciLower: -Infinity,\n ciUpper: Infinity,\n iterations: 0,\n alpha,\n verdict: 'INCONCLUSIVE',\n }\n }\n\n const deltas: number[] = new Array(iterations)\n for (let i = 0; i < iterations; i++) {\n const bResample = resample(baseline, rng)\n const cResample = resample(candidate, rng)\n deltas[i] = mean(cResample) - mean(bResample)\n }\n deltas.sort((a, b) => a - b)\n const lowerIdx = Math.floor((alpha / 2) * iterations)\n const upperIdx = Math.floor((1 - alpha / 2) * iterations) - 1\n const ciLower = deltas[Math.max(0, lowerIdx)]!\n const ciUpper = deltas[Math.min(iterations - 1, upperIdx)]!\n\n let verdict: Verdict\n if (ciLower > 0) verdict = 'ADVANCE'\n else if (ciUpper < 0) verdict = 'REVERT'\n else if (delta >= 0) verdict = 'KEEP'\n else verdict = 'INCONCLUSIVE'\n\n return {\n baselineMean,\n candidateMean,\n delta,\n ciLower,\n ciUpper,\n iterations,\n alpha,\n verdict,\n }\n}\n\nfunction mean(xs: number[]): number {\n if (xs.length === 0) return 0\n let s = 0\n for (const x of xs) s += x\n return s / xs.length\n}\n\nfunction resample(xs: number[], rng: () => number): number[] {\n const out = new Array(xs.length)\n for (let i = 0; i < xs.length; i++) out[i] = xs[Math.floor(rng() * xs.length)]\n return out\n}\n\n/** Mulberry32 — fast deterministic PRNG. Stable across runs given the same seed. */\nfunction mulberry32(seed: number): () => number {\n let t = seed >>> 0\n return () => {\n t += 0x6d2b79f5\n let r = t\n r = Math.imul(r ^ (r >>> 15), r | 1)\n r ^= r + Math.imul(r ^ (r >>> 7), r | 61)\n return ((r ^ (r >>> 14)) >>> 0) / 4294967296\n }\n}\n\n/** Stable seed derived from the inputs — same data → same CI bounds. */\nfunction hashSeed(a: number[], b: number[]): number {\n let h = 2166136261\n for (const x of [...a, ...b]) {\n const view = new Float64Array([x])\n const bytes = new Uint8Array(view.buffer)\n for (const byte of bytes) {\n h ^= byte\n h = Math.imul(h, 16777619)\n }\n }\n return h >>> 0\n}\n\n/**\n * Judge-replay promotion gate.\n *\n * The cheap inner-loop judge that drives an evolution run is by definition\n * fast and noisy. When you're about to promote a winning variant to the\n * canonical default, you want a STRONGER judge (a more expensive model, a\n * human grader, a separately-trained reward model) to confirm the win\n * generalises beyond the inner loop.\n *\n * This helper takes raw winner + baseline outputs, scores both through the\n * stronger judge, and applies `bootstrapCi`. ADVANCE means the stronger\n * judge agrees the winner is real with the configured confidence. Doesn't\n * matter what shape your \"output\" is — pass a string, an object, anything\n * the judge can read.\n */\nexport interface JudgeReplayGateArgs<TOutput> {\n baselineOutputs: TOutput[]\n candidateOutputs: TOutput[]\n /** Stronger judge — async to allow LLM calls. Return a 0..N scalar score. */\n judge: (output: TOutput) => Promise<number> | number\n alpha?: number\n iterations?: number\n /** RNG seed for reproducibility. */\n seed?: number\n /** Maximum concurrent judge calls. Default 4. */\n judgeConcurrency?: number\n}\n\nexport async function judgeReplayGate<TOutput>(\n args: JudgeReplayGateArgs<TOutput>,\n): Promise<BootstrapResult & { baselineSamples: number; candidateSamples: number }> {\n const concurrency = args.judgeConcurrency ?? 4\n const baselineScores = await scoreAll(args.baselineOutputs, args.judge, concurrency)\n const candidateScores = await scoreAll(args.candidateOutputs, args.judge, concurrency)\n const ci = bootstrapCi(baselineScores, candidateScores, {\n ...(args.alpha !== undefined ? { alpha: args.alpha } : {}),\n ...(args.iterations !== undefined ? { iterations: args.iterations } : {}),\n ...(args.seed !== undefined ? { seed: args.seed } : {}),\n })\n return {\n ...ci,\n baselineSamples: baselineScores.length,\n candidateSamples: candidateScores.length,\n }\n}\n\nasync function scoreAll<TOutput>(\n outputs: TOutput[],\n judge: (output: TOutput) => Promise<number> | number,\n concurrency: number,\n): Promise<number[]> {\n const results: number[] = new Array(outputs.length)\n let next = 0\n async function worker(): Promise<void> {\n while (true) {\n const i = next++\n if (i >= outputs.length) return\n const v = await judge(outputs[i]!)\n results[i] = Number.isFinite(v) ? v : 0\n }\n }\n await Promise.all(Array.from({ length: Math.max(1, concurrency) }, () => worker()))\n return results\n}\n"],"mappings":";;;;;AA0HA,IAAM,qBAA4D;AAAA,EAChE,eAAe;AAAA,EACf,kBAAkB;AAAA,EAClB,eAAe;AAAA,EACf,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,aAAa;AAAA,EACb,cAAc;AAAA,EACd,eAAe;AAAA,EACf,gBAAgB,OAAO;AAAA,EACvB,cAAc,OAAO;AAAA,EACrB,uBAAuB;AAAA,EACvB,uBAAuB;AACzB;AAEO,SAAS,wCACd,QACwB;AACxB,SAAO,OAAO,IAAI,CAAC,WAAW;AAAA,IAC5B,YAAY,MAAM;AAAA,IAClB,aAAa,MAAM;AAAA,IACnB,OAAO,MAAM,UAAU,YAAY,YAAY,MAAM,UAAU,QAAQ,QAAQ;AAAA,IAC/E,OAAO,MAAM;AAAA,IACb,IAAI,MAAM;AAAA,IACV,WAAW,MAAM,QAAQ,MAAM,OAAO,KAAK,IAAI,MAAM,MAAM,MAAM,SAAS;AAAA,IAC1E,SAAS,MAAM;AAAA,IACf,YAAY,MAAM;AAAA,IAClB,aAAa,MAAM,QAAQ,kBAAkB;AAAA,IAC7C,KAAK,MAAM;AAAA,IACX,UAAU,MAAM;AAAA,EAClB,EAAE;AACJ;AAEO,SAAS,0BAA0B,OAA2D;AACnG,QAAM,aAAa,EAAE,GAAG,oBAAoB,GAAG,MAAM,WAAW;AAChE,QAAM,cAAc,MAAM,eAAe;AACzC,QAAM,OAAO,gBAAgB,MAAM,QAAQ,CAAC,GAAG,aAAa,MAAM,UAAU;AAC5E,QAAM,SAAS,qBAAqB,MAAM,UAAU,CAAC,GAAG,aAAa,MAAM,UAAU;AACrF,QAAM,YAAY,MAAM,aAAa,CAAC;AACtC,QAAM,gBAAgB,MAAM,SAAS,iBAAiB,UAAU;AAChE,QAAM,cAAc,MAAM,SAAS,eAAe,oBAAoB,SAAS;AAC/E,QAAM,eAAe,UAAU,MAAM,QAAQ;AAC7C,QAAM,gBAAgB,UAAU,MAAM,SAAS;AAC/C,QAAM,YAAY,CAAC,GAAG,cAAc,GAAG,aAAa;AACpD,QAAM,cAAc,OAAO,IAAI,CAAC,MAAM,EAAE,KAAK,EAAE,OAAO,cAAc;AACpE,QAAM,gBAAgB,UAAU,SAAS,IAAI,YAAY;AACzD,QAAM,aAAa,KAAK,OAAO,CAAC,MAAM,EAAE,aAAa,QAAQ,EAAE;AAC/D,QAAM,cAAc,KAAK,OAAO,CAAC,MAAM,EAAE,aAAa,SAAS,EAAE;AACjE,QAAM,kBAAkB,KAAK,YAAY;AACzC,QAAM,mBAAmB,KAAK,aAAa;AAC3C,QAAM,UAAoC;AAAA,IACxC;AAAA,IACA;AAAA,IACA;AAAA,IACA,UAAU,SAAS,MAAM,QAAQ,WAAW,qBAAqB;AAAA,IACjE,WAAW,KAAK,aAAa;AAAA,IAC7B;AAAA,IACA;AAAA,IACA,YAAY,SAAS,iBAAiB,gBAAgB;AAAA,IACtD,aAAa,KAAK,CAAC,GAAG,KAAK,IAAI,CAAC,MAAM,EAAE,OAAO,GAAG,GAAG,OAAO,IAAI,CAAC,MAAM,EAAE,OAAO,EAAE,OAAO,cAAc,CAAC,CAAC;AAAA,IACzG,WAAW,WAAW,CAAC,GAAG,KAAK,IAAI,CAAC,MAAM,EAAE,MAAM,GAAG,GAAG,OAAO,IAAI,CAAC,MAAM,EAAE,UAAU,EAAE,OAAO,cAAc,CAAC,GAAG,IAAI;AAAA,IACrH,YAAY,WAAW,MAAM,QAAQ,WAAW,qBAAqB,EAAE;AAAA,IACvE,iBAAiB,WAAW,MAAM,QAAQ,WAAW,qBAAqB,EAAE,OAAO,CAAC,QAAQ,IAAI,MAAM,EAAE;AAAA,IACxG,kBAAkB,OAAO,OAAO,CAAC,MAAM,EAAE,cAAc,CAAC,EAAE;AAAA,IAC1D,iBAAiB,OAAO,OAAO,CAAC,OAAO,EAAE,aAAa,KAAK,CAAC,EAAE;AAAA,IAC9D;AAAA,IACA,cAAc,aAAa,SAAS;AAAA,IACpC,mBAAmB,kBAAkB,MAAM,QAAQ,WAAW,qBAAqB;AAAA,IACnF,0BAA0B,yBAAyB,MAAM;AAAA,EAC3D;AAEA,QAAM,SAAmC,CAAC;AAC1C,cAAY,OAAO,YAAY,SAAS,MAAM;AAC9C,eAAa,YAAY,SAAS,MAAM;AACxC,sBAAoB,MAAM,gBAAgB,MAAM,YAAY,SAAS,MAAM;AAC3E,mBAAiB,YAAY,SAAS,MAAM;AAC5C,kBAAgB,YAAY,SAAS,MAAM;AAE3C,QAAM,OAAO,UAAU,SAAS,YAAY,MAAM,gBAAgB,MAAM,MAAM;AAC9E,QAAM,SAAS,OAAO,KAAK,CAAC,MAAM,EAAE,aAAa,UAAU,IAAI,SAC3D,OAAO,SAAS,IAAI,SACpB;AAEJ,SAAO;AAAA,IACL,QAAQ,MAAM;AAAA,IACd;AAAA,IACA,YAAY,MAAM,cAAc;AAAA,IAChC;AAAA,IACA,SAAS,WAAW,WAAW,MAAM,eAAe,MAAM,aAAa,UAAU;AAAA,IACjF;AAAA,IACA;AAAA,IACA;AAAA,IACA,SAAS,MAAM,WAAW;AAAA,IAC1B,cAAc,MAAM,gBAAgB;AAAA,IACpC,SAAS,cAAc,MAAM,QAAQ,QAAQ,SAAS,MAAM;AAAA,EAC9D;AACF;AAEO,SAAS,wBAAwB,OAA2D;AACjG,QAAM,YAAY,0BAA0B,KAAK;AACjD,MAAI,UAAU,WAAW,QAAQ;AAC/B,UAAM,IAAI,MAAM,UAAU,OAAO;AAAA,EACnC;AACA,SAAO;AACT;AAEA,SAAS,gBACP,MACA,aACA,YACa;AACb,MAAI,YAAa,QAAO,KAAK,OAAO,CAAC,MAAM,EAAE,gBAAgB,WAAW;AACxE,MAAI,WAAY,QAAO,KAAK,OAAO,CAAC,MAAM,EAAE,gBAAgB,UAAU;AACtE,SAAO,CAAC,GAAG,IAAI;AACjB;AAEA,SAAS,qBACP,QACA,aACA,YACwB;AACxB,MAAI,YAAa,QAAO,OAAO,OAAO,CAAC,MAAM,EAAE,gBAAgB,UAAa,EAAE,gBAAgB,WAAW;AACzG,MAAI,WAAY,QAAO,OAAO,OAAO,CAAC,MAAM,EAAE,gBAAgB,UAAa,EAAE,gBAAgB,UAAU;AACvG,SAAO,CAAC,GAAG,MAAM;AACnB;AAEA,SAAS,YACP,OACA,YACA,SACA,QACM;AACN,MAAI,WAAW,iBAAiB,CAAC,MAAM,YAAY,MAAM,WAAW,UAAU,OAAO,GAAG;AACtF,WAAO,KAAK,EAAE,MAAM,UAAU,UAAU,YAAY,MAAM,kBAAkB,QAAQ,6CAA6C,CAAC;AAAA,EACpI;AACA,MAAI,QAAQ,gBAAgB,WAAW,kBAAkB;AACvD,WAAO,KAAK,EAAE,MAAM,UAAU,UAAU,YAAY,MAAM,iBAAiB,QAAQ,GAAG,QAAQ,aAAa,sBAAsB,WAAW,gBAAgB,IAAI,CAAC;AAAA,EACnK;AACA,MAAI,WAAW,kBAAkB,QAAQ,YAAY,YAAY,GAAG;AAClE,WAAO,KAAK,EAAE,MAAM,UAAU,UAAU,YAAY,MAAM,yBAAyB,QAAQ,mCAAmC,CAAC;AAAA,EACjI;AACF;AAEA,SAAS,aACP,YACA,SACA,QACM;AACN,MAAI,QAAQ,aAAa,WAAW,eAAe;AACjD,WAAO,KAAK,EAAE,MAAM,WAAW,UAAU,YAAY,MAAM,mBAAmB,QAAQ,GAAG,QAAQ,UAAU,wBAAwB,WAAW,aAAa,IAAI,CAAC;AAAA,EAClK;AACA,MAAI,QAAQ,WAAW,WAAW,aAAa;AAC7C,WAAO,KAAK,EAAE,MAAM,WAAW,UAAU,YAAY,MAAM,iBAAiB,QAAQ,YAAY,IAAI,QAAQ,QAAQ,CAAC,MAAM,IAAI,WAAW,WAAW,CAAC,IAAI,CAAC;AAAA,EAC7J;AACA,MAAI,QAAQ,YAAY,WAAW,cAAc;AAC/C,WAAO,KAAK,EAAE,MAAM,WAAW,UAAU,YAAY,MAAM,kBAAkB,QAAQ,aAAa,IAAI,QAAQ,SAAS,CAAC,MAAM,IAAI,WAAW,YAAY,CAAC,IAAI,CAAC;AAAA,EACjK;AACF;AAEA,SAAS,oBACP,cACA,YACA,SACA,QACM;AACN,MAAI,WAAW,kBAAkB,QAAQ,cAAc,WAAW,gBAAgB;AAChF,WAAO,KAAK,EAAE,MAAM,kBAAkB,UAAU,YAAY,MAAM,oBAAoB,QAAQ,GAAG,QAAQ,WAAW,yBAAyB,WAAW,cAAc,IAAI,CAAC;AAAA,EAC7K;AACA,MAAI,OAAO,SAAS,QAAQ,UAAU,KAAK,QAAQ,aAAa,WAAW,eAAe;AACxF,WAAO,KAAK,EAAE,MAAM,kBAAkB,UAAU,YAAY,MAAM,eAAe,QAAQ,sBAAsB,IAAI,QAAQ,UAAU,CAAC,MAAM,IAAI,WAAW,aAAa,CAAC,IAAI,CAAC;AAAA,EAChL;AACA,MAAI,gBAAgB,CAAC,aAAa,SAAS;AACzC,WAAO,KAAK,EAAE,MAAM,kBAAkB,UAAU,YAAY,MAAM,QAAQ,aAAa,iBAAiB,QAAQ,IAAI,QAAQ,aAAa,OAAO,CAAC;AAAA,EACnJ;AACF;AAEA,SAAS,iBACP,YACA,SACA,QACM;AACN,MAAI,CAAC,WAAW,sBAAuB;AACvC,MAAI,QAAQ,aAAa,QAAQ,iBAAiB;AAChD,WAAO,KAAK;AAAA,MACV,MAAM;AAAA,MACN,UAAU;AAAA,MACV,MAAM;AAAA,MACN,QAAQ,GAAG,QAAQ,aAAa,QAAQ,eAAe;AAAA,IACzD,CAAC;AAAA,EACH;AACF;AAEA,SAAS,gBACP,YACA,SACA,QACM;AACN,MAAI,QAAQ,cAAc,WAAW,gBAAgB;AACnD,WAAO,KAAK,EAAE,MAAM,cAAc,UAAU,YAAY,MAAM,eAAe,QAAQ,eAAe,IAAI,QAAQ,WAAW,CAAC,MAAM,IAAI,WAAW,cAAc,CAAC,IAAI,CAAC;AAAA,EACvK;AACA,MAAI,QAAQ,YAAY,WAAW,cAAc;AAC/C,WAAO,KAAK,EAAE,MAAM,cAAc,UAAU,YAAY,MAAM,kBAAkB,QAAQ,aAAa,IAAI,QAAQ,SAAS,CAAC,MAAM,IAAI,WAAW,YAAY,CAAC,IAAI,CAAC;AAAA,EACpK;AACF;AAEA,SAAS,UACP,SACA,YACA,cACA,QACyB;AACzB,SAAO;AAAA,IACL,KAAK,UAAU,QAAQ,QAAQ,QAAQ,gBAAgB,KAAK,IAAI,GAAG,WAAW,gBAAgB,CAAC,GAAG,GAAG,QAAQ,aAAa,uBAAuB,QAAQ,YAAY,OAAO,EAAE;AAAA,IAC9K,KAAK,WAAW,QAAQ,KAAK,IAAI,QAAQ,UAAU,QAAQ,SAAS,GAAG,YAAY,IAAI,QAAQ,QAAQ,CAAC,cAAc,IAAI,QAAQ,SAAS,CAAC,EAAE;AAAA,IAC9I,KAAK,kBAAkB,QAAQ,gBAAgB,CAAC,aAAa,UAAU,IAAI,SAAS,QAAQ,YAAY,WAAW,aAAa,GAAG,eAAe,QAAQ,WAAW,eAAe,IAAI,QAAQ,UAAU,CAAC,EAAE;AAAA,IAC7M,KAAK,eAAe,QAAQ,QAAQ,eAAe,IAAI,IAAI,QAAQ,kBAAkB,QAAQ,YAAY,mBAAmB,QAAQ,eAAe,IAAI,QAAQ,UAAU,EAAE;AAAA,IAC3K,KAAK,cAAc,QAAQ,gBAAgB,SAAS,UAAU,GAAG,eAAe,IAAI,QAAQ,WAAW,CAAC,cAAc,IAAI,QAAQ,SAAS,CAAC,EAAE;AAAA,EAChJ;AACF;AAEA,SAAS,KACP,MACA,QACA,OACA,QACuB;AACvB,QAAM,MAAM,OAAO,OAAO,CAAC,MAAM,EAAE,SAAS,IAAI;AAChD,QAAM,SAAS,IAAI,KAAK,CAAC,MAAM,EAAE,aAAa,UAAU,IAAI,SACxD,IAAI,SAAS,IAAI,SACjB;AACJ,SAAO,EAAE,MAAM,QAAQ,OAAO,QAAQ,KAAK,GAAG,OAAO;AACvD;AAEA,SAAS,oBAAoB,WAAqE;AAChG,QAAM,SAAuC,EAAE,OAAO,GAAG,KAAK,GAAG,MAAM,GAAG,SAAS,EAAE;AACrF,aAAW,YAAY,UAAW,QAAO,SAAS,SAAS,OAAO;AAClE,SAAO;AACT;AAEA,SAAS,aAAa,WAA+D;AACnF,QAAM,MAA8B,CAAC;AACrC,aAAW,YAAY,WAAW;AAChC,UAAM,SAAS,SAAS,MAAM,UAAU,SAAS,MAAM,YAAY;AACnE,QAAI,MAAM,KAAK,IAAI,MAAM,KAAK,KAAK;AAAA,EACrC;AACA,SAAO;AACT;AAEA,SAAS,kBACP,MACA,QACA,WACwB;AACxB,QAAM,MAA8B,CAAC;AACrC,aAAW,OAAO,MAAM;AACtB,UAAM,QAAQ,IAAI,QAAQ,gBAAgB,IAAI,QAAQ;AACtD,QAAI,IAAI,eAAgB,UAAU,UAAa,QAAQ,WAAY;AACjE,YAAM,OAAO,IAAI,eAAe;AAChC,UAAI,IAAI,KAAK,IAAI,IAAI,KAAK,KAAK;AAAA,IACjC;AAAA,EACF;AACA,aAAW,SAAS,QAAQ;AAC1B,QAAI,MAAM,eAAe,MAAM,OAAO,SAAU,MAAM,UAAU,UAAa,MAAM,QAAQ,WAAY;AACrG,YAAM,OAAO,MAAM,gBAAgB,MAAM,OAAO,QAAQ,WAAW;AACnE,UAAI,IAAI,KAAK,IAAI,IAAI,KAAK,KAAK;AAAA,IACjC;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,yBAAyB,QAAiE;AACjG,QAAM,MAA8B,CAAC;AACrC,aAAW,SAAS,QAAQ;AAC1B,eAAW,OAAO,MAAM,OAAO,CAAC,GAAG;AACjC,YAAM,UAAU,IAAI,sBAAsB;AAC1C,UAAI,OAAO,KAAK,IAAI,OAAO,KAAK,KAAK;AAAA,IACvC;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,WACP,MACA,QACA,WAC4B;AAC5B,QAAM,MAAkC,CAAC;AACzC,aAAW,OAAO,MAAM;AACtB,UAAM,QAAQ,IAAI,QAAQ,gBAAgB,IAAI,QAAQ;AACtD,QAAI,IAAI,eAAgB,UAAU,UAAa,QAAQ,WAAY;AACjE,YAAM,YAAY,IAAI,QAAQ,IAAI;AAClC,UAAI,KAAK,EAAE,QAAQ,OAAO,cAAc,YAAY,YAAY,EAAE,CAAC;AAAA,IACrE;AAAA,EACF;AACA,aAAW,SAAS,QAAQ;AAC1B,QAAI,MAAM,eAAe,MAAM,OAAO,SAAU,MAAM,UAAU,UAAa,MAAM,QAAQ,WAAY;AACrG,UAAI,KAAK,EAAE,SAAS,MAAM,KAAK,UAAU,KAAK,EAAE,CAAC;AAAA,IACnD;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,SACP,MACA,QACA,WACQ;AACR,QAAM,WAAW;AAAA,IACf,GAAG,KAAK,IAAI,CAAC,QAAQ;AACnB,YAAM,QAAQ,IAAI,QAAQ,gBAAgB,IAAI,QAAQ;AACtD,aAAO,CAAC,IAAI,eAAe,UAAU,UAAa,SAAS;AAAA,IAC7D,CAAC;AAAA,IACD,GAAG,OAAO,IAAI,CAAC,UAAU,MAAM,OAAO,UAAU,MAAM,UAAU,UAAa,MAAM,SAAS,UAAU;AAAA,EACxG;AACA,MAAI,SAAS,WAAW,EAAG,QAAO;AAClC,SAAO,SAAS,OAAO,OAAO,EAAE,SAAS,SAAS;AACpD;AAEA,SAAS,UAAU,MAA4B,OAA8B;AAC3E,SAAO,KACJ,OAAO,CAAC,QAAQ,IAAI,aAAa,KAAK,EACtC,IAAI,CAAC,QAAQ,UAAU,YAAY,IAAI,QAAQ,eAAe,IAAI,QAAQ,WAAW,EACrF,OAAO,cAAc;AAC1B;AAEA,SAAS,KAAK,IAA+B;AAC3C,MAAI,GAAG,WAAW,EAAG,QAAO,OAAO;AACnC,SAAO,GAAG,OAAO,CAAC,KAAK,MAAM,MAAM,GAAG,CAAC,IAAI,GAAG;AAChD;AAEA,SAAS,WAAW,IAAuB,GAAmB;AAC5D,MAAI,GAAG,WAAW,EAAG,QAAO,OAAO;AACnC,QAAM,SAAS,CAAC,GAAG,EAAE,EAAE,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAC3C,SAAO,OAAO,KAAK,IAAI,OAAO,SAAS,GAAG,KAAK,IAAI,GAAG,KAAK,KAAK,IAAI,OAAO,MAAM,IAAI,CAAC,CAAC,CAAC;AAC1F;AAEA,SAAS,eAAe,OAAiC;AACvD,SAAO,OAAO,UAAU,YAAY,OAAO,SAAS,KAAK;AAC3D;AAEA,SAAS,SAAS,GAAW,GAAmB;AAC9C,MAAI,CAAC,OAAO,SAAS,CAAC,KAAK,CAAC,OAAO,SAAS,CAAC,EAAG,QAAO,OAAO;AAC9D,SAAO,IAAI;AACb;AAEA,SAAS,SAAS,KAAa,QAAwB;AACrD,MAAI,CAAC,OAAO,SAAS,GAAG,EAAG,QAAO;AAClC,MAAI,UAAU,EAAG,QAAO,OAAO,IAAI,IAAI;AACvC,SAAO,QAAQ,IAAI,KAAK,IAAI,GAAG,GAAG,IAAI,MAAM;AAC9C;AAEA,SAAS,gBACP,SACA,YACQ;AACR,QAAM,OAAO,OAAO,SAAS,WAAW,cAAc,KAAK,OAAO,SAAS,QAAQ,WAAW,IAC1F,QAAQ,WAAW,iBAAiB,KAAK,IAAI,QAAQ,aAAa,KAAK,CAAC,IACxE;AACJ,QAAM,UAAU,OAAO,SAAS,WAAW,YAAY,KAAK,OAAO,SAAS,QAAQ,SAAS,IACzF,QAAQ,WAAW,eAAe,KAAK,IAAI,QAAQ,WAAW,KAAK,CAAC,IACpE;AACJ,SAAO,KAAK,IAAI,MAAM,OAAO;AAC/B;AAEA,SAAS,QAAQ,GAAmB;AAClC,MAAI,CAAC,OAAO,SAAS,CAAC,EAAG,QAAO;AAChC,SAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,CAAC,CAAC;AACnC;AAEA,SAAS,cACP,QACA,QACA,SACA,QACQ;AACR,QAAM,SAAS,sBAAsB,MAAM,KAAK,MAAM;AACtD,QAAM,aAAa,aAAa,QAAQ,aAAa,eAAe,QAAQ,UAAU,gBAAgB,QAAQ,WAAW,aAAa,IAAI,QAAQ,QAAQ,CAAC,cAAc,IAAI,QAAQ,SAAS,CAAC;AAC/L,MAAI,OAAO,WAAW,EAAG,QAAO,GAAG,MAAM,KAAK,UAAU;AACxD,SAAO,GAAG,MAAM,KAAK,UAAU,YAAY,OAAO,IAAI,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,GAAG,CAAC;AAChF;AAEA,SAAS,IAAI,GAAmB;AAC9B,MAAI,CAAC,OAAO,SAAS,CAAC,EAAG,QAAO,OAAO,CAAC;AACxC,SAAO,EAAE,QAAQ,CAAC;AACpB;;;AC9eO,SAAS,oBACd,WACA,UAAsC,CAAC,GAC/B;AACR,QAAM,QAAQ,QAAQ,SAAS,mBAAmB,UAAU,MAAM;AAClE,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,KAAK,KAAK,EAAE;AACvB,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,aAAa,UAAU,OAAO,YAAY,CAAC,IAAI;AAC1D,QAAM,KAAK,cAAc,UAAU,UAAU,QAAQ,IAAI,IAAI;AAC7D,MAAI,UAAU,YAAa,OAAM,KAAK,gBAAgB,UAAU,WAAW,IAAI;AAC/E,MAAI,UAAU,WAAY,OAAM,KAAK,eAAe,UAAU,UAAU,IAAI;AAC5E,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,UAAU,OAAO;AAC5B,QAAM,KAAK,EAAE;AAEb,QAAM,KAAK,YAAY;AACvB,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,oBAAoB;AAC/B,QAAM,KAAK,YAAY;AACvB,QAAM,KAAK,iBAAiB,UAAU,QAAQ,aAAa,IAAI;AAC/D,QAAM,KAAK,mBAAmB,UAAU,QAAQ,UAAU,IAAI;AAC9D,QAAM,KAAK,oBAAoB,UAAU,QAAQ,WAAW,IAAI;AAChE,QAAM,KAAK,iBAAiB,IAAI,UAAU,QAAQ,QAAQ,CAAC,IAAI;AAC/D,QAAM,KAAK,kBAAkB,IAAI,UAAU,QAAQ,SAAS,CAAC,IAAI;AACjE,QAAM,KAAK,mBAAmB,IAAI,UAAU,QAAQ,eAAe,CAAC,IAAI;AACxE,QAAM,KAAK,oBAAoB,IAAI,UAAU,QAAQ,gBAAgB,CAAC,IAAI;AAC1E,QAAM,KAAK,mBAAmB,IAAI,UAAU,QAAQ,UAAU,CAAC,IAAI;AACnE,QAAM,KAAK,kBAAkB,IAAI,UAAU,QAAQ,WAAW,CAAC,IAAI;AACnE,QAAM,KAAK,qBAAqB,KAAK,MAAM,UAAU,QAAQ,SAAS,CAAC,OAAO;AAC9E,QAAM,KAAK,EAAE;AAEb,MAAI,UAAU,OAAO,SAAS,GAAG;AAC/B,UAAM,KAAK,WAAW;AACtB,UAAM,KAAK,EAAE;AACb,eAAW,SAAS,UAAU,QAAQ;AACpC,YAAM,KAAK,OAAO,MAAM,QAAQ,QAAQ,MAAM,IAAI,OAAO,MAAM,IAAI,MAAM,MAAM,MAAM,EAAE;AAAA,IACzF;AACA,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,QAAM,WAAW,QAAQ,UAAU,QAAQ,wBAAwB;AACnE,MAAI,SAAS,SAAS,GAAG;AACvB,UAAM,KAAK,yBAAyB;AACpC,UAAM,KAAK,EAAE;AACb,eAAW,CAAC,SAAS,KAAK,KAAK,SAAU,OAAM,KAAK,KAAK,OAAO,KAAK,KAAK,EAAE;AAC5E,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,QAAM,WAAW,QAAQ,UAAU,QAAQ,iBAAiB;AAC5D,MAAI,SAAS,SAAS,GAAG;AACvB,UAAM,KAAK,kBAAkB;AAC7B,UAAM,KAAK,EAAE;AACb,eAAW,CAAC,MAAM,KAAK,KAAK,SAAU,OAAM,KAAK,KAAK,IAAI,KAAK,KAAK,EAAE;AACtE,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,QAAQ,QAAQ,QAAQ,KAAK,SAAS,GAAG;AAC3C,UAAM,KAAK,gBAAgB;AAC3B,UAAM,KAAK,EAAE;AACb,UAAM,KAAK,aAAa,CAAC,GAAG,QAAQ,IAAI,GAAG;AAAA,MACzC,YAAY,QAAQ,cAAc,UAAU,cAAc;AAAA,MAC1D,OAAO;AAAA,IACT,CAAC,EAAE,QAAQ;AACX,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,MAAI,QAAQ,wBAAwB,QAAQ,qBAAqB,SAAS,GAAG;AAC3E,UAAM,KAAK,0BAA0B;AACrC,UAAM,KAAK,EAAE;AACb,eAAW,WAAW,QAAQ,qBAAsB,OAAM,KAAK,KAAK,OAAO,EAAE;AAC7E,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,QAAM,cAAc,QAAQ,eAAe,mBAAmB,SAAS;AACvE,MAAI,YAAY,SAAS,GAAG;AAC1B,UAAM,KAAK,iBAAiB;AAC5B,UAAM,KAAK,EAAE;AACb,eAAW,UAAU,YAAa,OAAM,KAAK,KAAK,MAAM,EAAE;AAC1D,UAAM,KAAK,EAAE;AAAA,EACf;AAEA,SAAO,MAAM,KAAK,IAAI,EAAE,QAAQ,IAAI;AACtC;AAEA,SAAS,mBAAmB,WAAiD;AAC3E,MAAI,UAAU,QAAS,QAAO,CAAC,kDAAkD;AACjF,SAAO,UAAU,OACd,OAAO,CAAC,UAAU,MAAM,aAAa,UAAU,EAC/C,IAAI,CAAC,UAAU,WAAW,MAAM,IAAI,KAAK,MAAM,MAAM,EAAE;AAC5D;AAEA,SAAS,QAAQ,QAAyD;AACxE,SAAO,OAAO,QAAQ,MAAM,EACzB,OAAO,CAAC,CAAC,EAAE,KAAK,MAAM,QAAQ,CAAC,EAC/B,KAAK,CAAC,GAAG,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,EAAE,CAAC,EAAE,cAAc,EAAE,CAAC,CAAC,CAAC;AAC3D;AAEA,SAAS,IAAI,OAAuB;AAClC,SAAO,OAAO,SAAS,KAAK,IAAI,IAAI,QAAQ,KAAK,QAAQ,CAAC,CAAC,MAAM;AACnE;AAEA,SAAS,IAAI,OAAuB;AAClC,SAAO,OAAO,SAAS,KAAK,IAAI,MAAM,QAAQ,CAAC,IAAI;AACrD;;;ACpDO,SAAS,YACd,UACA,WACA,UAA4B,CAAC,GACZ;AACjB,QAAM,QAAQ,QAAQ,SAAS;AAC/B,QAAM,aAAa,QAAQ,cAAc;AACzC,QAAM,WAAW,QAAQ,mBAAmB;AAC5C,QAAM,MAAM,WAAW,QAAQ,QAAQ,SAAS,UAAU,SAAS,CAAC;AAEpE,QAAM,eAAeA,MAAK,QAAQ;AAClC,QAAM,gBAAgBA,MAAK,SAAS;AACpC,QAAM,QAAQ,gBAAgB;AAE9B,MAAI,SAAS,SAAS,UAAU,SAAS,YAAY,SAAS,WAAW,KAAK,UAAU,WAAW,GAAG;AACpG,WAAO;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,MACA,SAAS;AAAA,MACT,SAAS;AAAA,MACT,YAAY;AAAA,MACZ;AAAA,MACA,SAAS;AAAA,IACX;AAAA,EACF;AAEA,QAAM,SAAmB,IAAI,MAAM,UAAU;AAC7C,WAAS,IAAI,GAAG,IAAI,YAAY,KAAK;AACnC,UAAM,YAAY,SAAS,UAAU,GAAG;AACxC,UAAM,YAAY,SAAS,WAAW,GAAG;AACzC,WAAO,CAAC,IAAIA,MAAK,SAAS,IAAIA,MAAK,SAAS;AAAA,EAC9C;AACA,SAAO,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAC3B,QAAM,WAAW,KAAK,MAAO,QAAQ,IAAK,UAAU;AACpD,QAAM,WAAW,KAAK,OAAO,IAAI,QAAQ,KAAK,UAAU,IAAI;AAC5D,QAAM,UAAU,OAAO,KAAK,IAAI,GAAG,QAAQ,CAAC;AAC5C,QAAM,UAAU,OAAO,KAAK,IAAI,aAAa,GAAG,QAAQ,CAAC;AAEzD,MAAI;AACJ,MAAI,UAAU,EAAG,WAAU;AAAA,WAClB,UAAU,EAAG,WAAU;AAAA,WACvB,SAAS,EAAG,WAAU;AAAA,MAC1B,WAAU;AAEf,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAEA,SAASA,MAAK,IAAsB;AAClC,MAAI,GAAG,WAAW,EAAG,QAAO;AAC5B,MAAI,IAAI;AACR,aAAW,KAAK,GAAI,MAAK;AACzB,SAAO,IAAI,GAAG;AAChB;AAEA,SAAS,SAAS,IAAc,KAA6B;AAC3D,QAAM,MAAM,IAAI,MAAM,GAAG,MAAM;AAC/B,WAAS,IAAI,GAAG,IAAI,GAAG,QAAQ,IAAK,KAAI,CAAC,IAAI,GAAG,KAAK,MAAM,IAAI,IAAI,GAAG,MAAM,CAAC;AAC7E,SAAO;AACT;AAGA,SAAS,WAAW,MAA4B;AAC9C,MAAI,IAAI,SAAS;AACjB,SAAO,MAAM;AACX,SAAK;AACL,QAAI,IAAI;AACR,QAAI,KAAK,KAAK,IAAK,MAAM,IAAK,IAAI,CAAC;AACnC,SAAK,IAAI,KAAK,KAAK,IAAK,MAAM,GAAI,IAAI,EAAE;AACxC,aAAS,IAAK,MAAM,QAAS,KAAK;AAAA,EACpC;AACF;AAGA,SAAS,SAAS,GAAa,GAAqB;AAClD,MAAI,IAAI;AACR,aAAW,KAAK,CAAC,GAAG,GAAG,GAAG,CAAC,GAAG;AAC5B,UAAM,OAAO,IAAI,aAAa,CAAC,CAAC,CAAC;AACjC,UAAM,QAAQ,IAAI,WAAW,KAAK,MAAM;AACxC,eAAW,QAAQ,OAAO;AACxB,WAAK;AACL,UAAI,KAAK,KAAK,GAAG,QAAQ;AAAA,IAC3B;AAAA,EACF;AACA,SAAO,MAAM;AACf;AA8BA,eAAsB,gBACpB,MACkF;AAClF,QAAM,cAAc,KAAK,oBAAoB;AAC7C,QAAM,iBAAiB,MAAM,SAAS,KAAK,iBAAiB,KAAK,OAAO,WAAW;AACnF,QAAM,kBAAkB,MAAM,SAAS,KAAK,kBAAkB,KAAK,OAAO,WAAW;AACrF,QAAM,KAAK,YAAY,gBAAgB,iBAAiB;AAAA,IACtD,GAAI,KAAK,UAAU,SAAY,EAAE,OAAO,KAAK,MAAM,IAAI,CAAC;AAAA,IACxD,GAAI,KAAK,eAAe,SAAY,EAAE,YAAY,KAAK,WAAW,IAAI,CAAC;AAAA,IACvE,GAAI,KAAK,SAAS,SAAY,EAAE,MAAM,KAAK,KAAK,IAAI,CAAC;AAAA,EACvD,CAAC;AACD,SAAO;AAAA,IACL,GAAG;AAAA,IACH,iBAAiB,eAAe;AAAA,IAChC,kBAAkB,gBAAgB;AAAA,EACpC;AACF;AAEA,eAAe,SACb,SACA,OACA,aACmB;AACnB,QAAM,UAAoB,IAAI,MAAM,QAAQ,MAAM;AAClD,MAAI,OAAO;AACX,iBAAe,SAAwB;AACrC,WAAO,MAAM;AACX,YAAM,IAAI;AACV,UAAI,KAAK,QAAQ,OAAQ;AACzB,YAAM,IAAI,MAAM,MAAM,QAAQ,CAAC,CAAE;AACjC,cAAQ,CAAC,IAAI,OAAO,SAAS,CAAC,IAAI,IAAI;AAAA,IACxC;AAAA,EACF;AACA,QAAM,QAAQ,IAAI,MAAM,KAAK,EAAE,QAAQ,KAAK,IAAI,GAAG,WAAW,EAAE,GAAG,MAAM,OAAO,CAAC,CAAC;AAClF,SAAO;AACT;","names":["mean"]}