@tangle-network/agent-eval 0.20.12 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +177 -0
- package/README.md +43 -1
- package/dist/{chunk-KWUAAIHR.js → chunk-4W4NCYM2.js} +182 -1
- package/dist/chunk-4W4NCYM2.js.map +1 -0
- package/dist/{chunk-PKCVBYTQ.js → chunk-5IIQKMD5.js} +38 -2
- package/dist/chunk-5IIQKMD5.js.map +1 -0
- package/dist/{chunk-HNJLMAJ2.js → chunk-6KQG5HAH.js} +2 -2
- package/dist/chunk-6M774GY6.js +53 -0
- package/dist/chunk-6M774GY6.js.map +1 -0
- package/dist/{chunk-MCMV7DUL.js → chunk-ARZ6BEV6.js} +2 -2
- package/dist/chunk-IOXMGMHQ.js +1226 -0
- package/dist/chunk-IOXMGMHQ.js.map +1 -0
- package/dist/{chunk-75MCTH7P.js → chunk-KAO3Q65R.js} +198 -3
- package/dist/chunk-KAO3Q65R.js.map +1 -0
- package/dist/chunk-QUKKGHTZ.js +121 -0
- package/dist/chunk-QUKKGHTZ.js.map +1 -0
- package/dist/chunk-SQQLHODJ.js +163 -0
- package/dist/chunk-SQQLHODJ.js.map +1 -0
- package/dist/{chunk-IKFVX537.js → chunk-UAND2LOT.js} +232 -211
- package/dist/chunk-UAND2LOT.js.map +1 -0
- package/dist/{chunk-HKYRWNHV.js → chunk-USHQBPMH.js} +283 -7
- package/dist/chunk-USHQBPMH.js.map +1 -0
- package/dist/cli.js +3 -2
- package/dist/cli.js.map +1 -1
- package/dist/{control-C8NKbF3w.d.ts → control-cxwMOAsy.d.ts} +3 -2
- package/dist/control.d.ts +4 -3
- package/dist/control.js +2 -2
- package/dist/emitter-B2XqDKFU.d.ts +121 -0
- package/dist/{feedback-trajectory-BGQ_ANCN.d.ts → feedback-trajectory-CB0A32o3.d.ts} +2 -1
- package/dist/index.d.ts +16 -302
- package/dist/index.js +70 -62
- package/dist/index.js.map +1 -1
- package/dist/integrity-K2oVlF57.d.ts +210 -0
- package/dist/openapi.json +1 -1
- package/dist/optimization-UVDNKaO6.d.ts +574 -0
- package/dist/optimization.d.ts +7 -144
- package/dist/optimization.js +9 -2
- package/dist/reporting-B82RSv9C.d.ts +593 -0
- package/dist/reporting.d.ts +5 -426
- package/dist/reporting.js +17 -6
- package/dist/{emitter-BYO2nSDA.d.ts → store-u47QaJ9G.d.ts} +1 -91
- package/dist/{multi-shot-optimization-Bvtz294B.d.ts → summary-report-D4p7RlDu.d.ts} +381 -1
- package/dist/traces.d.ts +179 -3
- package/dist/traces.js +35 -4
- package/dist/wire/index.js +3 -2
- package/docs/research-report-methodology.md +170 -0
- package/docs/wire-protocol.md +1 -1
- package/package.json +11 -13
- package/dist/chunk-75MCTH7P.js.map +0 -1
- package/dist/chunk-HKYRWNHV.js.map +0 -1
- package/dist/chunk-IKFVX537.js.map +0 -1
- package/dist/chunk-KWUAAIHR.js.map +0 -1
- package/dist/chunk-ODFINDLQ.js +0 -413
- package/dist/chunk-ODFINDLQ.js.map +0 -1
- package/dist/chunk-PKCVBYTQ.js.map +0 -1
- /package/dist/{chunk-HNJLMAJ2.js.map → chunk-6KQG5HAH.js.map} +0 -0
- /package/dist/{chunk-MCMV7DUL.js.map → chunk-ARZ6BEV6.js.map} +0 -0
package/dist/traces.js
CHANGED
|
@@ -7,6 +7,8 @@ import {
|
|
|
7
7
|
OTEL_AGENT_EVAL_SCOPE,
|
|
8
8
|
OtlpFileTraceStore,
|
|
9
9
|
REDACTION_VERSION,
|
|
10
|
+
ReplayCache,
|
|
11
|
+
ReplayCacheMissError,
|
|
10
12
|
SpanNotFoundError,
|
|
11
13
|
TRACE_ANALYST_ACTOR_DESCRIPTION,
|
|
12
14
|
TRACE_ANALYST_ACTOR_DESCRIPTION_VERSION,
|
|
@@ -21,6 +23,7 @@ import {
|
|
|
21
23
|
buildTraceAnalystTools,
|
|
22
24
|
buildTraceInsightContext,
|
|
23
25
|
buildTraceInsightPrompt,
|
|
26
|
+
createReplayFetch,
|
|
24
27
|
defaultTraceInsightPanel,
|
|
25
28
|
describeTraceInsightScope,
|
|
26
29
|
domainEvidencePattern,
|
|
@@ -32,6 +35,7 @@ import {
|
|
|
32
35
|
isRetrievalSpan,
|
|
33
36
|
isSandboxSpan,
|
|
34
37
|
isToolSpan,
|
|
38
|
+
iterateRawCalls,
|
|
35
39
|
judgeSpans,
|
|
36
40
|
llmSpans,
|
|
37
41
|
planTraceInsightQuestions,
|
|
@@ -42,22 +46,42 @@ import {
|
|
|
42
46
|
scoreTraceInsightReadiness,
|
|
43
47
|
tokenizeDomainWords,
|
|
44
48
|
toolSpans,
|
|
45
|
-
traceAnalystFunctionGroup
|
|
46
|
-
|
|
49
|
+
traceAnalystFunctionGroup,
|
|
50
|
+
traceAnalystOnRunComplete
|
|
51
|
+
} from "./chunk-4W4NCYM2.js";
|
|
52
|
+
import {
|
|
53
|
+
RunIntegrityError,
|
|
54
|
+
assertRunCaptured,
|
|
55
|
+
throwIfRunIncomplete
|
|
56
|
+
} from "./chunk-QUKKGHTZ.js";
|
|
47
57
|
import {
|
|
48
58
|
TraceEmitter,
|
|
49
59
|
llmSpanFromProvider
|
|
50
|
-
} from "./chunk-
|
|
60
|
+
} from "./chunk-5IIQKMD5.js";
|
|
61
|
+
import "./chunk-6M774GY6.js";
|
|
62
|
+
import {
|
|
63
|
+
FileSystemRawProviderSink,
|
|
64
|
+
InMemoryRawProviderSink,
|
|
65
|
+
NoopRawProviderSink,
|
|
66
|
+
defaultProviderRedactor,
|
|
67
|
+
providerFromBaseUrl
|
|
68
|
+
} from "./chunk-SQQLHODJ.js";
|
|
51
69
|
import "./chunk-PZ5AY32C.js";
|
|
52
70
|
export {
|
|
53
71
|
DEFAULT_REDACTION_RULES,
|
|
54
72
|
DEFAULT_TRACE_ANALYST_BUDGETS,
|
|
55
73
|
FAILURE_CLASSES,
|
|
74
|
+
FileSystemRawProviderSink,
|
|
56
75
|
FileSystemTraceStore,
|
|
76
|
+
InMemoryRawProviderSink,
|
|
57
77
|
InMemoryTraceStore,
|
|
78
|
+
NoopRawProviderSink,
|
|
58
79
|
OTEL_AGENT_EVAL_SCOPE,
|
|
59
80
|
OtlpFileTraceStore,
|
|
60
81
|
REDACTION_VERSION,
|
|
82
|
+
ReplayCache,
|
|
83
|
+
ReplayCacheMissError,
|
|
84
|
+
RunIntegrityError,
|
|
61
85
|
SpanNotFoundError,
|
|
62
86
|
TRACE_ANALYST_ACTOR_DESCRIPTION,
|
|
63
87
|
TRACE_ANALYST_ACTOR_DESCRIPTION_VERSION,
|
|
@@ -70,9 +94,12 @@ export {
|
|
|
70
94
|
aggregateLlm,
|
|
71
95
|
analyzeTraces,
|
|
72
96
|
argHash,
|
|
97
|
+
assertRunCaptured,
|
|
73
98
|
buildTraceAnalystTools,
|
|
74
99
|
buildTraceInsightContext,
|
|
75
100
|
buildTraceInsightPrompt,
|
|
101
|
+
createReplayFetch,
|
|
102
|
+
defaultProviderRedactor,
|
|
76
103
|
defaultTraceInsightPanel,
|
|
77
104
|
describeTraceInsightScope,
|
|
78
105
|
domainEvidencePattern,
|
|
@@ -84,17 +111,21 @@ export {
|
|
|
84
111
|
isRetrievalSpan,
|
|
85
112
|
isSandboxSpan,
|
|
86
113
|
isToolSpan,
|
|
114
|
+
iterateRawCalls,
|
|
87
115
|
judgeSpans,
|
|
88
116
|
llmSpanFromProvider,
|
|
89
117
|
llmSpans,
|
|
90
118
|
planTraceInsightQuestions,
|
|
119
|
+
providerFromBaseUrl,
|
|
91
120
|
redactString,
|
|
92
121
|
redactValue,
|
|
93
122
|
runFailureClass,
|
|
94
123
|
runsForScenario,
|
|
95
124
|
scoreTraceInsightReadiness,
|
|
125
|
+
throwIfRunIncomplete,
|
|
96
126
|
tokenizeDomainWords,
|
|
97
127
|
toolSpans,
|
|
98
|
-
traceAnalystFunctionGroup
|
|
128
|
+
traceAnalystFunctionGroup,
|
|
129
|
+
traceAnalystOnRunComplete
|
|
99
130
|
};
|
|
100
131
|
//# sourceMappingURL=traces.js.map
|
package/dist/wire/index.js
CHANGED
|
@@ -24,8 +24,9 @@ import {
|
|
|
24
24
|
runRpcBatch,
|
|
25
25
|
runRpcOnce,
|
|
26
26
|
startServer
|
|
27
|
-
} from "../chunk-
|
|
28
|
-
import "../chunk-
|
|
27
|
+
} from "../chunk-6KQG5HAH.js";
|
|
28
|
+
import "../chunk-KAO3Q65R.js";
|
|
29
|
+
import "../chunk-SQQLHODJ.js";
|
|
29
30
|
import "../chunk-PZ5AY32C.js";
|
|
30
31
|
export {
|
|
31
32
|
BUILTIN_RUBRICS,
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
# researchReport — methodology
|
|
2
|
+
|
|
3
|
+
This document is the methodological brief for `researchReport` (exported from
|
|
4
|
+
`@tangle-network/agent-eval` and `@tangle-network/agent-eval/reporting`). It
|
|
5
|
+
exists so a launch reviewer, peer reviewer, or auditor can quickly verify that
|
|
6
|
+
the verdict embedded in any rendered report is defensible, reproducible, and
|
|
7
|
+
appropriate to the data.
|
|
8
|
+
|
|
9
|
+
The companion code is `src/summary-report.ts`. Each item below names the
|
|
10
|
+
corresponding function or option so the doc and the code don't drift.
|
|
11
|
+
|
|
12
|
+
## Inputs
|
|
13
|
+
|
|
14
|
+
- `runs: RunRecord[]` — every record carries `runId`, `candidateId`, `seed`,
|
|
15
|
+
`experimentId`, `splitTag`, and an `outcome` with the configured score.
|
|
16
|
+
- `comparator: string` — the candidate id treated as the null reference. Must
|
|
17
|
+
be selected before data inspection; `preregistrationHash` should pin this.
|
|
18
|
+
- `split: 'search' | 'holdout'` — defaults to `holdout`. Decisions on `search`
|
|
19
|
+
are descriptive only; promotion calls require the holdout.
|
|
20
|
+
- `rope: { low, high }` — Region of Practical Equivalence on the paired delta,
|
|
21
|
+
in score units. Must come from the domain owner — there is no
|
|
22
|
+
statistically-defensible default.
|
|
23
|
+
- `minPairs` (soft floor, default 20) and `RESEARCH_REPORT_HARD_PAIR_FLOOR`
|
|
24
|
+
(hard floor, 6). Below the soft floor, the verdict is `needs_more_data` and
|
|
25
|
+
the report carries the MDE at the current N.
|
|
26
|
+
- `fdr` (default 0.05), `confidence` (default 0.95), `mdePower` (default 0.8),
|
|
27
|
+
`mdeAlpha` (default = `fdr`).
|
|
28
|
+
|
|
29
|
+
## Pairing
|
|
30
|
+
|
|
31
|
+
Pairs are joined by `(experimentId, seed)` so the comparator and candidate
|
|
32
|
+
share scenario *and* seed. This is the same join `gainHistogram` uses; see
|
|
33
|
+
`pairScoresByKey` in `src/summary-report.ts`. Records on the wrong split or
|
|
34
|
+
with non-finite scores are dropped before pairing.
|
|
35
|
+
|
|
36
|
+
## Decision rule
|
|
37
|
+
|
|
38
|
+
In order — first match wins:
|
|
39
|
+
|
|
40
|
+
1. `comparator` itself → `hold` (baseline).
|
|
41
|
+
2. No comparator → `hold` if on the cost/quality Pareto frontier, else
|
|
42
|
+
`needs_more_data`. The verdict is descriptive, not causal.
|
|
43
|
+
3. Held-out gate verdict ≠ `promote` → `reject`. The gate is *necessary but
|
|
44
|
+
not sufficient*; even a `promote` gate must clear the paired test below.
|
|
45
|
+
4. Paired N < `RESEARCH_REPORT_HARD_PAIR_FLOOR` → `needs_more_data` with a
|
|
46
|
+
"below hard floor" reason. Bootstrap CIs degenerate at this size.
|
|
47
|
+
5. ROPE configured AND paired-delta CI ⊂ ROPE → `equivalent`.
|
|
48
|
+
6. Paired-delta CI upper bound < 0 → `reject` (CI excludes a non-negative
|
|
49
|
+
effect). Note: this uses **paired delta only** — not the marginal mean.
|
|
50
|
+
7. Paired N < `minPairs` (soft floor) → `needs_more_data` with the MDE at
|
|
51
|
+
current N attached so the verdict is actionable.
|
|
52
|
+
8. BH-adjusted q ≤ `fdr` AND CI lower bound > 0 → `promote`. The BH q-value
|
|
53
|
+
controls FDR across all candidates in the same sweep; the bootstrap CI
|
|
54
|
+
provides an effect-size guarantee independent of the test.
|
|
55
|
+
9. Otherwise → `hold`.
|
|
56
|
+
|
|
57
|
+
## Statistical primitives used
|
|
58
|
+
|
|
59
|
+
| Quantity | Function | Source file |
|
|
60
|
+
|---|---|---|
|
|
61
|
+
| Marginal CI on score mean | `confidenceInterval` | `statistics.ts` |
|
|
62
|
+
| Cohen's d vs comparator | `cohensD` | `statistics.ts` |
|
|
63
|
+
| Wilcoxon signed-rank (paired) | `wilcoxonSignedRank` | `statistics.ts` |
|
|
64
|
+
| BH-FDR q-values | `benjaminiHochberg` | `power-analysis.ts` |
|
|
65
|
+
| Paired bootstrap CI on median delta | `pairedBootstrap` | `paired-stats.ts` |
|
|
66
|
+
| Bayesian-bootstrap-style Pr(Δ>0), Pr(Δ∈ROPE) | `bootstrapMeanSamples` | `summary-report.ts` (private) |
|
|
67
|
+
| Minimum detectable paired effect | `pairedMde` | `power-analysis.ts` |
|
|
68
|
+
| Run fingerprint | `hashJson(canonicalize(...))` | `pre-registration.ts` |
|
|
69
|
+
|
|
70
|
+
The Pr(Δ>0) and Pr(Δ∈ROPE) summaries use the bootstrap-prior duality of
|
|
71
|
+
[Rubin 1981]: under a non-informative Dirichlet prior, the bootstrap
|
|
72
|
+
distribution of a sample statistic is its posterior. We expose these as
|
|
73
|
+
posterior summaries on the **mean** delta and the bootstrap CI on the
|
|
74
|
+
**median** delta — the median is more robust to the heavy-tailed score
|
|
75
|
+
distributions seen in agent benchmarks; the mean lets us read off the
|
|
76
|
+
Bayesian-style probability of superiority in a single number.
|
|
77
|
+
|
|
78
|
+
## MDE
|
|
79
|
+
|
|
80
|
+
The minimum detectable paired effect at N pairs, two-sided α, and power β:
|
|
81
|
+
|
|
82
|
+
$$d_\text{min} = \frac{z_{1-\alpha/2} + z_\beta}{\sqrt{n}}$$
|
|
83
|
+
|
|
84
|
+
reported on the standardised scale, then multiplied by the observed paired-
|
|
85
|
+
delta SD to get the MDE in score units. Consumers reading a `needs_more_data`
|
|
86
|
+
verdict can use the MDE to budget the next round of runs:
|
|
87
|
+
|
|
88
|
+
- Observed paired SD = 0.10 score units, paired N = 20, α = 0.05, β = 0.8 →
|
|
89
|
+
d_min ≈ 0.63 standardised → MDE ≈ 0.063 score units. If the smallest
|
|
90
|
+
effect that would change a launch decision is below this, run more pairs.
|
|
91
|
+
|
|
92
|
+
## Provenance
|
|
93
|
+
|
|
94
|
+
Every report carries:
|
|
95
|
+
|
|
96
|
+
- `runFingerprint`: SHA-256 over the canonicalised list of
|
|
97
|
+
`(runId, candidateId, splitTag)` triples (sorted by runId), plus the
|
|
98
|
+
comparator id and split. Same `(runs, comparator, split)` produces the same
|
|
99
|
+
fingerprint regardless of input order.
|
|
100
|
+
- `preregistrationHash`: the caller passes the hash of a signed
|
|
101
|
+
`HypothesisManifest` (see `pre-registration.ts`). The fingerprint and the
|
|
102
|
+
preregistration hash together let a reader verify both *what data the
|
|
103
|
+
report saw* and *what protocol it was supposed to run.*
|
|
104
|
+
|
|
105
|
+
Reports without a `preregistrationHash` carry a "post-hoc" warning in the
|
|
106
|
+
risks list and the executive summary. Treat them as descriptive only.
|
|
107
|
+
|
|
108
|
+
## Alternatives considered
|
|
109
|
+
|
|
110
|
+
- **Paired t-test instead of Wilcoxon + bootstrap.** Rejected: agent score
|
|
111
|
+
distributions are heavy-tailed (judges saturate near 0 and 1) and the t
|
|
112
|
+
approximation breaks down with the small N typical of holdouts.
|
|
113
|
+
- **Unpaired Mann–Whitney.** Rejected: matched scenarios make pairing free,
|
|
114
|
+
and unpaired tests throw away the variance reduction. Use the paired test
|
|
115
|
+
by default.
|
|
116
|
+
- **Sequential / always-valid inference (e-values, alpha-spending).**
|
|
117
|
+
**Shipped in 0.22.** `pairedEvalueSequence` and
|
|
118
|
+
`evaluateInterimReleaseConfidence` provide time-uniform inference using
|
|
119
|
+
the predictable plug-in betting martingale (Waudby-Smith & Ramdas 2024)
|
|
120
|
+
paired with the empirical Bernstein confidence sequence (Howard et al.
|
|
121
|
+
2021). For *rolling* analyses (interim looks at a campaign that's still
|
|
122
|
+
accumulating data) call those primitives directly; `researchReport`
|
|
123
|
+
remains the single-look summary. Paper-grade pre-registration covers the
|
|
124
|
+
static analysis; the sequential primitives cover the iterative one.
|
|
125
|
+
- **Hierarchical Bayesian shrinkage across many candidates.** Future work.
|
|
126
|
+
The current ranking is on raw paired statistics and over-credits the top
|
|
127
|
+
candidate when many are tested. A Bayesian hierarchical model with a
|
|
128
|
+
weakly informative prior would shrink each variant toward the grand mean,
|
|
129
|
+
reducing rank flips between near-tied candidates.
|
|
130
|
+
- **Calibration / coverage simulation on the bootstrap CI.** Future work; we
|
|
131
|
+
rely on the asymptotic guarantee plus the hard pair floor to keep coverage
|
|
132
|
+
reasonable.
|
|
133
|
+
- **Outcome-anchored calibration.** **Shipped in 0.22.**
|
|
134
|
+
`rubricPredictiveValidity` joins `RunRecord`s to a `DeploymentOutcomeStore`
|
|
135
|
+
and reports per-rubric Spearman against deployment outcomes (revenue,
|
|
136
|
+
retention, CSAT, …). Combined with the static methodology in this
|
|
137
|
+
document, the loop is: pre-register → measure with `researchReport` →
|
|
138
|
+
ship → observe outcomes → recalibrate rubric weights with
|
|
139
|
+
`rubricPredictiveValidity`.
|
|
140
|
+
|
|
141
|
+
## When NOT to apply
|
|
142
|
+
|
|
143
|
+
- Paired N below the hard floor (6) on any candidate.
|
|
144
|
+
- Comparator chosen by inspecting the data (post-hoc selection inflates
|
|
145
|
+
false-discovery rates beyond the BH guarantee).
|
|
146
|
+
- Mid-run distribution shift: judge model swap, rubric change, infrastructure
|
|
147
|
+
outage. Pair exchangeability is violated and the bootstrap is not valid.
|
|
148
|
+
- Scenarios drawn non-randomly from a stream the candidate can influence
|
|
149
|
+
(data-leak across runs). The pairing is no longer ignorable.
|
|
150
|
+
- Highly skewed cost distributions: the Pareto frontier still works but the
|
|
151
|
+
marginal CI on cost may be misleading.
|
|
152
|
+
|
|
153
|
+
## Citations
|
|
154
|
+
|
|
155
|
+
- Benjamini, Y. & Hochberg, Y. (1995). Controlling the false discovery rate:
|
|
156
|
+
a practical and powerful approach to multiple testing. *JRSS B*,
|
|
157
|
+
57(1), 289–300.
|
|
158
|
+
- Wilcoxon, F. (1945). Individual comparisons by ranking methods.
|
|
159
|
+
*Biometrics Bulletin*, 1(6), 80–83.
|
|
160
|
+
- Efron, B. (1979). Bootstrap methods: another look at the jackknife.
|
|
161
|
+
*Annals of Statistics*, 7(1), 1–26.
|
|
162
|
+
- Rubin, D. B. (1981). The Bayesian bootstrap.
|
|
163
|
+
*Annals of Statistics*, 9(1), 130–134.
|
|
164
|
+
- Kruschke, J. K. (2018). Rejecting or accepting parameter values in
|
|
165
|
+
Bayesian estimation. *Advances in Methods and Practices in
|
|
166
|
+
Psychological Science*, 1(2), 270–280. (ROPE.)
|
|
167
|
+
- Howard, S. R., Ramdas, A., McAuliffe, J., Sekhon, J. (2021).
|
|
168
|
+
Time-uniform, nonparametric, nonasymptotic confidence sequences.
|
|
169
|
+
*Annals of Statistics*, 49(2), 1055–1080. (Background reading on
|
|
170
|
+
always-valid inference for sequential extensions.)
|
package/docs/wire-protocol.md
CHANGED
|
@@ -188,7 +188,7 @@ Each invocation is one process — Node startup adds ~500 ms. For more than a fe
|
|
|
188
188
|
4. **RPC case** — add `case 'x':` in `dispatchRpc` in `src/wire/rpc.ts`.
|
|
189
189
|
5. **OpenAPI route** — register in `src/wire/openapi.ts` so it shows up in the spec.
|
|
190
190
|
6. **Test** — add to `tests/wire/`. At minimum: schema validation, happy-path, error-path.
|
|
191
|
-
7. **Python client** — add a method on `Client` in `clients/python/src/
|
|
191
|
+
7. **Python client** — add a method on `Client` in `clients/python/src/agent_eval_rpc/client.py`, plus pydantic models in `models.py` mirroring the new schemas.
|
|
192
192
|
|
|
193
193
|
The pattern is mechanical. When the surface grows past ~10 methods, swap the hand-written Python models for `datamodel-code-generator -i openapi.json -o models.py`.
|
|
194
194
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tangle-network/agent-eval",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.22.0",
|
|
4
4
|
"description": "Trace-first evaluation infrastructure for agent systems: traces, harnesses, verifier pipelines, judges, datasets, gates, optimization, and reporting.",
|
|
5
5
|
"homepage": "https://github.com/tangle-network/agent-eval#readme",
|
|
6
6
|
"repository": {
|
|
@@ -74,21 +74,12 @@
|
|
|
74
74
|
"publishConfig": {
|
|
75
75
|
"access": "public"
|
|
76
76
|
},
|
|
77
|
-
"scripts": {
|
|
78
|
-
"build": "tsup && pnpm openapi",
|
|
79
|
-
"dev": "tsup --watch",
|
|
80
|
-
"prepare": "pnpm build",
|
|
81
|
-
"test": "vitest run",
|
|
82
|
-
"test:watch": "vitest",
|
|
83
|
-
"typecheck": "tsc --noEmit",
|
|
84
|
-
"openapi": "node dist/cli.js openapi --out dist/openapi.json"
|
|
85
|
-
},
|
|
86
77
|
"dependencies": {
|
|
87
78
|
"@asteasolutions/zod-to-openapi": "^8.5.0",
|
|
88
79
|
"@ax-llm/ax": "^19.0.25",
|
|
89
80
|
"@hono/node-server": "^2.0.0",
|
|
90
81
|
"@tangle-network/tcloud": "^0.4.6",
|
|
91
|
-
"hono": "^4.12.
|
|
82
|
+
"hono": "^4.12.16",
|
|
92
83
|
"zod": "^4.3.6"
|
|
93
84
|
},
|
|
94
85
|
"devDependencies": {
|
|
@@ -102,5 +93,12 @@
|
|
|
102
93
|
"node": ">=20"
|
|
103
94
|
},
|
|
104
95
|
"license": "MIT",
|
|
105
|
-
"
|
|
106
|
-
|
|
96
|
+
"scripts": {
|
|
97
|
+
"build": "tsup && pnpm openapi",
|
|
98
|
+
"dev": "tsup --watch",
|
|
99
|
+
"test": "vitest run",
|
|
100
|
+
"test:watch": "vitest",
|
|
101
|
+
"typecheck": "tsc --noEmit",
|
|
102
|
+
"openapi": "node dist/cli.js openapi --out dist/openapi.json"
|
|
103
|
+
}
|
|
104
|
+
}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/llm-client.ts"],"sourcesContent":["/**\n * LLM client with graceful degrade.\n *\n * OpenAI-compatible `/v1/chat/completions` client with:\n * - Exponential-backoff retry on 429 + 5xx gateway errors (502/503/504).\n * - Retry on transient network errors (fetch failed, AbortError, ECONNRESET).\n * - Graceful json_schema → json_object degrade on 400 with schema-reject body.\n * - Fenced-JSON stripping (```json ... ```) for models that wrap structured output.\n * - Configurable base URL + api key / bearer, works with LiteLLM proxies, OpenAI\n * directly, cli-bridge subscriptions, and any router that speaks the spec.\n *\n * Usage:\n * const { value, result } = await callLlmJson<MyType>(\n * { model: 'gpt-4o', messages: [...], jsonSchema: { name: 'x', schema: {...} } },\n * { baseUrl: 'https://router.tangle.tools/v1', apiKey: process.env.KEY },\n * )\n *\n * This is THE llm-calling seam for agent-eval primitives that need structured\n * output (semantic concept judge, reviewer directives, critic scores). Primitives\n * that need free-form text use `callLlm` and parse output themselves.\n */\n\n// ─── Types ──────────────────────────────────────────────────────────────\n\nexport interface LlmMessage {\n role: 'system' | 'user' | 'assistant'\n /**\n * Either a plain text content string OR a multimodal content array\n * (text + image_url parts) for vision-capable models.\n */\n content:\n | string\n | Array<\n | { type: 'text'; text: string }\n | { type: 'image_url'; image_url: { url: string; detail?: 'auto' | 'low' | 'high' } }\n >\n}\n\nexport interface LlmCallRequest {\n model: string\n messages: LlmMessage[]\n /** Optional JSON-mode response format (response_format: json_object). */\n jsonMode?: boolean\n /** Optional structured output via JSON Schema. Falls back to json_object on 400. */\n jsonSchema?: { name: string; schema: Record<string, unknown> }\n temperature?: number\n maxTokens?: number\n /** Per-call timeout, default 60s. */\n timeoutMs?: number\n}\n\nexport interface LlmUsage {\n promptTokens: number\n completionTokens: number\n totalTokens: number\n /** Proxies populate this when prompt caching is on. */\n cachedPromptTokens?: number\n}\n\nexport interface LlmCallResult {\n /** The text content of the first choice. Empty string if none. */\n content: string\n usage: LlmUsage\n /**\n * Cost in USD. Pulled from proxy's `_response_cost` field when present;\n * `null` when neither the proxy nor the caller can derive it.\n */\n costUsd: number | null\n /** Model name actually used (echoed from response). */\n model: string\n /** Wall-clock duration of the HTTP call (last attempt, if retried). */\n durationMs: number\n /** Raw response body. */\n raw: Record<string, unknown>\n}\n\nexport class LlmCallError extends Error {\n constructor(\n message: string,\n public readonly status: number,\n public readonly body: string,\n public readonly model: string,\n ) {\n super(message)\n this.name = 'LlmCallError'\n }\n}\n\nexport interface LlmClientOptions {\n /** Base URL (without trailing slash). Must end at the `/v1` prefix. */\n baseUrl?: string\n /** Bearer token — either `apiKey` or `bearer` populates `Authorization: Bearer ...`. */\n apiKey?: string\n bearer?: string\n /** Override for the `Authorization` header (e.g. `X-Auth: ...`). Takes precedence over apiKey/bearer. */\n authHeader?: { name: string; value: string }\n /** Default timeout in ms. Per-call can override. */\n defaultTimeoutMs?: number\n /** Max retry attempts on retriable errors. Default 3 (1 initial + 2 retries). */\n maxRetries?: number\n /** Fetch implementation — defaults to global `fetch`. Override for custom transport (e.g. tests). */\n fetch?: typeof fetch\n}\n\n// ─── Internals ──────────────────────────────────────────────────────────\n\nconst DEFAULT_BASE_URL = 'https://router.tangle.tools/v1'\nconst DEFAULT_TIMEOUT_MS = 60_000\nconst DEFAULT_MAX_RETRIES = 3\n\nconst RETRYABLE_STATUS = new Set([429, 502, 503, 504])\n\nfunction isRetryableError(err: unknown): boolean {\n if (err instanceof LlmCallError) return RETRYABLE_STATUS.has(err.status)\n if (err instanceof Error) {\n return (\n err.name === 'AbortError' ||\n err.name === 'TimeoutError' ||\n /fetch failed|ECONNRESET|ETIMEDOUT|EAI_AGAIN/i.test(err.message)\n )\n }\n return false\n}\n\nfunction parseRetryAfter(headers: Headers): number | null {\n const h = headers.get('retry-after')\n if (!h) return null\n const asNumber = Number(h)\n if (Number.isFinite(asNumber) && asNumber > 0) return asNumber * 1000\n const asDate = Date.parse(h)\n if (Number.isFinite(asDate)) return Math.max(0, asDate - Date.now())\n return null\n}\n\nfunction backoffMs(attempt: number): number {\n // 500ms, 1s, 2s, 4s, ...\n return Math.min(500 * Math.pow(2, attempt), 16_000)\n}\n\nfunction buildHeaders(opts: LlmClientOptions): Record<string, string> {\n const headers: Record<string, string> = {\n 'Content-Type': 'application/json',\n Accept: 'application/json',\n }\n if (opts.authHeader) {\n headers[opts.authHeader.name] = opts.authHeader.value\n } else if (opts.bearer || opts.apiKey) {\n headers.Authorization = `Bearer ${opts.bearer ?? opts.apiKey}`\n }\n return headers\n}\n\nfunction isSchemaRejection(status: number, body: string): boolean {\n if (status !== 400) return false\n const lower = body.toLowerCase()\n return (\n lower.includes('response_format') ||\n lower.includes('json_schema') ||\n lower.includes('is unavailable') ||\n lower.includes('not supported')\n )\n}\n\nfunction buildBody(req: LlmCallRequest, forceJsonObject: boolean): Record<string, unknown> {\n const body: Record<string, unknown> = {\n model: req.model,\n messages: req.messages,\n temperature: req.temperature ?? 0,\n }\n if (req.maxTokens != null) {\n if (usesMaxCompletionTokens(req.model)) body.max_completion_tokens = req.maxTokens\n else body.max_tokens = req.maxTokens\n }\n\n if (req.jsonSchema && !forceJsonObject) {\n body.response_format = {\n type: 'json_schema',\n json_schema: { name: req.jsonSchema.name, schema: req.jsonSchema.schema, strict: true },\n }\n } else if (req.jsonMode || req.jsonSchema) {\n body.response_format = { type: 'json_object' }\n }\n\n return body\n}\n\nfunction usesMaxCompletionTokens(model: string): boolean {\n return /^gpt-5(?:[.\\-]|$)/i.test(model)\n}\n\nasync function sleep(ms: number): Promise<void> {\n return new Promise((resolve) => setTimeout(resolve, ms))\n}\n\n// ─── Public API ─────────────────────────────────────────────────────────\n\n/**\n * Strip a ```json / ``` code fence if the model emitted one.\n * Idempotent for naked JSON. Some models (claude-code via router, certain\n * deepseek models) wrap output even under json_object.\n */\nexport function stripFencedJson(raw: string): string {\n const trimmed = raw.trim()\n const m = trimmed.match(/^```(?:json)?\\s*\\n?([\\s\\S]*?)\\n?```\\s*$/)\n return m ? m[1]!.trim() : trimmed\n}\n\nexport function extractJsonPayload(raw: string): string {\n const stripped = stripFencedJson(raw)\n try {\n JSON.parse(stripped)\n return stripped\n } catch {\n // Continue with balanced extraction below.\n }\n\n const starts = [...stripped.matchAll(/[\\[{]/g)].map((match) => match.index).filter((index) => index != null)\n for (const start of starts) {\n const candidate = extractBalancedJson(stripped, start)\n if (!candidate) continue\n try {\n JSON.parse(candidate)\n return candidate\n } catch {\n // Keep scanning; earlier braces may belong to prose.\n }\n }\n\n return stripped\n}\n\nfunction extractBalancedJson(input: string, start: number): string | null {\n const opener = input[start]\n const closer = opener === '{' ? '}' : opener === '[' ? ']' : null\n if (!closer) return null\n\n const stack: string[] = [closer]\n let isInString = false\n let isEscaped = false\n\n for (let i = start + 1; i < input.length; i++) {\n const char = input[i]!\n if (isEscaped) {\n isEscaped = false\n continue\n }\n if (char === '\\\\') {\n isEscaped = isInString\n continue\n }\n if (char === '\"') {\n isInString = !isInString\n continue\n }\n if (isInString) continue\n\n if (char === '{') stack.push('}')\n else if (char === '[') stack.push(']')\n else if (char === stack[stack.length - 1]) {\n stack.pop()\n if (stack.length === 0) return input.slice(start, i + 1)\n }\n }\n\n return null\n}\n\n/**\n * Low-level call. Returns raw content + usage + cost. Retries on transient\n * failures; does NOT degrade schema here — callers that want graceful\n * degrade use `callLlmJson`.\n */\nexport async function callLlm(\n req: LlmCallRequest,\n opts: LlmClientOptions = {},\n): Promise<LlmCallResult> {\n const baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL).replace(/\\/+$/, '')\n const url = `${baseUrl}/chat/completions`\n const timeoutMs = req.timeoutMs ?? opts.defaultTimeoutMs ?? DEFAULT_TIMEOUT_MS\n const maxRetries = opts.maxRetries ?? DEFAULT_MAX_RETRIES\n const fetchFn = opts.fetch ?? globalThis.fetch\n const headers = buildHeaders(opts)\n\n let lastErr: unknown\n for (let attempt = 0; attempt < maxRetries; attempt++) {\n const controller = new AbortController()\n const timeoutHandle = setTimeout(() => controller.abort(), timeoutMs)\n const started = Date.now()\n\n try {\n const res = await fetchFn(url, {\n method: 'POST',\n headers,\n body: JSON.stringify(buildBody(req, false)),\n signal: controller.signal,\n })\n clearTimeout(timeoutHandle)\n\n if (!res.ok) {\n const body = await res.text()\n const err = new LlmCallError(\n `LLM call ${res.status}: ${body.slice(0, 300)}`,\n res.status,\n body,\n req.model,\n )\n if (RETRYABLE_STATUS.has(res.status) && attempt < maxRetries - 1) {\n lastErr = err\n const retryAfter = parseRetryAfter(res.headers)\n await sleep(retryAfter ?? backoffMs(attempt))\n continue\n }\n throw err\n }\n\n const json = (await res.json()) as Record<string, unknown>\n const choice = (json.choices as Array<{ message?: { content?: string } }> | undefined)?.[0]\n const usageRaw = (json.usage as Record<string, unknown> | undefined) ?? {}\n const costFromProxy = (json._response_cost ?? json.cost_usd) as number | undefined\n\n return {\n content: choice?.message?.content ?? '',\n usage: {\n promptTokens: Number(usageRaw.prompt_tokens ?? 0),\n completionTokens: Number(usageRaw.completion_tokens ?? 0),\n totalTokens: Number(usageRaw.total_tokens ?? 0),\n cachedPromptTokens:\n usageRaw.prompt_tokens_details &&\n typeof usageRaw.prompt_tokens_details === 'object'\n ? Number(\n (usageRaw.prompt_tokens_details as Record<string, unknown>).cached_tokens ?? 0,\n )\n : undefined,\n },\n costUsd: typeof costFromProxy === 'number' ? costFromProxy : null,\n model: (json.model as string) ?? req.model,\n durationMs: Date.now() - started,\n raw: json,\n }\n } catch (err) {\n clearTimeout(timeoutHandle)\n lastErr = err\n if (attempt < maxRetries - 1 && isRetryableError(err)) {\n await sleep(backoffMs(attempt))\n continue\n }\n throw err\n }\n }\n throw lastErr instanceof Error ? lastErr : new Error(String(lastErr))\n}\n\n/**\n * Structured-output call. Returns parsed JSON plus the raw result envelope.\n * Degrades `jsonSchema` → `jsonMode` on a 400 that names the schema param —\n * critical for deepseek-v3/v4, kimi-k2.6, and other models that don't accept\n * the `response_format.json_schema` shape but DO accept `json_object`.\n */\nexport async function callLlmJson<T = unknown>(\n req: LlmCallRequest,\n opts: LlmClientOptions = {},\n): Promise<{ value: T; result: LlmCallResult }> {\n try {\n const result = await callLlm({ ...req, jsonMode: req.jsonMode ?? !req.jsonSchema }, opts)\n const value = parseJsonSafely<T>(result.content, result.model)\n return { value, result }\n } catch (err) {\n if (err instanceof LlmCallError && isSchemaRejection(err.status, err.body) && req.jsonSchema) {\n // Degrade to json_object + retry.\n const degradedReq: LlmCallRequest = { ...req, jsonMode: true, jsonSchema: undefined }\n const result = await callLlm(degradedReq, opts)\n const value = parseJsonSafely<T>(result.content, result.model)\n return { value, result }\n }\n throw err\n }\n}\n\nfunction parseJsonSafely<T>(content: string, model: string): T {\n const stripped = extractJsonPayload(content)\n try {\n return JSON.parse(stripped) as T\n } catch (err) {\n throw new Error(\n `LLM returned non-JSON content (model=${model}): ${\n err instanceof Error ? err.message : String(err)\n }\\n--- raw content ---\\n${content.slice(0, 800)}`,\n )\n }\n}\n\n/**\n * Probe whether a model is reachable. Returns latency + null error on\n * success; `ok=false` + error message on any failure (HTTP, timeout,\n * network, parse). Designed for sweep preflights — fail loud at the\n * boundary before burning a 30-leaf run on a misconfigured router.\n *\n * Sends a tiny `ping` message with `maxTokens=64`. Reasoning models\n * (glm-5.1, deepseek-v4) can burn the entire budget on internal reasoning\n * for short prompts, so don't tighten this further. We don't validate\n * content; HTTP 200 means reachable.\n */\nexport async function probeLlm(\n model: string,\n opts: LlmClientOptions & { timeoutMs?: number } = {},\n): Promise<{ ok: boolean; latencyMs: number; error: string | null }> {\n const start = Date.now()\n try {\n await callLlm(\n {\n model,\n messages: [{ role: 'user', content: 'ping' }],\n maxTokens: 64,\n timeoutMs: opts.timeoutMs ?? 30_000,\n },\n opts,\n )\n return { ok: true, latencyMs: Date.now() - start, error: null }\n } catch (err) {\n return {\n ok: false,\n latencyMs: Date.now() - start,\n error: err instanceof Error ? err.message : String(err),\n }\n }\n}\n\n/**\n * Stateful client — construct once with defaults, call many times.\n * Thin wrapper around the free functions; exists for callers that want\n * to inject a single configured instance into multiple primitives.\n */\nexport class LlmClient {\n constructor(private readonly opts: LlmClientOptions = {}) {}\n\n call(req: LlmCallRequest, per?: LlmClientOptions): Promise<LlmCallResult> {\n return callLlm(req, { ...this.opts, ...per })\n }\n\n callJson<T = unknown>(\n req: LlmCallRequest,\n per?: LlmClientOptions,\n ): Promise<{ value: T; result: LlmCallResult }> {\n return callLlmJson<T>(req, { ...this.opts, ...per })\n }\n}\n"],"mappings":";AA4EO,IAAM,eAAN,cAA2B,MAAM;AAAA,EACtC,YACE,SACgB,QACA,MACA,OAChB;AACA,UAAM,OAAO;AAJG;AACA;AACA;AAGhB,SAAK,OAAO;AAAA,EACd;AAAA,EANkB;AAAA,EACA;AAAA,EACA;AAKpB;AAoBA,IAAM,mBAAmB;AACzB,IAAM,qBAAqB;AAC3B,IAAM,sBAAsB;AAE5B,IAAM,mBAAmB,oBAAI,IAAI,CAAC,KAAK,KAAK,KAAK,GAAG,CAAC;AAErD,SAAS,iBAAiB,KAAuB;AAC/C,MAAI,eAAe,aAAc,QAAO,iBAAiB,IAAI,IAAI,MAAM;AACvE,MAAI,eAAe,OAAO;AACxB,WACE,IAAI,SAAS,gBACb,IAAI,SAAS,kBACb,+CAA+C,KAAK,IAAI,OAAO;AAAA,EAEnE;AACA,SAAO;AACT;AAEA,SAAS,gBAAgB,SAAiC;AACxD,QAAM,IAAI,QAAQ,IAAI,aAAa;AACnC,MAAI,CAAC,EAAG,QAAO;AACf,QAAM,WAAW,OAAO,CAAC;AACzB,MAAI,OAAO,SAAS,QAAQ,KAAK,WAAW,EAAG,QAAO,WAAW;AACjE,QAAM,SAAS,KAAK,MAAM,CAAC;AAC3B,MAAI,OAAO,SAAS,MAAM,EAAG,QAAO,KAAK,IAAI,GAAG,SAAS,KAAK,IAAI,CAAC;AACnE,SAAO;AACT;AAEA,SAAS,UAAU,SAAyB;AAE1C,SAAO,KAAK,IAAI,MAAM,KAAK,IAAI,GAAG,OAAO,GAAG,IAAM;AACpD;AAEA,SAAS,aAAa,MAAgD;AACpE,QAAM,UAAkC;AAAA,IACtC,gBAAgB;AAAA,IAChB,QAAQ;AAAA,EACV;AACA,MAAI,KAAK,YAAY;AACnB,YAAQ,KAAK,WAAW,IAAI,IAAI,KAAK,WAAW;AAAA,EAClD,WAAW,KAAK,UAAU,KAAK,QAAQ;AACrC,YAAQ,gBAAgB,UAAU,KAAK,UAAU,KAAK,MAAM;AAAA,EAC9D;AACA,SAAO;AACT;AAEA,SAAS,kBAAkB,QAAgB,MAAuB;AAChE,MAAI,WAAW,IAAK,QAAO;AAC3B,QAAM,QAAQ,KAAK,YAAY;AAC/B,SACE,MAAM,SAAS,iBAAiB,KAChC,MAAM,SAAS,aAAa,KAC5B,MAAM,SAAS,gBAAgB,KAC/B,MAAM,SAAS,eAAe;AAElC;AAEA,SAAS,UAAU,KAAqB,iBAAmD;AACzF,QAAM,OAAgC;AAAA,IACpC,OAAO,IAAI;AAAA,IACX,UAAU,IAAI;AAAA,IACd,aAAa,IAAI,eAAe;AAAA,EAClC;AACA,MAAI,IAAI,aAAa,MAAM;AACzB,QAAI,wBAAwB,IAAI,KAAK,EAAG,MAAK,wBAAwB,IAAI;AAAA,QACpE,MAAK,aAAa,IAAI;AAAA,EAC7B;AAEA,MAAI,IAAI,cAAc,CAAC,iBAAiB;AACtC,SAAK,kBAAkB;AAAA,MACrB,MAAM;AAAA,MACN,aAAa,EAAE,MAAM,IAAI,WAAW,MAAM,QAAQ,IAAI,WAAW,QAAQ,QAAQ,KAAK;AAAA,IACxF;AAAA,EACF,WAAW,IAAI,YAAY,IAAI,YAAY;AACzC,SAAK,kBAAkB,EAAE,MAAM,cAAc;AAAA,EAC/C;AAEA,SAAO;AACT;AAEA,SAAS,wBAAwB,OAAwB;AACvD,SAAO,qBAAqB,KAAK,KAAK;AACxC;AAEA,eAAe,MAAM,IAA2B;AAC9C,SAAO,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,EAAE,CAAC;AACzD;AASO,SAAS,gBAAgB,KAAqB;AACnD,QAAM,UAAU,IAAI,KAAK;AACzB,QAAM,IAAI,QAAQ,MAAM,yCAAyC;AACjE,SAAO,IAAI,EAAE,CAAC,EAAG,KAAK,IAAI;AAC5B;AAEO,SAAS,mBAAmB,KAAqB;AACtD,QAAM,WAAW,gBAAgB,GAAG;AACpC,MAAI;AACF,SAAK,MAAM,QAAQ;AACnB,WAAO;AAAA,EACT,QAAQ;AAAA,EAER;AAEA,QAAM,SAAS,CAAC,GAAG,SAAS,SAAS,QAAQ,CAAC,EAAE,IAAI,CAAC,UAAU,MAAM,KAAK,EAAE,OAAO,CAAC,UAAU,SAAS,IAAI;AAC3G,aAAW,SAAS,QAAQ;AAC1B,UAAM,YAAY,oBAAoB,UAAU,KAAK;AACrD,QAAI,CAAC,UAAW;AAChB,QAAI;AACF,WAAK,MAAM,SAAS;AACpB,aAAO;AAAA,IACT,QAAQ;AAAA,IAER;AAAA,EACF;AAEA,SAAO;AACT;AAEA,SAAS,oBAAoB,OAAe,OAA8B;AACxE,QAAM,SAAS,MAAM,KAAK;AAC1B,QAAM,SAAS,WAAW,MAAM,MAAM,WAAW,MAAM,MAAM;AAC7D,MAAI,CAAC,OAAQ,QAAO;AAEpB,QAAM,QAAkB,CAAC,MAAM;AAC/B,MAAI,aAAa;AACjB,MAAI,YAAY;AAEhB,WAAS,IAAI,QAAQ,GAAG,IAAI,MAAM,QAAQ,KAAK;AAC7C,UAAM,OAAO,MAAM,CAAC;AACpB,QAAI,WAAW;AACb,kBAAY;AACZ;AAAA,IACF;AACA,QAAI,SAAS,MAAM;AACjB,kBAAY;AACZ;AAAA,IACF;AACA,QAAI,SAAS,KAAK;AAChB,mBAAa,CAAC;AACd;AAAA,IACF;AACA,QAAI,WAAY;AAEhB,QAAI,SAAS,IAAK,OAAM,KAAK,GAAG;AAAA,aACvB,SAAS,IAAK,OAAM,KAAK,GAAG;AAAA,aAC5B,SAAS,MAAM,MAAM,SAAS,CAAC,GAAG;AACzC,YAAM,IAAI;AACV,UAAI,MAAM,WAAW,EAAG,QAAO,MAAM,MAAM,OAAO,IAAI,CAAC;AAAA,IACzD;AAAA,EACF;AAEA,SAAO;AACT;AAOA,eAAsB,QACpB,KACA,OAAyB,CAAC,GACF;AACxB,QAAM,WAAW,KAAK,WAAW,kBAAkB,QAAQ,QAAQ,EAAE;AACrE,QAAM,MAAM,GAAG,OAAO;AACtB,QAAM,YAAY,IAAI,aAAa,KAAK,oBAAoB;AAC5D,QAAM,aAAa,KAAK,cAAc;AACtC,QAAM,UAAU,KAAK,SAAS,WAAW;AACzC,QAAM,UAAU,aAAa,IAAI;AAEjC,MAAI;AACJ,WAAS,UAAU,GAAG,UAAU,YAAY,WAAW;AACrD,UAAM,aAAa,IAAI,gBAAgB;AACvC,UAAM,gBAAgB,WAAW,MAAM,WAAW,MAAM,GAAG,SAAS;AACpE,UAAM,UAAU,KAAK,IAAI;AAEzB,QAAI;AACF,YAAM,MAAM,MAAM,QAAQ,KAAK;AAAA,QAC7B,QAAQ;AAAA,QACR;AAAA,QACA,MAAM,KAAK,UAAU,UAAU,KAAK,KAAK,CAAC;AAAA,QAC1C,QAAQ,WAAW;AAAA,MACrB,CAAC;AACD,mBAAa,aAAa;AAE1B,UAAI,CAAC,IAAI,IAAI;AACX,cAAM,OAAO,MAAM,IAAI,KAAK;AAC5B,cAAM,MAAM,IAAI;AAAA,UACd,YAAY,IAAI,MAAM,KAAK,KAAK,MAAM,GAAG,GAAG,CAAC;AAAA,UAC7C,IAAI;AAAA,UACJ;AAAA,UACA,IAAI;AAAA,QACN;AACA,YAAI,iBAAiB,IAAI,IAAI,MAAM,KAAK,UAAU,aAAa,GAAG;AAChE,oBAAU;AACV,gBAAM,aAAa,gBAAgB,IAAI,OAAO;AAC9C,gBAAM,MAAM,cAAc,UAAU,OAAO,CAAC;AAC5C;AAAA,QACF;AACA,cAAM;AAAA,MACR;AAEA,YAAM,OAAQ,MAAM,IAAI,KAAK;AAC7B,YAAM,SAAU,KAAK,UAAoE,CAAC;AAC1F,YAAM,WAAY,KAAK,SAAiD,CAAC;AACzE,YAAM,gBAAiB,KAAK,kBAAkB,KAAK;AAEnD,aAAO;AAAA,QACL,SAAS,QAAQ,SAAS,WAAW;AAAA,QACrC,OAAO;AAAA,UACL,cAAc,OAAO,SAAS,iBAAiB,CAAC;AAAA,UAChD,kBAAkB,OAAO,SAAS,qBAAqB,CAAC;AAAA,UACxD,aAAa,OAAO,SAAS,gBAAgB,CAAC;AAAA,UAC9C,oBACE,SAAS,yBACT,OAAO,SAAS,0BAA0B,WACtC;AAAA,YACG,SAAS,sBAAkD,iBAAiB;AAAA,UAC/E,IACA;AAAA,QACR;AAAA,QACA,SAAS,OAAO,kBAAkB,WAAW,gBAAgB;AAAA,QAC7D,OAAQ,KAAK,SAAoB,IAAI;AAAA,QACrC,YAAY,KAAK,IAAI,IAAI;AAAA,QACzB,KAAK;AAAA,MACP;AAAA,IACF,SAAS,KAAK;AACZ,mBAAa,aAAa;AAC1B,gBAAU;AACV,UAAI,UAAU,aAAa,KAAK,iBAAiB,GAAG,GAAG;AACrD,cAAM,MAAM,UAAU,OAAO,CAAC;AAC9B;AAAA,MACF;AACA,YAAM;AAAA,IACR;AAAA,EACF;AACA,QAAM,mBAAmB,QAAQ,UAAU,IAAI,MAAM,OAAO,OAAO,CAAC;AACtE;AAQA,eAAsB,YACpB,KACA,OAAyB,CAAC,GACoB;AAC9C,MAAI;AACF,UAAM,SAAS,MAAM,QAAQ,EAAE,GAAG,KAAK,UAAU,IAAI,YAAY,CAAC,IAAI,WAAW,GAAG,IAAI;AACxF,UAAM,QAAQ,gBAAmB,OAAO,SAAS,OAAO,KAAK;AAC7D,WAAO,EAAE,OAAO,OAAO;AAAA,EACzB,SAAS,KAAK;AACZ,QAAI,eAAe,gBAAgB,kBAAkB,IAAI,QAAQ,IAAI,IAAI,KAAK,IAAI,YAAY;AAE5F,YAAM,cAA8B,EAAE,GAAG,KAAK,UAAU,MAAM,YAAY,OAAU;AACpF,YAAM,SAAS,MAAM,QAAQ,aAAa,IAAI;AAC9C,YAAM,QAAQ,gBAAmB,OAAO,SAAS,OAAO,KAAK;AAC7D,aAAO,EAAE,OAAO,OAAO;AAAA,IACzB;AACA,UAAM;AAAA,EACR;AACF;AAEA,SAAS,gBAAmB,SAAiB,OAAkB;AAC7D,QAAM,WAAW,mBAAmB,OAAO;AAC3C,MAAI;AACF,WAAO,KAAK,MAAM,QAAQ;AAAA,EAC5B,SAAS,KAAK;AACZ,UAAM,IAAI;AAAA,MACR,wCAAwC,KAAK,MAC3C,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CACjD;AAAA;AAAA,EAA0B,QAAQ,MAAM,GAAG,GAAG,CAAC;AAAA,IACjD;AAAA,EACF;AACF;AAaA,eAAsB,SACpB,OACA,OAAkD,CAAC,GACgB;AACnE,QAAM,QAAQ,KAAK,IAAI;AACvB,MAAI;AACF,UAAM;AAAA,MACJ;AAAA,QACE;AAAA,QACA,UAAU,CAAC,EAAE,MAAM,QAAQ,SAAS,OAAO,CAAC;AAAA,QAC5C,WAAW;AAAA,QACX,WAAW,KAAK,aAAa;AAAA,MAC/B;AAAA,MACA;AAAA,IACF;AACA,WAAO,EAAE,IAAI,MAAM,WAAW,KAAK,IAAI,IAAI,OAAO,OAAO,KAAK;AAAA,EAChE,SAAS,KAAK;AACZ,WAAO;AAAA,MACL,IAAI;AAAA,MACJ,WAAW,KAAK,IAAI,IAAI;AAAA,MACxB,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IACxD;AAAA,EACF;AACF;AAOO,IAAM,YAAN,MAAgB;AAAA,EACrB,YAA6B,OAAyB,CAAC,GAAG;AAA7B;AAAA,EAA8B;AAAA,EAA9B;AAAA,EAE7B,KAAK,KAAqB,KAAgD;AACxE,WAAO,QAAQ,KAAK,EAAE,GAAG,KAAK,MAAM,GAAG,IAAI,CAAC;AAAA,EAC9C;AAAA,EAEA,SACE,KACA,KAC8C;AAC9C,WAAO,YAAe,KAAK,EAAE,GAAG,KAAK,MAAM,GAAG,IAAI,CAAC;AAAA,EACrD;AACF;","names":[]}
|