@tangle-network/agent-eval 0.20.11 → 0.20.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +99 -170
- package/dist/benchmarks/index.d.ts +2 -1
- package/dist/{chunk-JAOLXRIA.js → chunk-75MCTH7P.js} +8 -2
- package/dist/chunk-75MCTH7P.js.map +1 -0
- package/dist/chunk-HKYRWNHV.js +1354 -0
- package/dist/chunk-HKYRWNHV.js.map +1 -0
- package/dist/{chunk-LSR4IAYN.js → chunk-HNJLMAJ2.js} +2 -2
- package/dist/chunk-IKFVX537.js +717 -0
- package/dist/chunk-IKFVX537.js.map +1 -0
- package/dist/chunk-KWUAAIHR.js +1764 -0
- package/dist/chunk-KWUAAIHR.js.map +1 -0
- package/dist/chunk-MCMV7DUL.js +1310 -0
- package/dist/chunk-MCMV7DUL.js.map +1 -0
- package/dist/chunk-ODFINDLQ.js +413 -0
- package/dist/chunk-ODFINDLQ.js.map +1 -0
- package/dist/chunk-PKCVBYTQ.js +200 -0
- package/dist/chunk-PKCVBYTQ.js.map +1 -0
- package/dist/chunk-YUFXO3TU.js +148 -0
- package/dist/chunk-YUFXO3TU.js.map +1 -0
- package/dist/cli.js +2 -2
- package/dist/control-C8NKbF3w.d.ts +258 -0
- package/dist/control.d.ts +5 -0
- package/dist/control.js +30 -0
- package/dist/control.js.map +1 -0
- package/dist/dataset-B9qvlm_o.d.ts +112 -0
- package/dist/emitter-BYO2nSDA.d.ts +387 -0
- package/dist/feedback-trajectory-BGQ_ANCN.d.ts +345 -0
- package/dist/{index-1PZOtZFr.d.ts → index-c5saLbKD.d.ts} +2 -133
- package/dist/index.d.ts +115 -2870
- package/dist/index.js +1049 -6156
- package/dist/index.js.map +1 -1
- package/dist/multi-shot-optimization-Bvtz294B.d.ts +598 -0
- package/dist/openapi.json +1 -1
- package/dist/optimization.d.ts +145 -0
- package/dist/optimization.js +60 -0
- package/dist/optimization.js.map +1 -0
- package/dist/reporting.d.ts +426 -0
- package/dist/reporting.js +32 -0
- package/dist/reporting.js.map +1 -0
- package/dist/run-record-CX_jcAyr.d.ts +134 -0
- package/dist/traces.d.ts +658 -0
- package/dist/traces.js +100 -0
- package/dist/traces.js.map +1 -0
- package/dist/wire/index.js +2 -2
- package/docs/concepts.md +16 -11
- package/docs/feature-guide.md +10 -17
- package/docs/integration-launch-gates.md +77 -0
- package/docs/product-eval-adoption.md +27 -0
- package/docs/trace-analysis.md +75 -0
- package/package.json +21 -1
- package/dist/chunk-JAOLXRIA.js.map +0 -1
- /package/dist/{chunk-LSR4IAYN.js.map → chunk-HNJLMAJ2.js.map} +0 -0
package/dist/traces.js
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import {
|
|
2
|
+
DEFAULT_REDACTION_RULES,
|
|
3
|
+
DEFAULT_TRACE_ANALYST_BUDGETS,
|
|
4
|
+
FAILURE_CLASSES,
|
|
5
|
+
FileSystemTraceStore,
|
|
6
|
+
InMemoryTraceStore,
|
|
7
|
+
OTEL_AGENT_EVAL_SCOPE,
|
|
8
|
+
OtlpFileTraceStore,
|
|
9
|
+
REDACTION_VERSION,
|
|
10
|
+
SpanNotFoundError,
|
|
11
|
+
TRACE_ANALYST_ACTOR_DESCRIPTION,
|
|
12
|
+
TRACE_ANALYST_ACTOR_DESCRIPTION_VERSION,
|
|
13
|
+
TRACE_ANALYST_SUBAGENT_DESCRIPTION,
|
|
14
|
+
TRACE_ANALYST_TRUNCATION_MARKER_PREFIX,
|
|
15
|
+
TRACE_SCHEMA_VERSION,
|
|
16
|
+
TraceFileMissingError,
|
|
17
|
+
TraceNotFoundError,
|
|
18
|
+
aggregateLlm,
|
|
19
|
+
analyzeTraces,
|
|
20
|
+
argHash,
|
|
21
|
+
buildTraceAnalystTools,
|
|
22
|
+
buildTraceInsightContext,
|
|
23
|
+
buildTraceInsightPrompt,
|
|
24
|
+
defaultTraceInsightPanel,
|
|
25
|
+
describeTraceInsightScope,
|
|
26
|
+
domainEvidencePattern,
|
|
27
|
+
exportRunAsOtlp,
|
|
28
|
+
groupBy,
|
|
29
|
+
inferDomainKeywords,
|
|
30
|
+
isJudgeSpan,
|
|
31
|
+
isLlmSpan,
|
|
32
|
+
isRetrievalSpan,
|
|
33
|
+
isSandboxSpan,
|
|
34
|
+
isToolSpan,
|
|
35
|
+
judgeSpans,
|
|
36
|
+
llmSpans,
|
|
37
|
+
planTraceInsightQuestions,
|
|
38
|
+
redactString,
|
|
39
|
+
redactValue,
|
|
40
|
+
runFailureClass,
|
|
41
|
+
runsForScenario,
|
|
42
|
+
scoreTraceInsightReadiness,
|
|
43
|
+
tokenizeDomainWords,
|
|
44
|
+
toolSpans,
|
|
45
|
+
traceAnalystFunctionGroup
|
|
46
|
+
} from "./chunk-KWUAAIHR.js";
|
|
47
|
+
import {
|
|
48
|
+
TraceEmitter,
|
|
49
|
+
llmSpanFromProvider
|
|
50
|
+
} from "./chunk-PKCVBYTQ.js";
|
|
51
|
+
import "./chunk-PZ5AY32C.js";
|
|
52
|
+
export {
|
|
53
|
+
DEFAULT_REDACTION_RULES,
|
|
54
|
+
DEFAULT_TRACE_ANALYST_BUDGETS,
|
|
55
|
+
FAILURE_CLASSES,
|
|
56
|
+
FileSystemTraceStore,
|
|
57
|
+
InMemoryTraceStore,
|
|
58
|
+
OTEL_AGENT_EVAL_SCOPE,
|
|
59
|
+
OtlpFileTraceStore,
|
|
60
|
+
REDACTION_VERSION,
|
|
61
|
+
SpanNotFoundError,
|
|
62
|
+
TRACE_ANALYST_ACTOR_DESCRIPTION,
|
|
63
|
+
TRACE_ANALYST_ACTOR_DESCRIPTION_VERSION,
|
|
64
|
+
TRACE_ANALYST_SUBAGENT_DESCRIPTION,
|
|
65
|
+
TRACE_ANALYST_TRUNCATION_MARKER_PREFIX,
|
|
66
|
+
TRACE_SCHEMA_VERSION,
|
|
67
|
+
TraceEmitter,
|
|
68
|
+
TraceFileMissingError,
|
|
69
|
+
TraceNotFoundError,
|
|
70
|
+
aggregateLlm,
|
|
71
|
+
analyzeTraces,
|
|
72
|
+
argHash,
|
|
73
|
+
buildTraceAnalystTools,
|
|
74
|
+
buildTraceInsightContext,
|
|
75
|
+
buildTraceInsightPrompt,
|
|
76
|
+
defaultTraceInsightPanel,
|
|
77
|
+
describeTraceInsightScope,
|
|
78
|
+
domainEvidencePattern,
|
|
79
|
+
exportRunAsOtlp,
|
|
80
|
+
groupBy,
|
|
81
|
+
inferDomainKeywords,
|
|
82
|
+
isJudgeSpan,
|
|
83
|
+
isLlmSpan,
|
|
84
|
+
isRetrievalSpan,
|
|
85
|
+
isSandboxSpan,
|
|
86
|
+
isToolSpan,
|
|
87
|
+
judgeSpans,
|
|
88
|
+
llmSpanFromProvider,
|
|
89
|
+
llmSpans,
|
|
90
|
+
planTraceInsightQuestions,
|
|
91
|
+
redactString,
|
|
92
|
+
redactValue,
|
|
93
|
+
runFailureClass,
|
|
94
|
+
runsForScenario,
|
|
95
|
+
scoreTraceInsightReadiness,
|
|
96
|
+
tokenizeDomainWords,
|
|
97
|
+
toolSpans,
|
|
98
|
+
traceAnalystFunctionGroup
|
|
99
|
+
};
|
|
100
|
+
//# sourceMappingURL=traces.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
|
package/dist/wire/index.js
CHANGED
|
@@ -24,8 +24,8 @@ import {
|
|
|
24
24
|
runRpcBatch,
|
|
25
25
|
runRpcOnce,
|
|
26
26
|
startServer
|
|
27
|
-
} from "../chunk-
|
|
28
|
-
import "../chunk-
|
|
27
|
+
} from "../chunk-HNJLMAJ2.js";
|
|
28
|
+
import "../chunk-75MCTH7P.js";
|
|
29
29
|
import "../chunk-PZ5AY32C.js";
|
|
30
30
|
export {
|
|
31
31
|
BUILTIN_RUBRICS,
|
package/docs/concepts.md
CHANGED
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
# Concepts
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
`agent-eval` is for deciding whether an agent run should pass, keep working, be
|
|
4
|
+
replayed, be optimized, or be promoted.
|
|
4
5
|
|
|
5
|
-
|
|
6
|
+
It exists because agent output is not evidence. A model can say a task is done
|
|
7
|
+
while the build fails, the browser flow is broken, the integration was never
|
|
8
|
+
connected, or the answer lacks required sources. The package gives products a
|
|
9
|
+
shared way to record runs, check outcomes, classify failures, compare variants,
|
|
10
|
+
and make release decisions.
|
|
6
11
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
It exists because LLMs lie about whether they succeeded. A model will say "Done!" and ship code that doesn't compile. agent-eval is the layer between the model's output and your decision to ship.
|
|
10
|
-
|
|
11
|
-
## The three things you'll touch most
|
|
12
|
+
## Main Objects
|
|
12
13
|
|
|
13
14
|
| Thing | What it is | One-line example |
|
|
14
15
|
|---|---|---|
|
|
@@ -17,7 +18,8 @@ It exists because LLMs lie about whether they succeeded. A model will say "Done!
|
|
|
17
18
|
| **Verifier** | A pipeline of judges run in order, with dependencies. | "install → typecheck → build → semantic" |
|
|
18
19
|
| **Feedback trajectory** | A multi-shot record of attempts, approvals, rejections, edits, metrics, and policy outcomes. | "draft → user rejects → revised draft → approved → measured" |
|
|
19
20
|
|
|
20
|
-
|
|
21
|
+
Everything else exists to make those objects useful in real product loops:
|
|
22
|
+
traces, datasets, control runtime, optimizers, statistics, and reports.
|
|
21
23
|
|
|
22
24
|
When the thing being evaluated is an agent that should keep working, use
|
|
23
25
|
[`runAgentControlLoop`](./control-runtime.md). It turns validators into a
|
|
@@ -62,7 +64,7 @@ shape stays the same.
|
|
|
62
64
|
Those trajectories can be converted into preference memory, `DatasetScenario`
|
|
63
65
|
rows, optimizer rows, and held-out examples for overfit checks.
|
|
64
66
|
|
|
65
|
-
##
|
|
67
|
+
## Code Generator Eval
|
|
66
68
|
|
|
67
69
|
When the artifact is generated code, agent-eval scores it at three independent layers. Each layer fails differently, and you want to know which one broke:
|
|
68
70
|
|
|
@@ -125,7 +127,7 @@ Two rules that will save you bugs:
|
|
|
125
127
|
|
|
126
128
|
2. **Pair LLM judges with build outcomes.** An LLM judge will rate non-compiling code as "looks right" (0.8). Always short-circuit on `buildOutcome.passed === false` before any LLM judging.
|
|
127
129
|
|
|
128
|
-
##
|
|
130
|
+
## Trace Model
|
|
129
131
|
|
|
130
132
|
Every operation emits structured spans into a `TraceStore`. A run is a tree:
|
|
131
133
|
|
|
@@ -142,7 +144,10 @@ builder-session [span]
|
|
|
142
144
|
|
|
143
145
|
Spans are append-only and have stable ids — replay is reading the same store back. OTLP export ships them out for distributed tracing.
|
|
144
146
|
|
|
145
|
-
You
|
|
147
|
+
You usually should not build this tree by hand. Product runtimes,
|
|
148
|
+
`runAgentControlLoop`, harnesses, and verifiers should emit it while they run.
|
|
149
|
+
Use traces when debugging a flaky run, building replay data, or explaining a
|
|
150
|
+
release decision.
|
|
146
151
|
|
|
147
152
|
## Where to go next
|
|
148
153
|
|
package/docs/feature-guide.md
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
This page explains the main `agent-eval` primitives in plain English first,
|
|
4
4
|
then shows when to use each one.
|
|
5
5
|
|
|
6
|
-
##
|
|
6
|
+
## Overview
|
|
7
7
|
|
|
8
8
|
LLM agents can write code, drafts, research, plans, and actions. The hard part
|
|
9
9
|
is knowing whether they actually did a good job, whether they should keep
|
|
@@ -41,7 +41,7 @@ trying, and whether a change made them better or worse.
|
|
|
41
41
|
|
|
42
42
|
## Integration Patterns
|
|
43
43
|
|
|
44
|
-
### Recommended
|
|
44
|
+
### Recommended Product Shape
|
|
45
45
|
|
|
46
46
|
Use this shape when the product needs to keep pushing work forward instead of
|
|
47
47
|
only answering once:
|
|
@@ -175,21 +175,6 @@ Store as `FeedbackTrajectory`, then derive:
|
|
|
175
175
|
logs, screenshots, or browser state. Use separate sandboxes for parallel
|
|
176
176
|
variants or destructive checks.
|
|
177
177
|
|
|
178
|
-
## Same-Sandbox Example
|
|
179
|
-
|
|
180
|
-
`examples/same-sandbox-harness/` shows the common coding/browser pattern:
|
|
181
|
-
|
|
182
|
-
```text
|
|
183
|
-
one sandbox/workdir -> install/build/test -> inspect evidence -> emit judge span
|
|
184
|
-
```
|
|
185
|
-
|
|
186
|
-
Use this when a judge needs evidence produced by earlier harness phases. Use
|
|
187
|
-
isolated sandboxes when variants run in parallel or a phase can corrupt the
|
|
188
|
-
workspace.
|
|
189
|
-
- Treat telemetry as evidence, not control flow. A trace sink outage should be
|
|
190
|
-
visible in `runtimeErrors`, but it should not stop the worker from completing
|
|
191
|
-
the user task.
|
|
192
|
-
|
|
193
178
|
## Highest-ROI Adoption Order
|
|
194
179
|
|
|
195
180
|
1. Wrap one real product workflow in `runAgentControlLoop`.
|
|
@@ -211,3 +196,11 @@ reusable:
|
|
|
211
196
|
|
|
212
197
|
Core should provide shapes, stores, runners, scoring, traces, and converters.
|
|
213
198
|
Downstream integrations provide domain state, policy, tools, and storage.
|
|
199
|
+
|
|
200
|
+
## Examples
|
|
201
|
+
|
|
202
|
+
- `examples/same-sandbox-harness`: one workdir for install/build/test plus
|
|
203
|
+
evidence inspection.
|
|
204
|
+
- `examples/multi-shot-optimization`: full-trajectory optimization with a
|
|
205
|
+
holdout gate.
|
|
206
|
+
- `examples/benchmarks`: benchmark adapter contracts and reference wrappers.
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# Integration Launch Gates
|
|
2
|
+
|
|
3
|
+
Use these gates when a product lets generated apps or agents use user-owned
|
|
4
|
+
connections through an integration hub.
|
|
5
|
+
|
|
6
|
+
The eval should wrap the real product path:
|
|
7
|
+
|
|
8
|
+
```txt
|
|
9
|
+
user prompt
|
|
10
|
+
-> product emits IntegrationManifest
|
|
11
|
+
-> platform resolves connections and grants
|
|
12
|
+
-> sandbox receives capability bundle
|
|
13
|
+
-> generated app invokes integration action
|
|
14
|
+
-> platform enforces policy, approval, idempotency, audit
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Deterministic Gates
|
|
18
|
+
|
|
19
|
+
- The generated app declares an integration manifest before sandbox launch.
|
|
20
|
+
- Manifest validation passes.
|
|
21
|
+
- Required connections and scopes are present before execution.
|
|
22
|
+
- Sandbox environment contains a capability bundle, not raw provider tokens.
|
|
23
|
+
- Reads invoke through the platform bridge.
|
|
24
|
+
- Writes return `approval_required` unless product policy explicitly allows
|
|
25
|
+
them.
|
|
26
|
+
- Approved writes are bound to the same action, input hash, connection, and
|
|
27
|
+
subject.
|
|
28
|
+
- Revoked grants or expired capabilities stop invocation.
|
|
29
|
+
- Resumed or long-running sandboxes receive a refreshed bundle before expiry.
|
|
30
|
+
- Audit includes grant creation, capability issue, invoke success/failure,
|
|
31
|
+
approval resolution, and revoke events.
|
|
32
|
+
|
|
33
|
+
## Failure Classes
|
|
34
|
+
|
|
35
|
+
`agent-eval` classifies integration failures separately from prompt/tool
|
|
36
|
+
failures:
|
|
37
|
+
|
|
38
|
+
- `bad_integration_manifest`
|
|
39
|
+
- `missing_integration_connection`
|
|
40
|
+
- `missing_integration_scope`
|
|
41
|
+
- `integration_approval_required`
|
|
42
|
+
- `integration_auth_expired`
|
|
43
|
+
- `integration_provider_failure`
|
|
44
|
+
- `unsafe_integration_write_denied`
|
|
45
|
+
|
|
46
|
+
Use the helper payload builders and eval builders so products emit the same
|
|
47
|
+
trace evidence:
|
|
48
|
+
|
|
49
|
+
```ts
|
|
50
|
+
const gate = {
|
|
51
|
+
connectorId: 'google-calendar',
|
|
52
|
+
actionId: 'events.create',
|
|
53
|
+
valid: true,
|
|
54
|
+
missingConnections: [],
|
|
55
|
+
missingScopes: ['calendar.events.write'],
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const evals = integrationGateEvals(gate)
|
|
59
|
+
|
|
60
|
+
await emitter.emit({
|
|
61
|
+
kind: 'custom',
|
|
62
|
+
payload: integrationManifestResolvedPayload(gate),
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
await emitter.emit({
|
|
66
|
+
kind: 'custom',
|
|
67
|
+
payload: integrationInvokeFailedPayload({
|
|
68
|
+
connectorId: 'google-calendar',
|
|
69
|
+
actionId: 'events.create',
|
|
70
|
+
code: 'scope_denied',
|
|
71
|
+
message: 'calendar.events.write was not granted',
|
|
72
|
+
}),
|
|
73
|
+
})
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
The classifier then reports the real missing surface instead of burying the
|
|
77
|
+
failure under `tool_recovery_failure` or `unknown`.
|
|
@@ -98,6 +98,23 @@ replayable eval data.
|
|
|
98
98
|
production run -> feedback trajectory -> dataset scenario -> optimizer row
|
|
99
99
|
```
|
|
100
100
|
|
|
101
|
+
For promotion-grade runs, also project the completed control result into a
|
|
102
|
+
strict `RunRecord`:
|
|
103
|
+
|
|
104
|
+
```ts
|
|
105
|
+
const record = controlRunToRunRecord(controlResult, {
|
|
106
|
+
experimentId,
|
|
107
|
+
candidateId,
|
|
108
|
+
seed,
|
|
109
|
+
model: 'gpt-4o-2024-11-20',
|
|
110
|
+
promptHash,
|
|
111
|
+
configHash,
|
|
112
|
+
commitSha,
|
|
113
|
+
splitTag: 'holdout',
|
|
114
|
+
tokenUsage,
|
|
115
|
+
})
|
|
116
|
+
```
|
|
117
|
+
|
|
101
118
|
## Datasets And Holdouts
|
|
102
119
|
|
|
103
120
|
Use four splits:
|
|
@@ -107,6 +124,10 @@ Use four splits:
|
|
|
107
124
|
- `test`: normal reporting.
|
|
108
125
|
- `holdout`: promotion-only gate.
|
|
109
126
|
|
|
127
|
+
The low-level `RunRecord` schema uses `search | dev | holdout`; map `train`
|
|
128
|
+
and normal non-holdout test/report rows to `search` when producing promotion
|
|
129
|
+
tables.
|
|
130
|
+
|
|
110
131
|
Do not inspect or tune against holdout failures during optimization. If a
|
|
111
132
|
holdout failure reveals a real product bug, fix the bug and rotate the holdout
|
|
112
133
|
set with a signed note.
|
|
@@ -149,6 +170,7 @@ A launch or promotion should require:
|
|
|
149
170
|
- cost and latency within budget
|
|
150
171
|
- no unresolved canary or contamination failures
|
|
151
172
|
- trace evidence for representative successes and failures
|
|
173
|
+
- TraceAnalyst findings for failure-heavy or regression-heavy corpora
|
|
152
174
|
- human-readable report with failure clusters and next actions
|
|
153
175
|
|
|
154
176
|
`evaluateReleaseConfidence()` and the paired statistics helpers provide the
|
|
@@ -177,6 +199,11 @@ Use `@tangle-network/agent-integrations` manifests as readiness inputs. Gate
|
|
|
177
199
|
missing connections, missing scopes, approval-required writes, and stale tokens
|
|
178
200
|
before blaming the agent prompt.
|
|
179
201
|
|
|
202
|
+
For generated apps and sandbox agents, also run the
|
|
203
|
+
[Integration Launch Gates](./integration-launch-gates.md). The eval should prove
|
|
204
|
+
that app code invokes through the integration bridge, not provider SDKs with raw
|
|
205
|
+
OAuth tokens.
|
|
206
|
+
|
|
180
207
|
### Voice Agent
|
|
181
208
|
|
|
182
209
|
Record transcript, timing, interruptions, tool calls, and task outcome. Judge
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# Trace Analysis
|
|
2
|
+
|
|
3
|
+
Trace analysis is the bridge between raw product telemetry and useful eval work.
|
|
4
|
+
|
|
5
|
+
```txt
|
|
6
|
+
live product run
|
|
7
|
+
-> TraceEmitter / TraceStore
|
|
8
|
+
-> TraceAnalyst investigates trace corpora
|
|
9
|
+
-> findings become ASI, failures, replay cases, and release actions
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
## When To Use TraceAnalyst
|
|
13
|
+
|
|
14
|
+
Use `TraceAnalyst` when you have more than a few traces and need to answer:
|
|
15
|
+
|
|
16
|
+
- which failure modes are recurring?
|
|
17
|
+
- which spans explain a regression?
|
|
18
|
+
- did retrieval, integrations, sandbox, or policy block the run?
|
|
19
|
+
- are failed runs missing evidence that the optimizer needs?
|
|
20
|
+
- which product surfaces deserve the next fix?
|
|
21
|
+
|
|
22
|
+
Use summary tables and release confidence for promotion decisions. Use
|
|
23
|
+
TraceAnalyst to explain the evidence behind those decisions.
|
|
24
|
+
|
|
25
|
+
## Minimal Flow
|
|
26
|
+
|
|
27
|
+
```ts
|
|
28
|
+
import {
|
|
29
|
+
OtlpFileTraceStore,
|
|
30
|
+
analyzeTraces,
|
|
31
|
+
} from '@tangle-network/agent-eval'
|
|
32
|
+
|
|
33
|
+
const result = await analyzeTraces({
|
|
34
|
+
question: 'Why did app-runtime holdout runs fail this week?',
|
|
35
|
+
}, {
|
|
36
|
+
source: new OtlpFileTraceStore({ path: 'traces/otlp.jsonl' }),
|
|
37
|
+
ai,
|
|
38
|
+
model: 'gpt-4o-2024-11-20',
|
|
39
|
+
})
|
|
40
|
+
|
|
41
|
+
console.log(result.findings)
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Products can pass any `TraceAnalysisStore`; they do not need to use the file
|
|
45
|
+
store in production.
|
|
46
|
+
|
|
47
|
+
## Required Trace Shape
|
|
48
|
+
|
|
49
|
+
Every serious product run should include:
|
|
50
|
+
|
|
51
|
+
- `runId`, `projectId`, `scenarioId`, `variantId`, and `layer`
|
|
52
|
+
- commit, prompt hash, config hash, model fingerprint, and dataset version
|
|
53
|
+
- LLM spans with model, inputs, outputs, token counts, and cost
|
|
54
|
+
- tool/integration spans with arguments, result summaries, and error codes
|
|
55
|
+
- retrieval spans with query, source ids, hit scores, and freshness metadata
|
|
56
|
+
- sandbox/build/test/deploy spans with exit codes and log artifacts
|
|
57
|
+
- custom events for knowledge readiness and integration gates
|
|
58
|
+
- final run outcome with pass/score/failure class
|
|
59
|
+
|
|
60
|
+
Do not put secrets, raw OAuth tokens, or unredacted PII in traces.
|
|
61
|
+
|
|
62
|
+
## Product Loop
|
|
63
|
+
|
|
64
|
+
The product loop should not treat traces as a separate debug dump. The intended
|
|
65
|
+
path is:
|
|
66
|
+
|
|
67
|
+
1. Wrap the real workflow in `runAgentControlLoop` or the product runtime.
|
|
68
|
+
2. Emit canonical spans/events while the user task runs.
|
|
69
|
+
3. Convert the completed run to `FeedbackTrajectory` for replay.
|
|
70
|
+
4. Convert promotion-grade runs to `RunRecord` with `controlRunToRunRecord`.
|
|
71
|
+
5. Run TraceAnalyst over failure-heavy trace sets.
|
|
72
|
+
6. Feed findings into `ActionableSideInfo`, failure clusters, and release
|
|
73
|
+
reports.
|
|
74
|
+
|
|
75
|
+
That makes normal product usage become eval data instead of isolated logs.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tangle-network/agent-eval",
|
|
3
|
-
"version": "0.20.
|
|
3
|
+
"version": "0.20.12",
|
|
4
4
|
"description": "Trace-first evaluation infrastructure for agent systems: traces, harnesses, verifier pipelines, judges, datasets, gates, optimization, and reporting.",
|
|
5
5
|
"homepage": "https://github.com/tangle-network/agent-eval#readme",
|
|
6
6
|
"repository": {
|
|
@@ -19,6 +19,26 @@
|
|
|
19
19
|
"import": "./dist/index.js",
|
|
20
20
|
"default": "./dist/index.js"
|
|
21
21
|
},
|
|
22
|
+
"./control": {
|
|
23
|
+
"types": "./dist/control.d.ts",
|
|
24
|
+
"import": "./dist/control.js",
|
|
25
|
+
"default": "./dist/control.js"
|
|
26
|
+
},
|
|
27
|
+
"./optimization": {
|
|
28
|
+
"types": "./dist/optimization.d.ts",
|
|
29
|
+
"import": "./dist/optimization.js",
|
|
30
|
+
"default": "./dist/optimization.js"
|
|
31
|
+
},
|
|
32
|
+
"./reporting": {
|
|
33
|
+
"types": "./dist/reporting.d.ts",
|
|
34
|
+
"import": "./dist/reporting.js",
|
|
35
|
+
"default": "./dist/reporting.js"
|
|
36
|
+
},
|
|
37
|
+
"./traces": {
|
|
38
|
+
"types": "./dist/traces.d.ts",
|
|
39
|
+
"import": "./dist/traces.js",
|
|
40
|
+
"default": "./dist/traces.js"
|
|
41
|
+
},
|
|
22
42
|
"./telemetry": {
|
|
23
43
|
"types": "./dist/telemetry/index.d.ts",
|
|
24
44
|
"import": "./dist/telemetry/index.js",
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/llm-client.ts"],"sourcesContent":["/**\n * LLM client with graceful degrade.\n *\n * OpenAI-compatible `/v1/chat/completions` client with:\n * - Exponential-backoff retry on 429 + 5xx gateway errors (502/503/504).\n * - Retry on transient network errors (fetch failed, AbortError, ECONNRESET).\n * - Graceful json_schema → json_object degrade on 400 with schema-reject body.\n * - Fenced-JSON stripping (```json ... ```) for models that wrap structured output.\n * - Configurable base URL + api key / bearer, works with LiteLLM proxies, OpenAI\n * directly, cli-bridge subscriptions, and any router that speaks the spec.\n *\n * Usage:\n * const { value, result } = await callLlmJson<MyType>(\n * { model: 'gpt-4o', messages: [...], jsonSchema: { name: 'x', schema: {...} } },\n * { baseUrl: 'https://router.tangle.tools/v1', apiKey: process.env.KEY },\n * )\n *\n * This is THE llm-calling seam for agent-eval primitives that need structured\n * output (semantic concept judge, reviewer directives, critic scores). Primitives\n * that need free-form text use `callLlm` and parse output themselves.\n */\n\n// ─── Types ──────────────────────────────────────────────────────────────\n\nexport interface LlmMessage {\n role: 'system' | 'user' | 'assistant'\n /**\n * Either a plain text content string OR a multimodal content array\n * (text + image_url parts) for vision-capable models.\n */\n content:\n | string\n | Array<\n | { type: 'text'; text: string }\n | { type: 'image_url'; image_url: { url: string; detail?: 'auto' | 'low' | 'high' } }\n >\n}\n\nexport interface LlmCallRequest {\n model: string\n messages: LlmMessage[]\n /** Optional JSON-mode response format (response_format: json_object). */\n jsonMode?: boolean\n /** Optional structured output via JSON Schema. Falls back to json_object on 400. */\n jsonSchema?: { name: string; schema: Record<string, unknown> }\n temperature?: number\n maxTokens?: number\n /** Per-call timeout, default 60s. */\n timeoutMs?: number\n}\n\nexport interface LlmUsage {\n promptTokens: number\n completionTokens: number\n totalTokens: number\n /** Proxies populate this when prompt caching is on. */\n cachedPromptTokens?: number\n}\n\nexport interface LlmCallResult {\n /** The text content of the first choice. Empty string if none. */\n content: string\n usage: LlmUsage\n /**\n * Cost in USD. Pulled from proxy's `_response_cost` field when present;\n * `null` when neither the proxy nor the caller can derive it.\n */\n costUsd: number | null\n /** Model name actually used (echoed from response). */\n model: string\n /** Wall-clock duration of the HTTP call (last attempt, if retried). */\n durationMs: number\n /** Raw response body. */\n raw: Record<string, unknown>\n}\n\nexport class LlmCallError extends Error {\n constructor(\n message: string,\n public readonly status: number,\n public readonly body: string,\n public readonly model: string,\n ) {\n super(message)\n this.name = 'LlmCallError'\n }\n}\n\nexport interface LlmClientOptions {\n /** Base URL (without trailing slash). Must end at the `/v1` prefix. */\n baseUrl?: string\n /** Bearer token — either `apiKey` or `bearer` populates `Authorization: Bearer ...`. */\n apiKey?: string\n bearer?: string\n /** Override for the `Authorization` header (e.g. `X-Auth: ...`). Takes precedence over apiKey/bearer. */\n authHeader?: { name: string; value: string }\n /** Default timeout in ms. Per-call can override. */\n defaultTimeoutMs?: number\n /** Max retry attempts on retriable errors. Default 3 (1 initial + 2 retries). */\n maxRetries?: number\n /** Fetch implementation — defaults to global `fetch`. Override for custom transport (e.g. tests). */\n fetch?: typeof fetch\n}\n\n// ─── Internals ──────────────────────────────────────────────────────────\n\nconst DEFAULT_BASE_URL = 'https://router.tangle.tools/v1'\nconst DEFAULT_TIMEOUT_MS = 60_000\nconst DEFAULT_MAX_RETRIES = 3\n\nconst RETRYABLE_STATUS = new Set([429, 502, 503, 504])\n\nfunction isRetryableError(err: unknown): boolean {\n if (err instanceof LlmCallError) return RETRYABLE_STATUS.has(err.status)\n if (err instanceof Error) {\n return (\n err.name === 'AbortError' ||\n err.name === 'TimeoutError' ||\n /fetch failed|ECONNRESET|ETIMEDOUT|EAI_AGAIN/i.test(err.message)\n )\n }\n return false\n}\n\nfunction parseRetryAfter(headers: Headers): number | null {\n const h = headers.get('retry-after')\n if (!h) return null\n const asNumber = Number(h)\n if (Number.isFinite(asNumber) && asNumber > 0) return asNumber * 1000\n const asDate = Date.parse(h)\n if (Number.isFinite(asDate)) return Math.max(0, asDate - Date.now())\n return null\n}\n\nfunction backoffMs(attempt: number): number {\n // 500ms, 1s, 2s, 4s, ...\n return Math.min(500 * Math.pow(2, attempt), 16_000)\n}\n\nfunction buildHeaders(opts: LlmClientOptions): Record<string, string> {\n const headers: Record<string, string> = {\n 'Content-Type': 'application/json',\n Accept: 'application/json',\n }\n if (opts.authHeader) {\n headers[opts.authHeader.name] = opts.authHeader.value\n } else if (opts.bearer || opts.apiKey) {\n headers.Authorization = `Bearer ${opts.bearer ?? opts.apiKey}`\n }\n return headers\n}\n\nfunction isSchemaRejection(status: number, body: string): boolean {\n if (status !== 400) return false\n const lower = body.toLowerCase()\n return (\n lower.includes('response_format') ||\n lower.includes('json_schema') ||\n lower.includes('is unavailable') ||\n lower.includes('not supported')\n )\n}\n\nfunction buildBody(req: LlmCallRequest, forceJsonObject: boolean): Record<string, unknown> {\n const body: Record<string, unknown> = {\n model: req.model,\n messages: req.messages,\n temperature: req.temperature ?? 0,\n }\n if (req.maxTokens != null) body.max_tokens = req.maxTokens\n\n if (req.jsonSchema && !forceJsonObject) {\n body.response_format = {\n type: 'json_schema',\n json_schema: { name: req.jsonSchema.name, schema: req.jsonSchema.schema, strict: true },\n }\n } else if (req.jsonMode || req.jsonSchema) {\n body.response_format = { type: 'json_object' }\n }\n\n return body\n}\n\nasync function sleep(ms: number): Promise<void> {\n return new Promise((resolve) => setTimeout(resolve, ms))\n}\n\n// ─── Public API ─────────────────────────────────────────────────────────\n\n/**\n * Strip a ```json / ``` code fence if the model emitted one.\n * Idempotent for naked JSON. Some models (claude-code via router, certain\n * deepseek models) wrap output even under json_object.\n */\nexport function stripFencedJson(raw: string): string {\n const trimmed = raw.trim()\n const m = trimmed.match(/^```(?:json)?\\s*\\n?([\\s\\S]*?)\\n?```\\s*$/)\n return m ? m[1]!.trim() : trimmed\n}\n\nexport function extractJsonPayload(raw: string): string {\n const stripped = stripFencedJson(raw)\n try {\n JSON.parse(stripped)\n return stripped\n } catch {\n // Continue with balanced extraction below.\n }\n\n const starts = [...stripped.matchAll(/[\\[{]/g)].map((match) => match.index).filter((index) => index != null)\n for (const start of starts) {\n const candidate = extractBalancedJson(stripped, start)\n if (!candidate) continue\n try {\n JSON.parse(candidate)\n return candidate\n } catch {\n // Keep scanning; earlier braces may belong to prose.\n }\n }\n\n return stripped\n}\n\nfunction extractBalancedJson(input: string, start: number): string | null {\n const opener = input[start]\n const closer = opener === '{' ? '}' : opener === '[' ? ']' : null\n if (!closer) return null\n\n const stack: string[] = [closer]\n let isInString = false\n let isEscaped = false\n\n for (let i = start + 1; i < input.length; i++) {\n const char = input[i]!\n if (isEscaped) {\n isEscaped = false\n continue\n }\n if (char === '\\\\') {\n isEscaped = isInString\n continue\n }\n if (char === '\"') {\n isInString = !isInString\n continue\n }\n if (isInString) continue\n\n if (char === '{') stack.push('}')\n else if (char === '[') stack.push(']')\n else if (char === stack[stack.length - 1]) {\n stack.pop()\n if (stack.length === 0) return input.slice(start, i + 1)\n }\n }\n\n return null\n}\n\n/**\n * Low-level call. Returns raw content + usage + cost. Retries on transient\n * failures; does NOT degrade schema here — callers that want graceful\n * degrade use `callLlmJson`.\n */\nexport async function callLlm(\n req: LlmCallRequest,\n opts: LlmClientOptions = {},\n): Promise<LlmCallResult> {\n const baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL).replace(/\\/+$/, '')\n const url = `${baseUrl}/chat/completions`\n const timeoutMs = req.timeoutMs ?? opts.defaultTimeoutMs ?? DEFAULT_TIMEOUT_MS\n const maxRetries = opts.maxRetries ?? DEFAULT_MAX_RETRIES\n const fetchFn = opts.fetch ?? globalThis.fetch\n const headers = buildHeaders(opts)\n\n let lastErr: unknown\n for (let attempt = 0; attempt < maxRetries; attempt++) {\n const controller = new AbortController()\n const timeoutHandle = setTimeout(() => controller.abort(), timeoutMs)\n const started = Date.now()\n\n try {\n const res = await fetchFn(url, {\n method: 'POST',\n headers,\n body: JSON.stringify(buildBody(req, false)),\n signal: controller.signal,\n })\n clearTimeout(timeoutHandle)\n\n if (!res.ok) {\n const body = await res.text()\n const err = new LlmCallError(\n `LLM call ${res.status}: ${body.slice(0, 300)}`,\n res.status,\n body,\n req.model,\n )\n if (RETRYABLE_STATUS.has(res.status) && attempt < maxRetries - 1) {\n lastErr = err\n const retryAfter = parseRetryAfter(res.headers)\n await sleep(retryAfter ?? backoffMs(attempt))\n continue\n }\n throw err\n }\n\n const json = (await res.json()) as Record<string, unknown>\n const choice = (json.choices as Array<{ message?: { content?: string } }> | undefined)?.[0]\n const usageRaw = (json.usage as Record<string, unknown> | undefined) ?? {}\n const costFromProxy = (json._response_cost ?? json.cost_usd) as number | undefined\n\n return {\n content: choice?.message?.content ?? '',\n usage: {\n promptTokens: Number(usageRaw.prompt_tokens ?? 0),\n completionTokens: Number(usageRaw.completion_tokens ?? 0),\n totalTokens: Number(usageRaw.total_tokens ?? 0),\n cachedPromptTokens:\n usageRaw.prompt_tokens_details &&\n typeof usageRaw.prompt_tokens_details === 'object'\n ? Number(\n (usageRaw.prompt_tokens_details as Record<string, unknown>).cached_tokens ?? 0,\n )\n : undefined,\n },\n costUsd: typeof costFromProxy === 'number' ? costFromProxy : null,\n model: (json.model as string) ?? req.model,\n durationMs: Date.now() - started,\n raw: json,\n }\n } catch (err) {\n clearTimeout(timeoutHandle)\n lastErr = err\n if (attempt < maxRetries - 1 && isRetryableError(err)) {\n await sleep(backoffMs(attempt))\n continue\n }\n throw err\n }\n }\n throw lastErr instanceof Error ? lastErr : new Error(String(lastErr))\n}\n\n/**\n * Structured-output call. Returns parsed JSON plus the raw result envelope.\n * Degrades `jsonSchema` → `jsonMode` on a 400 that names the schema param —\n * critical for deepseek-v3/v4, kimi-k2.6, and other models that don't accept\n * the `response_format.json_schema` shape but DO accept `json_object`.\n */\nexport async function callLlmJson<T = unknown>(\n req: LlmCallRequest,\n opts: LlmClientOptions = {},\n): Promise<{ value: T; result: LlmCallResult }> {\n try {\n const result = await callLlm({ ...req, jsonMode: req.jsonMode ?? !req.jsonSchema }, opts)\n const value = parseJsonSafely<T>(result.content, result.model)\n return { value, result }\n } catch (err) {\n if (err instanceof LlmCallError && isSchemaRejection(err.status, err.body) && req.jsonSchema) {\n // Degrade to json_object + retry.\n const degradedReq: LlmCallRequest = { ...req, jsonMode: true, jsonSchema: undefined }\n const result = await callLlm(degradedReq, opts)\n const value = parseJsonSafely<T>(result.content, result.model)\n return { value, result }\n }\n throw err\n }\n}\n\nfunction parseJsonSafely<T>(content: string, model: string): T {\n const stripped = extractJsonPayload(content)\n try {\n return JSON.parse(stripped) as T\n } catch (err) {\n throw new Error(\n `LLM returned non-JSON content (model=${model}): ${\n err instanceof Error ? err.message : String(err)\n }\\n--- raw content ---\\n${content.slice(0, 800)}`,\n )\n }\n}\n\n/**\n * Probe whether a model is reachable. Returns latency + null error on\n * success; `ok=false` + error message on any failure (HTTP, timeout,\n * network, parse). Designed for sweep preflights — fail loud at the\n * boundary before burning a 30-leaf run on a misconfigured router.\n *\n * Sends a tiny `ping` message with `maxTokens=64`. Reasoning models\n * (glm-5.1, deepseek-v4) can burn the entire budget on internal reasoning\n * for short prompts, so don't tighten this further. We don't validate\n * content; HTTP 200 means reachable.\n */\nexport async function probeLlm(\n model: string,\n opts: LlmClientOptions & { timeoutMs?: number } = {},\n): Promise<{ ok: boolean; latencyMs: number; error: string | null }> {\n const start = Date.now()\n try {\n await callLlm(\n {\n model,\n messages: [{ role: 'user', content: 'ping' }],\n maxTokens: 64,\n timeoutMs: opts.timeoutMs ?? 30_000,\n },\n opts,\n )\n return { ok: true, latencyMs: Date.now() - start, error: null }\n } catch (err) {\n return {\n ok: false,\n latencyMs: Date.now() - start,\n error: err instanceof Error ? err.message : String(err),\n }\n }\n}\n\n/**\n * Stateful client — construct once with defaults, call many times.\n * Thin wrapper around the free functions; exists for callers that want\n * to inject a single configured instance into multiple primitives.\n */\nexport class LlmClient {\n constructor(private readonly opts: LlmClientOptions = {}) {}\n\n call(req: LlmCallRequest, per?: LlmClientOptions): Promise<LlmCallResult> {\n return callLlm(req, { ...this.opts, ...per })\n }\n\n callJson<T = unknown>(\n req: LlmCallRequest,\n per?: LlmClientOptions,\n ): Promise<{ value: T; result: LlmCallResult }> {\n return callLlmJson<T>(req, { ...this.opts, ...per })\n }\n}\n"],"mappings":";AA4EO,IAAM,eAAN,cAA2B,MAAM;AAAA,EACtC,YACE,SACgB,QACA,MACA,OAChB;AACA,UAAM,OAAO;AAJG;AACA;AACA;AAGhB,SAAK,OAAO;AAAA,EACd;AAAA,EANkB;AAAA,EACA;AAAA,EACA;AAKpB;AAoBA,IAAM,mBAAmB;AACzB,IAAM,qBAAqB;AAC3B,IAAM,sBAAsB;AAE5B,IAAM,mBAAmB,oBAAI,IAAI,CAAC,KAAK,KAAK,KAAK,GAAG,CAAC;AAErD,SAAS,iBAAiB,KAAuB;AAC/C,MAAI,eAAe,aAAc,QAAO,iBAAiB,IAAI,IAAI,MAAM;AACvE,MAAI,eAAe,OAAO;AACxB,WACE,IAAI,SAAS,gBACb,IAAI,SAAS,kBACb,+CAA+C,KAAK,IAAI,OAAO;AAAA,EAEnE;AACA,SAAO;AACT;AAEA,SAAS,gBAAgB,SAAiC;AACxD,QAAM,IAAI,QAAQ,IAAI,aAAa;AACnC,MAAI,CAAC,EAAG,QAAO;AACf,QAAM,WAAW,OAAO,CAAC;AACzB,MAAI,OAAO,SAAS,QAAQ,KAAK,WAAW,EAAG,QAAO,WAAW;AACjE,QAAM,SAAS,KAAK,MAAM,CAAC;AAC3B,MAAI,OAAO,SAAS,MAAM,EAAG,QAAO,KAAK,IAAI,GAAG,SAAS,KAAK,IAAI,CAAC;AACnE,SAAO;AACT;AAEA,SAAS,UAAU,SAAyB;AAE1C,SAAO,KAAK,IAAI,MAAM,KAAK,IAAI,GAAG,OAAO,GAAG,IAAM;AACpD;AAEA,SAAS,aAAa,MAAgD;AACpE,QAAM,UAAkC;AAAA,IACtC,gBAAgB;AAAA,IAChB,QAAQ;AAAA,EACV;AACA,MAAI,KAAK,YAAY;AACnB,YAAQ,KAAK,WAAW,IAAI,IAAI,KAAK,WAAW;AAAA,EAClD,WAAW,KAAK,UAAU,KAAK,QAAQ;AACrC,YAAQ,gBAAgB,UAAU,KAAK,UAAU,KAAK,MAAM;AAAA,EAC9D;AACA,SAAO;AACT;AAEA,SAAS,kBAAkB,QAAgB,MAAuB;AAChE,MAAI,WAAW,IAAK,QAAO;AAC3B,QAAM,QAAQ,KAAK,YAAY;AAC/B,SACE,MAAM,SAAS,iBAAiB,KAChC,MAAM,SAAS,aAAa,KAC5B,MAAM,SAAS,gBAAgB,KAC/B,MAAM,SAAS,eAAe;AAElC;AAEA,SAAS,UAAU,KAAqB,iBAAmD;AACzF,QAAM,OAAgC;AAAA,IACpC,OAAO,IAAI;AAAA,IACX,UAAU,IAAI;AAAA,IACd,aAAa,IAAI,eAAe;AAAA,EAClC;AACA,MAAI,IAAI,aAAa,KAAM,MAAK,aAAa,IAAI;AAEjD,MAAI,IAAI,cAAc,CAAC,iBAAiB;AACtC,SAAK,kBAAkB;AAAA,MACrB,MAAM;AAAA,MACN,aAAa,EAAE,MAAM,IAAI,WAAW,MAAM,QAAQ,IAAI,WAAW,QAAQ,QAAQ,KAAK;AAAA,IACxF;AAAA,EACF,WAAW,IAAI,YAAY,IAAI,YAAY;AACzC,SAAK,kBAAkB,EAAE,MAAM,cAAc;AAAA,EAC/C;AAEA,SAAO;AACT;AAEA,eAAe,MAAM,IAA2B;AAC9C,SAAO,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,EAAE,CAAC;AACzD;AASO,SAAS,gBAAgB,KAAqB;AACnD,QAAM,UAAU,IAAI,KAAK;AACzB,QAAM,IAAI,QAAQ,MAAM,yCAAyC;AACjE,SAAO,IAAI,EAAE,CAAC,EAAG,KAAK,IAAI;AAC5B;AAEO,SAAS,mBAAmB,KAAqB;AACtD,QAAM,WAAW,gBAAgB,GAAG;AACpC,MAAI;AACF,SAAK,MAAM,QAAQ;AACnB,WAAO;AAAA,EACT,QAAQ;AAAA,EAER;AAEA,QAAM,SAAS,CAAC,GAAG,SAAS,SAAS,QAAQ,CAAC,EAAE,IAAI,CAAC,UAAU,MAAM,KAAK,EAAE,OAAO,CAAC,UAAU,SAAS,IAAI;AAC3G,aAAW,SAAS,QAAQ;AAC1B,UAAM,YAAY,oBAAoB,UAAU,KAAK;AACrD,QAAI,CAAC,UAAW;AAChB,QAAI;AACF,WAAK,MAAM,SAAS;AACpB,aAAO;AAAA,IACT,QAAQ;AAAA,IAER;AAAA,EACF;AAEA,SAAO;AACT;AAEA,SAAS,oBAAoB,OAAe,OAA8B;AACxE,QAAM,SAAS,MAAM,KAAK;AAC1B,QAAM,SAAS,WAAW,MAAM,MAAM,WAAW,MAAM,MAAM;AAC7D,MAAI,CAAC,OAAQ,QAAO;AAEpB,QAAM,QAAkB,CAAC,MAAM;AAC/B,MAAI,aAAa;AACjB,MAAI,YAAY;AAEhB,WAAS,IAAI,QAAQ,GAAG,IAAI,MAAM,QAAQ,KAAK;AAC7C,UAAM,OAAO,MAAM,CAAC;AACpB,QAAI,WAAW;AACb,kBAAY;AACZ;AAAA,IACF;AACA,QAAI,SAAS,MAAM;AACjB,kBAAY;AACZ;AAAA,IACF;AACA,QAAI,SAAS,KAAK;AAChB,mBAAa,CAAC;AACd;AAAA,IACF;AACA,QAAI,WAAY;AAEhB,QAAI,SAAS,IAAK,OAAM,KAAK,GAAG;AAAA,aACvB,SAAS,IAAK,OAAM,KAAK,GAAG;AAAA,aAC5B,SAAS,MAAM,MAAM,SAAS,CAAC,GAAG;AACzC,YAAM,IAAI;AACV,UAAI,MAAM,WAAW,EAAG,QAAO,MAAM,MAAM,OAAO,IAAI,CAAC;AAAA,IACzD;AAAA,EACF;AAEA,SAAO;AACT;AAOA,eAAsB,QACpB,KACA,OAAyB,CAAC,GACF;AACxB,QAAM,WAAW,KAAK,WAAW,kBAAkB,QAAQ,QAAQ,EAAE;AACrE,QAAM,MAAM,GAAG,OAAO;AACtB,QAAM,YAAY,IAAI,aAAa,KAAK,oBAAoB;AAC5D,QAAM,aAAa,KAAK,cAAc;AACtC,QAAM,UAAU,KAAK,SAAS,WAAW;AACzC,QAAM,UAAU,aAAa,IAAI;AAEjC,MAAI;AACJ,WAAS,UAAU,GAAG,UAAU,YAAY,WAAW;AACrD,UAAM,aAAa,IAAI,gBAAgB;AACvC,UAAM,gBAAgB,WAAW,MAAM,WAAW,MAAM,GAAG,SAAS;AACpE,UAAM,UAAU,KAAK,IAAI;AAEzB,QAAI;AACF,YAAM,MAAM,MAAM,QAAQ,KAAK;AAAA,QAC7B,QAAQ;AAAA,QACR;AAAA,QACA,MAAM,KAAK,UAAU,UAAU,KAAK,KAAK,CAAC;AAAA,QAC1C,QAAQ,WAAW;AAAA,MACrB,CAAC;AACD,mBAAa,aAAa;AAE1B,UAAI,CAAC,IAAI,IAAI;AACX,cAAM,OAAO,MAAM,IAAI,KAAK;AAC5B,cAAM,MAAM,IAAI;AAAA,UACd,YAAY,IAAI,MAAM,KAAK,KAAK,MAAM,GAAG,GAAG,CAAC;AAAA,UAC7C,IAAI;AAAA,UACJ;AAAA,UACA,IAAI;AAAA,QACN;AACA,YAAI,iBAAiB,IAAI,IAAI,MAAM,KAAK,UAAU,aAAa,GAAG;AAChE,oBAAU;AACV,gBAAM,aAAa,gBAAgB,IAAI,OAAO;AAC9C,gBAAM,MAAM,cAAc,UAAU,OAAO,CAAC;AAC5C;AAAA,QACF;AACA,cAAM;AAAA,MACR;AAEA,YAAM,OAAQ,MAAM,IAAI,KAAK;AAC7B,YAAM,SAAU,KAAK,UAAoE,CAAC;AAC1F,YAAM,WAAY,KAAK,SAAiD,CAAC;AACzE,YAAM,gBAAiB,KAAK,kBAAkB,KAAK;AAEnD,aAAO;AAAA,QACL,SAAS,QAAQ,SAAS,WAAW;AAAA,QACrC,OAAO;AAAA,UACL,cAAc,OAAO,SAAS,iBAAiB,CAAC;AAAA,UAChD,kBAAkB,OAAO,SAAS,qBAAqB,CAAC;AAAA,UACxD,aAAa,OAAO,SAAS,gBAAgB,CAAC;AAAA,UAC9C,oBACE,SAAS,yBACT,OAAO,SAAS,0BAA0B,WACtC;AAAA,YACG,SAAS,sBAAkD,iBAAiB;AAAA,UAC/E,IACA;AAAA,QACR;AAAA,QACA,SAAS,OAAO,kBAAkB,WAAW,gBAAgB;AAAA,QAC7D,OAAQ,KAAK,SAAoB,IAAI;AAAA,QACrC,YAAY,KAAK,IAAI,IAAI;AAAA,QACzB,KAAK;AAAA,MACP;AAAA,IACF,SAAS,KAAK;AACZ,mBAAa,aAAa;AAC1B,gBAAU;AACV,UAAI,UAAU,aAAa,KAAK,iBAAiB,GAAG,GAAG;AACrD,cAAM,MAAM,UAAU,OAAO,CAAC;AAC9B;AAAA,MACF;AACA,YAAM;AAAA,IACR;AAAA,EACF;AACA,QAAM,mBAAmB,QAAQ,UAAU,IAAI,MAAM,OAAO,OAAO,CAAC;AACtE;AAQA,eAAsB,YACpB,KACA,OAAyB,CAAC,GACoB;AAC9C,MAAI;AACF,UAAM,SAAS,MAAM,QAAQ,EAAE,GAAG,KAAK,UAAU,IAAI,YAAY,CAAC,IAAI,WAAW,GAAG,IAAI;AACxF,UAAM,QAAQ,gBAAmB,OAAO,SAAS,OAAO,KAAK;AAC7D,WAAO,EAAE,OAAO,OAAO;AAAA,EACzB,SAAS,KAAK;AACZ,QAAI,eAAe,gBAAgB,kBAAkB,IAAI,QAAQ,IAAI,IAAI,KAAK,IAAI,YAAY;AAE5F,YAAM,cAA8B,EAAE,GAAG,KAAK,UAAU,MAAM,YAAY,OAAU;AACpF,YAAM,SAAS,MAAM,QAAQ,aAAa,IAAI;AAC9C,YAAM,QAAQ,gBAAmB,OAAO,SAAS,OAAO,KAAK;AAC7D,aAAO,EAAE,OAAO,OAAO;AAAA,IACzB;AACA,UAAM;AAAA,EACR;AACF;AAEA,SAAS,gBAAmB,SAAiB,OAAkB;AAC7D,QAAM,WAAW,mBAAmB,OAAO;AAC3C,MAAI;AACF,WAAO,KAAK,MAAM,QAAQ;AAAA,EAC5B,SAAS,KAAK;AACZ,UAAM,IAAI;AAAA,MACR,wCAAwC,KAAK,MAC3C,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CACjD;AAAA;AAAA,EAA0B,QAAQ,MAAM,GAAG,GAAG,CAAC;AAAA,IACjD;AAAA,EACF;AACF;AAaA,eAAsB,SACpB,OACA,OAAkD,CAAC,GACgB;AACnE,QAAM,QAAQ,KAAK,IAAI;AACvB,MAAI;AACF,UAAM;AAAA,MACJ;AAAA,QACE;AAAA,QACA,UAAU,CAAC,EAAE,MAAM,QAAQ,SAAS,OAAO,CAAC;AAAA,QAC5C,WAAW;AAAA,QACX,WAAW,KAAK,aAAa;AAAA,MAC/B;AAAA,MACA;AAAA,IACF;AACA,WAAO,EAAE,IAAI,MAAM,WAAW,KAAK,IAAI,IAAI,OAAO,OAAO,KAAK;AAAA,EAChE,SAAS,KAAK;AACZ,WAAO;AAAA,MACL,IAAI;AAAA,MACJ,WAAW,KAAK,IAAI,IAAI;AAAA,MACxB,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IACxD;AAAA,EACF;AACF;AAOO,IAAM,YAAN,MAAgB;AAAA,EACrB,YAA6B,OAAyB,CAAC,GAAG;AAA7B;AAAA,EAA8B;AAAA,EAA9B;AAAA,EAE7B,KAAK,KAAqB,KAAgD;AACxE,WAAO,QAAQ,KAAK,EAAE,GAAG,KAAK,MAAM,GAAG,IAAI,CAAC;AAAA,EAC9C;AAAA,EAEA,SACE,KACA,KAC8C;AAC9C,WAAO,YAAe,KAAK,EAAE,GAAG,KAAK,MAAM,GAAG,IAAI,CAAC;AAAA,EACrD;AACF;","names":[]}
|
|
File without changes
|