@tangle-network/agent-eval 0.20.0 → 0.20.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,8 +24,8 @@ import {
24
24
  runRpcBatch,
25
25
  runRpcOnce,
26
26
  startServer
27
- } from "../chunk-OZPRSK4A.js";
28
- import "../chunk-ITN4YOZY.js";
27
+ } from "../chunk-CJJSB6ZQ.js";
28
+ import "../chunk-JAOLXRIA.js";
29
29
  import "../chunk-PZ5AY32C.js";
30
30
  export {
31
31
  BUILTIN_RUBRICS,
@@ -0,0 +1,44 @@
1
+ # Example benchmark wrappers
2
+
3
+ Reference implementations of `BenchmarkAdapter` for two public benchmarks. They are NOT bundled — they're intentionally shipped as source you read, copy, and adapt.
4
+
5
+ | Wrapper | What it does | Why it's an example, not core |
6
+ |---|---|---|
7
+ | [`gsm8k/`](./gsm8k) | Exact-match grading on the final numeric answer of GSM8K (Cobbe et al.) | The dataset isn't ours and isn't bundled. The wrapper points to a local JSONL via `AGENT_EVAL_GSM8K_PATH`. |
8
+ | [`swebench-lite/`](./swebench-lite) | Pass/fail grading via an external SWE-Bench grader command | The grader is a separate binary; the wrapper stubs the integration via `AGENT_EVAL_SWEBENCH_GRADER_CMD`. |
9
+
10
+ The novel benchmark we ship and own — the synthetic routing task — lives in `src/benchmarks/routing/` and IS in the bundle.
11
+
12
+ ## Using these wrappers
13
+
14
+ Two paths.
15
+
16
+ **Option A — read and inline.** Copy the wrapper file into your project. Replace the import paths from `../../../src/benchmarks/types` and `../../../src/run-record` with `@tangle-network/agent-eval`. Done.
17
+
18
+ **Option B — import from agent-eval source.** If your project sits in this monorepo (or you've cloned the repo), import directly:
19
+
20
+ ```ts
21
+ import * as gsm8k from '@tangle-network/agent-eval/examples/benchmarks/gsm8k'
22
+ ```
23
+
24
+ This requires adding `examples/**/*.ts` to your TypeScript paths. Easier to just copy.
25
+
26
+ ## What every BenchmarkAdapter exports
27
+
28
+ ```ts
29
+ loadDataset(split: 'search' | 'dev' | 'holdout'): Promise<DatasetItem[]>
30
+ evaluate(item, response): Promise<{ score: number, raw: Record<string, unknown> }>
31
+ assignSplit(itemId: string): 'search' | 'dev' | 'holdout'
32
+ ```
33
+
34
+ `assignSplit` uses `deterministicSplit(itemId, BENCHMARK_SPLIT_SEED)` — same item gets the same split everywhere. Don't change the seed; it's load-bearing for reproducibility.
35
+
36
+ ## Adding a new benchmark
37
+
38
+ 1. Create `examples/benchmarks/<your-benchmark>/index.ts`.
39
+ 2. Export `loadDataset`, `evaluate`, `assignSplit`. Optionally a typed `Adapter` class.
40
+ 3. Use `deterministicSplit` from `@tangle-network/agent-eval` for split assignment.
41
+ 4. Fail loud on missing config (env vars, paths). Never default to silent-pass.
42
+ 5. Document config requirements in a per-benchmark README.
43
+
44
+ If your benchmark is novel and broadly useful, propose moving it into `src/benchmarks/` as core surface (PR welcome). The bar is: novel rubric, reusable across projects, low maintenance burden.
@@ -0,0 +1,126 @@
1
+ /**
2
+ * GSM8K wrapper — exact-match grading on the final numeric answer.
3
+ *
4
+ * The dataset itself is NOT bundled. `loadDataset` will:
5
+ * 1. read from `process.env.AGENT_EVAL_GSM8K_PATH` if set (a JSONL
6
+ * file with `{ id, question, answer }` records — the standard
7
+ * HF mirror layout converted to JSONL);
8
+ * 2. otherwise throw a clearly-marked error pointing to the loader.
9
+ *
10
+ * `evaluate` parses the final number out of the response (last
11
+ * occurrence of a signed-decimal-or-integer literal, optionally after
12
+ * `####`, the GSM8K answer convention) and compares to the ground-
13
+ * truth integer. Floating-point comparisons use a 1e-6 tolerance.
14
+ */
15
+
16
+ import { existsSync, readFileSync } from 'node:fs'
17
+
18
+ import type {
19
+ BenchmarkAdapter,
20
+ BenchmarkDatasetItem,
21
+ BenchmarkEvaluation,
22
+ } from '../../../src/benchmarks/types'
23
+ import { deterministicSplit } from '../../../src/benchmarks/types'
24
+ import type { RunSplitTag } from '../../../src/run-record'
25
+
26
+ export interface Gsm8kPayload {
27
+ question: string
28
+ /** Reference answer, post-#### normalization. May be a number or
29
+ * a numeric string ("72", "1.5"). */
30
+ answer: string
31
+ }
32
+
33
+ export type Gsm8kItem = BenchmarkDatasetItem<Gsm8kPayload>
34
+
35
+ class Gsm8kAdapter implements BenchmarkAdapter<Gsm8kItem, Gsm8kPayload> {
36
+ async loadDataset(split: RunSplitTag): Promise<Gsm8kItem[]> {
37
+ const path = process.env.AGENT_EVAL_GSM8K_PATH
38
+ if (!path) {
39
+ throw new Error(
40
+ 'GSM8K dataset not provided. Set AGENT_EVAL_GSM8K_PATH to a JSONL file ' +
41
+ 'with {id, question, answer} records (the HF GSM8K mirror converted to JSONL).',
42
+ )
43
+ }
44
+ if (!existsSync(path)) {
45
+ throw new Error(`AGENT_EVAL_GSM8K_PATH=${path} does not exist`)
46
+ }
47
+ const items = parseJsonl(path).filter((it) => assignSplitImpl(it.id) === split)
48
+ return items
49
+ }
50
+
51
+ async evaluate(item: Gsm8kItem, response: string): Promise<BenchmarkEvaluation> {
52
+ const expected = parseGsm8kAnswer(item.payload.answer)
53
+ const observed = parseGsm8kAnswer(response)
54
+ if (expected === null) {
55
+ // Defensive: the dataset should never ship a non-numeric ref.
56
+ return { score: 0, raw: { reason: 'reference_not_numeric', expected: item.payload.answer } }
57
+ }
58
+ if (observed === null) {
59
+ return { score: 0, raw: { reason: 'no_numeric_in_response', expected, observed: null } }
60
+ }
61
+ const ok = Math.abs(expected - observed) < 1e-6
62
+ return { score: ok ? 1 : 0, raw: { expected, observed, exactMatch: ok } }
63
+ }
64
+
65
+ assignSplit(itemId: string): RunSplitTag {
66
+ return assignSplitImpl(itemId)
67
+ }
68
+ }
69
+
70
+ function assignSplitImpl(itemId: string): RunSplitTag {
71
+ return deterministicSplit(`gsm8k::${itemId}`)
72
+ }
73
+
74
+ function parseJsonl(path: string): Gsm8kItem[] {
75
+ const raw = readFileSync(path, 'utf8')
76
+ const out: Gsm8kItem[] = []
77
+ let lineNo = 0
78
+ for (const line of raw.split('\n')) {
79
+ lineNo++
80
+ const trimmed = line.trim()
81
+ if (!trimmed) continue
82
+ let row: Record<string, unknown>
83
+ try {
84
+ row = JSON.parse(trimmed) as Record<string, unknown>
85
+ } catch (e) {
86
+ throw new Error(`GSM8K JSONL parse error at line ${lineNo}: ${(e as Error).message}`)
87
+ }
88
+ const id = String(row.id ?? `gsm8k_${lineNo}`)
89
+ const question = String(row.question ?? '')
90
+ const answer = String(row.answer ?? '')
91
+ if (!question || !answer) {
92
+ throw new Error(`GSM8K JSONL line ${lineNo} missing question/answer`)
93
+ }
94
+ out.push({ id, payload: { question, answer } })
95
+ }
96
+ return out
97
+ }
98
+
99
+ /**
100
+ * Parse a GSM8K-style answer. Honors the dataset's `#### N`
101
+ * convention (the canonical answer comes after `####`); otherwise
102
+ * returns the LAST signed numeric literal in the string.
103
+ */
104
+ export function parseGsm8kAnswer(text: string): number | null {
105
+ if (!text) return null
106
+ const afterMarker = text.match(/####\s*(-?\d[\d,]*\.?\d*)/)
107
+ if (afterMarker) {
108
+ const cleaned = afterMarker[1]!.replace(/,/g, '')
109
+ const v = Number(cleaned)
110
+ if (Number.isFinite(v)) return v
111
+ }
112
+ // Last numeric literal anywhere in the string.
113
+ const matches = text.match(/-?\d[\d,]*\.?\d*/g)
114
+ if (!matches || matches.length === 0) return null
115
+ const last = matches[matches.length - 1]!
116
+ const cleaned = last.replace(/,/g, '')
117
+ const v = Number(cleaned)
118
+ return Number.isFinite(v) ? v : null
119
+ }
120
+
121
+ const adapter = new Gsm8kAdapter()
122
+
123
+ export const loadDataset = adapter.loadDataset.bind(adapter)
124
+ export const evaluate = adapter.evaluate.bind(adapter)
125
+ export const assignSplit = adapter.assignSplit.bind(adapter)
126
+ export { Gsm8kAdapter }
@@ -0,0 +1,178 @@
1
+ /**
2
+ * SWE-Bench Lite wrapper — 30-instance subset.
3
+ *
4
+ * Status: STUB. The actual SWE-Bench harness needs a Docker host and
5
+ * is too heavy to ship inside this package. We expose the contract
6
+ * (loadDataset, evaluate, assignSplit) so consumers can plug in their
7
+ * own grader without touching call sites.
8
+ *
9
+ * Wire-up paths in priority order:
10
+ *
11
+ * 1. `process.env.AGENT_EVAL_SWEBENCH_PATH` → JSONL with the 30
12
+ * lite instances + per-instance metadata (instance_id,
13
+ * problem_statement, base_commit, repo, FAIL_TO_PASS,
14
+ * PASS_TO_PASS).
15
+ * 2. `process.env.AGENT_EVAL_SWEBENCH_GRADER_CMD` → executable
16
+ * that reads `{instance_id, patch}` JSON on stdin and writes
17
+ * `{passed, fail_to_pass_passed, pass_to_pass_passed, log}`
18
+ * JSON on stdout. Implementations can shell out to the
19
+ * official `swebench` runner here.
20
+ *
21
+ * If neither is set, every public method throws a clearly-marked
22
+ * "not implemented" error. The stub fails LOUD; it never silently
23
+ * scores zero.
24
+ */
25
+
26
+ import { existsSync, readFileSync } from 'node:fs'
27
+ import { spawn } from 'node:child_process'
28
+
29
+ import type {
30
+ BenchmarkAdapter,
31
+ BenchmarkDatasetItem,
32
+ BenchmarkEvaluation,
33
+ } from '../../../src/benchmarks/types'
34
+ import { deterministicSplit } from '../../../src/benchmarks/types'
35
+ import type { RunSplitTag } from '../../../src/run-record'
36
+
37
+ export interface SweBenchLitePayload {
38
+ instanceId: string
39
+ problemStatement: string
40
+ baseCommit: string
41
+ repo: string
42
+ failToPass: string[]
43
+ passToPass: string[]
44
+ }
45
+
46
+ export type SweBenchLiteItem = BenchmarkDatasetItem<SweBenchLitePayload>
47
+
48
+ class SweBenchLiteAdapter
49
+ implements BenchmarkAdapter<SweBenchLiteItem, SweBenchLitePayload>
50
+ {
51
+ async loadDataset(split: RunSplitTag): Promise<SweBenchLiteItem[]> {
52
+ const path = process.env.AGENT_EVAL_SWEBENCH_PATH
53
+ if (!path) {
54
+ throw new Error(
55
+ 'SWE-Bench Lite dataset not provided. Set AGENT_EVAL_SWEBENCH_PATH to a JSONL file ' +
56
+ 'with the 30 lite instances. STUB: this wrapper does not bundle the dataset; ' +
57
+ 'see https://www.swebench.com/lite.html for the canonical source.',
58
+ )
59
+ }
60
+ if (!existsSync(path)) {
61
+ throw new Error(`AGENT_EVAL_SWEBENCH_PATH=${path} does not exist`)
62
+ }
63
+ const all = parseJsonl(path)
64
+ return all.filter((it) => assignSplitImpl(it.id) === split)
65
+ }
66
+
67
+ async evaluate(item: SweBenchLiteItem, response: string): Promise<BenchmarkEvaluation> {
68
+ const cmd = process.env.AGENT_EVAL_SWEBENCH_GRADER_CMD
69
+ if (!cmd) {
70
+ throw new Error(
71
+ 'SWE-Bench Lite grader not configured. Set AGENT_EVAL_SWEBENCH_GRADER_CMD to an ' +
72
+ 'executable that reads {instance_id, patch} JSON on stdin and writes ' +
73
+ '{passed, fail_to_pass_passed, pass_to_pass_passed, log} JSON on stdout. ' +
74
+ 'TODO(swebench-lite): bundle a default Docker-based runner once the SDK ' +
75
+ 'stabilises (https://github.com/swe-bench/SWE-bench).',
76
+ )
77
+ }
78
+ const stdinPayload = JSON.stringify({ instance_id: item.payload.instanceId, patch: response })
79
+ const result = await runGrader(cmd, stdinPayload)
80
+ let parsed: Record<string, unknown>
81
+ try {
82
+ parsed = JSON.parse(result.stdout) as Record<string, unknown>
83
+ } catch (e) {
84
+ throw new Error(
85
+ `SWE-Bench grader emitted non-JSON stdout: ${(e as Error).message}\n` +
86
+ `stdout=${result.stdout.slice(0, 400)}\nstderr=${result.stderr.slice(0, 400)}`,
87
+ )
88
+ }
89
+ const passed = Boolean(parsed.passed)
90
+ return {
91
+ score: passed ? 1 : 0,
92
+ raw: {
93
+ passed,
94
+ failToPassPassed: Boolean(parsed.fail_to_pass_passed),
95
+ passToPassPassed: Boolean(parsed.pass_to_pass_passed),
96
+ graderLog: typeof parsed.log === 'string' ? parsed.log.slice(0, 4000) : '',
97
+ },
98
+ }
99
+ }
100
+
101
+ assignSplit(itemId: string): RunSplitTag {
102
+ return assignSplitImpl(itemId)
103
+ }
104
+ }
105
+
106
+ function assignSplitImpl(itemId: string): RunSplitTag {
107
+ return deterministicSplit(`swebench-lite::${itemId}`)
108
+ }
109
+
110
+ function parseJsonl(path: string): SweBenchLiteItem[] {
111
+ const raw = readFileSync(path, 'utf8')
112
+ const out: SweBenchLiteItem[] = []
113
+ let lineNo = 0
114
+ for (const line of raw.split('\n')) {
115
+ lineNo++
116
+ const trimmed = line.trim()
117
+ if (!trimmed) continue
118
+ const row = JSON.parse(trimmed) as Record<string, unknown>
119
+ const instanceId = String(row.instance_id ?? row.instanceId ?? '')
120
+ if (!instanceId) {
121
+ throw new Error(`swebench-lite line ${lineNo} missing instance_id`)
122
+ }
123
+ out.push({
124
+ id: instanceId,
125
+ payload: {
126
+ instanceId,
127
+ problemStatement: String(row.problem_statement ?? row.problemStatement ?? ''),
128
+ baseCommit: String(row.base_commit ?? row.baseCommit ?? ''),
129
+ repo: String(row.repo ?? ''),
130
+ failToPass: asStringArray(row.FAIL_TO_PASS ?? row.failToPass),
131
+ passToPass: asStringArray(row.PASS_TO_PASS ?? row.passToPass),
132
+ },
133
+ })
134
+ }
135
+ return out
136
+ }
137
+
138
+ function asStringArray(v: unknown): string[] {
139
+ if (Array.isArray(v)) return v.filter((x): x is string => typeof x === 'string')
140
+ if (typeof v === 'string') {
141
+ try {
142
+ const parsed = JSON.parse(v)
143
+ if (Array.isArray(parsed)) return parsed.filter((x): x is string => typeof x === 'string')
144
+ } catch {
145
+ // Plain string; treat as a single-element list.
146
+ return [v]
147
+ }
148
+ }
149
+ return []
150
+ }
151
+
152
+ function runGrader(cmd: string, stdin: string): Promise<{ stdout: string; stderr: string }> {
153
+ return new Promise((resolve, reject) => {
154
+ const parts = cmd.split(/\s+/)
155
+ const child = spawn(parts[0]!, parts.slice(1), { stdio: ['pipe', 'pipe', 'pipe'] })
156
+ let stdout = ''
157
+ let stderr = ''
158
+ child.stdout.on('data', (b: Buffer) => (stdout += b.toString('utf8')))
159
+ child.stderr.on('data', (b: Buffer) => (stderr += b.toString('utf8')))
160
+ child.on('error', reject)
161
+ child.on('close', (code) => {
162
+ if (code !== 0) {
163
+ reject(new Error(`grader exited with code ${code}: ${stderr.slice(0, 400)}`))
164
+ return
165
+ }
166
+ resolve({ stdout, stderr })
167
+ })
168
+ child.stdin.write(stdin)
169
+ child.stdin.end()
170
+ })
171
+ }
172
+
173
+ const adapter = new SweBenchLiteAdapter()
174
+
175
+ export const loadDataset = adapter.loadDataset.bind(adapter)
176
+ export const evaluate = adapter.evaluate.bind(adapter)
177
+ export const assignSplit = adapter.assignSplit.bind(adapter)
178
+ export { SweBenchLiteAdapter }
@@ -0,0 +1,114 @@
1
+ import {
2
+ runMultiShotOptimization,
3
+ trialTraceFromMultiShotTrial,
4
+ type MultiShotVariant,
5
+ type RunRecord,
6
+ } from '@tangle-network/agent-eval'
7
+
8
+ type Payload = {
9
+ instruction: string
10
+ quality: number
11
+ }
12
+
13
+ const baseline: MultiShotVariant<Payload> = {
14
+ id: 'baseline',
15
+ label: 'baseline',
16
+ generation: 0,
17
+ payload: {
18
+ instruction: 'Complete the user task.',
19
+ quality: 0.45,
20
+ },
21
+ }
22
+
23
+ const result = await runMultiShotOptimization<Payload>({
24
+ runId: 'demo-multi-shot',
25
+ target: 'demo-agent-system-prompt',
26
+ seedVariants: [baseline],
27
+ searchScenarioIds: ['search-brief', 'search-code-review', 'search-research'],
28
+ reps: 1,
29
+ generations: 2,
30
+ populationSize: 2,
31
+ scoreConcurrency: 2,
32
+ runner: {
33
+ async run({ variant, scenarioId }) {
34
+ return {
35
+ trace: {
36
+ scenarioId,
37
+ turns: [
38
+ { role: 'user', content: `Run ${scenarioId}` },
39
+ { role: 'assistant', content: `${variant.payload.instruction} quality=${variant.payload.quality}` },
40
+ ],
41
+ output: `quality=${variant.payload.quality}`,
42
+ },
43
+ costUsd: 0.01,
44
+ durationMs: 50,
45
+ }
46
+ },
47
+ },
48
+ scorer: {
49
+ async score({ variant }) {
50
+ return {
51
+ score: variant.payload.quality,
52
+ ok: true,
53
+ asi: variant.payload.quality >= 0.8
54
+ ? []
55
+ : [{
56
+ expectationId: 'complete-task',
57
+ message: 'The agent did not fully complete the task.',
58
+ severity: 'error',
59
+ responsibleSurface: 'system-prompt',
60
+ suggestion: 'Make completion criteria explicit before final response.',
61
+ }],
62
+ }
63
+ },
64
+ },
65
+ mutateAdapter: {
66
+ async mutate({ parent, bottomTrials, childCount, generation }) {
67
+ const traces = bottomTrials.map((trial) => trialTraceFromMultiShotTrial(trial))
68
+ const rationale = traces.flatMap((trace) => (trace.expectations ?? []).map((e) => e.phrase)).join('\n')
69
+ return Array.from({ length: childCount }, (_, i) => ({
70
+ id: `${parent.id}.g${generation}.${i}`,
71
+ label: 'completion-focused',
72
+ generation,
73
+ payload: {
74
+ instruction: `${parent.payload.instruction} Verify every requested step before final answer.`,
75
+ quality: 0.9,
76
+ },
77
+ rationale,
78
+ }))
79
+ },
80
+ },
81
+ gate: {
82
+ holdoutScenarioIds: ['holdout-brief', 'holdout-code-review', 'holdout-research'],
83
+ gate: {
84
+ baselineKey: 'baseline',
85
+ minProductiveRuns: 3,
86
+ pairedDeltaThreshold: 0,
87
+ seed: 7,
88
+ },
89
+ toRunRecord: ({ variant, scenarioId, rep, split, seed, trial }): RunRecord => ({
90
+ runId: `demo-${variant.id}-${scenarioId}-${rep}-${split}`,
91
+ experimentId: scenarioId,
92
+ candidateId: variant.id,
93
+ seed,
94
+ model: 'demo-model@2026-01-01',
95
+ promptHash: 'p'.repeat(64),
96
+ configHash: 'c'.repeat(64),
97
+ commitSha: 'deadbeef',
98
+ wallMs: trial.durationMs ?? 0,
99
+ costUsd: trial.cost ?? 0,
100
+ tokenUsage: { input: 1, output: 1 },
101
+ outcome: {
102
+ [split === 'holdout' ? 'holdoutScore' : 'searchScore']: trial.score,
103
+ raw: { score: trial.score },
104
+ },
105
+ splitTag: split,
106
+ }),
107
+ },
108
+ })
109
+
110
+ console.log({
111
+ searchBest: result.searchBestVariant.id,
112
+ promoted: result.promotedVariant.id,
113
+ gate: result.gate?.decision ?? null,
114
+ })
@@ -0,0 +1,63 @@
1
+ import {
2
+ InMemoryTraceStore,
3
+ SandboxHarness,
4
+ SubprocessSandboxDriver,
5
+ TraceEmitter,
6
+ } from '@tangle-network/agent-eval'
7
+
8
+ /**
9
+ * Same-sandbox pattern:
10
+ * - one driver owns one workdir
11
+ * - the harness runs setup/build/test there
12
+ * - later checks can inspect files/logs/screenshots produced by those phases
13
+ *
14
+ * Replace `workdir` with a generated app, browser automation checkout, or
15
+ * remote computer-use workspace.
16
+ */
17
+ export async function runSameSandboxExample(workdir: string) {
18
+ const store = new InMemoryTraceStore()
19
+ const driver = new SubprocessSandboxDriver({ cwd: workdir })
20
+ const harness = new SandboxHarness(driver)
21
+ const emitter = new TraceEmitter(store)
22
+ await emitter.startRun({
23
+ scenarioId: 'same-sandbox-example',
24
+ layer: 'app-build',
25
+ })
26
+
27
+ const result = await harness.run({
28
+ setupCommand: 'pnpm install --frozen-lockfile',
29
+ runCommand: 'pnpm build',
30
+ testCommand: 'pnpm test',
31
+ timeoutMs: 180_000,
32
+ }, emitter)
33
+
34
+ const summary = [
35
+ `passed=${result.passed}`,
36
+ `score=${result.score}`,
37
+ `build=${result.run?.exitCode ?? 'not-run'}`,
38
+ `test=${result.test?.exitCode ?? 'not-run'}`,
39
+ result.test?.stdout?.slice(-2000) ?? '',
40
+ ].join('\n')
41
+
42
+ const judged = {
43
+ score: result.passed && summary.includes('test=0') ? 1 : 0,
44
+ rationale: result.passed
45
+ ? 'Shared sandbox produced passing build/test evidence.'
46
+ : 'Shared sandbox did not produce passing build/test evidence.',
47
+ }
48
+ await emitter.recordJudge({
49
+ judgeId: 'same-sandbox-evidence',
50
+ name: 'same-sandbox-evidence',
51
+ dimension: 'evidence',
52
+ score: judged.score,
53
+ rationale: judged.rationale,
54
+ evidence: summary,
55
+ })
56
+ await emitter.endRun({
57
+ pass: result.passed,
58
+ score: result.score,
59
+ notes: judged.rationale,
60
+ })
61
+
62
+ return { result, judged, traces: await store.listRuns() }
63
+ }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@tangle-network/agent-eval",
3
- "version": "0.20.0",
4
- "description": "Trace-first evaluation framework for Tangle agents. Core (spans, pipelines, sandbox harness, OTLP export), trust (dataset, red-team, calibration, behavior DSL), builder-of-builders (three-layer eval, resumable sessions, meta-runtime correlation), and frontier (meta-eval correlation study, Process Reward Modeling, bisector).",
3
+ "version": "0.20.3",
4
+ "description": "Trace-first evaluation infrastructure for agent systems: traces, harnesses, verifier pipelines, judges, datasets, gates, optimization, and reporting.",
5
5
  "homepage": "https://github.com/tangle-network/agent-eval#readme",
6
6
  "repository": {
7
7
  "type": "git",
@@ -40,7 +40,8 @@
40
40
  },
41
41
  "files": [
42
42
  "dist",
43
- "docs"
43
+ "docs",
44
+ "examples"
44
45
  ],
45
46
  "publishConfig": {
46
47
  "access": "public"
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/llm-client.ts"],"sourcesContent":["/**\n * LLM client with graceful degrade.\n *\n * OpenAI-compatible `/v1/chat/completions` client with:\n * - Exponential-backoff retry on 429 + 5xx gateway errors (502/503/504).\n * - Retry on transient network errors (fetch failed, AbortError, ECONNRESET).\n * - Graceful json_schema → json_object degrade on 400 with schema-reject body.\n * - Fenced-JSON stripping (```json ... ```) for models that wrap structured output.\n * - Configurable base URL + api key / bearer, works with LiteLLM proxies, OpenAI\n * directly, cli-bridge subscriptions, and any router that speaks the spec.\n *\n * Usage:\n * const { value, result } = await callLlmJson<MyType>(\n * { model: 'gpt-4o', messages: [...], jsonSchema: { name: 'x', schema: {...} } },\n * { baseUrl: 'https://router.tangle.tools/v1', apiKey: process.env.KEY },\n * )\n *\n * This is THE llm-calling seam for agent-eval primitives that need structured\n * output (semantic concept judge, reviewer directives, critic scores). Primitives\n * that need free-form text use `callLlm` and parse output themselves.\n */\n\n// ─── Types ──────────────────────────────────────────────────────────────\n\nexport interface LlmMessage {\n role: 'system' | 'user' | 'assistant'\n /**\n * Either a plain text content string OR a multimodal content array\n * (text + image_url parts) for vision-capable models.\n */\n content:\n | string\n | Array<\n | { type: 'text'; text: string }\n | { type: 'image_url'; image_url: { url: string; detail?: 'auto' | 'low' | 'high' } }\n >\n}\n\nexport interface LlmCallRequest {\n model: string\n messages: LlmMessage[]\n /** Optional JSON-mode response format (response_format: json_object). */\n jsonMode?: boolean\n /** Optional structured output via JSON Schema. Falls back to json_object on 400. */\n jsonSchema?: { name: string; schema: Record<string, unknown> }\n temperature?: number\n maxTokens?: number\n /** Per-call timeout, default 60s. */\n timeoutMs?: number\n}\n\nexport interface LlmUsage {\n promptTokens: number\n completionTokens: number\n totalTokens: number\n /** Proxies populate this when prompt caching is on. */\n cachedPromptTokens?: number\n}\n\nexport interface LlmCallResult {\n /** The text content of the first choice. Empty string if none. */\n content: string\n usage: LlmUsage\n /**\n * Cost in USD. Pulled from proxy's `_response_cost` field when present;\n * `null` when neither the proxy nor the caller can derive it.\n */\n costUsd: number | null\n /** Model name actually used (echoed from response). */\n model: string\n /** Wall-clock duration of the HTTP call (last attempt, if retried). */\n durationMs: number\n /** Raw response body. */\n raw: Record<string, unknown>\n}\n\nexport class LlmCallError extends Error {\n constructor(\n message: string,\n public readonly status: number,\n public readonly body: string,\n public readonly model: string,\n ) {\n super(message)\n this.name = 'LlmCallError'\n }\n}\n\nexport interface LlmClientOptions {\n /** Base URL (without trailing slash). Must end at the `/v1` prefix. */\n baseUrl?: string\n /** Bearer token — either `apiKey` or `bearer` populates `Authorization: Bearer ...`. */\n apiKey?: string\n bearer?: string\n /** Override for the `Authorization` header (e.g. `X-Auth: ...`). Takes precedence over apiKey/bearer. */\n authHeader?: { name: string; value: string }\n /** Default timeout in ms. Per-call can override. */\n defaultTimeoutMs?: number\n /** Max retry attempts on retriable errors. Default 3 (1 initial + 2 retries). */\n maxRetries?: number\n /** Fetch implementation — defaults to global `fetch`. Override for custom transport (e.g. tests). */\n fetch?: typeof fetch\n}\n\n// ─── Internals ──────────────────────────────────────────────────────────\n\nconst DEFAULT_BASE_URL = 'https://router.tangle.tools/v1'\nconst DEFAULT_TIMEOUT_MS = 60_000\nconst DEFAULT_MAX_RETRIES = 3\n\nconst RETRYABLE_STATUS = new Set([429, 502, 503, 504])\n\nfunction isRetryableError(err: unknown): boolean {\n if (err instanceof LlmCallError) return RETRYABLE_STATUS.has(err.status)\n if (err instanceof Error) {\n return (\n err.name === 'AbortError' ||\n err.name === 'TimeoutError' ||\n /fetch failed|ECONNRESET|ETIMEDOUT|EAI_AGAIN/i.test(err.message)\n )\n }\n return false\n}\n\nfunction parseRetryAfter(headers: Headers): number | null {\n const h = headers.get('retry-after')\n if (!h) return null\n const asNumber = Number(h)\n if (Number.isFinite(asNumber) && asNumber > 0) return asNumber * 1000\n const asDate = Date.parse(h)\n if (Number.isFinite(asDate)) return Math.max(0, asDate - Date.now())\n return null\n}\n\nfunction backoffMs(attempt: number): number {\n // 500ms, 1s, 2s, 4s, ...\n return Math.min(500 * Math.pow(2, attempt), 16_000)\n}\n\nfunction buildHeaders(opts: LlmClientOptions): Record<string, string> {\n const headers: Record<string, string> = {\n 'Content-Type': 'application/json',\n Accept: 'application/json',\n }\n if (opts.authHeader) {\n headers[opts.authHeader.name] = opts.authHeader.value\n } else if (opts.bearer || opts.apiKey) {\n headers.Authorization = `Bearer ${opts.bearer ?? opts.apiKey}`\n }\n return headers\n}\n\nfunction isSchemaRejection(status: number, body: string): boolean {\n if (status !== 400) return false\n const lower = body.toLowerCase()\n return (\n lower.includes('response_format') ||\n lower.includes('json_schema') ||\n lower.includes('is unavailable') ||\n lower.includes('not supported')\n )\n}\n\nfunction buildBody(req: LlmCallRequest, forceJsonObject: boolean): Record<string, unknown> {\n const body: Record<string, unknown> = {\n model: req.model,\n messages: req.messages,\n temperature: req.temperature ?? 0,\n }\n if (req.maxTokens != null) body.max_tokens = req.maxTokens\n\n if (req.jsonSchema && !forceJsonObject) {\n body.response_format = {\n type: 'json_schema',\n json_schema: { name: req.jsonSchema.name, schema: req.jsonSchema.schema, strict: true },\n }\n } else if (req.jsonMode || req.jsonSchema) {\n body.response_format = { type: 'json_object' }\n }\n\n return body\n}\n\nasync function sleep(ms: number): Promise<void> {\n return new Promise((resolve) => setTimeout(resolve, ms))\n}\n\n// ─── Public API ─────────────────────────────────────────────────────────\n\n/**\n * Strip a ```json / ``` code fence if the model emitted one.\n * Idempotent for naked JSON. Some models (claude-code via router, certain\n * deepseek models) wrap output even under json_object.\n */\nexport function stripFencedJson(raw: string): string {\n const trimmed = raw.trim()\n const m = trimmed.match(/^```(?:json)?\\s*\\n?([\\s\\S]*?)\\n?```\\s*$/)\n return m ? m[1]!.trim() : trimmed\n}\n\n/**\n * Low-level call. Returns raw content + usage + cost. Retries on transient\n * failures; does NOT degrade schema here — callers that want graceful\n * degrade use `callLlmJson`.\n */\nexport async function callLlm(\n req: LlmCallRequest,\n opts: LlmClientOptions = {},\n): Promise<LlmCallResult> {\n const baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL).replace(/\\/+$/, '')\n const url = `${baseUrl}/chat/completions`\n const timeoutMs = req.timeoutMs ?? opts.defaultTimeoutMs ?? DEFAULT_TIMEOUT_MS\n const maxRetries = opts.maxRetries ?? DEFAULT_MAX_RETRIES\n const fetchFn = opts.fetch ?? globalThis.fetch\n const headers = buildHeaders(opts)\n\n let lastErr: unknown\n for (let attempt = 0; attempt < maxRetries; attempt++) {\n const controller = new AbortController()\n const timeoutHandle = setTimeout(() => controller.abort(), timeoutMs)\n const started = Date.now()\n\n try {\n const res = await fetchFn(url, {\n method: 'POST',\n headers,\n body: JSON.stringify(buildBody(req, false)),\n signal: controller.signal,\n })\n clearTimeout(timeoutHandle)\n\n if (!res.ok) {\n const body = await res.text()\n const err = new LlmCallError(\n `LLM call ${res.status}: ${body.slice(0, 300)}`,\n res.status,\n body,\n req.model,\n )\n if (RETRYABLE_STATUS.has(res.status) && attempt < maxRetries - 1) {\n lastErr = err\n const retryAfter = parseRetryAfter(res.headers)\n await sleep(retryAfter ?? backoffMs(attempt))\n continue\n }\n throw err\n }\n\n const json = (await res.json()) as Record<string, unknown>\n const choice = (json.choices as Array<{ message?: { content?: string } }> | undefined)?.[0]\n const usageRaw = (json.usage as Record<string, unknown> | undefined) ?? {}\n const costFromProxy = (json._response_cost ?? json.cost_usd) as number | undefined\n\n return {\n content: choice?.message?.content ?? '',\n usage: {\n promptTokens: Number(usageRaw.prompt_tokens ?? 0),\n completionTokens: Number(usageRaw.completion_tokens ?? 0),\n totalTokens: Number(usageRaw.total_tokens ?? 0),\n cachedPromptTokens:\n usageRaw.prompt_tokens_details &&\n typeof usageRaw.prompt_tokens_details === 'object'\n ? Number(\n (usageRaw.prompt_tokens_details as Record<string, unknown>).cached_tokens ?? 0,\n )\n : undefined,\n },\n costUsd: typeof costFromProxy === 'number' ? costFromProxy : null,\n model: (json.model as string) ?? req.model,\n durationMs: Date.now() - started,\n raw: json,\n }\n } catch (err) {\n clearTimeout(timeoutHandle)\n lastErr = err\n if (attempt < maxRetries - 1 && isRetryableError(err)) {\n await sleep(backoffMs(attempt))\n continue\n }\n throw err\n }\n }\n throw lastErr instanceof Error ? lastErr : new Error(String(lastErr))\n}\n\n/**\n * Structured-output call. Returns parsed JSON plus the raw result envelope.\n * Degrades `jsonSchema` → `jsonMode` on a 400 that names the schema param —\n * critical for deepseek-v3/v4, kimi-k2.6, and other models that don't accept\n * the `response_format.json_schema` shape but DO accept `json_object`.\n */\nexport async function callLlmJson<T = unknown>(\n req: LlmCallRequest,\n opts: LlmClientOptions = {},\n): Promise<{ value: T; result: LlmCallResult }> {\n try {\n const result = await callLlm({ ...req, jsonMode: req.jsonMode ?? !req.jsonSchema }, opts)\n const value = parseJsonSafely<T>(result.content, result.model)\n return { value, result }\n } catch (err) {\n if (err instanceof LlmCallError && isSchemaRejection(err.status, err.body) && req.jsonSchema) {\n // Degrade to json_object + retry.\n const degradedReq: LlmCallRequest = { ...req, jsonMode: true, jsonSchema: undefined }\n const result = await callLlm(degradedReq, opts)\n const value = parseJsonSafely<T>(result.content, result.model)\n return { value, result }\n }\n throw err\n }\n}\n\nfunction parseJsonSafely<T>(content: string, model: string): T {\n const stripped = stripFencedJson(content)\n try {\n return JSON.parse(stripped) as T\n } catch (err) {\n throw new Error(\n `LLM returned non-JSON content (model=${model}): ${\n err instanceof Error ? err.message : String(err)\n }\\n--- raw content ---\\n${content.slice(0, 800)}`,\n )\n }\n}\n\n/**\n * Probe whether a model is reachable. Returns latency + null error on\n * success; `ok=false` + error message on any failure (HTTP, timeout,\n * network, parse). Designed for sweep preflights — fail loud at the\n * boundary before burning a 30-leaf run on a misconfigured router.\n *\n * Sends a tiny `ping` message with `maxTokens=64`. Reasoning models\n * (glm-5.1, deepseek-v4) can burn the entire budget on internal reasoning\n * for short prompts, so don't tighten this further. We don't validate\n * content; HTTP 200 means reachable.\n */\nexport async function probeLlm(\n model: string,\n opts: LlmClientOptions & { timeoutMs?: number } = {},\n): Promise<{ ok: boolean; latencyMs: number; error: string | null }> {\n const start = Date.now()\n try {\n await callLlm(\n {\n model,\n messages: [{ role: 'user', content: 'ping' }],\n maxTokens: 64,\n timeoutMs: opts.timeoutMs ?? 30_000,\n },\n opts,\n )\n return { ok: true, latencyMs: Date.now() - start, error: null }\n } catch (err) {\n return {\n ok: false,\n latencyMs: Date.now() - start,\n error: err instanceof Error ? err.message : String(err),\n }\n }\n}\n\n/**\n * Stateful client — construct once with defaults, call many times.\n * Thin wrapper around the free functions; exists for callers that want\n * to inject a single configured instance into multiple primitives.\n */\nexport class LlmClient {\n constructor(private readonly opts: LlmClientOptions = {}) {}\n\n call(req: LlmCallRequest, per?: LlmClientOptions): Promise<LlmCallResult> {\n return callLlm(req, { ...this.opts, ...per })\n }\n\n callJson<T = unknown>(\n req: LlmCallRequest,\n per?: LlmClientOptions,\n ): Promise<{ value: T; result: LlmCallResult }> {\n return callLlmJson<T>(req, { ...this.opts, ...per })\n }\n}\n"],"mappings":";AA4EO,IAAM,eAAN,cAA2B,MAAM;AAAA,EACtC,YACE,SACgB,QACA,MACA,OAChB;AACA,UAAM,OAAO;AAJG;AACA;AACA;AAGhB,SAAK,OAAO;AAAA,EACd;AAAA,EANkB;AAAA,EACA;AAAA,EACA;AAKpB;AAoBA,IAAM,mBAAmB;AACzB,IAAM,qBAAqB;AAC3B,IAAM,sBAAsB;AAE5B,IAAM,mBAAmB,oBAAI,IAAI,CAAC,KAAK,KAAK,KAAK,GAAG,CAAC;AAErD,SAAS,iBAAiB,KAAuB;AAC/C,MAAI,eAAe,aAAc,QAAO,iBAAiB,IAAI,IAAI,MAAM;AACvE,MAAI,eAAe,OAAO;AACxB,WACE,IAAI,SAAS,gBACb,IAAI,SAAS,kBACb,+CAA+C,KAAK,IAAI,OAAO;AAAA,EAEnE;AACA,SAAO;AACT;AAEA,SAAS,gBAAgB,SAAiC;AACxD,QAAM,IAAI,QAAQ,IAAI,aAAa;AACnC,MAAI,CAAC,EAAG,QAAO;AACf,QAAM,WAAW,OAAO,CAAC;AACzB,MAAI,OAAO,SAAS,QAAQ,KAAK,WAAW,EAAG,QAAO,WAAW;AACjE,QAAM,SAAS,KAAK,MAAM,CAAC;AAC3B,MAAI,OAAO,SAAS,MAAM,EAAG,QAAO,KAAK,IAAI,GAAG,SAAS,KAAK,IAAI,CAAC;AACnE,SAAO;AACT;AAEA,SAAS,UAAU,SAAyB;AAE1C,SAAO,KAAK,IAAI,MAAM,KAAK,IAAI,GAAG,OAAO,GAAG,IAAM;AACpD;AAEA,SAAS,aAAa,MAAgD;AACpE,QAAM,UAAkC;AAAA,IACtC,gBAAgB;AAAA,IAChB,QAAQ;AAAA,EACV;AACA,MAAI,KAAK,YAAY;AACnB,YAAQ,KAAK,WAAW,IAAI,IAAI,KAAK,WAAW;AAAA,EAClD,WAAW,KAAK,UAAU,KAAK,QAAQ;AACrC,YAAQ,gBAAgB,UAAU,KAAK,UAAU,KAAK,MAAM;AAAA,EAC9D;AACA,SAAO;AACT;AAEA,SAAS,kBAAkB,QAAgB,MAAuB;AAChE,MAAI,WAAW,IAAK,QAAO;AAC3B,QAAM,QAAQ,KAAK,YAAY;AAC/B,SACE,MAAM,SAAS,iBAAiB,KAChC,MAAM,SAAS,aAAa,KAC5B,MAAM,SAAS,gBAAgB,KAC/B,MAAM,SAAS,eAAe;AAElC;AAEA,SAAS,UAAU,KAAqB,iBAAmD;AACzF,QAAM,OAAgC;AAAA,IACpC,OAAO,IAAI;AAAA,IACX,UAAU,IAAI;AAAA,IACd,aAAa,IAAI,eAAe;AAAA,EAClC;AACA,MAAI,IAAI,aAAa,KAAM,MAAK,aAAa,IAAI;AAEjD,MAAI,IAAI,cAAc,CAAC,iBAAiB;AACtC,SAAK,kBAAkB;AAAA,MACrB,MAAM;AAAA,MACN,aAAa,EAAE,MAAM,IAAI,WAAW,MAAM,QAAQ,IAAI,WAAW,QAAQ,QAAQ,KAAK;AAAA,IACxF;AAAA,EACF,WAAW,IAAI,YAAY,IAAI,YAAY;AACzC,SAAK,kBAAkB,EAAE,MAAM,cAAc;AAAA,EAC/C;AAEA,SAAO;AACT;AAEA,eAAe,MAAM,IAA2B;AAC9C,SAAO,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,EAAE,CAAC;AACzD;AASO,SAAS,gBAAgB,KAAqB;AACnD,QAAM,UAAU,IAAI,KAAK;AACzB,QAAM,IAAI,QAAQ,MAAM,yCAAyC;AACjE,SAAO,IAAI,EAAE,CAAC,EAAG,KAAK,IAAI;AAC5B;AAOA,eAAsB,QACpB,KACA,OAAyB,CAAC,GACF;AACxB,QAAM,WAAW,KAAK,WAAW,kBAAkB,QAAQ,QAAQ,EAAE;AACrE,QAAM,MAAM,GAAG,OAAO;AACtB,QAAM,YAAY,IAAI,aAAa,KAAK,oBAAoB;AAC5D,QAAM,aAAa,KAAK,cAAc;AACtC,QAAM,UAAU,KAAK,SAAS,WAAW;AACzC,QAAM,UAAU,aAAa,IAAI;AAEjC,MAAI;AACJ,WAAS,UAAU,GAAG,UAAU,YAAY,WAAW;AACrD,UAAM,aAAa,IAAI,gBAAgB;AACvC,UAAM,gBAAgB,WAAW,MAAM,WAAW,MAAM,GAAG,SAAS;AACpE,UAAM,UAAU,KAAK,IAAI;AAEzB,QAAI;AACF,YAAM,MAAM,MAAM,QAAQ,KAAK;AAAA,QAC7B,QAAQ;AAAA,QACR;AAAA,QACA,MAAM,KAAK,UAAU,UAAU,KAAK,KAAK,CAAC;AAAA,QAC1C,QAAQ,WAAW;AAAA,MACrB,CAAC;AACD,mBAAa,aAAa;AAE1B,UAAI,CAAC,IAAI,IAAI;AACX,cAAM,OAAO,MAAM,IAAI,KAAK;AAC5B,cAAM,MAAM,IAAI;AAAA,UACd,YAAY,IAAI,MAAM,KAAK,KAAK,MAAM,GAAG,GAAG,CAAC;AAAA,UAC7C,IAAI;AAAA,UACJ;AAAA,UACA,IAAI;AAAA,QACN;AACA,YAAI,iBAAiB,IAAI,IAAI,MAAM,KAAK,UAAU,aAAa,GAAG;AAChE,oBAAU;AACV,gBAAM,aAAa,gBAAgB,IAAI,OAAO;AAC9C,gBAAM,MAAM,cAAc,UAAU,OAAO,CAAC;AAC5C;AAAA,QACF;AACA,cAAM;AAAA,MACR;AAEA,YAAM,OAAQ,MAAM,IAAI,KAAK;AAC7B,YAAM,SAAU,KAAK,UAAoE,CAAC;AAC1F,YAAM,WAAY,KAAK,SAAiD,CAAC;AACzE,YAAM,gBAAiB,KAAK,kBAAkB,KAAK;AAEnD,aAAO;AAAA,QACL,SAAS,QAAQ,SAAS,WAAW;AAAA,QACrC,OAAO;AAAA,UACL,cAAc,OAAO,SAAS,iBAAiB,CAAC;AAAA,UAChD,kBAAkB,OAAO,SAAS,qBAAqB,CAAC;AAAA,UACxD,aAAa,OAAO,SAAS,gBAAgB,CAAC;AAAA,UAC9C,oBACE,SAAS,yBACT,OAAO,SAAS,0BAA0B,WACtC;AAAA,YACG,SAAS,sBAAkD,iBAAiB;AAAA,UAC/E,IACA;AAAA,QACR;AAAA,QACA,SAAS,OAAO,kBAAkB,WAAW,gBAAgB;AAAA,QAC7D,OAAQ,KAAK,SAAoB,IAAI;AAAA,QACrC,YAAY,KAAK,IAAI,IAAI;AAAA,QACzB,KAAK;AAAA,MACP;AAAA,IACF,SAAS,KAAK;AACZ,mBAAa,aAAa;AAC1B,gBAAU;AACV,UAAI,UAAU,aAAa,KAAK,iBAAiB,GAAG,GAAG;AACrD,cAAM,MAAM,UAAU,OAAO,CAAC;AAC9B;AAAA,MACF;AACA,YAAM;AAAA,IACR;AAAA,EACF;AACA,QAAM,mBAAmB,QAAQ,UAAU,IAAI,MAAM,OAAO,OAAO,CAAC;AACtE;AAQA,eAAsB,YACpB,KACA,OAAyB,CAAC,GACoB;AAC9C,MAAI;AACF,UAAM,SAAS,MAAM,QAAQ,EAAE,GAAG,KAAK,UAAU,IAAI,YAAY,CAAC,IAAI,WAAW,GAAG,IAAI;AACxF,UAAM,QAAQ,gBAAmB,OAAO,SAAS,OAAO,KAAK;AAC7D,WAAO,EAAE,OAAO,OAAO;AAAA,EACzB,SAAS,KAAK;AACZ,QAAI,eAAe,gBAAgB,kBAAkB,IAAI,QAAQ,IAAI,IAAI,KAAK,IAAI,YAAY;AAE5F,YAAM,cAA8B,EAAE,GAAG,KAAK,UAAU,MAAM,YAAY,OAAU;AACpF,YAAM,SAAS,MAAM,QAAQ,aAAa,IAAI;AAC9C,YAAM,QAAQ,gBAAmB,OAAO,SAAS,OAAO,KAAK;AAC7D,aAAO,EAAE,OAAO,OAAO;AAAA,IACzB;AACA,UAAM;AAAA,EACR;AACF;AAEA,SAAS,gBAAmB,SAAiB,OAAkB;AAC7D,QAAM,WAAW,gBAAgB,OAAO;AACxC,MAAI;AACF,WAAO,KAAK,MAAM,QAAQ;AAAA,EAC5B,SAAS,KAAK;AACZ,UAAM,IAAI;AAAA,MACR,wCAAwC,KAAK,MAC3C,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CACjD;AAAA;AAAA,EAA0B,QAAQ,MAAM,GAAG,GAAG,CAAC;AAAA,IACjD;AAAA,EACF;AACF;AAaA,eAAsB,SACpB,OACA,OAAkD,CAAC,GACgB;AACnE,QAAM,QAAQ,KAAK,IAAI;AACvB,MAAI;AACF,UAAM;AAAA,MACJ;AAAA,QACE;AAAA,QACA,UAAU,CAAC,EAAE,MAAM,QAAQ,SAAS,OAAO,CAAC;AAAA,QAC5C,WAAW;AAAA,QACX,WAAW,KAAK,aAAa;AAAA,MAC/B;AAAA,MACA;AAAA,IACF;AACA,WAAO,EAAE,IAAI,MAAM,WAAW,KAAK,IAAI,IAAI,OAAO,OAAO,KAAK;AAAA,EAChE,SAAS,KAAK;AACZ,WAAO;AAAA,MACL,IAAI;AAAA,MACJ,WAAW,KAAK,IAAI,IAAI;AAAA,MACxB,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IACxD;AAAA,EACF;AACF;AAOO,IAAM,YAAN,MAAgB;AAAA,EACrB,YAA6B,OAAyB,CAAC,GAAG;AAA7B;AAAA,EAA8B;AAAA,EAA9B;AAAA,EAE7B,KAAK,KAAqB,KAAgD;AACxE,WAAO,QAAQ,KAAK,EAAE,GAAG,KAAK,MAAM,GAAG,IAAI,CAAC;AAAA,EAC9C;AAAA,EAEA,SACE,KACA,KAC8C;AAC9C,WAAO,YAAe,KAAK,EAAE,GAAG,KAAK,MAAM,GAAG,IAAI,CAAC;AAAA,EACrD;AACF;","names":[]}