@tangle-network/agent-eval 0.23.1 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +145 -0
- package/README.md +212 -79
- package/dist/baseline-4R5deP0N.d.ts +108 -0
- package/dist/benchmarks/index.d.ts +3 -2
- package/dist/benchmarks/index.js +1 -1
- package/dist/builder-eval/index.d.ts +249 -0
- package/dist/builder-eval/index.js +391 -0
- package/dist/builder-eval/index.js.map +1 -0
- package/dist/{chunk-IOXMGMHQ.js → chunk-2A5XJB43.js} +142 -318
- package/dist/chunk-2A5XJB43.js.map +1 -0
- package/dist/chunk-47X6LRCE.js +76 -0
- package/dist/chunk-47X6LRCE.js.map +1 -0
- package/dist/{chunk-6M774GY6.js → chunk-4F5DQN55.js} +1 -1
- package/dist/chunk-4F5DQN55.js.map +1 -0
- package/dist/{chunk-KAO3Q65R.js → chunk-4S4BM3QQ.js} +15 -13
- package/dist/chunk-4S4BM3QQ.js.map +1 -0
- package/dist/chunk-5BKGXME7.js +65 -0
- package/dist/chunk-5BKGXME7.js.map +1 -0
- package/dist/{chunk-6KQG5HAH.js → chunk-5LBB5B3Z.js} +376 -72
- package/dist/chunk-5LBB5B3Z.js.map +1 -0
- package/dist/{chunk-42I2QC2L.js → chunk-6QDKWHLS.js} +18 -14
- package/dist/chunk-6QDKWHLS.js.map +1 -0
- package/dist/{chunk-VQQSPGSM.js → chunk-EDUKQ5AM.js} +247 -189
- package/dist/chunk-EDUKQ5AM.js.map +1 -0
- package/dist/chunk-I4MBDTY5.js +272 -0
- package/dist/chunk-I4MBDTY5.js.map +1 -0
- package/dist/chunk-JLZQWFV3.js +618 -0
- package/dist/chunk-JLZQWFV3.js.map +1 -0
- package/dist/chunk-K2TPS5LB.js +569 -0
- package/dist/chunk-K2TPS5LB.js.map +1 -0
- package/dist/chunk-KKHDIONI.js +414 -0
- package/dist/chunk-KKHDIONI.js.map +1 -0
- package/dist/chunk-KMPRBJK4.js +74 -0
- package/dist/chunk-KMPRBJK4.js.map +1 -0
- package/dist/{chunk-QUKKGHTZ.js → chunk-KTGTIOFD.js} +6 -3
- package/dist/chunk-KTGTIOFD.js.map +1 -0
- package/dist/chunk-LSH4MMOZ.js +838 -0
- package/dist/chunk-LSH4MMOZ.js.map +1 -0
- package/dist/chunk-NG236HPC.js +57 -0
- package/dist/chunk-NG236HPC.js.map +1 -0
- package/dist/{chunk-QBW3YBTR.js → chunk-NLMNWKVM.js} +14 -6
- package/dist/chunk-NLMNWKVM.js.map +1 -0
- package/dist/chunk-NU65VQ7M.js +99 -0
- package/dist/chunk-NU65VQ7M.js.map +1 -0
- package/dist/chunk-OWLAAMME.js +250 -0
- package/dist/chunk-OWLAAMME.js.map +1 -0
- package/dist/{chunk-SQQLHODJ.js → chunk-PC4UYEBM.js} +7 -4
- package/dist/chunk-PC4UYEBM.js.map +1 -0
- package/dist/{chunk-7EAUOUQS.js → chunk-RAF443UI.js} +213 -115
- package/dist/chunk-RAF443UI.js.map +1 -0
- package/dist/chunk-RZTMDUO7.js +49 -0
- package/dist/chunk-RZTMDUO7.js.map +1 -0
- package/dist/{chunk-EXGR4XEM.js → chunk-SESZDQPX.js} +23 -19
- package/dist/chunk-SESZDQPX.js.map +1 -0
- package/dist/{chunk-5IIQKMD5.js → chunk-TVVP3ZZQ.js} +14 -4
- package/dist/chunk-TVVP3ZZQ.js.map +1 -0
- package/dist/chunk-WWYCWKUM.js +196 -0
- package/dist/chunk-WWYCWKUM.js.map +1 -0
- package/dist/{chunk-AXHNWLIX.js → chunk-YRZ4M5GS.js} +2 -90
- package/dist/chunk-YRZ4M5GS.js.map +1 -0
- package/dist/chunk-ZN274SWR.js +613 -0
- package/dist/chunk-ZN274SWR.js.map +1 -0
- package/dist/cli.js +10 -6
- package/dist/cli.js.map +1 -1
- package/dist/{control-DvkH87qJ.d.ts → control-CBShYYA6.d.ts} +32 -33
- package/dist/control-runtime-BuJHoLg0.d.ts +180 -0
- package/dist/control.d.ts +8 -6
- package/dist/control.js +10 -7
- package/dist/{dataset-B9qvlm_o.d.ts → dataset-CiK_3LDr.d.ts} +5 -2
- package/dist/{emitter-B2XqDKFU.d.ts → emitter-DP_cSSiw.d.ts} +1 -1
- package/dist/errors-BZ9sTdz7.d.ts +70 -0
- package/dist/failure-cluster-C2EGSDiT.d.ts +76 -0
- package/dist/feedback-trajectory-DfFdrraJ.d.ts +169 -0
- package/dist/governance/index.d.ts +5 -0
- package/dist/governance/index.js +18 -0
- package/dist/governance/index.js.map +1 -0
- package/dist/{index-DDTlbHEK.d.ts → index--fVrWDiR.d.ts} +1 -1
- package/dist/index-Oj9fAPPN.d.ts +270 -0
- package/dist/index.d.ts +2018 -3003
- package/dist/index.js +7443 -9102
- package/dist/index.js.map +1 -1
- package/dist/{integrity-Cr5YodSY.d.ts → integrity-DK2EBVZC.d.ts} +4 -3
- package/dist/knowledge/index.d.ts +102 -0
- package/dist/knowledge/index.js +18 -0
- package/dist/knowledge/index.js.map +1 -0
- package/dist/meta-eval/index.d.ts +99 -0
- package/dist/meta-eval/index.js +324 -0
- package/dist/meta-eval/index.js.map +1 -0
- package/dist/multi-layer-verifier-LkP3LVKj.d.ts +141 -0
- package/dist/openapi.json +491 -1
- package/dist/optimization.d.ts +11 -8
- package/dist/optimization.js +11 -9
- package/dist/outcome-store-D6KWmYvj.d.ts +63 -0
- package/dist/pipelines/index.d.ts +172 -0
- package/dist/pipelines/index.js +345 -0
- package/dist/pipelines/index.js.map +1 -0
- package/dist/prm/index.d.ts +99 -0
- package/dist/prm/index.js +222 -0
- package/dist/prm/index.js.map +1 -0
- package/dist/query-DODUYdPg.d.ts +30 -0
- package/dist/release-report-BNgMdqPF.d.ts +292 -0
- package/dist/replay-BL96gCEP.d.ts +226 -0
- package/dist/reporting.d.ts +10 -295
- package/dist/reporting.js +10 -6
- package/dist/{eval-campaign-Ds5QljIh.d.ts → researcher-BPT8x_NT.d.ts} +148 -146
- package/dist/rl.d.ts +1762 -8
- package/dist/rl.js +2035 -58
- package/dist/rl.js.map +1 -1
- package/dist/rubric-D5tjHNJQ.d.ts +72 -0
- package/dist/rubric-predictive-validity-C0uDYwG6.d.ts +105 -0
- package/dist/{run-record-DNiOMBrZ.d.ts → run-record-CqzahIbx.d.ts} +4 -1
- package/dist/sequential-Dgz1n51-.d.ts +139 -0
- package/dist/{store-u47QaJ9G.d.ts → store-Db2Bv8Cf.d.ts} +1 -1
- package/dist/{summary-report-Ce1r4EYo.d.ts → summary-report-C7VPYEj2.d.ts} +3 -76
- package/dist/telemetry/file.js +4 -1
- package/dist/telemetry/file.js.map +1 -1
- package/dist/telemetry/index.js +57 -57
- package/dist/telemetry/index.js.map +1 -1
- package/dist/test-graded-scenario-B2kWEdh9.d.ts +146 -0
- package/dist/traces.d.ts +142 -387
- package/dist/traces.js +1302 -40
- package/dist/traces.js.map +1 -1
- package/dist/trajectory-CnoBo-JY.d.ts +32 -0
- package/dist/wire/index.d.ts +369 -25
- package/dist/wire/index.js +22 -3
- package/package.json +44 -18
- package/dist/chunk-42I2QC2L.js.map +0 -1
- package/dist/chunk-5IIQKMD5.js.map +0 -1
- package/dist/chunk-6KQG5HAH.js.map +0 -1
- package/dist/chunk-6M774GY6.js.map +0 -1
- package/dist/chunk-7EAUOUQS.js.map +0 -1
- package/dist/chunk-AXHNWLIX.js.map +0 -1
- package/dist/chunk-EXGR4XEM.js.map +0 -1
- package/dist/chunk-IOXMGMHQ.js.map +0 -1
- package/dist/chunk-KAO3Q65R.js.map +0 -1
- package/dist/chunk-LZKIOBG2.js +0 -2026
- package/dist/chunk-LZKIOBG2.js.map +0 -1
- package/dist/chunk-QBW3YBTR.js.map +0 -1
- package/dist/chunk-QUKKGHTZ.js.map +0 -1
- package/dist/chunk-SQQLHODJ.js.map +0 -1
- package/dist/chunk-V5QSWN7L.js +0 -1310
- package/dist/chunk-V5QSWN7L.js.map +0 -1
- package/dist/chunk-VQQSPGSM.js.map +0 -1
- package/dist/chunk-XPHOZPOM.js +0 -1947
- package/dist/chunk-XPHOZPOM.js.map +0 -1
- package/dist/feedback-trajectory-c43WGtTX.d.ts +0 -346
- package/dist/index-ekBXweiQ.d.ts +0 -1894
- package/dist/sequential-DgU2mFsE.d.ts +0 -304
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/failure-taxonomy.ts","../src/pipelines/failure-cluster.ts","../src/tool-use-metrics.ts","../src/baseline.ts"],"sourcesContent":["/**\n * Failure taxonomy — canonical classes + a default classifier.\n *\n * Every failed run should end up in a named class. The classifier here\n * is rule-based (fast, deterministic); an LLM fallback can be added by\n * the consumer for novel cases and trained into the rule base over time.\n *\n * Consumers call `classifyFailure(run, spans, events)` and persist the\n * returned class as `Run.outcome.failureClass`.\n */\n\nimport type { FailureClass, Run, Span, TraceEvent } from './trace/schema'\nimport { FAILURE_CLASSES } from './trace/schema'\n\nexport { FAILURE_CLASSES, type FailureClass }\n\nexport interface FailureContext {\n run: Run\n spans: Span[]\n events: TraceEvent[]\n}\n\nexport interface FailureClassification {\n failureClass: FailureClass\n reason: string\n triggerSpanId?: string\n triggerEventId?: string\n}\n\n/** Ordered rules — first match wins. */\nexport interface FailureRule {\n id: string\n match: (ctx: FailureContext) => {\n failureClass: FailureClass\n reason: string\n triggerSpanId?: string\n triggerEventId?: string\n } | null\n}\n\nexport const DEFAULT_RULES: FailureRule[] = [\n // Outcome already named? Respect it.\n {\n id: 'explicit-outcome',\n match: ({ run }) => {\n const fc = run.outcome?.failureClass\n if (fc && fc !== 'unknown')\n return { failureClass: fc, reason: 'outcome.failureClass set explicitly' }\n return null\n },\n },\n {\n id: 'knowledge-readiness-blocked',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'readiness_scored' &&\n e.payload.passed === false,\n )\n return event\n ? {\n failureClass: 'knowledge_readiness_blocked',\n reason: 'knowledge readiness report blocked execution',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'bad-integration-manifest',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n ((e.payload.kind === 'integration_manifest_validated' && e.payload.valid === false) ||\n (e.payload.kind === 'integration_invoke_failed' &&\n e.payload.code === 'manifest_invalid')),\n )\n return event\n ? {\n failureClass: 'bad_integration_manifest',\n reason: 'integration manifest validation failed before launch',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'missing-integration-connection',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'integration_manifest_resolved' &&\n hasResolutionStatus(e.payload, 'missing_connection'),\n )\n return event\n ? {\n failureClass: 'missing_integration_connection',\n reason: 'required integration connection was missing',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'missing-integration-scope',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n ((e.payload.kind === 'integration_manifest_resolved' && hasMissingScopes(e.payload)) ||\n (e.payload.kind === 'integration_invoke_failed' && e.payload.code === 'scope_denied')),\n )\n return event\n ? {\n failureClass: 'missing_integration_scope',\n reason: 'integration grant or connection lacks required scopes',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'integration-approval-required',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n ((e.payload.kind === 'integration_invoke' && e.payload.status === 'approval_required') ||\n (e.payload.kind === 'integration_invoke_failed' &&\n e.payload.code === 'approval_required') ||\n e.payload.kind === 'integration_approval_required'),\n )\n return event\n ? {\n failureClass: 'integration_approval_required',\n reason: 'integration write paused for user approval',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'integration-auth-expired',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'integration_invoke_failed' &&\n (e.payload.code === 'auth_expired' ||\n e.payload.code === 'connection_not_active' ||\n e.payload.code === 'capability_expired' ||\n e.payload.status === 'expired'),\n )\n return event\n ? {\n failureClass: 'integration_auth_expired',\n reason: 'integration connection or capability expired',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'unsafe-integration-write-denied',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'integration_invoke_failed' &&\n (e.payload.code === 'unsafe_write_denied' ||\n e.payload.code === 'policy_denied' ||\n e.payload.code === 'action_denied'),\n )\n return event\n ? {\n failureClass: 'unsafe_integration_write_denied',\n reason: 'integration write was denied by policy or capability scope',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'integration-provider-failure',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'integration_invoke_failed' &&\n ![\n 'scope_denied',\n 'approval_required',\n 'auth_expired',\n 'connection_not_active',\n 'capability_expired',\n 'unsafe_write_denied',\n 'policy_denied',\n 'action_denied',\n 'manifest_invalid',\n ].includes(String(e.payload.code)),\n )\n return event\n ? {\n failureClass: 'integration_provider_failure',\n reason: 'integration provider invocation failed',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'missing-credentials',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'knowledge_gap' &&\n e.payload.category === 'credential_or_secret',\n )\n return event\n ? {\n failureClass: 'missing_credentials',\n reason: 'required credential or secret was missing',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'bad-retrieval',\n match: ({ run, spans }) => {\n if (run.outcome?.pass !== false) return null\n const retrieval = spans.find(\n (s) =>\n s.kind === 'retrieval' && (s.hits.length === 0 || s.hits.every((hit) => hit.score <= 0)),\n )\n return retrieval\n ? {\n failureClass: 'bad_retrieval',\n reason: 'retrieval returned no useful hits for a failed run',\n triggerSpanId: retrieval.spanId,\n }\n : null\n },\n },\n {\n id: 'insufficient-evidence',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'knowledge_gap' &&\n e.payload.reason === 'insufficient_evidence',\n )\n return event\n ? {\n failureClass: 'insufficient_evidence',\n reason: 'task proceeded with insufficient supporting evidence',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n {\n id: 'contradictory-evidence',\n match: ({ events }) => {\n const event = events.find(\n (e) =>\n e.kind === 'custom' &&\n e.payload.kind === 'knowledge_gap' &&\n e.payload.reason === 'contradictory_evidence',\n )\n return event\n ? {\n failureClass: 'contradictory_evidence',\n reason: 'supporting evidence contradicted itself',\n triggerEventId: event.eventId,\n }\n : null\n },\n },\n // Budget breach events\n {\n id: 'budget-breach',\n match: ({ events }) => {\n const breach = events.find((e) => e.kind === 'budget_breach')\n return breach\n ? {\n failureClass: 'budget_exceeded',\n reason: `budget breached on ${breach.payload.dimension ?? 'unknown dimension'}`,\n triggerEventId: breach.eventId,\n }\n : null\n },\n },\n // Policy violations\n {\n id: 'policy-violation',\n match: ({ events }) => {\n const e = events.find((x) => x.kind === 'policy_violation')\n return e\n ? {\n failureClass: 'policy_violation',\n reason: 'policy_violation event emitted',\n triggerEventId: e.eventId,\n }\n : null\n },\n },\n // Sandbox non-zero exit code\n {\n id: 'sandbox-failure',\n match: ({ spans }) => {\n const s = spans.find(\n (x) => x.kind === 'sandbox' && typeof x.exitCode === 'number' && x.exitCode !== 0,\n )\n if (!s) return null\n return {\n failureClass: 'sandbox_failure',\n reason: `sandbox exited ${(s as Extract<Span, { kind: 'sandbox' }>).exitCode}`,\n triggerSpanId: s.spanId,\n }\n },\n },\n // Timeout: run aborted by external signal\n {\n id: 'timeout',\n match: ({ run, events }) => {\n if (run.status !== 'aborted') return null\n const hasTimeout = events.some(\n (e) =>\n e.kind === 'error' &&\n String(e.payload.reason ?? '')\n .toLowerCase()\n .includes('timeout'),\n )\n const note = (run.outcome?.notes ?? '').toLowerCase()\n if (hasTimeout || note.includes('timeout') || note.includes('deadline')) {\n return { failureClass: 'timeout', reason: 'timeout signal observed' }\n }\n return null\n },\n },\n // Tool recovery failure: many consecutive tool errors on the same tool\n {\n id: 'tool-recovery-failure',\n match: ({ spans }) => {\n const tools = spans.filter((s) => s.kind === 'tool')\n const byTool = new Map<string, Span[]>()\n for (const t of tools) {\n const name = (t as Extract<Span, { kind: 'tool' }>).toolName\n const arr = byTool.get(name) ?? []\n arr.push(t)\n byTool.set(name, arr)\n }\n for (const [name, arr] of byTool) {\n const errs = arr.filter((s) => s.status === 'error')\n if (errs.length >= 3 && errs.length === arr.length) {\n return {\n failureClass: 'tool_recovery_failure',\n reason: `${errs.length} consecutive errors on tool \"${name}\"`,\n triggerSpanId: errs[errs.length - 1]!.spanId,\n }\n }\n }\n return null\n },\n },\n // Tool selection error: the run failed and agent called zero tools despite having them\n {\n id: 'tool-selection-error',\n match: ({ run, spans }) => {\n if (run.outcome?.pass !== false) return null\n const hasToolsAvailable = spans.some(\n (s) =>\n s.kind === 'agent' &&\n (s.attributes?.toolsAvailable as number | undefined) !== undefined &&\n (s.attributes?.toolsAvailable as number) > 0,\n )\n const tools = spans.filter((s) => s.kind === 'tool')\n if (hasToolsAvailable && tools.length === 0) {\n return {\n failureClass: 'tool_selection_error',\n reason: 'tools were available but none were called',\n }\n }\n return null\n },\n },\n // Format drift: scored by a judge with dimension='format' below threshold\n {\n id: 'format-drift',\n match: ({ spans }) => {\n const judge = spans.find(\n (s) =>\n s.kind === 'judge' &&\n (s as Extract<Span, { kind: 'judge' }>).dimension === 'format' &&\n (s as Extract<Span, { kind: 'judge' }>).score < 0.5,\n )\n return judge\n ? {\n failureClass: 'format_drift',\n reason: 'format judge scored below 0.5',\n triggerSpanId: judge.spanId,\n }\n : null\n },\n },\n]\n\nfunction hasResolutionStatus(payload: Record<string, unknown>, status: string): boolean {\n if (status === 'missing_connection' && stringArray(payload.missingConnections).length > 0)\n return true\n return resolutionItems(payload).some((item) => item.status === status)\n}\n\nfunction hasMissingScopes(payload: Record<string, unknown>): boolean {\n if (stringArray(payload.missingScopes).length > 0) return true\n return resolutionItems(payload).some(\n (item) => Array.isArray(item.missingScopes) && item.missingScopes.length > 0,\n )\n}\n\nfunction resolutionItems(payload: Record<string, unknown>): Array<Record<string, unknown>> {\n return [\n ...records(payload.missing),\n ...records(payload.optionalMissing),\n ...records(payload.ready),\n ]\n}\n\nfunction records(value: unknown): Array<Record<string, unknown>> {\n if (!Array.isArray(value)) return []\n return value.filter(\n (item): item is Record<string, unknown> =>\n Boolean(item) && typeof item === 'object' && !Array.isArray(item),\n )\n}\n\nfunction stringArray(value: unknown): string[] {\n return Array.isArray(value)\n ? value.filter((item): item is string => typeof item === 'string')\n : []\n}\n\n/** Classify the failure mode of a run using an ordered rule list. */\nexport function classifyFailure(\n ctx: FailureContext,\n rules: FailureRule[] = DEFAULT_RULES,\n): FailureClassification {\n if (ctx.run.outcome?.pass !== false && ctx.run.status === 'completed') {\n return { failureClass: 'success', reason: 'run completed with pass=true (or no explicit fail)' }\n }\n for (const rule of rules) {\n const hit = rule.match(ctx)\n if (hit) return hit\n }\n return { failureClass: 'unknown', reason: 'no rule matched; run failed for unclassified reason' }\n}\n","/**\n * FailureClusterView — groups failed runs by (failureClass, triggerTool,\n * argHash-prefix) so weekly reviews can prioritize the top-N clusters.\n *\n * Each cluster includes: N runs, scenarios affected, representative\n * error message, a proposed mitigation hint (rule → action table).\n */\n\nimport { classifyFailure, DEFAULT_RULES, type FailureRule } from '../failure-taxonomy'\nimport { argHash, toolSpans } from '../trace/query'\nimport type { FailureClass, Span } from '../trace/schema'\nimport type { TraceStore } from '../trace/store'\n\nexport interface FailureCluster {\n failureClass: FailureClass\n /** Tool name when the trigger was a tool span, else undefined. */\n toolName?: string\n /** First 16 chars of argHash — clusters similar args. */\n argPrefix?: string\n /**\n * Source dimension when the trigger was a judge span (e.g. `'format'`,\n * `'safety'`, `'correctness'`). Lets cross-template aggregators\n * group failures by the dimension that fired without overloading\n * `argPrefix`. Optional — legacy clusters without this field\n * deserialize cleanly.\n */\n dimension?: string\n runCount: number\n scenarioIds: string[]\n exampleError?: string\n exampleRunId: string\n}\n\nexport interface FailureClusterReport {\n clusters: FailureCluster[]\n totalFailures: number\n totalRuns: number\n}\n\nexport async function failureClusterView(\n store: TraceStore,\n options: { rules?: FailureRule[]; minClusterSize?: number } = {},\n): Promise<FailureClusterReport> {\n const rules = options.rules ?? DEFAULT_RULES\n const minSize = options.minClusterSize ?? 1\n const runs = await store.listRuns()\n\n type Key = string\n const clusters = new Map<Key, FailureCluster>()\n let totalFailures = 0\n\n for (const run of runs) {\n if (run.status === 'completed' && run.outcome?.pass !== false) continue\n totalFailures++\n const spans = await store.spans({ runId: run.runId })\n const events = await store.events({ runId: run.runId })\n const cls = classifyFailure({ run, spans, events }, rules)\n\n let toolName: string | undefined\n let argPrefix: string | undefined\n let dimension: string | undefined\n if (cls.triggerSpanId) {\n const trig = spans.find((s) => s.spanId === cls.triggerSpanId)\n if (trig?.kind === 'tool') {\n toolName = trig.toolName\n argPrefix = argHash(trig.args).slice(0, 16)\n } else if (trig?.kind === 'judge') {\n dimension = trig.dimension\n }\n }\n // Fallback: look at the last errored tool span\n if (!toolName) {\n const ts = await toolSpans(store, run.runId)\n const errored = ts.filter((t) => t.status === 'error').pop()\n if (errored) {\n toolName = errored.toolName\n argPrefix = argHash(errored.args).slice(0, 16)\n }\n }\n // Secondary signal: any judge span on the failed run carries a\n // dimension. Useful when the rule classified by judge score but\n // didn't surface the trigger span (or surfaced a non-judge span).\n if (!dimension) {\n const judge = spans.find((s) => s.kind === 'judge' && typeof s.dimension === 'string')\n if (judge?.kind === 'judge') dimension = judge.dimension\n }\n\n const key = `${cls.failureClass}|${toolName ?? ''}|${argPrefix ?? ''}|${dimension ?? ''}`\n let cluster = clusters.get(key)\n if (!cluster) {\n cluster = {\n failureClass: cls.failureClass,\n toolName,\n argPrefix,\n dimension,\n runCount: 0,\n scenarioIds: [],\n exampleRunId: run.runId,\n exampleError: firstErrorMessage(spans) ?? cls.reason,\n }\n clusters.set(key, cluster)\n }\n cluster.runCount++\n if (!cluster.scenarioIds.includes(run.scenarioId)) cluster.scenarioIds.push(run.scenarioId)\n }\n\n const arr = [...clusters.values()]\n .filter((c) => c.runCount >= minSize)\n .sort((a, b) => b.runCount - a.runCount)\n\n return { clusters: arr, totalFailures, totalRuns: runs.length }\n}\n\nfunction firstErrorMessage(spans: Span[]): string | undefined {\n const errored = spans.find((s) => s.status === 'error')\n return errored?.error\n}\n","/**\n * Tool-use metrics — derived purely from trace data.\n *\n * No scoring assumptions: consumers supply optional ground-truth tool\n * selections per turn + optional \"information used downstream\" signals.\n * Without those, we still compute descriptive metrics (error rate,\n * retry rate, duplicate-call rate) that are useful on their own.\n */\n\nimport { argHash, groupBy, toolSpans } from './trace/query'\nimport type { Span } from './trace/schema'\nimport type { TraceStore } from './trace/store'\n\nexport interface ToolUseMetrics {\n runId: string\n totalCalls: number\n byTool: Record<string, ToolStats>\n errorRate: number\n /** Ratio of calls with identical (toolName, argHash) already seen earlier in the same run. */\n duplicateRate: number\n /** Ratio of error calls followed by ≥1 retry on same tool. */\n retryRate: number\n /** Optional: of the calls agent made, fraction the evaluator marked as \"correct selection\". */\n selectionAccuracy?: number\n}\n\nexport interface ToolStats {\n calls: number\n errors: number\n avgLatencyMs: number\n duplicates: number\n}\n\nexport interface ToolUseOptions {\n /** Map of spanId → whether the evaluator judged the tool selection correct. Optional. */\n selectionLabels?: Record<string, boolean>\n}\n\nexport async function computeToolUseMetrics(\n store: TraceStore,\n runId: string,\n options: ToolUseOptions = {},\n): Promise<ToolUseMetrics> {\n const tools = await toolSpans(store, runId)\n if (tools.length === 0) {\n return { runId, totalCalls: 0, byTool: {}, errorRate: 0, duplicateRate: 0, retryRate: 0 }\n }\n\n const byTool: Record<string, ToolStats> = {}\n let totalErrors = 0\n let totalDuplicates = 0\n const sortedTools = [...tools].sort((a, b) => a.startedAt - b.startedAt)\n const seenSignatures = new Set<string>()\n\n // duplicate detection + per-tool aggregation\n for (const t of sortedTools) {\n const stat = (byTool[t.toolName] ??= { calls: 0, errors: 0, avgLatencyMs: 0, duplicates: 0 })\n stat.calls += 1\n if (t.status === 'error') {\n stat.errors += 1\n totalErrors += 1\n }\n if (typeof t.latencyMs === 'number') stat.avgLatencyMs += t.latencyMs\n const sig = `${t.toolName}|${argHash(t.args)}`\n if (seenSignatures.has(sig)) {\n stat.duplicates += 1\n totalDuplicates += 1\n }\n seenSignatures.add(sig)\n }\n\n for (const stat of Object.values(byTool)) {\n stat.avgLatencyMs = stat.calls > 0 ? stat.avgLatencyMs / stat.calls : 0\n }\n\n // retry detection: per-tool chronological adjacency where error → next same-tool call\n let retryOpportunities = 0\n let retriesFollowed = 0\n for (const [, arr] of groupBy(sortedTools, (t) => t.toolName)) {\n for (let i = 0; i < arr.length; i++) {\n if (arr[i]!.status !== 'error') continue\n retryOpportunities += 1\n if (arr[i + 1]) retriesFollowed += 1\n }\n }\n const retryRate = retryOpportunities > 0 ? retriesFollowed / retryOpportunities : 0\n\n let selectionAccuracy: number | undefined\n if (options.selectionLabels) {\n const labeled = sortedTools.filter((t) => t.spanId in options.selectionLabels!)\n if (labeled.length > 0) {\n selectionAccuracy =\n labeled.filter((t) => options.selectionLabels![t.spanId]).length / labeled.length\n }\n }\n\n return {\n runId,\n totalCalls: sortedTools.length,\n byTool,\n errorRate: totalErrors / sortedTools.length,\n duplicateRate: totalDuplicates / sortedTools.length,\n retryRate,\n selectionAccuracy,\n }\n}\n\nexport type { Span }\n","/**\n * Baseline regression detection.\n *\n * Lifted from ADC baseline.ts. Every promotion-blocking signal boils down\n * to: \"is this run measurably worse than baseline?\" — with enough\n * statistical rigor to distinguish noise from drift.\n *\n * Uses:\n * - Welch's t-test (unequal variance) for per-metric mean comparison\n * - Cohen's d for effect size magnitude\n * - IQR for stability flag (unstable samples can't be trusted for comparisons)\n *\n * Returns a structured verdict: improved | regressed | stable | unstable.\n */\n\nimport { cohensD } from './statistics'\n\nexport interface MetricSamples {\n /** Stable metric key (e.g. \"overallScore\", \"firstTokenMs\"). */\n metric: string\n /** Whether higher values are better. */\n higherIsBetter: boolean\n baseline: number[]\n candidate: number[]\n}\n\nexport interface MetricVerdict {\n metric: string\n baselineMean: number\n candidateMean: number\n delta: number\n cohensD: number\n welchT: number\n welchDf: number\n welchP: number\n stable: boolean\n /** IQR of the combined samples — used as a rough stability indicator. */\n iqr: number\n verdict: 'improved' | 'regressed' | 'stable' | 'unstable'\n}\n\nexport interface BaselineReport {\n metrics: MetricVerdict[]\n /** True if any critical metric regressed. */\n hasRegression: boolean\n /** True if any metric is unstable (too noisy to judge). */\n hasUnstable: boolean\n}\n\nexport interface BaselineOptions {\n /** Effect size threshold for meaningful delta (default 0.5 — medium effect). */\n effectThreshold?: number\n /** p-value threshold for statistical significance (default 0.05). */\n alpha?: number\n /** IQR/mean ratio above which samples are flagged unstable (default 0.30). */\n unstableCvThreshold?: number\n}\n\n/**\n * Compare candidate samples against baseline per metric. Verdict logic:\n * - unstable: IQR/|mean| > threshold on either set — not enough signal\n * - improved: meaningful effect in the \"better\" direction AND p < alpha\n * - regressed: meaningful effect in the \"worse\" direction AND p < alpha\n * - stable: otherwise (no significant change)\n */\nexport function compareToBaseline(\n samples: MetricSamples[],\n options: BaselineOptions = {},\n): BaselineReport {\n const effectThreshold = options.effectThreshold ?? 0.5\n const alpha = options.alpha ?? 0.05\n const cvThreshold = options.unstableCvThreshold ?? 0.3\n\n const metrics: MetricVerdict[] = samples.map((s) => {\n if (s.baseline.length < 2 || s.candidate.length < 2) {\n throw new Error(`compareToBaseline: need ≥2 samples per side for \"${s.metric}\"`)\n }\n const bMean = mean(s.baseline)\n const cMean = mean(s.candidate)\n const delta = cMean - bMean\n const d = cohensD(s.baseline, s.candidate) // positive = candidate higher\n const { t, df, p } = welchsTTest(s.baseline, s.candidate)\n // Stability is per-side: a comparison is trustworthy only when BOTH\n // samples are internally consistent. Combining the sides would flag\n // large-but-real deltas as \"unstable\" which is exactly what we want\n // to detect.\n const baselineIqr = iqr(s.baseline)\n const candidateIqr = iqr(s.candidate)\n const baselineStable = baselineIqr / Math.max(Math.abs(bMean), 1e-9) <= cvThreshold\n const candidateStable = candidateIqr / Math.max(Math.abs(cMean), 1e-9) <= cvThreshold\n const stable = baselineStable && candidateStable\n const reportedIqr = Math.max(baselineIqr, candidateIqr)\n\n let verdict: MetricVerdict['verdict']\n if (!stable) {\n verdict = 'unstable'\n } else if (p < alpha && Math.abs(d) >= effectThreshold) {\n const candidateIsBetter = s.higherIsBetter ? delta > 0 : delta < 0\n verdict = candidateIsBetter ? 'improved' : 'regressed'\n } else {\n verdict = 'stable'\n }\n\n return {\n metric: s.metric,\n baselineMean: bMean,\n candidateMean: cMean,\n delta,\n cohensD: d,\n welchT: t,\n welchDf: df,\n welchP: p,\n stable,\n iqr: reportedIqr,\n verdict,\n }\n })\n\n return {\n metrics,\n hasRegression: metrics.some((m) => m.verdict === 'regressed'),\n hasUnstable: metrics.some((m) => m.verdict === 'unstable'),\n }\n}\n\nfunction mean(xs: number[]): number {\n return xs.reduce((a, b) => a + b, 0) / xs.length\n}\n\n/** Inter-quartile range; 0 when the sample has no spread. */\nexport function iqr(xs: number[]): number {\n if (xs.length === 0) return 0\n const sorted = [...xs].sort((a, b) => a - b)\n const q = (p: number) => {\n const idx = p * (sorted.length - 1)\n const lo = Math.floor(idx)\n const hi = Math.ceil(idx)\n return sorted[lo]! + (sorted[hi]! - sorted[lo]!) * (idx - lo)\n }\n return q(0.75) - q(0.25)\n}\n\n/**\n * Welch's t-test — unequal-variance two-sample t. Uses the same Student-t\n * CDF as `pairedTTest` (via incomplete beta); falls back to normal tail\n * when df is large.\n */\nexport function welchsTTest(a: number[], b: number[]): { t: number; df: number; p: number } {\n if (a.length < 2 || b.length < 2) return { t: 0, df: 0, p: 1 }\n const mA = mean(a)\n const mB = mean(b)\n const vA = variance(a, mA)\n const vB = variance(b, mB)\n const seSquared = vA / a.length + vB / b.length\n if (seSquared === 0) return { t: mA === mB ? 0 : Infinity, df: 0, p: mA === mB ? 1 : 0 }\n const t = (mB - mA) / Math.sqrt(seSquared)\n const df =\n (seSquared * seSquared) /\n ((vA / a.length) ** 2 / (a.length - 1) + (vB / b.length) ** 2 / (b.length - 1))\n const p = 2 * (1 - studentTCdf(Math.abs(t), df))\n return { t, df, p }\n}\n\nfunction variance(xs: number[], m: number): number {\n return xs.reduce((acc, x) => acc + (x - m) ** 2, 0) / (xs.length - 1)\n}\n\n// Re-used from statistics.ts via small local copy to avoid exporting internals.\nfunction studentTCdf(t: number, df: number): number {\n if (df <= 0) return 0.5\n if (df > 100) return normalCdf(t)\n const x = df / (df + t * t)\n const ib = incompleteBeta(x, df / 2, 0.5)\n return t >= 0 ? 1 - 0.5 * ib : 0.5 * ib\n}\n\nfunction incompleteBeta(x: number, a: number, b: number): number {\n if (x <= 0) return 0\n if (x >= 1) return 1\n const lnBeta = lnGamma(a) + lnGamma(b) - lnGamma(a + b)\n const front = Math.exp(Math.log(x) * a + Math.log(1 - x) * b - lnBeta) / a\n let c = 1\n let d = 1 - ((a + b) * x) / (a + 1)\n if (Math.abs(d) < 1e-30) d = 1e-30\n d = 1 / d\n let f = d\n for (let m = 1; m <= 200; m++) {\n const m2 = 2 * m\n let num = (m * (b - m) * x) / ((a + m2 - 1) * (a + m2))\n d = 1 + num * d\n if (Math.abs(d) < 1e-30) d = 1e-30\n c = 1 + num / c\n if (Math.abs(c) < 1e-30) c = 1e-30\n d = 1 / d\n f *= d * c\n num = -((a + m) * (a + b + m) * x) / ((a + m2) * (a + m2 + 1))\n d = 1 + num * d\n if (Math.abs(d) < 1e-30) d = 1e-30\n c = 1 + num / c\n if (Math.abs(c) < 1e-30) c = 1e-30\n d = 1 / d\n const delta = d * c\n f *= delta\n if (Math.abs(delta - 1) < 3e-7) break\n }\n return front * f\n}\n\nfunction lnGamma(z: number): number {\n const coefs = [\n 0.99999999999980993, 676.5203681218851, -1259.1392167224028, 771.32342877765313,\n -176.61502916214059, 12.507343278686905, -0.13857109526572012, 9.9843695780195716e-6,\n 1.5056327351493116e-7,\n ]\n if (z < 0.5) return Math.log(Math.PI / Math.sin(Math.PI * z)) - lnGamma(1 - z)\n z -= 1\n let x = coefs[0]!\n for (let i = 1; i < 9; i++) x += coefs[i]! / (z + i)\n const t = z + 7.5\n return 0.5 * Math.log(2 * Math.PI) + (z + 0.5) * Math.log(t) - t + Math.log(x)\n}\n\nfunction normalCdf(x: number): number {\n const a1 = 0.254829592\n const a2 = -0.284496736\n const a3 = 1.421413741\n const a4 = -1.453152027\n const a5 = 1.061405429\n const p = 0.3275911\n const sign = x < 0 ? -1 : 1\n const absX = Math.abs(x)\n const t = 1 / (1 + p * absX)\n const y = 1 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp((-absX * absX) / 2)\n return 0.5 * (1 + sign * y)\n}\n"],"mappings":";;;;;;;;;;AAwCO,IAAM,gBAA+B;AAAA;AAAA,EAE1C;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,IAAI,MAAM;AAClB,YAAM,KAAK,IAAI,SAAS;AACxB,UAAI,MAAM,OAAO;AACf,eAAO,EAAE,cAAc,IAAI,QAAQ,sCAAsC;AAC3E,aAAO;AAAA,IACT;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,sBACnB,EAAE,QAAQ,WAAW;AAAA,MACzB;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,aACT,EAAE,QAAQ,SAAS,oCAAoC,EAAE,QAAQ,UAAU,SAC1E,EAAE,QAAQ,SAAS,+BAClB,EAAE,QAAQ,SAAS;AAAA,MAC3B;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,mCACnB,oBAAoB,EAAE,SAAS,oBAAoB;AAAA,MACvD;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,aACT,EAAE,QAAQ,SAAS,mCAAmC,iBAAiB,EAAE,OAAO,KAC/E,EAAE,QAAQ,SAAS,+BAA+B,EAAE,QAAQ,SAAS;AAAA,MAC5E;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,aACT,EAAE,QAAQ,SAAS,wBAAwB,EAAE,QAAQ,WAAW,uBAC/D,EAAE,QAAQ,SAAS,+BAClB,EAAE,QAAQ,SAAS,uBACrB,EAAE,QAAQ,SAAS;AAAA,MACzB;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,gCAClB,EAAE,QAAQ,SAAS,kBAClB,EAAE,QAAQ,SAAS,2BACnB,EAAE,QAAQ,SAAS,wBACnB,EAAE,QAAQ,WAAW;AAAA,MAC3B;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,gCAClB,EAAE,QAAQ,SAAS,yBAClB,EAAE,QAAQ,SAAS,mBACnB,EAAE,QAAQ,SAAS;AAAA,MACzB;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,+BACnB,CAAC;AAAA,UACC;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,QACF,EAAE,SAAS,OAAO,EAAE,QAAQ,IAAI,CAAC;AAAA,MACrC;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,mBACnB,EAAE,QAAQ,aAAa;AAAA,MAC3B;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,KAAK,MAAM,MAAM;AACzB,UAAI,IAAI,SAAS,SAAS,MAAO,QAAO;AACxC,YAAM,YAAY,MAAM;AAAA,QACtB,CAAC,MACC,EAAE,SAAS,gBAAgB,EAAE,KAAK,WAAW,KAAK,EAAE,KAAK,MAAM,CAAC,QAAQ,IAAI,SAAS,CAAC;AAAA,MAC1F;AACA,aAAO,YACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,eAAe,UAAU;AAAA,MAC3B,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,mBACnB,EAAE,QAAQ,WAAW;AAAA,MACzB;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,QAAQ,OAAO;AAAA,QACnB,CAAC,MACC,EAAE,SAAS,YACX,EAAE,QAAQ,SAAS,mBACnB,EAAE,QAAQ,WAAW;AAAA,MACzB;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,MAAM;AAAA,MACxB,IACA;AAAA,IACN;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,SAAS,OAAO,KAAK,CAAC,MAAM,EAAE,SAAS,eAAe;AAC5D,aAAO,SACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ,sBAAsB,OAAO,QAAQ,aAAa,mBAAmB;AAAA,QAC7E,gBAAgB,OAAO;AAAA,MACzB,IACA;AAAA,IACN;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,OAAO,MAAM;AACrB,YAAM,IAAI,OAAO,KAAK,CAAC,MAAM,EAAE,SAAS,kBAAkB;AAC1D,aAAO,IACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,gBAAgB,EAAE;AAAA,MACpB,IACA;AAAA,IACN;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,MAAM,MAAM;AACpB,YAAM,IAAI,MAAM;AAAA,QACd,CAAC,MAAM,EAAE,SAAS,aAAa,OAAO,EAAE,aAAa,YAAY,EAAE,aAAa;AAAA,MAClF;AACA,UAAI,CAAC,EAAG,QAAO;AACf,aAAO;AAAA,QACL,cAAc;AAAA,QACd,QAAQ,kBAAmB,EAAyC,QAAQ;AAAA,QAC5E,eAAe,EAAE;AAAA,MACnB;AAAA,IACF;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,KAAK,OAAO,MAAM;AAC1B,UAAI,IAAI,WAAW,UAAW,QAAO;AACrC,YAAM,aAAa,OAAO;AAAA,QACxB,CAAC,MACC,EAAE,SAAS,WACX,OAAO,EAAE,QAAQ,UAAU,EAAE,EAC1B,YAAY,EACZ,SAAS,SAAS;AAAA,MACzB;AACA,YAAM,QAAQ,IAAI,SAAS,SAAS,IAAI,YAAY;AACpD,UAAI,cAAc,KAAK,SAAS,SAAS,KAAK,KAAK,SAAS,UAAU,GAAG;AACvE,eAAO,EAAE,cAAc,WAAW,QAAQ,0BAA0B;AAAA,MACtE;AACA,aAAO;AAAA,IACT;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,MAAM,MAAM;AACpB,YAAM,QAAQ,MAAM,OAAO,CAAC,MAAM,EAAE,SAAS,MAAM;AACnD,YAAM,SAAS,oBAAI,IAAoB;AACvC,iBAAW,KAAK,OAAO;AACrB,cAAM,OAAQ,EAAsC;AACpD,cAAM,MAAM,OAAO,IAAI,IAAI,KAAK,CAAC;AACjC,YAAI,KAAK,CAAC;AACV,eAAO,IAAI,MAAM,GAAG;AAAA,MACtB;AACA,iBAAW,CAAC,MAAM,GAAG,KAAK,QAAQ;AAChC,cAAM,OAAO,IAAI,OAAO,CAAC,MAAM,EAAE,WAAW,OAAO;AACnD,YAAI,KAAK,UAAU,KAAK,KAAK,WAAW,IAAI,QAAQ;AAClD,iBAAO;AAAA,YACL,cAAc;AAAA,YACd,QAAQ,GAAG,KAAK,MAAM,gCAAgC,IAAI;AAAA,YAC1D,eAAe,KAAK,KAAK,SAAS,CAAC,EAAG;AAAA,UACxC;AAAA,QACF;AAAA,MACF;AACA,aAAO;AAAA,IACT;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,KAAK,MAAM,MAAM;AACzB,UAAI,IAAI,SAAS,SAAS,MAAO,QAAO;AACxC,YAAM,oBAAoB,MAAM;AAAA,QAC9B,CAAC,MACC,EAAE,SAAS,WACV,EAAE,YAAY,mBAA0C,UACxD,EAAE,YAAY,iBAA4B;AAAA,MAC/C;AACA,YAAM,QAAQ,MAAM,OAAO,CAAC,MAAM,EAAE,SAAS,MAAM;AACnD,UAAI,qBAAqB,MAAM,WAAW,GAAG;AAC3C,eAAO;AAAA,UACL,cAAc;AAAA,UACd,QAAQ;AAAA,QACV;AAAA,MACF;AACA,aAAO;AAAA,IACT;AAAA,EACF;AAAA;AAAA,EAEA;AAAA,IACE,IAAI;AAAA,IACJ,OAAO,CAAC,EAAE,MAAM,MAAM;AACpB,YAAM,QAAQ,MAAM;AAAA,QAClB,CAAC,MACC,EAAE,SAAS,WACV,EAAuC,cAAc,YACrD,EAAuC,QAAQ;AAAA,MACpD;AACA,aAAO,QACH;AAAA,QACE,cAAc;AAAA,QACd,QAAQ;AAAA,QACR,eAAe,MAAM;AAAA,MACvB,IACA;AAAA,IACN;AAAA,EACF;AACF;AAEA,SAAS,oBAAoB,SAAkC,QAAyB;AACtF,MAAI,WAAW,wBAAwB,YAAY,QAAQ,kBAAkB,EAAE,SAAS;AACtF,WAAO;AACT,SAAO,gBAAgB,OAAO,EAAE,KAAK,CAAC,SAAS,KAAK,WAAW,MAAM;AACvE;AAEA,SAAS,iBAAiB,SAA2C;AACnE,MAAI,YAAY,QAAQ,aAAa,EAAE,SAAS,EAAG,QAAO;AAC1D,SAAO,gBAAgB,OAAO,EAAE;AAAA,IAC9B,CAAC,SAAS,MAAM,QAAQ,KAAK,aAAa,KAAK,KAAK,cAAc,SAAS;AAAA,EAC7E;AACF;AAEA,SAAS,gBAAgB,SAAkE;AACzF,SAAO;AAAA,IACL,GAAG,QAAQ,QAAQ,OAAO;AAAA,IAC1B,GAAG,QAAQ,QAAQ,eAAe;AAAA,IAClC,GAAG,QAAQ,QAAQ,KAAK;AAAA,EAC1B;AACF;AAEA,SAAS,QAAQ,OAAgD;AAC/D,MAAI,CAAC,MAAM,QAAQ,KAAK,EAAG,QAAO,CAAC;AACnC,SAAO,MAAM;AAAA,IACX,CAAC,SACC,QAAQ,IAAI,KAAK,OAAO,SAAS,YAAY,CAAC,MAAM,QAAQ,IAAI;AAAA,EACpE;AACF;AAEA,SAAS,YAAY,OAA0B;AAC7C,SAAO,MAAM,QAAQ,KAAK,IACtB,MAAM,OAAO,CAAC,SAAyB,OAAO,SAAS,QAAQ,IAC/D,CAAC;AACP;AAGO,SAAS,gBACd,KACA,QAAuB,eACA;AACvB,MAAI,IAAI,IAAI,SAAS,SAAS,SAAS,IAAI,IAAI,WAAW,aAAa;AACrE,WAAO,EAAE,cAAc,WAAW,QAAQ,qDAAqD;AAAA,EACjG;AACA,aAAW,QAAQ,OAAO;AACxB,UAAM,MAAM,KAAK,MAAM,GAAG;AAC1B,QAAI,IAAK,QAAO;AAAA,EAClB;AACA,SAAO,EAAE,cAAc,WAAW,QAAQ,sDAAsD;AAClG;;;ACtaA,eAAsB,mBACpB,OACA,UAA8D,CAAC,GAChC;AAC/B,QAAM,QAAQ,QAAQ,SAAS;AAC/B,QAAM,UAAU,QAAQ,kBAAkB;AAC1C,QAAM,OAAO,MAAM,MAAM,SAAS;AAGlC,QAAM,WAAW,oBAAI,IAAyB;AAC9C,MAAI,gBAAgB;AAEpB,aAAW,OAAO,MAAM;AACtB,QAAI,IAAI,WAAW,eAAe,IAAI,SAAS,SAAS,MAAO;AAC/D;AACA,UAAM,QAAQ,MAAM,MAAM,MAAM,EAAE,OAAO,IAAI,MAAM,CAAC;AACpD,UAAM,SAAS,MAAM,MAAM,OAAO,EAAE,OAAO,IAAI,MAAM,CAAC;AACtD,UAAM,MAAM,gBAAgB,EAAE,KAAK,OAAO,OAAO,GAAG,KAAK;AAEzD,QAAI;AACJ,QAAI;AACJ,QAAI;AACJ,QAAI,IAAI,eAAe;AACrB,YAAM,OAAO,MAAM,KAAK,CAAC,MAAM,EAAE,WAAW,IAAI,aAAa;AAC7D,UAAI,MAAM,SAAS,QAAQ;AACzB,mBAAW,KAAK;AAChB,oBAAY,QAAQ,KAAK,IAAI,EAAE,MAAM,GAAG,EAAE;AAAA,MAC5C,WAAW,MAAM,SAAS,SAAS;AACjC,oBAAY,KAAK;AAAA,MACnB;AAAA,IACF;AAEA,QAAI,CAAC,UAAU;AACb,YAAM,KAAK,MAAM,UAAU,OAAO,IAAI,KAAK;AAC3C,YAAM,UAAU,GAAG,OAAO,CAAC,MAAM,EAAE,WAAW,OAAO,EAAE,IAAI;AAC3D,UAAI,SAAS;AACX,mBAAW,QAAQ;AACnB,oBAAY,QAAQ,QAAQ,IAAI,EAAE,MAAM,GAAG,EAAE;AAAA,MAC/C;AAAA,IACF;AAIA,QAAI,CAAC,WAAW;AACd,YAAM,QAAQ,MAAM,KAAK,CAAC,MAAM,EAAE,SAAS,WAAW,OAAO,EAAE,cAAc,QAAQ;AACrF,UAAI,OAAO,SAAS,QAAS,aAAY,MAAM;AAAA,IACjD;AAEA,UAAM,MAAM,GAAG,IAAI,YAAY,IAAI,YAAY,EAAE,IAAI,aAAa,EAAE,IAAI,aAAa,EAAE;AACvF,QAAI,UAAU,SAAS,IAAI,GAAG;AAC9B,QAAI,CAAC,SAAS;AACZ,gBAAU;AAAA,QACR,cAAc,IAAI;AAAA,QAClB;AAAA,QACA;AAAA,QACA;AAAA,QACA,UAAU;AAAA,QACV,aAAa,CAAC;AAAA,QACd,cAAc,IAAI;AAAA,QAClB,cAAc,kBAAkB,KAAK,KAAK,IAAI;AAAA,MAChD;AACA,eAAS,IAAI,KAAK,OAAO;AAAA,IAC3B;AACA,YAAQ;AACR,QAAI,CAAC,QAAQ,YAAY,SAAS,IAAI,UAAU,EAAG,SAAQ,YAAY,KAAK,IAAI,UAAU;AAAA,EAC5F;AAEA,QAAM,MAAM,CAAC,GAAG,SAAS,OAAO,CAAC,EAC9B,OAAO,CAAC,MAAM,EAAE,YAAY,OAAO,EACnC,KAAK,CAAC,GAAG,MAAM,EAAE,WAAW,EAAE,QAAQ;AAEzC,SAAO,EAAE,UAAU,KAAK,eAAe,WAAW,KAAK,OAAO;AAChE;AAEA,SAAS,kBAAkB,OAAmC;AAC5D,QAAM,UAAU,MAAM,KAAK,CAAC,MAAM,EAAE,WAAW,OAAO;AACtD,SAAO,SAAS;AAClB;;;AC9EA,eAAsB,sBACpB,OACA,OACA,UAA0B,CAAC,GACF;AACzB,QAAM,QAAQ,MAAM,UAAU,OAAO,KAAK;AAC1C,MAAI,MAAM,WAAW,GAAG;AACtB,WAAO,EAAE,OAAO,YAAY,GAAG,QAAQ,CAAC,GAAG,WAAW,GAAG,eAAe,GAAG,WAAW,EAAE;AAAA,EAC1F;AAEA,QAAM,SAAoC,CAAC;AAC3C,MAAI,cAAc;AAClB,MAAI,kBAAkB;AACtB,QAAM,cAAc,CAAC,GAAG,KAAK,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,YAAY,EAAE,SAAS;AACvE,QAAM,iBAAiB,oBAAI,IAAY;AAGvC,aAAW,KAAK,aAAa;AAC3B,UAAM,OAAQ,OAAO,EAAE,QAAQ,MAAM,EAAE,OAAO,GAAG,QAAQ,GAAG,cAAc,GAAG,YAAY,EAAE;AAC3F,SAAK,SAAS;AACd,QAAI,EAAE,WAAW,SAAS;AACxB,WAAK,UAAU;AACf,qBAAe;AAAA,IACjB;AACA,QAAI,OAAO,EAAE,cAAc,SAAU,MAAK,gBAAgB,EAAE;AAC5D,UAAM,MAAM,GAAG,EAAE,QAAQ,IAAI,QAAQ,EAAE,IAAI,CAAC;AAC5C,QAAI,eAAe,IAAI,GAAG,GAAG;AAC3B,WAAK,cAAc;AACnB,yBAAmB;AAAA,IACrB;AACA,mBAAe,IAAI,GAAG;AAAA,EACxB;AAEA,aAAW,QAAQ,OAAO,OAAO,MAAM,GAAG;AACxC,SAAK,eAAe,KAAK,QAAQ,IAAI,KAAK,eAAe,KAAK,QAAQ;AAAA,EACxE;AAGA,MAAI,qBAAqB;AACzB,MAAI,kBAAkB;AACtB,aAAW,CAAC,EAAE,GAAG,KAAK,QAAQ,aAAa,CAAC,MAAM,EAAE,QAAQ,GAAG;AAC7D,aAAS,IAAI,GAAG,IAAI,IAAI,QAAQ,KAAK;AACnC,UAAI,IAAI,CAAC,EAAG,WAAW,QAAS;AAChC,4BAAsB;AACtB,UAAI,IAAI,IAAI,CAAC,EAAG,oBAAmB;AAAA,IACrC;AAAA,EACF;AACA,QAAM,YAAY,qBAAqB,IAAI,kBAAkB,qBAAqB;AAElF,MAAI;AACJ,MAAI,QAAQ,iBAAiB;AAC3B,UAAM,UAAU,YAAY,OAAO,CAAC,MAAM,EAAE,UAAU,QAAQ,eAAgB;AAC9E,QAAI,QAAQ,SAAS,GAAG;AACtB,0BACE,QAAQ,OAAO,CAAC,MAAM,QAAQ,gBAAiB,EAAE,MAAM,CAAC,EAAE,SAAS,QAAQ;AAAA,IAC/E;AAAA,EACF;AAEA,SAAO;AAAA,IACL;AAAA,IACA,YAAY,YAAY;AAAA,IACxB;AAAA,IACA,WAAW,cAAc,YAAY;AAAA,IACrC,eAAe,kBAAkB,YAAY;AAAA,IAC7C;AAAA,IACA;AAAA,EACF;AACF;;;ACxCO,SAAS,kBACd,SACA,UAA2B,CAAC,GACZ;AAChB,QAAM,kBAAkB,QAAQ,mBAAmB;AACnD,QAAM,QAAQ,QAAQ,SAAS;AAC/B,QAAM,cAAc,QAAQ,uBAAuB;AAEnD,QAAM,UAA2B,QAAQ,IAAI,CAAC,MAAM;AAClD,QAAI,EAAE,SAAS,SAAS,KAAK,EAAE,UAAU,SAAS,GAAG;AACnD,YAAM,IAAI,MAAM,yDAAoD,EAAE,MAAM,GAAG;AAAA,IACjF;AACA,UAAM,QAAQ,KAAK,EAAE,QAAQ;AAC7B,UAAM,QAAQ,KAAK,EAAE,SAAS;AAC9B,UAAM,QAAQ,QAAQ;AACtB,UAAM,IAAI,QAAQ,EAAE,UAAU,EAAE,SAAS;AACzC,UAAM,EAAE,GAAG,IAAI,EAAE,IAAI,YAAY,EAAE,UAAU,EAAE,SAAS;AAKxD,UAAM,cAAc,IAAI,EAAE,QAAQ;AAClC,UAAM,eAAe,IAAI,EAAE,SAAS;AACpC,UAAM,iBAAiB,cAAc,KAAK,IAAI,KAAK,IAAI,KAAK,GAAG,IAAI,KAAK;AACxE,UAAM,kBAAkB,eAAe,KAAK,IAAI,KAAK,IAAI,KAAK,GAAG,IAAI,KAAK;AAC1E,UAAM,SAAS,kBAAkB;AACjC,UAAM,cAAc,KAAK,IAAI,aAAa,YAAY;AAEtD,QAAI;AACJ,QAAI,CAAC,QAAQ;AACX,gBAAU;AAAA,IACZ,WAAW,IAAI,SAAS,KAAK,IAAI,CAAC,KAAK,iBAAiB;AACtD,YAAM,oBAAoB,EAAE,iBAAiB,QAAQ,IAAI,QAAQ;AACjE,gBAAU,oBAAoB,aAAa;AAAA,IAC7C,OAAO;AACL,gBAAU;AAAA,IACZ;AAEA,WAAO;AAAA,MACL,QAAQ,EAAE;AAAA,MACV,cAAc;AAAA,MACd,eAAe;AAAA,MACf;AAAA,MACA,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,SAAS;AAAA,MACT,QAAQ;AAAA,MACR;AAAA,MACA,KAAK;AAAA,MACL;AAAA,IACF;AAAA,EACF,CAAC;AAED,SAAO;AAAA,IACL;AAAA,IACA,eAAe,QAAQ,KAAK,CAAC,MAAM,EAAE,YAAY,WAAW;AAAA,IAC5D,aAAa,QAAQ,KAAK,CAAC,MAAM,EAAE,YAAY,UAAU;AAAA,EAC3D;AACF;AAEA,SAAS,KAAK,IAAsB;AAClC,SAAO,GAAG,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,GAAG;AAC5C;AAGO,SAAS,IAAI,IAAsB;AACxC,MAAI,GAAG,WAAW,EAAG,QAAO;AAC5B,QAAM,SAAS,CAAC,GAAG,EAAE,EAAE,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAC3C,QAAM,IAAI,CAAC,MAAc;AACvB,UAAM,MAAM,KAAK,OAAO,SAAS;AACjC,UAAM,KAAK,KAAK,MAAM,GAAG;AACzB,UAAM,KAAK,KAAK,KAAK,GAAG;AACxB,WAAO,OAAO,EAAE,KAAM,OAAO,EAAE,IAAK,OAAO,EAAE,MAAO,MAAM;AAAA,EAC5D;AACA,SAAO,EAAE,IAAI,IAAI,EAAE,IAAI;AACzB;AAOO,SAAS,YAAY,GAAa,GAAmD;AAC1F,MAAI,EAAE,SAAS,KAAK,EAAE,SAAS,EAAG,QAAO,EAAE,GAAG,GAAG,IAAI,GAAG,GAAG,EAAE;AAC7D,QAAM,KAAK,KAAK,CAAC;AACjB,QAAM,KAAK,KAAK,CAAC;AACjB,QAAM,KAAK,SAAS,GAAG,EAAE;AACzB,QAAM,KAAK,SAAS,GAAG,EAAE;AACzB,QAAM,YAAY,KAAK,EAAE,SAAS,KAAK,EAAE;AACzC,MAAI,cAAc,EAAG,QAAO,EAAE,GAAG,OAAO,KAAK,IAAI,UAAU,IAAI,GAAG,GAAG,OAAO,KAAK,IAAI,EAAE;AACvF,QAAM,KAAK,KAAK,MAAM,KAAK,KAAK,SAAS;AACzC,QAAM,KACH,YAAY,cACX,KAAK,EAAE,WAAW,KAAK,EAAE,SAAS,MAAM,KAAK,EAAE,WAAW,KAAK,EAAE,SAAS;AAC9E,QAAM,IAAI,KAAK,IAAI,YAAY,KAAK,IAAI,CAAC,GAAG,EAAE;AAC9C,SAAO,EAAE,GAAG,IAAI,EAAE;AACpB;AAEA,SAAS,SAAS,IAAc,GAAmB;AACjD,SAAO,GAAG,OAAO,CAAC,KAAK,MAAM,OAAO,IAAI,MAAM,GAAG,CAAC,KAAK,GAAG,SAAS;AACrE;AAGA,SAAS,YAAY,GAAW,IAAoB;AAClD,MAAI,MAAM,EAAG,QAAO;AACpB,MAAI,KAAK,IAAK,QAAO,UAAU,CAAC;AAChC,QAAM,IAAI,MAAM,KAAK,IAAI;AACzB,QAAM,KAAK,eAAe,GAAG,KAAK,GAAG,GAAG;AACxC,SAAO,KAAK,IAAI,IAAI,MAAM,KAAK,MAAM;AACvC;AAEA,SAAS,eAAe,GAAW,GAAW,GAAmB;AAC/D,MAAI,KAAK,EAAG,QAAO;AACnB,MAAI,KAAK,EAAG,QAAO;AACnB,QAAM,SAAS,QAAQ,CAAC,IAAI,QAAQ,CAAC,IAAI,QAAQ,IAAI,CAAC;AACtD,QAAM,QAAQ,KAAK,IAAI,KAAK,IAAI,CAAC,IAAI,IAAI,KAAK,IAAI,IAAI,CAAC,IAAI,IAAI,MAAM,IAAI;AACzE,MAAI,IAAI;AACR,MAAI,IAAI,KAAM,IAAI,KAAK,KAAM,IAAI;AACjC,MAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,MAAI,IAAI;AACR,MAAI,IAAI;AACR,WAAS,IAAI,GAAG,KAAK,KAAK,KAAK;AAC7B,UAAM,KAAK,IAAI;AACf,QAAI,MAAO,KAAK,IAAI,KAAK,MAAO,IAAI,KAAK,MAAM,IAAI;AACnD,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI;AACR,SAAK,IAAI;AACT,UAAM,GAAG,IAAI,MAAM,IAAI,IAAI,KAAK,OAAO,IAAI,OAAO,IAAI,KAAK;AAC3D,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI,MAAM;AACd,QAAI,KAAK,IAAI,CAAC,IAAI,MAAO,KAAI;AAC7B,QAAI,IAAI;AACR,UAAM,QAAQ,IAAI;AAClB,SAAK;AACL,QAAI,KAAK,IAAI,QAAQ,CAAC,IAAI,KAAM;AAAA,EAClC;AACA,SAAO,QAAQ;AACjB;AAEA,SAAS,QAAQ,GAAmB;AAClC,QAAM,QAAQ;AAAA,IACZ;AAAA,IAAqB;AAAA,IAAmB;AAAA,IAAqB;AAAA,IAC7D;AAAA,IAAqB;AAAA,IAAoB;AAAA,IAAsB;AAAA,IAC/D;AAAA,EACF;AACA,MAAI,IAAI,IAAK,QAAO,KAAK,IAAI,KAAK,KAAK,KAAK,IAAI,KAAK,KAAK,CAAC,CAAC,IAAI,QAAQ,IAAI,CAAC;AAC7E,OAAK;AACL,MAAI,IAAI,MAAM,CAAC;AACf,WAAS,IAAI,GAAG,IAAI,GAAG,IAAK,MAAK,MAAM,CAAC,KAAM,IAAI;AAClD,QAAM,IAAI,IAAI;AACd,SAAO,MAAM,KAAK,IAAI,IAAI,KAAK,EAAE,KAAK,IAAI,OAAO,KAAK,IAAI,CAAC,IAAI,IAAI,KAAK,IAAI,CAAC;AAC/E;AAEA,SAAS,UAAU,GAAmB;AACpC,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,KAAK;AACX,QAAM,IAAI;AACV,QAAM,OAAO,IAAI,IAAI,KAAK;AAC1B,QAAM,OAAO,KAAK,IAAI,CAAC;AACvB,QAAM,IAAI,KAAK,IAAI,IAAI;AACvB,QAAM,IAAI,QAAQ,KAAK,IAAI,MAAM,IAAI,MAAM,IAAI,MAAM,IAAI,MAAM,IAAI,KAAK,IAAK,CAAC,OAAO,OAAQ,CAAC;AAC9F,SAAO,OAAO,IAAI,OAAO;AAC3B;","names":[]}
|
|
@@ -0,0 +1,569 @@
|
|
|
1
|
+
import {
|
|
2
|
+
canonicalize,
|
|
3
|
+
hashJson
|
|
4
|
+
} from "./chunk-4F5DQN55.js";
|
|
5
|
+
import {
|
|
6
|
+
ReplayError
|
|
7
|
+
} from "./chunk-NG236HPC.js";
|
|
8
|
+
|
|
9
|
+
// src/trace/store.ts
|
|
10
|
+
var InMemoryTraceStore = class {
|
|
11
|
+
runs = /* @__PURE__ */ new Map();
|
|
12
|
+
allSpans = [];
|
|
13
|
+
allEvents = [];
|
|
14
|
+
allArtifacts = [];
|
|
15
|
+
allBudget = [];
|
|
16
|
+
async appendRun(run) {
|
|
17
|
+
if (this.runs.has(run.runId)) throw new Error(`run ${run.runId} already exists`);
|
|
18
|
+
this.runs.set(run.runId, { ...run });
|
|
19
|
+
}
|
|
20
|
+
async updateRun(runId, patch) {
|
|
21
|
+
const existing = this.runs.get(runId);
|
|
22
|
+
if (!existing) throw new Error(`run ${runId} not found`);
|
|
23
|
+
this.runs.set(runId, { ...existing, ...patch });
|
|
24
|
+
}
|
|
25
|
+
async appendSpan(span) {
|
|
26
|
+
this.allSpans.push({ ...span });
|
|
27
|
+
}
|
|
28
|
+
async updateSpan(spanId, patch) {
|
|
29
|
+
const idx = this.allSpans.findIndex((s) => s.spanId === spanId);
|
|
30
|
+
if (idx < 0) throw new Error(`span ${spanId} not found`);
|
|
31
|
+
this.allSpans[idx] = { ...this.allSpans[idx], ...patch };
|
|
32
|
+
}
|
|
33
|
+
async appendEvent(event) {
|
|
34
|
+
this.allEvents.push({ ...event });
|
|
35
|
+
}
|
|
36
|
+
async appendArtifact(artifact) {
|
|
37
|
+
this.allArtifacts.push({ ...artifact });
|
|
38
|
+
}
|
|
39
|
+
async appendBudgetEntry(entry) {
|
|
40
|
+
this.allBudget.push({ ...entry });
|
|
41
|
+
}
|
|
42
|
+
async getRun(runId) {
|
|
43
|
+
const r = this.runs.get(runId);
|
|
44
|
+
return r ? { ...r } : void 0;
|
|
45
|
+
}
|
|
46
|
+
async listRuns(filter = {}) {
|
|
47
|
+
return [...this.runs.values()].filter((r) => matchesRun(r, filter));
|
|
48
|
+
}
|
|
49
|
+
async spans(filter = {}) {
|
|
50
|
+
return this.allSpans.filter((s) => matchesSpan(s, filter)).map((s) => ({ ...s }));
|
|
51
|
+
}
|
|
52
|
+
async events(filter = {}) {
|
|
53
|
+
return this.allEvents.filter((e) => matchesEvent(e, filter)).map((e) => ({ ...e }));
|
|
54
|
+
}
|
|
55
|
+
async budget(runId) {
|
|
56
|
+
return this.allBudget.filter((b) => b.runId === runId).map((b) => ({ ...b }));
|
|
57
|
+
}
|
|
58
|
+
async artifacts(runId) {
|
|
59
|
+
return this.allArtifacts.filter((a) => a.runId === runId).map((a) => ({ ...a }));
|
|
60
|
+
}
|
|
61
|
+
};
|
|
62
|
+
function matchesRun(r, f) {
|
|
63
|
+
if (f.scenarioId && r.scenarioId !== f.scenarioId) return false;
|
|
64
|
+
if (f.variantId && r.variantId !== f.variantId) return false;
|
|
65
|
+
if (f.status && r.status !== f.status) return false;
|
|
66
|
+
if (f.since !== void 0 && r.startedAt < f.since) return false;
|
|
67
|
+
if (f.until !== void 0 && r.startedAt > f.until) return false;
|
|
68
|
+
if (f.tag && r.tags?.[f.tag.key] !== f.tag.value) return false;
|
|
69
|
+
if (f.parentRunId && r.parentRunId !== f.parentRunId) return false;
|
|
70
|
+
if (f.projectId && r.projectId !== f.projectId) return false;
|
|
71
|
+
if (f.chatId && r.chatId !== f.chatId) return false;
|
|
72
|
+
if (f.layer && r.layer !== f.layer) return false;
|
|
73
|
+
return true;
|
|
74
|
+
}
|
|
75
|
+
function matchesSpan(s, f) {
|
|
76
|
+
if (f.runId && s.runId !== f.runId) return false;
|
|
77
|
+
if (f.parentSpanId && s.parentSpanId !== f.parentSpanId) return false;
|
|
78
|
+
if (f.kind && s.kind !== f.kind) return false;
|
|
79
|
+
if (f.name && s.name !== f.name) return false;
|
|
80
|
+
if (f.toolName && (s.kind !== "tool" || s.toolName !== f.toolName)) return false;
|
|
81
|
+
if (f.judgeId && (s.kind !== "judge" || s.judgeId !== f.judgeId)) return false;
|
|
82
|
+
if (f.since !== void 0 && s.startedAt < f.since) return false;
|
|
83
|
+
if (f.until !== void 0 && s.startedAt > f.until) return false;
|
|
84
|
+
return true;
|
|
85
|
+
}
|
|
86
|
+
function matchesEvent(e, f) {
|
|
87
|
+
if (f.runId && e.runId !== f.runId) return false;
|
|
88
|
+
if (f.spanId && e.spanId !== f.spanId) return false;
|
|
89
|
+
if (f.kind && e.kind !== f.kind) return false;
|
|
90
|
+
if (f.since !== void 0 && e.timestamp < f.since) return false;
|
|
91
|
+
if (f.until !== void 0 && e.timestamp > f.until) return false;
|
|
92
|
+
return true;
|
|
93
|
+
}
|
|
94
|
+
var FileSystemTraceStore = class {
|
|
95
|
+
dir;
|
|
96
|
+
maxBytes;
|
|
97
|
+
/** Lazy in-memory index for queries — populated on first read. */
|
|
98
|
+
index;
|
|
99
|
+
loaded = false;
|
|
100
|
+
constructor(options) {
|
|
101
|
+
this.dir = options.dir;
|
|
102
|
+
this.maxBytes = options.maxBytes ?? 32 * 1024 * 1024;
|
|
103
|
+
}
|
|
104
|
+
async ensureDir() {
|
|
105
|
+
const fs = await import("fs/promises");
|
|
106
|
+
await fs.mkdir(this.dir, { recursive: true });
|
|
107
|
+
}
|
|
108
|
+
async append(name, record) {
|
|
109
|
+
await this.ensureDir();
|
|
110
|
+
const fs = await import("fs/promises");
|
|
111
|
+
const path = await import("path");
|
|
112
|
+
const active = path.join(this.dir, `${name}.ndjson`);
|
|
113
|
+
try {
|
|
114
|
+
const stat = await fs.stat(active);
|
|
115
|
+
if (stat.size >= this.maxBytes) {
|
|
116
|
+
const rolled = path.join(this.dir, `${name}.${Date.now()}.ndjson`);
|
|
117
|
+
await fs.rename(active, rolled);
|
|
118
|
+
}
|
|
119
|
+
} catch {
|
|
120
|
+
}
|
|
121
|
+
await fs.appendFile(active, `${JSON.stringify(record)}
|
|
122
|
+
`, "utf8");
|
|
123
|
+
if (this.index && !record?._update) {
|
|
124
|
+
void this.insertInto(name, record);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
async insertInto(name, record) {
|
|
128
|
+
if (!this.index) return;
|
|
129
|
+
switch (name) {
|
|
130
|
+
case "runs":
|
|
131
|
+
await this.index.appendRun(record);
|
|
132
|
+
break;
|
|
133
|
+
case "spans":
|
|
134
|
+
await this.index.appendSpan(record);
|
|
135
|
+
break;
|
|
136
|
+
case "events":
|
|
137
|
+
await this.index.appendEvent(record);
|
|
138
|
+
break;
|
|
139
|
+
case "artifacts":
|
|
140
|
+
await this.index.appendArtifact(record);
|
|
141
|
+
break;
|
|
142
|
+
case "budget":
|
|
143
|
+
await this.index.appendBudgetEntry(record);
|
|
144
|
+
break;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
async load() {
|
|
148
|
+
if (this.loaded && this.index) return this.index;
|
|
149
|
+
const fs = await import("fs/promises");
|
|
150
|
+
const path = await import("path");
|
|
151
|
+
const store = new InMemoryTraceStore();
|
|
152
|
+
try {
|
|
153
|
+
const entries = await fs.readdir(this.dir);
|
|
154
|
+
for (const file of entries) {
|
|
155
|
+
if (!file.endsWith(".ndjson")) continue;
|
|
156
|
+
const full = path.join(this.dir, file);
|
|
157
|
+
const content = await fs.readFile(full, "utf8");
|
|
158
|
+
const base = file.split(".")[0];
|
|
159
|
+
for (const line of content.split("\n")) {
|
|
160
|
+
if (!line.trim()) continue;
|
|
161
|
+
const record = JSON.parse(line);
|
|
162
|
+
if (base === "runs") {
|
|
163
|
+
try {
|
|
164
|
+
await store.appendRun(record);
|
|
165
|
+
} catch {
|
|
166
|
+
await store.updateRun(record.runId, record);
|
|
167
|
+
}
|
|
168
|
+
} else if (base === "spans") {
|
|
169
|
+
if (record?._update) {
|
|
170
|
+
try {
|
|
171
|
+
await store.updateSpan(record.spanId, record);
|
|
172
|
+
} catch {
|
|
173
|
+
await store.appendSpan(record);
|
|
174
|
+
}
|
|
175
|
+
} else {
|
|
176
|
+
await store.appendSpan(record);
|
|
177
|
+
}
|
|
178
|
+
} else if (base === "events") {
|
|
179
|
+
await store.appendEvent(record);
|
|
180
|
+
} else if (base === "artifacts") {
|
|
181
|
+
await store.appendArtifact(record);
|
|
182
|
+
} else if (base === "budget") {
|
|
183
|
+
await store.appendBudgetEntry(record);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
} catch {
|
|
188
|
+
}
|
|
189
|
+
this.index = store;
|
|
190
|
+
this.loaded = true;
|
|
191
|
+
return store;
|
|
192
|
+
}
|
|
193
|
+
async appendRun(run) {
|
|
194
|
+
await this.append("runs", run);
|
|
195
|
+
}
|
|
196
|
+
async updateRun(runId, patch) {
|
|
197
|
+
await this.append("runs", { runId, ...patch, _update: true });
|
|
198
|
+
if (this.index) await this.index.updateRun(runId, patch);
|
|
199
|
+
}
|
|
200
|
+
async appendSpan(span) {
|
|
201
|
+
await this.append("spans", span);
|
|
202
|
+
}
|
|
203
|
+
async updateSpan(spanId, patch) {
|
|
204
|
+
await this.append("spans", { spanId, ...patch, _update: true });
|
|
205
|
+
if (this.index) await this.index.updateSpan(spanId, patch);
|
|
206
|
+
}
|
|
207
|
+
async appendEvent(event) {
|
|
208
|
+
await this.append("events", event);
|
|
209
|
+
}
|
|
210
|
+
async appendArtifact(artifact) {
|
|
211
|
+
await this.append("artifacts", artifact);
|
|
212
|
+
}
|
|
213
|
+
async appendBudgetEntry(entry) {
|
|
214
|
+
await this.append("budget", entry);
|
|
215
|
+
}
|
|
216
|
+
async getRun(runId) {
|
|
217
|
+
return (await this.load()).getRun(runId);
|
|
218
|
+
}
|
|
219
|
+
async listRuns(filter) {
|
|
220
|
+
return (await this.load()).listRuns(filter);
|
|
221
|
+
}
|
|
222
|
+
async spans(filter) {
|
|
223
|
+
return (await this.load()).spans(filter);
|
|
224
|
+
}
|
|
225
|
+
async events(filter) {
|
|
226
|
+
return (await this.load()).events(filter);
|
|
227
|
+
}
|
|
228
|
+
async budget(runId) {
|
|
229
|
+
return (await this.load()).budget(runId);
|
|
230
|
+
}
|
|
231
|
+
async artifacts(runId) {
|
|
232
|
+
return (await this.load()).artifacts(runId);
|
|
233
|
+
}
|
|
234
|
+
};
|
|
235
|
+
|
|
236
|
+
// src/trace/otel.ts
|
|
237
|
+
var OTEL_AGENT_EVAL_SCOPE = { name: "@tangle-network/agent-eval", version: "0.3.0" };
|
|
238
|
+
async function exportRunAsOtlp(store, runId, resourceAttrs = {}) {
|
|
239
|
+
const run = await store.getRun(runId);
|
|
240
|
+
if (!run) throw new Error(`run ${runId} not found`);
|
|
241
|
+
const spans = await store.spans({ runId });
|
|
242
|
+
const events = await store.events({ runId });
|
|
243
|
+
const eventsBySpan = /* @__PURE__ */ new Map();
|
|
244
|
+
for (const e of events) {
|
|
245
|
+
if (!e.spanId) continue;
|
|
246
|
+
const arr = eventsBySpan.get(e.spanId) ?? [];
|
|
247
|
+
arr.push(e);
|
|
248
|
+
eventsBySpan.set(e.spanId, arr);
|
|
249
|
+
}
|
|
250
|
+
const traceId = runToTraceId(run);
|
|
251
|
+
const otlpSpans = spans.map(
|
|
252
|
+
(s) => spanToOtlp(s, traceId, eventsBySpan.get(s.spanId) ?? [])
|
|
253
|
+
);
|
|
254
|
+
return {
|
|
255
|
+
resourceSpans: [
|
|
256
|
+
{
|
|
257
|
+
resource: {
|
|
258
|
+
attributes: toAttributes({
|
|
259
|
+
"service.name": "agent-eval",
|
|
260
|
+
"run.id": run.runId,
|
|
261
|
+
"run.scenario_id": run.scenarioId,
|
|
262
|
+
"run.variant_id": run.variantId ?? "",
|
|
263
|
+
"run.dataset_version": run.datasetVersion ?? "",
|
|
264
|
+
"run.code_sha": run.codeSha ?? "",
|
|
265
|
+
"run.model_fingerprint": run.modelFingerprint ?? "",
|
|
266
|
+
...resourceAttrs
|
|
267
|
+
})
|
|
268
|
+
},
|
|
269
|
+
scopeSpans: [{ scope: OTEL_AGENT_EVAL_SCOPE, spans: otlpSpans }]
|
|
270
|
+
}
|
|
271
|
+
]
|
|
272
|
+
};
|
|
273
|
+
}
|
|
274
|
+
function spanToOtlp(span, traceId, events) {
|
|
275
|
+
const endedAt = span.endedAt ?? span.startedAt;
|
|
276
|
+
return {
|
|
277
|
+
traceId,
|
|
278
|
+
spanId: padSpanId(span.spanId),
|
|
279
|
+
parentSpanId: span.parentSpanId ? padSpanId(span.parentSpanId) : void 0,
|
|
280
|
+
name: span.name,
|
|
281
|
+
kind: 1,
|
|
282
|
+
// SPAN_KIND_INTERNAL
|
|
283
|
+
startTimeUnixNano: msToNs(span.startedAt),
|
|
284
|
+
endTimeUnixNano: msToNs(endedAt),
|
|
285
|
+
attributes: toAttributes(flattenSpanAttributes(span)),
|
|
286
|
+
events: events.map((e) => ({
|
|
287
|
+
timeUnixNano: msToNs(e.timestamp),
|
|
288
|
+
name: e.kind,
|
|
289
|
+
attributes: toAttributes(flattenPayload(e.payload))
|
|
290
|
+
})),
|
|
291
|
+
status: span.status === "error" ? { code: 2, message: span.error } : { code: 1 }
|
|
292
|
+
};
|
|
293
|
+
}
|
|
294
|
+
function flattenSpanAttributes(span) {
|
|
295
|
+
const base = {
|
|
296
|
+
"span.kind": span.kind
|
|
297
|
+
};
|
|
298
|
+
if (span.kind === "llm") {
|
|
299
|
+
base["llm.model"] = span.model;
|
|
300
|
+
if (span.inputTokens !== void 0) base["llm.input_tokens"] = span.inputTokens;
|
|
301
|
+
if (span.outputTokens !== void 0) base["llm.output_tokens"] = span.outputTokens;
|
|
302
|
+
if (span.costUsd !== void 0) base["llm.cost_usd"] = span.costUsd;
|
|
303
|
+
if (span.finishReason) base["llm.finish_reason"] = span.finishReason;
|
|
304
|
+
} else if (span.kind === "tool") {
|
|
305
|
+
base["tool.name"] = span.toolName;
|
|
306
|
+
if (span.latencyMs !== void 0) base["tool.latency_ms"] = span.latencyMs;
|
|
307
|
+
} else if (span.kind === "retrieval") {
|
|
308
|
+
base["retrieval.query"] = span.query;
|
|
309
|
+
base["retrieval.hits"] = span.hits.length;
|
|
310
|
+
} else if (span.kind === "judge") {
|
|
311
|
+
base["judge.id"] = span.judgeId;
|
|
312
|
+
base["judge.dimension"] = span.dimension;
|
|
313
|
+
base["judge.score"] = span.score;
|
|
314
|
+
base["judge.target_span_id"] = span.targetSpanId;
|
|
315
|
+
} else if (span.kind === "sandbox") {
|
|
316
|
+
if (span.image) base["sandbox.image"] = span.image;
|
|
317
|
+
if (span.exitCode !== void 0) base["sandbox.exit_code"] = span.exitCode;
|
|
318
|
+
if (span.testsPassed !== void 0) base["sandbox.tests_passed"] = span.testsPassed;
|
|
319
|
+
if (span.testsTotal !== void 0) base["sandbox.tests_total"] = span.testsTotal;
|
|
320
|
+
}
|
|
321
|
+
if (span.attributes) {
|
|
322
|
+
for (const [k, v] of Object.entries(span.attributes)) {
|
|
323
|
+
if (typeof v === "string" || typeof v === "number" || typeof v === "boolean") base[k] = v;
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
return base;
|
|
327
|
+
}
|
|
328
|
+
function flattenPayload(payload) {
|
|
329
|
+
const out = {};
|
|
330
|
+
for (const [k, v] of Object.entries(payload)) {
|
|
331
|
+
if (typeof v === "string" || typeof v === "number" || typeof v === "boolean") out[k] = v;
|
|
332
|
+
else out[k] = JSON.stringify(v);
|
|
333
|
+
}
|
|
334
|
+
return out;
|
|
335
|
+
}
|
|
336
|
+
function toAttributes(record) {
|
|
337
|
+
return Object.entries(record).map(([key, value]) => ({
|
|
338
|
+
key,
|
|
339
|
+
value: typeof value === "number" ? Number.isInteger(value) ? { intValue: value.toString() } : { doubleValue: value } : typeof value === "boolean" ? { boolValue: value } : { stringValue: value }
|
|
340
|
+
}));
|
|
341
|
+
}
|
|
342
|
+
function msToNs(ms) {
|
|
343
|
+
return (BigInt(Math.floor(ms)) * 1000000n).toString();
|
|
344
|
+
}
|
|
345
|
+
function padSpanId(id) {
|
|
346
|
+
const cleaned = id.replace(/-/g, "");
|
|
347
|
+
return cleaned.slice(0, 16).padEnd(16, "0");
|
|
348
|
+
}
|
|
349
|
+
function runToTraceId(run) {
|
|
350
|
+
const cleaned = run.runId.replace(/-/g, "");
|
|
351
|
+
return cleaned.slice(0, 32).padEnd(32, "0");
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
// src/trace/redact.ts
|
|
355
|
+
var DEFAULT_REDACTION_RULES = [
|
|
356
|
+
{ id: "email", pattern: /\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/gi },
|
|
357
|
+
{ id: "ssn", pattern: /\b\d{3}-\d{2}-\d{4}\b/g },
|
|
358
|
+
{ id: "credit-card", pattern: /\b(?:\d[ -]*?){13,16}\b/g },
|
|
359
|
+
{ id: "phone-us", pattern: /\b(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b/g },
|
|
360
|
+
{ id: "ipv4", pattern: /\b(?:\d{1,3}\.){3}\d{1,3}\b/g },
|
|
361
|
+
{ id: "aws-access-key", pattern: /\bAKIA[0-9A-Z]{16}\b/g },
|
|
362
|
+
{ id: "bearer", pattern: /\bBearer\s+[A-Za-z0-9._~+/=-]{10,}/gi },
|
|
363
|
+
{ id: "sk-key", pattern: /\bsk-[A-Za-z0-9_-]{10,}\b/g },
|
|
364
|
+
{
|
|
365
|
+
id: "private-key-block",
|
|
366
|
+
pattern: /-----BEGIN (?:RSA |EC |OPENSSH |DSA )?PRIVATE KEY-----[\s\S]*?-----END[^-]*-----/g
|
|
367
|
+
}
|
|
368
|
+
];
|
|
369
|
+
var REDACTION_VERSION = "1.0.0";
|
|
370
|
+
function redactString(input, rules = DEFAULT_REDACTION_RULES) {
|
|
371
|
+
const byRule = {};
|
|
372
|
+
let redactionCount = 0;
|
|
373
|
+
let output = input;
|
|
374
|
+
for (const rule of rules) {
|
|
375
|
+
let hits = 0;
|
|
376
|
+
output = output.replace(rule.pattern, () => {
|
|
377
|
+
hits++;
|
|
378
|
+
return rule.replacement ?? `[redacted:${rule.id}]`;
|
|
379
|
+
});
|
|
380
|
+
if (hits > 0) {
|
|
381
|
+
byRule[rule.id] = hits;
|
|
382
|
+
redactionCount += hits;
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
return { output, report: { redactionCount, byRule } };
|
|
386
|
+
}
|
|
387
|
+
function redactValue(value, rules = DEFAULT_REDACTION_RULES, report = { redactionCount: 0, byRule: {} }) {
|
|
388
|
+
if (typeof value === "string") {
|
|
389
|
+
const { output, report: r } = redactString(value, rules);
|
|
390
|
+
report.redactionCount += r.redactionCount;
|
|
391
|
+
for (const [k, v] of Object.entries(r.byRule)) {
|
|
392
|
+
report.byRule[k] = (report.byRule[k] ?? 0) + v;
|
|
393
|
+
}
|
|
394
|
+
return { value: output, report };
|
|
395
|
+
}
|
|
396
|
+
if (Array.isArray(value)) {
|
|
397
|
+
return {
|
|
398
|
+
value: value.map((v) => redactValue(v, rules, report).value),
|
|
399
|
+
report
|
|
400
|
+
};
|
|
401
|
+
}
|
|
402
|
+
if (value !== null && typeof value === "object") {
|
|
403
|
+
const next = {};
|
|
404
|
+
for (const [k, v] of Object.entries(value)) {
|
|
405
|
+
next[k] = redactValue(v, rules, report).value;
|
|
406
|
+
}
|
|
407
|
+
return { value: next, report };
|
|
408
|
+
}
|
|
409
|
+
return { value, report };
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
// src/replay.ts
|
|
413
|
+
var ReplayCacheMissError = class extends ReplayError {
|
|
414
|
+
constructor(url, requestKey2, message) {
|
|
415
|
+
super(message ?? `replay cache miss for ${url} (key=${requestKey2})`);
|
|
416
|
+
this.url = url;
|
|
417
|
+
this.requestKey = requestKey2;
|
|
418
|
+
}
|
|
419
|
+
url;
|
|
420
|
+
requestKey;
|
|
421
|
+
};
|
|
422
|
+
var ReplayCache = class _ReplayCache {
|
|
423
|
+
byKey = /* @__PURE__ */ new Map();
|
|
424
|
+
orphans = 0;
|
|
425
|
+
byProvider = {};
|
|
426
|
+
byModel = {};
|
|
427
|
+
/**
|
|
428
|
+
* Build a cache from a sink's events. The sink must implement `list()`.
|
|
429
|
+
* Filter by `runId` / `spanId` to scope to a specific replay.
|
|
430
|
+
*/
|
|
431
|
+
static async fromSink(sink, filter = {}) {
|
|
432
|
+
if (!sink.list) {
|
|
433
|
+
throw new ReplayError("ReplayCache.fromSink: sink must implement list() to be replayable.");
|
|
434
|
+
}
|
|
435
|
+
const events = await sink.list(filter);
|
|
436
|
+
return _ReplayCache.fromEvents(events);
|
|
437
|
+
}
|
|
438
|
+
/** Build a cache from an in-memory event list. */
|
|
439
|
+
static async fromEvents(events) {
|
|
440
|
+
const cache = new _ReplayCache();
|
|
441
|
+
const groups = /* @__PURE__ */ new Map();
|
|
442
|
+
for (const e of events) {
|
|
443
|
+
const k = `${e.runId ?? ""}::${e.spanId ?? ""}::${e.attemptIndex}`;
|
|
444
|
+
const g = groups.get(k) ?? {};
|
|
445
|
+
if (e.direction === "request") g.req = e;
|
|
446
|
+
else g.res = e;
|
|
447
|
+
groups.set(k, g);
|
|
448
|
+
}
|
|
449
|
+
for (const g of groups.values()) {
|
|
450
|
+
if (!g.req) continue;
|
|
451
|
+
if (!g.res) {
|
|
452
|
+
cache.orphans += 1;
|
|
453
|
+
continue;
|
|
454
|
+
}
|
|
455
|
+
const key = await requestKey(g.req);
|
|
456
|
+
cache.byKey.set(key, { request: g.req, response: g.res });
|
|
457
|
+
cache.byProvider[g.req.provider] = (cache.byProvider[g.req.provider] ?? 0) + 1;
|
|
458
|
+
cache.byModel[g.req.model] = (cache.byModel[g.req.model] ?? 0) + 1;
|
|
459
|
+
}
|
|
460
|
+
return cache;
|
|
461
|
+
}
|
|
462
|
+
/** Number of cacheable (request, response) pairs in the cache. */
|
|
463
|
+
size() {
|
|
464
|
+
return this.byKey.size;
|
|
465
|
+
}
|
|
466
|
+
stats() {
|
|
467
|
+
return {
|
|
468
|
+
total: this.byKey.size,
|
|
469
|
+
byProvider: { ...this.byProvider },
|
|
470
|
+
byModel: { ...this.byModel },
|
|
471
|
+
orphanRequests: this.orphans
|
|
472
|
+
};
|
|
473
|
+
}
|
|
474
|
+
/** Iterate every cached `(request, response)` pair in insertion order. */
|
|
475
|
+
*entries() {
|
|
476
|
+
for (const entry of this.byKey.values()) yield entry;
|
|
477
|
+
}
|
|
478
|
+
/**
|
|
479
|
+
* Look up a cached response by hashing the (model, messages, temperature,
|
|
480
|
+
* maxTokens, response_format) shape. Returns `undefined` on miss; the
|
|
481
|
+
* caller decides whether to throw, fall back to the network, or skip.
|
|
482
|
+
*/
|
|
483
|
+
async lookup(requestBody) {
|
|
484
|
+
const key = await keyFromBody(requestBody);
|
|
485
|
+
return this.byKey.get(key);
|
|
486
|
+
}
|
|
487
|
+
};
|
|
488
|
+
function createReplayFetch(cache, opts = {}) {
|
|
489
|
+
const onMiss = opts.onMiss ?? "throw";
|
|
490
|
+
const fallback = opts.fallbackFetch ?? globalThis.fetch?.bind(globalThis);
|
|
491
|
+
return (async (input, init) => {
|
|
492
|
+
const url = typeof input === "string" ? input : input instanceof URL ? input.toString() : input.url;
|
|
493
|
+
if (!/\/chat\/completions(?:[?#].*)?$/.test(url)) {
|
|
494
|
+
if (!fallback)
|
|
495
|
+
throw new ReplayError(
|
|
496
|
+
`replay fetch: non-completions URL ${url} but no fallbackFetch configured`
|
|
497
|
+
);
|
|
498
|
+
return fallback(input, init);
|
|
499
|
+
}
|
|
500
|
+
let bodyParsed;
|
|
501
|
+
if (init?.body && typeof init.body === "string") {
|
|
502
|
+
try {
|
|
503
|
+
bodyParsed = JSON.parse(init.body);
|
|
504
|
+
} catch {
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
const hit = bodyParsed === void 0 ? void 0 : await cache.lookup(bodyParsed);
|
|
508
|
+
if (hit) {
|
|
509
|
+
opts.onHit?.({ url, provider: hit.request.provider, model: hit.request.model });
|
|
510
|
+
const status = hit.response.statusCode ?? 200;
|
|
511
|
+
const headers = new Headers(
|
|
512
|
+
Object.entries(hit.response.responseHeaders ?? { "Content-Type": "application/json" })
|
|
513
|
+
);
|
|
514
|
+
const bodyText = typeof hit.response.responseBody === "string" ? hit.response.responseBody : JSON.stringify(hit.response.responseBody ?? {});
|
|
515
|
+
return new Response(bodyText, { status, headers });
|
|
516
|
+
}
|
|
517
|
+
opts.onMissNotify?.({ url, requestBody: bodyParsed });
|
|
518
|
+
if (onMiss === "throw") {
|
|
519
|
+
const key = bodyParsed === void 0 ? "<unparseable>" : await keyFromBody(bodyParsed);
|
|
520
|
+
throw new ReplayCacheMissError(url, key);
|
|
521
|
+
}
|
|
522
|
+
if (onMiss === "fail-closed") {
|
|
523
|
+
return new Response(JSON.stringify({ error: "replay_cache_miss" }), { status: 599 });
|
|
524
|
+
}
|
|
525
|
+
if (!fallback)
|
|
526
|
+
throw new ReplayError("replay fetch: onMiss=fallback but no fallbackFetch configured");
|
|
527
|
+
return fallback(input, init);
|
|
528
|
+
});
|
|
529
|
+
}
|
|
530
|
+
async function* iterateRawCalls(sink, filter = {}) {
|
|
531
|
+
if (!sink.list) {
|
|
532
|
+
throw new ReplayError("iterateRawCalls: sink must implement list().");
|
|
533
|
+
}
|
|
534
|
+
const events = await sink.list(filter);
|
|
535
|
+
const cache = await ReplayCache.fromEvents(events);
|
|
536
|
+
for (const entry of cache.entries()) yield entry;
|
|
537
|
+
}
|
|
538
|
+
async function requestKey(event) {
|
|
539
|
+
return keyFromBody(event.requestBody);
|
|
540
|
+
}
|
|
541
|
+
async function keyFromBody(body) {
|
|
542
|
+
if (body == null || typeof body !== "object") return hashJson({ raw: String(body) });
|
|
543
|
+
const b = body;
|
|
544
|
+
const reduced = canonicalize({
|
|
545
|
+
model: b.model ?? null,
|
|
546
|
+
messages: b.messages ?? null,
|
|
547
|
+
temperature: b.temperature ?? null,
|
|
548
|
+
max_tokens: b.max_tokens ?? null,
|
|
549
|
+
max_completion_tokens: b.max_completion_tokens ?? null,
|
|
550
|
+
response_format: b.response_format ?? null
|
|
551
|
+
});
|
|
552
|
+
return hashJson(reduced);
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
export {
|
|
556
|
+
InMemoryTraceStore,
|
|
557
|
+
FileSystemTraceStore,
|
|
558
|
+
OTEL_AGENT_EVAL_SCOPE,
|
|
559
|
+
exportRunAsOtlp,
|
|
560
|
+
DEFAULT_REDACTION_RULES,
|
|
561
|
+
REDACTION_VERSION,
|
|
562
|
+
redactString,
|
|
563
|
+
redactValue,
|
|
564
|
+
ReplayCacheMissError,
|
|
565
|
+
ReplayCache,
|
|
566
|
+
createReplayFetch,
|
|
567
|
+
iterateRawCalls
|
|
568
|
+
};
|
|
569
|
+
//# sourceMappingURL=chunk-K2TPS5LB.js.map
|