@tangle-network/agent-eval 0.76.0 → 0.79.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/http.d.ts +2 -2
- package/dist/adapters/langchain.d.ts +2 -2
- package/dist/adapters/otel.d.ts +4 -4
- package/dist/{agent-profile-DYRboYWu.d.ts → agent-profile-aSEaJ9Pl.d.ts} +1 -1
- package/dist/analyst/index.d.ts +42 -8
- package/dist/analyst/index.js +32 -2
- package/dist/analyst/index.js.map +1 -1
- package/dist/authenticity/index.d.ts +161 -0
- package/dist/authenticity/index.js +215 -0
- package/dist/authenticity/index.js.map +1 -0
- package/dist/benchmarks/index.d.ts +2 -2
- package/dist/campaign/index.d.ts +11 -11
- package/dist/campaign/index.js +4 -4
- package/dist/{chunk-7W4SM7FD.js → chunk-5LVWPNS5.js} +91 -91
- package/dist/chunk-5LVWPNS5.js.map +1 -0
- package/dist/{chunk-WYIHD6EB.js → chunk-CF67I6QY.js} +1 -1
- package/dist/chunk-CF67I6QY.js.map +1 -0
- package/dist/{chunk-XPILG2CA.js → chunk-GXHLRXDI.js} +2 -2
- package/dist/{chunk-F3SRAAZO.js → chunk-KWRRMR3J.js} +15 -1
- package/dist/chunk-KWRRMR3J.js.map +1 -0
- package/dist/{chunk-JYE3WOTE.js → chunk-RPLZ4OIB.js} +10 -1
- package/dist/chunk-RPLZ4OIB.js.map +1 -0
- package/dist/{chunk-6EKXFFGQ.js → chunk-RTWFUK6A.js} +2 -2
- package/dist/{chunk-XGNCBAVZ.js → chunk-XQL22JDG.js} +2 -2
- package/dist/{chunk-GJJNJVIR.js → chunk-XXNIODOM.js} +2 -2
- package/dist/contract/index.d.ts +12 -12
- package/dist/contract/index.js +2 -2
- package/dist/{control-BgA6BYTm.d.ts → control-CehLtoET.d.ts} +1 -1
- package/dist/control.d.ts +2 -2
- package/dist/control.js +2 -2
- package/dist/hosted/index.d.ts +4 -4
- package/dist/{index-DsnOpCO6.d.ts → index-B1RKber3.d.ts} +1 -1
- package/dist/index.d.ts +126 -25
- package/dist/index.js +32 -7
- package/dist/index.js.map +1 -1
- package/dist/{insight-report-Df3lxYXM.d.ts → insight-report-dlpEzQDi.d.ts} +1 -1
- package/dist/{kind-factory-DW9XWPvM.d.ts → kind-factory-DqV2t1Xk.d.ts} +1 -1
- package/dist/meta-eval/index.d.ts +2 -2
- package/dist/openapi.json +1 -1
- package/dist/{provenance-B-TFszPW.d.ts → provenance-CEAJI9rm.d.ts} +3 -3
- package/dist/{registry-DuVYiTvw.d.ts → registry-BmEuU94S.d.ts} +2 -2
- package/dist/{release-report-CN8hJlhk.d.ts → release-report-CXXZlR8g.d.ts} +2 -2
- package/dist/reporting.d.ts +4 -4
- package/dist/{researcher-C_KJyIGg.d.ts → researcher-rInLj9De.d.ts} +2 -2
- package/dist/rl.d.ts +6 -6
- package/dist/rl.js +2 -2
- package/dist/{rubric-predictive-validity-D_4BSXGV.d.ts → rubric-predictive-validity-CWyWWLBg.d.ts} +1 -1
- package/dist/{run-improvement-loop-BqYH2vCR.d.ts → run-improvement-loop-Bgu4C59E.d.ts} +2 -4
- package/dist/{run-record-BgTFzO2r.d.ts → run-record-sItO5ftF.d.ts} +11 -0
- package/dist/{semantic-concept-judge-CV9Wlx4t.d.ts → semantic-concept-judge-Du4ZVyef.d.ts} +3 -3
- package/dist/{summary-report-ByiOUrHj.d.ts → summary-report-BTaXq1TS.d.ts} +1 -1
- package/dist/traces.d.ts +1 -1
- package/dist/traces.js +2 -2
- package/dist/{types-CRD68aH7.d.ts → types-DRvV0zRo.d.ts} +10 -1
- package/dist/{types-Bba0vl1V.d.ts → types-QHG0KnkF.d.ts} +11 -3
- package/dist/workflow/index.d.ts +4 -4
- package/dist/workflow/index.js +1 -1
- package/docs/auto-research-loop-end-to-end.md +1 -1
- package/docs/feature-guide.md +4 -4
- package/docs/multi-shot-optimization.md +61 -115
- package/docs/product-eval-adoption.md +1 -1
- package/docs/three-package-architecture.md +1 -1
- package/docs/trace-analysis.md +19 -0
- package/package.json +6 -1
- package/dist/chunk-7W4SM7FD.js.map +0 -1
- package/dist/chunk-F3SRAAZO.js.map +0 -1
- package/dist/chunk-JYE3WOTE.js.map +0 -1
- package/dist/chunk-WYIHD6EB.js.map +0 -1
- /package/dist/{chunk-XPILG2CA.js.map → chunk-GXHLRXDI.js.map} +0 -0
- /package/dist/{chunk-6EKXFFGQ.js.map → chunk-RTWFUK6A.js.map} +0 -0
- /package/dist/{chunk-XGNCBAVZ.js.map → chunk-XQL22JDG.js.map} +0 -0
- /package/dist/{chunk-GJJNJVIR.js.map → chunk-XXNIODOM.js.map} +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/analyst/ax-service.ts","../src/analyst/chat-client.ts","../src/trace-analyst/behavioral-metrics.ts","../src/analyst/behavioral-analyst.ts","../src/analyst/default-registry.ts","../src/concurrency.ts","../src/locked-jsonl-appender.ts","../src/analyst/findings-store.ts","../src/analyst/kinds/skill-usage.ts","../src/run-score.ts","../src/run-critic.ts","../src/semantic-concept-judge.ts"],"sourcesContent":["import type { AxAIService } from '@ax-llm/ax'\nimport { ai } from '@ax-llm/ax'\n\nexport interface CreateAnalystAiConfig {\n /** OpenAI-compatible API key forwarded as `Authorization: Bearer`.\n * cli-bridge ignores the value on loopback but Ax requires a non-empty string. */\n apiKey: string\n /** OpenAI-compatible base URL — e.g. `https://router.tangle.tools/v1` or a\n * cli-bridge loopback. */\n baseUrl: string\n /** Model id forwarded to the analyst actor + responder. */\n model: string\n /** Ax provider name. Defaults to the OpenAI-compatible client. */\n provider?: 'openai' | 'anthropic'\n}\n\n/**\n * Construct the `AxAIService` an analyst kind calls through\n * (`createTraceAnalystKind({ ai })`).\n *\n * Ax's `ai()` pins `config.model` to the OpenAI catalog enum, but every\n * OpenAI-compatible router an analyst points at (router.tangle.tools,\n * cli-bridge) accepts arbitrary model ids (claude-code/sonnet, openai/gpt-5.4,\n * …). Consumers were each re-rolling `ai({ name, apiKey, apiURL, config })`\n * behind an `as (a: any) => any` cast to dodge the enum; this is the one\n * canonical constructor so they don't have to — and don't take a direct\n * `@ax-llm/ax` dependency for it.\n */\nexport function createAnalystAi(config: CreateAnalystAiConfig): AxAIService {\n return ai({\n name: config.provider ?? 'openai',\n apiKey: config.apiKey,\n apiURL: config.baseUrl,\n config: { model: config.model },\n })\n}\n","/**\n * ChatClient — the single LLM abstraction analysts call.\n *\n * agent-eval already ships an `LlmClient` (OpenAI-compatible, retry,\n * graceful JSON-schema degrade) and judges that talk to `TCloud`. Two\n * mixed patterns force every analyst author to pick a transport, which\n * couples analyst code to runtime concerns (cli-bridge vs router vs\n * sandbox-sdk) it shouldn't know about.\n *\n * `ChatClient` is one interface every analyst takes via `AnalystContext.chat`.\n * The operator decides at the registry boundary which transport binds\n * to it. Analyst code stays transport-agnostic; swapping production\n * (sandbox-sdk) for local dev (cli-bridge) or tests (mock) is a one-\n * line factory call.\n *\n * Designed to coexist: existing `LlmClient` callers and existing\n * `TCloud`-based judges keep working untouched. New analyst code uses\n * `ChatClient`. When old call sites migrate, they pick up budgeting,\n * cancellation, and unified telemetry for free.\n */\n\nimport {\n type LlmCallRequest,\n type LlmCallResult,\n LlmClient,\n type LlmClientOptions,\n} from '../llm-client'\n\n/**\n * Unified chat interface. Mirrors LlmCallRequest/Result so the OpenAI-\n * compatible mental model stays. Two methods: a one-shot `chat()` and\n * an `streamChat()` for future agentic loops (not yet exposed).\n */\nexport interface ChatClient {\n /** Display name of the bound transport — included in telemetry. */\n readonly transport: ChatTransport\n /** Default model when caller omits — operators bind this per environment. */\n readonly defaultModel?: string\n\n chat(req: ChatRequest, opts?: ChatCallOpts): Promise<ChatResponse>\n}\n\nexport type ChatTransport =\n | 'router' // router.tangle.tools — production paid models\n | 'sandbox-sdk' // box.streamPrompt() — chat completion via sandbox SDK\n | 'cli-bridge' // local cli-bridge for dev / local-only runs\n | 'direct-provider' // direct OpenAI / Anthropic / etc. — bypass router\n | 'mock' // test-time injection\n\nexport interface ChatRequest extends Omit<LlmCallRequest, 'model'> {\n /** Optional — falls back to ChatClient.defaultModel. */\n model?: string\n}\n\nexport type ChatResponse = LlmCallResult\n\nexport interface ChatCallOpts {\n /** Cancel the in-flight request. */\n signal?: AbortSignal\n /** Hard USD ceiling for this single call (informational; the underlying transport may not enforce). */\n maxCostUsd?: number\n /** Correlation tag carried into request headers when the transport allows. */\n correlationId?: string\n}\n\n// ── Factory ─────────────────────────────────────────────────────────\n\nexport type CreateChatClientOpts =\n | RouterTransportOpts\n | CliBridgeTransportOpts\n | DirectProviderTransportOpts\n | SandboxSdkTransportOpts\n | MockTransportOpts\n\ninterface BaseTransportOpts {\n defaultModel?: string\n}\n\nexport interface RouterTransportOpts extends BaseTransportOpts {\n transport: 'router'\n baseUrl?: string\n apiKey: string\n}\n\nexport interface CliBridgeTransportOpts extends BaseTransportOpts {\n transport: 'cli-bridge'\n baseUrl?: string\n bearer?: string\n}\n\nexport interface DirectProviderTransportOpts extends BaseTransportOpts {\n transport: 'direct-provider'\n baseUrl: string\n apiKey: string\n}\n\n/**\n * Sandbox-SDK transport. Provided as a thin pass-through: the caller\n * supplies a callable that mimics LlmClient.chat() against an already-\n * configured Sandbox handle. We don't import the SDK here to keep\n * agent-eval dep-free of @tangle-network/sandbox.\n */\nexport interface SandboxSdkTransportOpts extends BaseTransportOpts {\n transport: 'sandbox-sdk'\n chat: (req: ChatRequest, opts?: ChatCallOpts) => Promise<ChatResponse>\n}\n\n/**\n * Mock transport for tests. The handler receives the request and returns\n * whatever the test wants. No retries, no JSON-schema degrade.\n */\nexport interface MockTransportOpts extends BaseTransportOpts {\n transport: 'mock'\n handler: (req: ChatRequest, opts?: ChatCallOpts) => Promise<ChatResponse>\n}\n\n/**\n * Build a ChatClient bound to a specific transport. The returned client\n * is safe to share across analysts in a single registry run.\n */\nexport function createChatClient(opts: CreateChatClientOpts): ChatClient {\n switch (opts.transport) {\n case 'router':\n return wrapLlmClient(\n opts.transport,\n opts.defaultModel,\n new LlmClient({\n baseUrl: opts.baseUrl ?? 'https://router.tangle.tools/v1',\n apiKey: opts.apiKey,\n } as LlmClientOptions),\n )\n case 'cli-bridge':\n return wrapLlmClient(\n opts.transport,\n opts.defaultModel,\n new LlmClient({\n baseUrl: opts.baseUrl ?? 'http://127.0.0.1:3344/v1',\n apiKey: opts.bearer ?? '',\n } as LlmClientOptions),\n )\n case 'direct-provider':\n return wrapLlmClient(\n opts.transport,\n opts.defaultModel,\n new LlmClient({\n baseUrl: opts.baseUrl,\n apiKey: opts.apiKey,\n } as LlmClientOptions),\n )\n case 'sandbox-sdk':\n return {\n transport: 'sandbox-sdk',\n defaultModel: opts.defaultModel,\n chat: async (req, callOpts) => opts.chat(resolveModel(req, opts.defaultModel), callOpts),\n }\n case 'mock':\n return {\n transport: 'mock',\n defaultModel: opts.defaultModel,\n chat: async (req, callOpts) => opts.handler(resolveModel(req, opts.defaultModel), callOpts),\n }\n }\n}\n\nfunction wrapLlmClient(\n transport: ChatTransport,\n defaultModel: string | undefined,\n inner: LlmClient,\n): ChatClient {\n return {\n transport,\n defaultModel,\n chat: async (req, callOpts) => {\n const resolved = resolveModel(req, defaultModel)\n // LlmClient.call doesn't accept an external AbortSignal today (it\n // owns its own AbortController for the per-attempt timeout). We\n // race the response against the caller's signal so awaiting code\n // unblocks on abort. The in-flight HTTP request still runs to its\n // own timeoutMs — when LlmClient grows a signal parameter, wire\n // it directly here and drop the race.\n const call = inner.call({\n model: resolved.model!,\n messages: req.messages,\n jsonMode: req.jsonMode,\n jsonSchema: req.jsonSchema,\n temperature: req.temperature,\n maxTokens: req.maxTokens,\n timeoutMs: req.timeoutMs,\n })\n if (!callOpts?.signal) return await call\n return await Promise.race([call, abortAsRejection(callOpts.signal)])\n },\n }\n}\n\nfunction abortAsRejection(signal: AbortSignal): Promise<never> {\n if (signal.aborted) return Promise.reject(toAbortError(signal))\n return new Promise<never>((_, reject) => {\n signal.addEventListener('abort', () => reject(toAbortError(signal)), { once: true })\n })\n}\n\nfunction toAbortError(signal: AbortSignal): Error {\n const reason = (signal as { reason?: unknown }).reason\n if (reason instanceof Error) return reason\n const e = new Error('ChatClient.chat: aborted')\n e.name = 'AbortError'\n return e\n}\n\nfunction resolveModel(req: ChatRequest, defaultModel: string | undefined): ChatRequest {\n if (req.model) return req\n if (!defaultModel) {\n throw new Error(\n 'ChatClient.chat: no model on request and no defaultModel on the client. ' +\n 'Either pass req.model or bind defaultModel at createChatClient().',\n )\n }\n return { ...req, model: defaultModel }\n}\n","/**\n * Deterministic behavioral metrics over OTLP spans — pure arithmetic, no LLM.\n *\n * These are the model-independent multiplier: the four trace-quality signals a\n * tolerant analyzer (e.g. HALO) re-derives per run inside the model — token\n * growth, output decay, tool monoculture, missing self-verification — computed\n * here once, in TypeScript, with zero model judgment. A finding that falls out\n * of arithmetic is trivially model-agnostic and cannot hallucinate the trend.\n *\n * General, not trace-specific: the detectors key off token trajectories and\n * tool usage present in any agentic OTLP trace, not any one benchmark.\n */\n\nimport type { TraceAnalystSpan } from './types'\n\nexport type SuboptimalCode =\n | 'monotonic-input-growth'\n | 'output-length-decay'\n | 'single-tool-dependency'\n | 'no-self-verification'\n\nexport interface SuboptimalSignal {\n code: SuboptimalCode\n severity: 'high' | 'medium' | 'low'\n /** Human-readable claim, with the backing numbers inlined. */\n detail: string\n /** The exact figures the detector fired on — auditable, no model in the loop. */\n evidence: Record<string, number | string | boolean>\n}\n\nexport interface BehavioralMetrics {\n llmCallCount: number\n inputTokenTrajectory: number[]\n outputTokenTrajectory: number[]\n toolHistogram: Record<string, number>\n totalToolCalls: number\n distinctTools: number\n /** distinct/total tool calls; 1.0 when there are no tool calls. */\n toolDiversityRatio: number\n hasSelfVerification: boolean\n signals: SuboptimalSignal[]\n}\n\n/** ≥ this input-token growth ratio across a run, with no compression, fires. */\nconst INPUT_GROWTH_FACTOR = 3\n/** Tool-usage signals need at least this many calls to be meaningful. */\nconst MIN_TOOL_CALLS = 3\n/** Tool names matching this are self-verification, not state mutation. */\nconst VERIFY_RE = /verif|eval|inspect|check|assert|validat|review|confirm/i\n\nfunction num(v: unknown): number | null {\n return typeof v === 'number' && Number.isFinite(v) ? v : null\n}\nfunction inputTokensOf(s: TraceAnalystSpan): number | null {\n return num(s.attributes['llm.input_tokens']) ?? num(s.attributes['llm.usage.input_tokens'])\n}\nfunction outputTokensOf(s: TraceAnalystSpan): number | null {\n return num(s.attributes['llm.output_tokens']) ?? num(s.attributes['llm.usage.output_tokens'])\n}\nfunction stepOf(s: TraceAnalystSpan): number | null {\n return num(s.attributes.step)\n}\nfunction toolNameOf(s: TraceAnalystSpan): string | null {\n if (s.tool_name) return s.tool_name\n const t = s.attributes['tool.name']\n return typeof t === 'string' && t.length > 0 ? t : null\n}\n\n/**\n * Reduce a span list to behavioral metrics + fired suboptimality signals.\n * Pure + deterministic: same spans → same output, on any machine, no model.\n */\nexport function computeTraceMetrics(spans: readonly TraceAnalystSpan[]): BehavioralMetrics {\n // Order by step (when present) then start_time so trajectories reflect run order.\n const ordered = [...spans].sort((a, b) => {\n const sa = stepOf(a)\n const sb = stepOf(b)\n if (sa !== null && sb !== null && sa !== sb) return sa - sb\n return a.start_time.localeCompare(b.start_time)\n })\n\n const inputTokenTrajectory: number[] = []\n const outputTokenTrajectory: number[] = []\n const toolHistogram: Record<string, number> = {}\n let hasSelfVerification = false\n\n for (const s of ordered) {\n const inT = inputTokensOf(s)\n if (inT !== null) inputTokenTrajectory.push(inT)\n const outT = outputTokensOf(s)\n if (outT !== null) outputTokenTrajectory.push(outT)\n const tool = toolNameOf(s)\n if (tool) {\n toolHistogram[tool] = (toolHistogram[tool] ?? 0) + 1\n if (VERIFY_RE.test(tool)) hasSelfVerification = true\n }\n }\n\n const totalToolCalls = Object.values(toolHistogram).reduce((a, b) => a + b, 0)\n const distinctTools = Object.keys(toolHistogram).length\n const toolDiversityRatio = totalToolCalls === 0 ? 1 : distinctTools / totalToolCalls\n\n const signals: SuboptimalSignal[] = []\n\n if (inputTokenTrajectory.length >= 3) {\n const first = inputTokenTrajectory[0]!\n const last = inputTokenTrajectory[inputTokenTrajectory.length - 1]!\n const growth = first > 0 ? last / first : 0\n if (last > first && growth >= INPUT_GROWTH_FACTOR) {\n signals.push({\n code: 'monotonic-input-growth',\n severity: 'high',\n detail: `LLM input tokens grew ${growth.toFixed(1)}x (${first}→${last}) across ${inputTokenTrajectory.length} calls — full history re-sent each step with no compression.`,\n evidence: {\n first,\n last,\n growth_x: Number(growth.toFixed(2)),\n calls: inputTokenTrajectory.length,\n },\n })\n }\n }\n\n if (outputTokenTrajectory.length >= 3) {\n const first = outputTokenTrajectory[0]!\n const last = outputTokenTrajectory[outputTokenTrajectory.length - 1]!\n if (last < first) {\n signals.push({\n code: 'output-length-decay',\n severity: 'medium',\n detail: `LLM output tokens shrank ${first}→${last} over ${outputTokenTrajectory.length} calls — less planning/reasoning per step as context grows.`,\n evidence: { first, last, calls: outputTokenTrajectory.length },\n })\n }\n }\n\n if (totalToolCalls >= MIN_TOOL_CALLS && distinctTools === 1) {\n const only = Object.keys(toolHistogram)[0]!\n signals.push({\n code: 'single-tool-dependency',\n severity: 'medium',\n detail: `All ${totalToolCalls} tool calls are \\`${only}\\` — no tool diversity and no fallback path.`,\n evidence: { tool: only, calls: totalToolCalls, distinct_tools: 1 },\n })\n }\n\n if (totalToolCalls >= MIN_TOOL_CALLS && !hasSelfVerification) {\n signals.push({\n code: 'no-self-verification',\n severity: 'medium',\n detail: `${totalToolCalls} tool calls and none verify/inspect/check state — the agent never validates its own actions.`,\n evidence: { tool_calls: totalToolCalls, verification_calls: 0 },\n })\n }\n\n return {\n llmCallCount: inputTokenTrajectory.length,\n inputTokenTrajectory,\n outputTokenTrajectory,\n toolHistogram,\n totalToolCalls,\n distinctTools,\n toolDiversityRatio,\n hasSelfVerification,\n signals,\n }\n}\n","/**\n * `behavioralAnalyst` — a DETERMINISTIC analyst (cost.kind = 'deterministic',\n * never calls the LLM). It produces the efficiency/behavioral findings a\n * tolerant agentic analyzer (HALO) re-derives per run inside the model —\n * context bloat, output decay, tool monoculture, missing self-verification —\n * directly from arithmetic over spans (`computeTraceMetrics`).\n *\n * Why it matters: these findings are model-agnostic BY CONSTRUCTION (no model\n * in the loop), so they cannot return 0 on a weak model the way the Ax-RLM\n * does — and they are strictly more reliable than HALO, which spends tokens\n * re-deriving the same numbers and can hallucinate the trend. The agentic\n * RLM kinds remain for SEMANTIC findings that genuinely need a model; this\n * analyst owns the behavioral class.\n */\n\nimport {\n type BehavioralMetrics,\n computeTraceMetrics,\n type SuboptimalCode,\n} from '../trace-analyst/behavioral-metrics'\nimport type { TraceAnalysisStore } from '../trace-analyst/store'\nimport type { TraceAnalystSpan } from '../trace-analyst/types'\nimport { type Analyst, type AnalystFinding, makeFinding } from './types'\n\nconst RECOMMENDED_ACTION: Record<SuboptimalCode, string> = {\n 'monotonic-input-growth':\n 'Add a context-budget instruction: once prior context exceeds a threshold, summarize earlier steps into a short status line instead of re-sending full history.',\n 'output-length-decay':\n 'Require a minimum planning/reasoning budget per step so late steps do not degrade into terse, error-prone commands.',\n 'single-tool-dependency':\n 'Direct the agent to use the full toolset (verify / inspect / alternate actions), not a single execute call, and to plan a fallback when a call returns an unexpected result.',\n 'no-self-verification':\n 'After every state-mutating action, verify the result (eval / inspect / assert) before proceeding.',\n}\n\nconst ANALYST_ID = 'efficiency-behavioral'\n\n/**\n * Map computed signals → structured AnalystFindings. Pure: no LLM, no clock\n * dependence beyond `produced_at` (overridable for deterministic tests).\n */\nexport function deriveEfficiencyFindings(\n metrics: BehavioralMetrics,\n opts: { analystId?: string; producedAt?: string } = {},\n): AnalystFinding[] {\n const analystId = opts.analystId ?? ANALYST_ID\n return metrics.signals.map((sig) =>\n makeFinding({\n analyst_id: analystId,\n area: 'efficiency',\n subject: sig.code, // kebab — passes the cluster grammar; stable key for diffFindings\n claim: sig.detail,\n severity: sig.severity,\n // Deterministic arithmetic over spans, not a model judgment → certain.\n confidence: 1,\n evidence_refs: [\n {\n kind: 'metric',\n uri: `metric://efficiency/${sig.code}`,\n excerpt: JSON.stringify(sig.evidence),\n },\n ],\n recommended_action: RECOMMENDED_ACTION[sig.code],\n metadata: { deterministic: true, evidence: sig.evidence },\n ...(opts.producedAt ? { produced_at: opts.producedAt } : {}),\n }),\n )\n}\n\n/** The deterministic behavioral/efficiency analyst (no LLM, any-model). */\nexport function behavioralAnalyst(): Analyst<TraceAnalysisStore> {\n return {\n id: ANALYST_ID,\n description:\n 'Deterministic behavioral/efficiency findings over OTLP spans — token-growth, output-decay, tool-monoculture, missing self-verification. Zero LLM; model-agnostic by construction.',\n inputKind: 'trace-store',\n cost: { kind: 'deterministic' },\n version: '1.0.0',\n async analyze(store) {\n const overview = await store.getOverview()\n const spans: TraceAnalystSpan[] = []\n for (const traceId of overview.sample_trace_ids) {\n const viewed = await store.viewTrace({ trace_id: traceId })\n if (viewed.spans) spans.push(...viewed.spans)\n }\n return deriveEfficiencyFindings(computeTraceMetrics(spans))\n },\n }\n}\n","/**\n * `buildDefaultAnalystRegistry` — the canonical analyst suite, so consumers\n * stop hand-wiring `new AnalystRegistry()` + per-kind `createTraceAnalystKind`.\n *\n * The deterministic `behavioralAnalyst` is ALWAYS registered (it needs no\n * model and is model-agnostic by construction). The agentic RLM kinds are\n * registered only when an `ai` service is supplied — so a caller with no LLM\n * still gets the full behavioral/efficiency diagnosis, and the substrate's\n * \"any model (including no model)\" guarantee holds at the suite level.\n */\n\nimport type { AxAIService } from '@ax-llm/ax'\nimport { behavioralAnalyst } from './behavioral-analyst'\nimport { createTraceAnalystKind, type TraceAnalystKindSpec } from './kind-factory'\nimport { DEFAULT_TRACE_ANALYST_KINDS } from './kinds'\nimport { AnalystRegistry, type AnalystRegistryOptions } from './registry'\n\nexport interface DefaultAnalystRegistryOptions {\n /** Ax service for the agentic RLM kinds. Omit → only the deterministic analyst. */\n ai?: AxAIService\n /** Model for the agentic kinds (falls back to the ai service default). */\n model?: string\n /** Which agentic kinds to register when `ai` is present. Default = the shipped suite. */\n kinds?: readonly TraceAnalystKindSpec[]\n /** Set false to omit the deterministic behavioral analyst (default: include). */\n includeBehavioral?: boolean\n /** Forwarded to the AnalystRegistry constructor (signal, tags, priorFindings). */\n registry?: AnalystRegistryOptions\n}\n\nexport function buildDefaultAnalystRegistry(\n opts: DefaultAnalystRegistryOptions = {},\n): AnalystRegistry {\n const registry = new AnalystRegistry(opts.registry)\n if (opts.includeBehavioral !== false) {\n registry.register(behavioralAnalyst())\n }\n if (opts.ai) {\n const kinds = opts.kinds ?? DEFAULT_TRACE_ANALYST_KINDS\n for (const spec of kinds) {\n registry.register(createTraceAnalystKind(spec, { ai: opts.ai, model: opts.model }))\n }\n }\n return registry\n}\n","/**\n * concurrency — small primitives the evolution loop needs.\n *\n * `Mutex` is a zero-dep async lock with FIFO fairness. The evolution loop\n * uses it to serialise checkout/build/commit sequences inside a single\n * pool slot, and to gate concurrent JSONL writers (see\n * `lockedJsonlReferenceReplayStore`).\n *\n * Deliberately minimal — no priority queue, no timeouts. If you need\n * those, swap to `async-mutex` at the call site.\n */\n\nexport class Mutex {\n private locked = false\n private readonly waiters: Array<() => void> = []\n\n async acquire(): Promise<() => void> {\n if (!this.locked) {\n this.locked = true\n return () => this.release()\n }\n return new Promise<() => void>((resolve) => {\n this.waiters.push(() => {\n resolve(() => this.release())\n })\n })\n }\n\n private release(): void {\n const next = this.waiters.shift()\n if (next) {\n next()\n } else {\n this.locked = false\n }\n }\n\n async runExclusive<T>(fn: () => Promise<T> | T): Promise<T> {\n const release = await this.acquire()\n try {\n return await fn()\n } finally {\n release()\n }\n }\n\n /** True iff someone holds the lock right now. Diagnostics only. */\n get isLocked(): boolean {\n return this.locked\n }\n\n /** Pending waiter count. Diagnostics only. */\n get pending(): number {\n return this.waiters.length\n }\n}\n","/**\n * LockedJsonlAppender — mutex-serialized JSONL append helper for arbitrary\n * payloads. The reference-replay store does the same thing for typed\n * `ReferenceReplayRun` rows; this is the generic version used by\n * `MutationTelemetry`, `TrialTelemetry`, and any other consumer that wants\n * append-only durable telemetry without rolling its own lock.\n *\n * Locks are per absolute file path (process-local). Cross-process\n * concurrency is NOT addressed — that's an fcntl/flock problem.\n */\n\nimport { appendFileSync, existsSync, mkdirSync } from 'node:fs'\nimport { dirname } from 'node:path'\nimport { Mutex } from './concurrency'\n\nconst mutexes = new Map<string, Mutex>()\n\nfunction getMutex(path: string): Mutex {\n let m = mutexes.get(path)\n if (!m) {\n m = new Mutex()\n mutexes.set(path, m)\n }\n return m\n}\n\nexport class LockedJsonlAppender {\n private readonly mutex: Mutex\n constructor(public readonly path: string) {\n this.mutex = getMutex(path)\n if (!existsSync(dirname(path))) {\n mkdirSync(dirname(path), { recursive: true })\n }\n }\n\n async append(entry: unknown): Promise<void> {\n const line = `${JSON.stringify(entry)}\\n`\n await this.mutex.runExclusive(() => {\n appendFileSync(this.path, line)\n })\n }\n}\n\n/** Reset all internal mutex state — tests only. */\nexport function resetLockedAppendersForTesting(): void {\n mutexes.clear()\n}\n","/**\n * FindingsStore — durable persistence for AnalystFinding rows + a diff\n * helper so we can answer \"what changed since the last run?\" without\n * recomputing analysts.\n *\n * On-disk shape is JSONL: one finding per line, append-only, locked via\n * LockedJsonlAppender. Operators get crash-safety (no partial JSON),\n * cheap reads (sequential parse), and trivial backup (rsync the file).\n *\n * Reads are non-locking: a reader sees a consistent snapshot of all\n * fully-written lines and skips an incomplete trailing line if the\n * writer is mid-append. Cross-process locking is intentionally out of\n * scope (see locked-jsonl-appender.ts).\n *\n * The store is run-scoped: callers pass `runId` on append and on load,\n * which keeps multi-run files cleanly partitioned. The `diffFindings`\n * helper compares two run-id sets using stable `finding_id` semantics —\n * the diff is the cross-run signal the regression dashboard renders.\n */\n\nimport { existsSync, readFileSync } from 'node:fs'\n\nimport { LockedJsonlAppender } from '../locked-jsonl-appender'\nimport type { AnalystFinding } from './types'\n\n/**\n * One persisted row. We attach `run_id` on disk so a single file can\n * hold multiple runs and the diff helper can query without re-walking\n * separate files.\n */\nexport interface PersistedFinding extends AnalystFinding {\n run_id: string\n}\n\nexport class FindingsStore {\n private readonly appender: LockedJsonlAppender\n\n constructor(public readonly path: string) {\n this.appender = new LockedJsonlAppender(path)\n }\n\n async append(runId: string, findings: AnalystFinding[]): Promise<void> {\n for (const f of findings) {\n const row: PersistedFinding = { ...f, run_id: runId }\n await this.appender.append(row)\n }\n }\n\n /** Load every persisted finding. Discards malformed trailing lines silently. */\n loadAll(): PersistedFinding[] {\n if (!existsSync(this.path)) return []\n const raw = readFileSync(this.path, 'utf8')\n if (!raw) return []\n const out: PersistedFinding[] = []\n for (const line of raw.split('\\n')) {\n if (!line) continue\n try {\n out.push(JSON.parse(line) as PersistedFinding)\n } catch {\n // Skip torn trailing line — the lock guarantees no torn lines\n // mid-file, only at EOF when a writer is in-flight.\n }\n }\n return out\n }\n\n /** Filter to a single run. */\n loadRun(runId: string): PersistedFinding[] {\n return this.loadAll().filter((r) => r.run_id === runId)\n }\n}\n\n// ── Cross-run diff ──────────────────────────────────────────────────\n\nexport interface FindingsDiff {\n /** New finding ids in `current` that weren't in `previous`. */\n appeared: PersistedFinding[]\n /** Finding ids in `previous` that aren't in `current`. */\n disappeared: PersistedFinding[]\n /** Same finding id present in both runs and unchanged per the materiality test. */\n persisted: PersistedFinding[]\n /**\n * Same finding id in both runs but at least one non-identity field\n * shifted per `DiffPolicy.isMaterial`. Reported as [previous, current].\n */\n changed: Array<{ previous: PersistedFinding; current: PersistedFinding }>\n}\n\nexport interface DiffPolicy {\n /**\n * Predicate that decides whether two findings (same finding_id) count\n * as a material change. Defaults to {@link defaultIsMaterial}: severity\n * shift, confidence Δ > 0.05, or evidence count change. Compliance /\n * perf consumers MAY supply a stricter predicate (e.g. rationale text\n * diff, metric Δ thresholds).\n */\n isMaterial?: (previous: AnalystFinding, current: AnalystFinding) => boolean\n}\n\n/**\n * Default materiality test. Deliberately narrow so LLM-reword churn\n * doesn't flood the diff. Stricter tests are opt-in via DiffPolicy.\n */\nexport function defaultIsMaterial(a: AnalystFinding, b: AnalystFinding): boolean {\n if (a.severity !== b.severity) return true\n if (Math.abs((a.confidence ?? 0) - (b.confidence ?? 0)) > 0.05) return true\n if (a.evidence_refs.length !== b.evidence_refs.length) return true\n return false\n}\n\n/**\n * Diff two findings sets by stable finding_id. Callers typically load\n * the two run-id slices from the same store and pass them in.\n */\nexport function diffFindings(\n previous: PersistedFinding[],\n current: PersistedFinding[],\n policy: DiffPolicy = {},\n): FindingsDiff {\n const isMaterial = policy.isMaterial ?? defaultIsMaterial\n const prevById = new Map(previous.map((f) => [f.finding_id, f]))\n const curById = new Map(current.map((f) => [f.finding_id, f]))\n\n const appeared: PersistedFinding[] = []\n const disappeared: PersistedFinding[] = []\n const persisted: PersistedFinding[] = []\n const changed: FindingsDiff['changed'] = []\n\n for (const [id, cur] of curById) {\n const prev = prevById.get(id)\n if (!prev) {\n appeared.push(cur)\n continue\n }\n if (isMaterial(prev, cur)) {\n changed.push({ previous: prev, current: cur })\n } else {\n persisted.push(cur)\n }\n }\n for (const [id, prev] of prevById) {\n if (!curById.has(id)) disappeared.push(prev)\n }\n return { appeared, disappeared, persisted, changed }\n}\n","/**\n * Skill-usage analyst — a DETERMINISTIC `Analyst` over a Claude/Codex skill\n * library + its trace corpus. Unlike the trace-store kinds (failure-mode,\n * improvement, ...) this kind calls no LLM: it mines real usage and skill\n * structure and emits findings by rule.\n *\n * It exists because the naive \"Skill-tool invocation count\" lies low — it\n * misses orchestrated sub-dispatch (a leaf skill run BY /pursue or /governor\n * logs under the parent), slash-command entry, local-script bypass, and\n * on-disk artifacts. The 2026-05-30 skill audit found 39/53 skills at zero\n * direct invocations, yet only one was a genuine cut: the rest were\n * measurement-invisible or discovery-limited. This analyst encodes that\n * lesson as a multi-signal usage model so a cheap repeatable pass can keep\n * the library honest, and so the expensive audit workflow's verdicts can\n * GEPA-distill it toward agreement (see `gold/skill-verdicts.gold.jsonl`).\n *\n * Report-building (`buildSkillUsageReport`, an fs scan) is separated from\n * finding emission (`SkillUsageAnalyst.analyze`, pure) so the slow scan runs\n * once at the registry boundary and the rule logic stays unit-testable.\n */\n\nimport { type Dirent, existsSync, readdirSync, readFileSync, statSync } from 'node:fs'\nimport { join } from 'node:path'\nimport type { Analyst, AnalystContext, AnalystFinding, AnalystSeverity } from '../types'\nimport { computeFindingId } from '../types'\n\n// ── Input model ──────────────────────────────────────────────────────\n\nexport type SkillKind = 'public' | 'private'\n\n/** One skill's multi-signal usage + structure. All counts are deterministic. */\nexport interface SkillUsageRecord {\n name: string\n kind: SkillKind\n /** Absolute path to the skill's SKILL.md. */\n path: string\n lines: number\n /** `\"skill\":\"<name>\"` Skill-tool invocations across the trace corpus. */\n directInvocations: number\n /** `<command-name>/<name>` slash invocations across the trace corpus. */\n slashInvocations: number\n /** Sibling skills whose SKILL.md dispatches to this one (`/<name>`). Proxy\n * for orchestrated sub-dispatch the per-skill counter cannot see. */\n inboundRefs: number\n /** On-disk artifacts attributable to the skill (e.g. `.evolve/<name>/**`). */\n artifactCount: number\n /** Tangle-private reference count in the body (leak signal for public skills). */\n tanglePrivateRefs: number\n hasReferencesDir: boolean\n hasEvalsDir: boolean\n /** Body mentions `skill-runs.jsonl` (visible to /reflect + /governor). */\n logsRuns: boolean\n /** Description carries an explicit `Triggers:` clause / trigger phrases. */\n hasTriggerPhrases: boolean\n}\n\nexport interface SkillUsageReport {\n generatedFromTraces: number\n records: SkillUsageRecord[]\n}\n\nexport interface SkillUsageScanConfig {\n /** Dirs holding `*.jsonl` transcripts (Claude `~/.claude/projects`, Codex sessions). */\n transcriptDirs: string[]\n /** Skill roots to scan; each dir directly under `root` with a `SKILL.md` is a skill. */\n skillRoots: { root: string; kind: SkillKind }[]\n /** Roots scanned for `<root>/.evolve/<skill>` artifact dirs. */\n artifactRoots?: string[]\n /** Token-prefixed mappings: skill name → extra artifact subpaths under an artifactRoot\n * (e.g. reflect → `.evolve/reflections`). Catches non-eponymous artifact dirs. */\n artifactAliases?: Record<string, string[]>\n /** Cap files read per transcript dir (bounds a huge corpus); 0 = unbounded. */\n maxTranscriptsPerDir?: number\n}\n\n// ── Deterministic thresholds ─────────────────────────────────────────\n\n/** Anthropic's authoring guidance keeps SKILL.md short; past this with no\n * `references/` split the body burns context budget every session. */\nconst BLOAT_LINE_THRESHOLD = 300\n\nconst TANGLE_PRIVATE_RE =\n /\\b(cli-bridge|tangletools|ops-board|drew-gtr-pro|@tangle-network\\/|~\\/company|tangle\\.tools|gtm-agent)\\b|\\bkimi\\b|\\btcloud\\b/gi\nconst TRIGGER_RE = /triggers?\\s*[:-]/i\n\n// ── Report builder (fs scan — slow, runs once at the registry boundary) ──\n\nfunction listSkillDirs(root: string): { name: string; path: string }[] {\n if (!existsSync(root)) return []\n const out: { name: string; path: string }[] = []\n for (const entry of readdirSync(root, { withFileTypes: true })) {\n if (!entry.isDirectory() && !entry.isSymbolicLink()) continue\n const skillMd = join(root, entry.name, 'SKILL.md')\n if (existsSync(skillMd)) out.push({ name: entry.name, path: skillMd })\n }\n return out\n}\n\nfunction walkJsonl(dir: string, cap: number): string[] {\n if (!existsSync(dir)) return []\n const files: string[] = []\n const stack = [dir]\n while (stack.length) {\n const cur = stack.pop()!\n let entries: Dirent[]\n try {\n entries = readdirSync(cur, { withFileTypes: true })\n } catch {\n continue\n }\n for (const e of entries) {\n const full = join(cur, e.name)\n if (e.isDirectory()) stack.push(full)\n else if (e.name.endsWith('.jsonl')) {\n files.push(full)\n if (cap > 0 && files.length >= cap) return files\n }\n }\n }\n return files\n}\n\nfunction frontmatterDescription(body: string): string {\n const fm = /^---\\n([\\s\\S]*?)\\n---/.exec(body)\n const block = fm?.[1] ?? ''\n const m = /description:\\s*(.+)/i.exec(block)\n return m?.[1] ?? ''\n}\n\nfunction countArtifacts(roots: string[], name: string, aliases: string[]): number {\n let n = 0\n for (const root of roots) {\n const candidates = [join(root, '.evolve', name), ...aliases.map((a) => join(root, a))]\n for (const dir of candidates) {\n if (!existsSync(dir)) continue\n try {\n if (statSync(dir).isDirectory()) n += readdirSync(dir).length\n else n += 1\n } catch {\n /* unreadable — skip */\n }\n }\n }\n return n\n}\n\n/** Scan the corpus + skill roots into a {@link SkillUsageReport}. Deterministic. */\nexport function buildSkillUsageReport(config: SkillUsageScanConfig): SkillUsageReport {\n const skills = config.skillRoots.flatMap(({ root, kind }) =>\n listSkillDirs(root).map((s) => ({ ...s, kind })),\n )\n const names = skills.map((s) => s.name)\n\n // One pass over the corpus accumulating direct + slash counts per skill.\n const direct = new Map<string, number>(names.map((n) => [n, 0]))\n const slash = new Map<string, number>(names.map((n) => [n, 0]))\n const skillRe = /\"skill\"\\s*:\\s*\"([a-z0-9_:-]+)\"/g\n const cmdRe = /<command-name>\\/?([a-z0-9_:-]+)<\\/command-name>/g\n let transcripts = 0\n for (const dir of config.transcriptDirs) {\n for (const file of walkJsonl(dir, config.maxTranscriptsPerDir ?? 0)) {\n transcripts += 1\n let data: string\n try {\n data = readFileSync(file, 'utf8')\n } catch {\n continue\n }\n for (const m of data.matchAll(skillRe)) {\n const g = m[1]\n if (!g) continue\n const n = g.split(':').pop() ?? g\n const prev = direct.get(n)\n if (prev !== undefined) direct.set(n, prev + 1)\n }\n for (const m of data.matchAll(cmdRe)) {\n const g = m[1]\n if (g === undefined) continue\n const prev = slash.get(g)\n if (prev !== undefined) slash.set(g, prev + 1)\n }\n }\n }\n\n // Read each skill body once; compute structure + inbound refs across siblings.\n const bodies = new Map<string, string>()\n for (const s of skills) {\n try {\n bodies.set(s.name, readFileSync(s.path, 'utf8'))\n } catch {\n bodies.set(s.name, '')\n }\n }\n const inbound = new Map<string, number>(names.map((n) => [n, 0]))\n for (const target of names) {\n const ref = new RegExp(`/${target}\\\\b|\\\\[\\\\[${target}\\\\]\\\\]`)\n for (const s of skills) {\n if (s.name === target) continue\n if (ref.test(bodies.get(s.name) ?? '')) inbound.set(target, inbound.get(target)! + 1)\n }\n }\n\n const records: SkillUsageRecord[] = skills.map((s) => {\n const body = bodies.get(s.name) ?? ''\n const dir = s.path.replace(/\\/SKILL\\.md$/, '')\n return {\n name: s.name,\n kind: s.kind,\n path: s.path,\n lines: body ? body.split('\\n').length : 0,\n directInvocations: direct.get(s.name) ?? 0,\n slashInvocations: slash.get(s.name) ?? 0,\n inboundRefs: inbound.get(s.name) ?? 0,\n artifactCount: countArtifacts(\n config.artifactRoots ?? [],\n s.name,\n config.artifactAliases?.[s.name] ?? [],\n ),\n tanglePrivateRefs: (body.match(TANGLE_PRIVATE_RE) ?? []).length,\n hasReferencesDir: existsSync(join(dir, 'references')),\n hasEvalsDir: existsSync(join(dir, 'evals')),\n logsRuns: body.includes('skill-runs.jsonl'),\n hasTriggerPhrases: TRIGGER_RE.test(frontmatterDescription(body) || body.slice(0, 600)),\n }\n })\n return { generatedFromTraces: transcripts, records }\n}\n\n// ── Finding emission (pure — unit-testable, no LLM, no fs) ────────────\n\nconst ANALYST_ID = 'skill-usage'\n\nfunction finding(\n area: string,\n subject: string,\n claim: string,\n severity: AnalystSeverity,\n confidence: number,\n producedAt: string,\n recommended: string,\n evidenceUri: string,\n rationale?: string,\n): AnalystFinding {\n return {\n schema_version: '1.0.0',\n finding_id: computeFindingId({ analyst_id: ANALYST_ID, area, subject, claim }),\n analyst_id: ANALYST_ID,\n produced_at: producedAt,\n severity,\n area,\n claim,\n rationale,\n evidence_refs: [{ kind: 'artifact', uri: evidenceUri }],\n recommended_action: recommended,\n confidence,\n subject,\n }\n}\n\n/** Pure rule pass over a report → findings. Exported for direct/unit use. */\nexport function emitSkillUsageFindings(\n report: SkillUsageReport,\n producedAt: string,\n): AnalystFinding[] {\n const out: AnalystFinding[] = []\n for (const r of report.records) {\n const directTotal = r.directInvocations + r.slashInvocations\n const trueUsage = directTotal + r.inboundRefs + r.artifactCount\n\n // 1. Dead: no usage signal of ANY kind. The only real deprecation candidate.\n if (trueUsage === 0) {\n out.push(\n finding(\n 'skill-usage',\n r.name,\n `Skill '${r.name}' has zero usage across all signals (direct, slash, inbound-refs, artifacts)`,\n 'high',\n 0.6,\n producedAt,\n 'Confirm the skill covers a real recurring job; if not, deprecate. Zero true usage is the only deterministic deprecation candidate.',\n r.path,\n 'No Skill-tool call, no slash invocation, no sibling dispatches to it, and no on-disk artifacts.',\n ),\n )\n } else if (directTotal === 0 && r.inboundRefs + r.artifactCount > 0) {\n // 2. Measurement-invisible: real use via orchestration/artifacts, never invoked directly.\n out.push(\n finding(\n 'skill-usage',\n r.name,\n `Skill '${r.name}' shows 0 direct invocations but is used via orchestration/artifacts (inbound=${r.inboundRefs}, artifacts=${r.artifactCount})`,\n 'info',\n 0.8,\n producedAt,\n 'Do NOT treat as unused — usage is real but logged under parent skills or on disk. Strengthen direct-invocation discovery only if direct use is desired.',\n r.path,\n 'The Skill-tool counter undercounts orchestrated/chained leaf skills.',\n ),\n )\n }\n\n // 3. Discovery gap: low direct use AND weak trigger surface.\n if (directTotal <= 2 && !r.hasTriggerPhrases) {\n out.push(\n finding(\n 'discoverability',\n r.name,\n `Skill '${r.name}' is rarely invoked directly and its description has no explicit trigger phrases`,\n 'medium',\n 0.7,\n producedAt,\n 'Add a `Triggers:` clause with verbatim user phrases to the frontmatter description so the model auto-invokes it.',\n r.path,\n ),\n )\n }\n\n // 4. Public-repo leak.\n if (r.kind === 'public' && r.tanglePrivateRefs > 0) {\n out.push(\n finding(\n 'safety',\n r.name,\n `Public skill '${r.name}' carries ${r.tanglePrivateRefs} Tangle-private reference(s)`,\n 'high',\n 0.75,\n producedAt,\n 'Sanitize incidental internal refs (cli-bridge/kimi/tcloud/~company/private repos) or relocate to a private repo. Verify @tangle-network/* refs are to PUBLISHED packages before treating as a leak.',\n r.path,\n ),\n )\n }\n\n // 5. Bloat / no progressive disclosure.\n if (r.lines > BLOAT_LINE_THRESHOLD && !r.hasReferencesDir) {\n out.push(\n finding(\n 'maintainability',\n r.name,\n `Skill '${r.name}' is ${r.lines} lines with no references/ split (progressive disclosure)`,\n 'medium',\n 0.8,\n producedAt,\n `Split detail into references/ loaded on demand; keep SKILL.md a short overview. ${r.lines} lines load into every session's context budget.`,\n r.path,\n ),\n )\n }\n\n // 6. No evals (Anthropic's \">=3 evals before docs\" rule).\n if (!r.hasEvalsDir) {\n out.push(\n finding(\n 'data-quality',\n r.name,\n `Skill '${r.name}' ships no evals/`,\n 'low',\n 0.6,\n producedAt,\n 'Add evals/evals.json with >=3 scenarios proving the skill beats baseline; gives regression coverage.',\n r.path,\n ),\n )\n }\n\n // 7. No run logging → invisible to /reflect and /governor.\n if (!r.logsRuns) {\n out.push(\n finding(\n 'observability',\n r.name,\n `Skill '${r.name}' never appends to .evolve/skill-runs.jsonl`,\n 'low',\n 0.55,\n producedAt,\n 'Append one run line to .evolve/skill-runs.jsonl on completion, or declare it a non-logging leaf, so the self-improvement loop can see it ran.',\n r.path,\n ),\n )\n }\n }\n return out\n}\n\n// ── The Analyst ──────────────────────────────────────────────────────\n\nexport class SkillUsageAnalyst implements Analyst<SkillUsageReport> {\n readonly id = ANALYST_ID\n readonly description =\n 'Deterministic multi-signal skill-usage analysis: flags dead skills, measurement-invisible (orchestrated) usage, discovery gaps, public-repo leaks, bloat, missing evals, and missing run-logging.'\n readonly inputKind = 'custom' as const\n readonly cost = { kind: 'deterministic' as const, est_usd_per_run: 0 }\n readonly version = '1.0.0'\n\n async analyze(input: SkillUsageReport, ctx: AnalystContext): Promise<AnalystFinding[]> {\n const producedAt = ctx.tags?.producedAt ?? new Date().toISOString()\n ctx.log?.(\n `skill-usage: ${input.records.length} skills over ${input.generatedFromTraces} transcripts`,\n )\n return emitSkillUsageFindings(input, producedAt)\n }\n}\n\nexport const SKILL_USAGE_ANALYST = new SkillUsageAnalyst()\n","export interface RunScore {\n success: number\n goalProgress: number\n repoGroundedness: number\n driftPenalty: number\n toolUseQuality: number\n patchQuality: number\n testReality: number\n finalGate: number\n reviewerBlockers: number\n costUsd: number\n wallSeconds: number\n notes?: string[]\n}\n\nexport interface RunScoreWeights {\n success: number\n goalProgress: number\n repoGroundedness: number\n driftPenalty: number\n toolUseQuality: number\n patchQuality: number\n testReality: number\n finalGate: number\n reviewerBlockers: number\n costUsd: number\n wallSeconds: number\n}\n\nexport const DEFAULT_RUN_SCORE_WEIGHTS: RunScoreWeights = {\n success: 4,\n goalProgress: 2,\n repoGroundedness: 1.5,\n driftPenalty: -1.5,\n toolUseQuality: 1,\n patchQuality: 1.25,\n testReality: 1.5,\n finalGate: 3,\n reviewerBlockers: -2,\n costUsd: -0.2,\n wallSeconds: -0.1,\n}\n\nexport function aggregateRunScore(score: RunScore, weights: Partial<RunScoreWeights> = {}): number {\n const w = { ...DEFAULT_RUN_SCORE_WEIGHTS, ...weights }\n return (\n w.success * clamp01(score.success) +\n w.goalProgress * clamp01(score.goalProgress) +\n w.repoGroundedness * clamp01(score.repoGroundedness) +\n w.driftPenalty * clamp01(score.driftPenalty) +\n w.toolUseQuality * clamp01(score.toolUseQuality) +\n w.patchQuality * clamp01(score.patchQuality) +\n w.testReality * clamp01(score.testReality) +\n w.finalGate * clamp01(score.finalGate) +\n w.reviewerBlockers * clamp01(score.reviewerBlockers) +\n w.costUsd * Math.max(0, finiteOrZero(score.costUsd)) +\n w.wallSeconds * Math.max(0, finiteOrZero(score.wallSeconds) / 60)\n )\n}\n\nexport function clamp01(value: number): number {\n if (!Number.isFinite(value)) return 0\n return Math.max(0, Math.min(1, value))\n}\n\nfunction finiteOrZero(value: number): number {\n return Number.isFinite(value) ? value : 0\n}\n","import { NotFoundError } from './errors'\nimport { aggregateRunScore, clamp01, type RunScore, type RunScoreWeights } from './run-score'\nimport type { Artifact, BudgetLedgerEntry, Run, Span, TraceEvent, TraceStore } from './trace'\n\nexport interface RunTrace {\n run: Run\n spans: Span[]\n events: TraceEvent[]\n artifacts: Artifact[]\n budget: BudgetLedgerEntry[]\n}\n\nexport interface RunCriticOptions {\n weights?: Partial<RunScoreWeights>\n driftPatterns?: RegExp[]\n}\n\nconst DEFAULT_DRIFT_PATTERNS = [\n /https?:\\/\\//i,\n /\\btitle:\\s/i,\n /\\bsummary:\\s/i,\n /\\burl:\\s/i,\n /\\bnpm package usage\\b/i,\n /\\bnews\\b/i,\n]\n\nexport class RunCritic {\n private readonly weights?: Partial<RunScoreWeights>\n private readonly driftPatterns: RegExp[]\n\n constructor(options: RunCriticOptions = {}) {\n this.weights = options.weights\n this.driftPatterns = options.driftPatterns ?? DEFAULT_DRIFT_PATTERNS\n }\n\n async score(store: TraceStore, runId: string): Promise<RunScore> {\n const run = await store.getRun(runId)\n if (!run) throw new NotFoundError(`run ${runId} not found`)\n const [spans, events, artifacts, budget] = await Promise.all([\n store.spans({ runId }),\n store.events({ runId }),\n store.artifacts(runId),\n store.budget(runId),\n ])\n return this.scoreTrace({ run, spans, events, artifacts, budget })\n }\n\n scoreTrace(trace: RunTrace): RunScore {\n const notes: string[] = []\n const llmSpans = trace.spans.filter(\n (s): s is Extract<Span, { kind: 'llm' }> => s.kind === 'llm',\n )\n const toolSpans = trace.spans.filter(\n (s): s is Extract<Span, { kind: 'tool' }> => s.kind === 'tool',\n )\n const judgeSpans = trace.spans.filter(\n (s): s is Extract<Span, { kind: 'judge' }> => s.kind === 'judge',\n )\n const sandboxSpans = trace.spans.filter(\n (s): s is Extract<Span, { kind: 'sandbox' }> => s.kind === 'sandbox',\n )\n const finalGateSpans = judgeSpans.filter(\n (span) => span.dimension === 'final_gate' || span.attributes?.finalGate === true,\n )\n\n const success =\n trace.run.outcome?.pass === true ? 1 : trace.run.status === 'completed' ? 0.5 : 0\n if (!success) notes.push('run did not complete with pass=true')\n\n const judgeAverage = judgeSpans.length\n ? judgeSpans.reduce((sum, span) => sum + normalizeJudgeScore(span.score), 0) /\n judgeSpans.length\n : undefined\n const outcomeScore =\n typeof trace.run.outcome?.score === 'number'\n ? clamp01(\n trace.run.outcome.score > 1 ? trace.run.outcome.score / 100 : trace.run.outcome.score,\n )\n : undefined\n const goalProgress = outcomeScore ?? judgeAverage ?? success\n\n const successfulTools = toolSpans.filter((span) => span.status !== 'error').length\n const toolUseQuality = toolSpans.length === 0 ? 0 : successfulTools / toolSpans.length\n if (toolSpans.length === 0) notes.push('no tool spans recorded')\n\n const patchEvidence =\n trace.artifacts.length +\n toolSpans.filter((span) => /write|edit|patch|apply/i.test(span.toolName)).length\n const patchQuality = patchEvidence > 0 ? clamp01(patchEvidence / 4) : 0\n if (!patchQuality) notes.push('no artifact or edit evidence recorded')\n\n const sandboxTests = sandboxSpans.filter(\n (span) => typeof span.testsTotal === 'number' && span.testsTotal > 0,\n )\n const testReality = sandboxTests.length\n ? sandboxTests.reduce(\n (sum, span) => sum + (span.testsPassed ?? 0) / Math.max(1, span.testsTotal ?? 1),\n 0,\n ) / sandboxTests.length\n : toolSpans.some((span) =>\n /\\btest|vitest|pytest|jest|build|tsc\\b/i.test(JSON.stringify(span.args)),\n )\n ? 0.4\n : 0\n if (!testReality) notes.push('no real test/build evidence recorded')\n\n const blockerSpans = judgeSpans.filter((span) => isBlockingJudge(span))\n const finalGateBlockers = finalGateSpans.filter((span) => isBlockingJudge(span))\n const finalGate = finalGateSpans.length ? (finalGateBlockers.length ? 0 : 1) : success\n if (finalGateBlockers.length)\n notes.push(`final gate blocked by ${finalGateBlockers.length} reviewer(s)`)\n else if (!finalGateSpans.length) notes.push('no final gate judgment recorded')\n\n const reviewerBlockers = judgeSpans.length ? blockerSpans.length / judgeSpans.length : 0\n if (reviewerBlockers) notes.push(`detected ${blockerSpans.length} blocking reviewer signal(s)`)\n\n const positiveGroundingSignals =\n patchEvidence +\n sandboxSpans.length +\n llmSpans.filter((span) => looksRepoGrounded(span.output ?? '')).length\n const driftSignals =\n llmSpans.filter((span) => this.isDrift(span.output ?? '')).length +\n trace.events.filter((event) => this.isDrift(JSON.stringify(event.payload))).length\n const repoGroundedness =\n positiveGroundingSignals + driftSignals === 0\n ? 0\n : positiveGroundingSignals / (positiveGroundingSignals + driftSignals)\n const driftPenalty =\n positiveGroundingSignals + driftSignals === 0\n ? 0\n : driftSignals / (positiveGroundingSignals + driftSignals)\n if (driftSignals > 0) notes.push(`detected ${driftSignals} drift signal(s)`)\n\n const costUsd = trace.budget.length\n ? Math.max(\n ...trace.budget\n .filter((entry: BudgetLedgerEntry) => entry.dimension === 'usd')\n .map((entry: BudgetLedgerEntry) => entry.consumed),\n 0,\n )\n : llmSpans.reduce((sum, span) => sum + (span.costUsd ?? 0), 0)\n const wallSeconds =\n trace.run.endedAt && trace.run.startedAt\n ? Math.max(0, (trace.run.endedAt - trace.run.startedAt) / 1000)\n : 0\n\n return {\n success,\n goalProgress,\n repoGroundedness,\n driftPenalty,\n toolUseQuality,\n patchQuality,\n testReality,\n finalGate,\n reviewerBlockers,\n costUsd,\n wallSeconds,\n notes,\n }\n }\n\n rank(score: RunScore): number {\n return aggregateRunScore(score, this.weights)\n }\n\n private isDrift(text: string): boolean {\n return this.driftPatterns.some((pattern) => pattern.test(text))\n }\n}\n\nfunction normalizeJudgeScore(score: number): number {\n return score > 1 ? clamp01(score / 10) : clamp01(score)\n}\n\nfunction looksRepoGrounded(text: string): boolean {\n return /(?:src\\/|tests?\\/|package\\.json|tsconfig|\\.ts\\b|\\.tsx\\b|git status|pnpm |npm |vitest|pytest|jest)/i.test(\n text,\n )\n}\n\nfunction isBlockingJudge(span: Extract<Span, { kind: 'judge' }>): boolean {\n return (\n span.attributes?.blocking === true ||\n span.attributes?.verdict === 'BLOCKING' ||\n positiveNumber(span.attributes?.blockingFindings) ||\n positiveNumber(span.attributes?.highFindings) ||\n span.score <= 2\n )\n}\n\nfunction positiveNumber(value: unknown): boolean {\n return typeof value === 'number' && value > 0\n}\n","/**\n * Semantic concept judge — \"does the built artifact actually implement\n * the features the user asked for?\"\n *\n * Distinct from the domain/code/coherence judges in `judges.ts`:\n * - those judges score free-form conversational agent outputs along\n * quality dimensions (accuracy, depth, etc.)\n * - this judge scores a *built artifact* (served HTML + source files)\n * against an explicit list of expected concepts, returning per-concept\n * {present, score 0-10, evidence, severity}.\n *\n * The judge is strict about distinguishing (a) a working implementation\n * from (b) a keyword-present stub. \"// TODO: mint button\" is NOT present.\n * Only real, functional, wired-up code counts.\n *\n * Use via {@link createSemanticConceptJudge} or directly via\n * {@link runSemanticConceptJudge}. Soft-fails (available=false) on LLM\n * or JSON-parse errors so the caller can treat that as \"layer skipped\"\n * rather than \"layer failed\" in a multi-layer pipeline.\n */\n\nimport { callLlmJson, type LlmClientOptions } from './llm-client'\nimport type { Severity } from './multi-layer-verifier'\n\n// ─── Types ──────────────────────────────────────────────────────────────\n\n/**\n * Implementation complexity class for weighted scoring.\n *\n * - `render` (default): the concept is a UI surface that displays static\n * data — render a list, show a counter, lay out a button. Single-file\n * work, no external integration.\n * - `integrate`: the concept requires wiring a real external system —\n * wallet connect (wagmi + RainbowKit + chain config), payment provider\n * (Stripe Elements + intent + webhook), an API client with auth.\n * Multi-file, library-knowledge, runtime correctness matters.\n * - `compute`: the concept requires algorithmic work — solver, simulator,\n * constraint propagation, ML inference. Correctness > UI polish.\n *\n * Default weights (when applied via `weightConcepts: 'complexity'`):\n * render=1.0, integrate=2.0, compute=2.5\n *\n * Cross-vertical scoring without complexity weighting silently inflates\n * the rate of UI-heavy verticals (healthcare, fintech dashboards) vs\n * integration-heavy verticals (DeFi, wallets) — all concepts treated\n * equally even though the agent does 2-3x the work for `integrate`.\n */\nexport type ConceptComplexity = 'render' | 'integrate' | 'compute'\n\nexport interface ConceptSpec {\n name: string\n /** Short hints that help the judge; not used for matching. */\n keywords?: string[]\n /** Optional explicit weight; default 1.0. Overrides complexity-derived weight. */\n weight?: number\n /** Implementation complexity class. Default `render`. */\n complexity?: ConceptComplexity\n}\n\nexport interface ConceptFinding {\n concept: string\n present: boolean\n /** 0..10. 10 = production-ready; 7 = functional thin; 4 = partial; 0 = absent. */\n score: number\n evidence: string\n severity: Severity\n}\n\nexport interface SemanticConceptJudgeInput {\n /** Full natural-language prompt the agent was handed. */\n userRequest: string\n /** Rendered HTML the preview returns (UI artifacts). Optional. */\n servedHtml?: string\n /** Top-level source files from the agent's workdir. */\n sourceFiles: Array<{ path: string; content: string }>\n /** The expected concept list. */\n expectedConcepts: ConceptSpec[]\n /** Free-form metadata (id, difficulty) to inject into the prompt. */\n artifactLabel?: string\n artifactDescription?: string\n}\n\nexport interface SemanticConceptJudgeResult {\n kind: 'semantic-concept'\n version: string\n /** Normalized 0..1 score — mean of per-concept scores / 10. */\n score: number\n presentCount: number\n totalCount: number\n findings: ConceptFinding[]\n summary: string\n durationMs: number\n costUsd: number | null\n /** False on LLM/JSON error — treat as \"skipped / unable to judge\" in pipelines. */\n available: boolean\n error?: string\n}\n\n/**\n * Score-aggregation strategy. `mean` averages 0-10 scores uniformly.\n * `complexity` applies the default weight table (render=1, integrate=2,\n * compute=2.5) unless a concept has an explicit `weight`. `explicit`\n * honors only `weight` (defaulting to 1 for unspecified).\n */\nexport type ConceptWeightStrategy = 'mean' | 'complexity' | 'explicit'\n\nexport const DEFAULT_COMPLEXITY_WEIGHTS: Record<ConceptComplexity, number> = {\n render: 1.0,\n integrate: 2.0,\n compute: 2.5,\n}\n\nexport interface SemanticConceptJudgeOptions {\n /** Model id to call. Default 'claude-sonnet-4-6' via agent-eval defaults. */\n model?: string\n /** Per-call timeout. Default 180s. */\n timeoutMs?: number\n /** Pipeline budget for the prompt (source blob truncation). Default 45000. */\n maxSourceChars?: number\n /** Per-file cap before inclusion. Default 20000. */\n maxPerFileChars?: number\n /** HTML cap. Default 30000. */\n maxHtmlChars?: number\n /** LlmClient config (baseUrl, apiKey, authHeader, …). */\n llm?: LlmClientOptions\n /**\n * Score aggregation strategy. Default `mean` — uniform average across\n * concepts. Cross-vertical comparisons should use `complexity` to\n * neutralize the integrate-vs-render asymmetry.\n */\n weightConcepts?: ConceptWeightStrategy\n /** Override the default complexity → weight table. */\n complexityWeights?: Partial<Record<ConceptComplexity, number>>\n}\n\n// ─── Prompt assembly ────────────────────────────────────────────────────\n\nexport const SEMANTIC_CONCEPT_JUDGE_VERSION = 'semantic-concept-judge-v1-2026-04-24'\n\nconst DEFAULT_MAX_SOURCE = 45_000\nconst DEFAULT_MAX_HTML = 30_000\nconst DEFAULT_MAX_PER_FILE = 20_000\nconst DEFAULT_TIMEOUT = 180_000\nconst DEFAULT_MODEL = 'claude-sonnet-4-6'\n\nconst SEMANTIC_SCHEMA = {\n type: 'object',\n additionalProperties: false,\n required: ['summary', 'concepts'],\n properties: {\n summary: { type: 'string', minLength: 20, maxLength: 600 },\n concepts: {\n type: 'array',\n minItems: 1,\n items: {\n type: 'object',\n additionalProperties: false,\n required: ['concept', 'present', 'score', 'evidence', 'severity'],\n properties: {\n concept: { type: 'string', minLength: 1, maxLength: 120 },\n present: { type: 'boolean' },\n score: { type: 'number', minimum: 0, maximum: 10 },\n evidence: { type: 'string', minLength: 5, maxLength: 400 },\n severity: { type: 'string', enum: ['critical', 'major', 'minor', 'info'] },\n },\n },\n },\n },\n}\n\nfunction truncate(body: string, cap: number, label: string): string {\n if (body.length <= cap) return body\n return `${body.slice(0, cap)}\\n… [truncated ${body.length - cap} chars of ${label}]`\n}\n\nfunction buildPrompt(\n input: SemanticConceptJudgeInput,\n opts: Required<SemanticConceptJudgeOptions>,\n): string {\n const sourceBlob = input.sourceFiles\n .filter((f) => f.content.length <= opts.maxPerFileChars)\n .map((f) => `--- FILE: ${f.path} ---\\n${f.content}`)\n .join('\\n\\n')\n\n const html = input.servedHtml ?? ''\n\n return `You are a strict code-review judge evaluating whether an agent's 0-to-1 build actually implements the features the user asked for.\n\nYou MUST distinguish:\n (a) WORKING code that implements the concept (rendered UI, wired handler, real API call),\n (b) KEYWORD-PRESENT stub (comments mentioning the concept, variable names, TODOs),\n (c) ABSENT (concept nowhere).\n\nA comment like \"// TODO: add mint button\" is NOT present — score 2-3. Only count a concept as present if there is real functional code: a rendered component, a call handler wired to state or a network call, a computed value actually used.\n\nUSER REQUEST (what the agent was asked to build):\n${input.userRequest}\n\n${input.artifactLabel ? `ARTIFACT METADATA:\\n name: ${input.artifactLabel}\\n description: ${input.artifactDescription ?? ''}\\n\\n` : ''}EXPECTED CONCEPTS (each must be graded independently):\n${input.expectedConcepts\n .map(\n (c, i) =>\n ` ${i + 1}. \"${c.name}\"${c.keywords?.length ? ` — hints: [${c.keywords.slice(0, 6).join(' | ')}]` : ''}`,\n )\n .join('\\n')}\n\n${html ? `SERVED HTML (what the preview returns when hit):\\n${truncate(html, opts.maxHtmlChars, 'HTML')}\\n\\n` : ''}SOURCE FILES (the agent's workdir):\n${truncate(sourceBlob, opts.maxSourceChars, 'source')}\n\nFor EACH concept, return:\n - concept: the concept name as given (match exactly)\n - present: boolean — does a working implementation exist?\n - score: 0-10 — 10 = production-ready; 7 = functional but thin; 4 = partial/stubbed; 2 = keyword-only comment; 0 = absent\n - evidence: cite \"<file>:<line>\" or \"served-html:<selector>\" pointing at the strongest supporting code. If the concept is absent or stubbed, explain what's missing.\n - severity:\n \"info\" when present: true AND score >= 7\n \"minor\" when present: true AND 4 <= score < 7\n \"major\" when present: false OR score < 4\n \"critical\" when the concept is not only absent but a core user flow depends on it\n\nAlso produce a \"summary\" (one sentence, 20-600 chars): overall verdict on whether this is a shippable implementation of the user request vs a keyword-dense placeholder.\n\nBE SKEPTICAL. Keyword matching already passed — your job is to catch what keyword matching misses. If the agent shipped a working build, say so. If it shipped a stub, say so. Don't grade on effort.\n\nReturn STRICT JSON. No prose outside the JSON.`\n}\n\n// ─── Runner ─────────────────────────────────────────────────────────────\n\n/**\n * Run the semantic concept judge. Soft-fails to available=false on\n * LLM/JSON errors — callers in a MultiLayerVerifier pipeline can treat\n * that as \"skip\" rather than \"fail.\"\n */\nexport async function runSemanticConceptJudge(\n input: SemanticConceptJudgeInput,\n options: SemanticConceptJudgeOptions = {},\n): Promise<SemanticConceptJudgeResult> {\n const start = Date.now()\n const totalCount = input.expectedConcepts.length\n\n if (totalCount === 0) {\n return {\n kind: 'semantic-concept',\n version: SEMANTIC_CONCEPT_JUDGE_VERSION,\n score: 0,\n presentCount: 0,\n totalCount: 0,\n findings: [],\n summary: 'no expected concepts declared',\n durationMs: 0,\n costUsd: null,\n available: false,\n error: 'no expected concepts declared',\n }\n }\n\n const opts: Required<SemanticConceptJudgeOptions> = {\n model: options.model ?? DEFAULT_MODEL,\n timeoutMs: options.timeoutMs ?? DEFAULT_TIMEOUT,\n maxSourceChars: options.maxSourceChars ?? DEFAULT_MAX_SOURCE,\n maxPerFileChars: options.maxPerFileChars ?? DEFAULT_MAX_PER_FILE,\n maxHtmlChars: options.maxHtmlChars ?? DEFAULT_MAX_HTML,\n llm: options.llm ?? {},\n weightConcepts: options.weightConcepts ?? 'mean',\n complexityWeights: { ...DEFAULT_COMPLEXITY_WEIGHTS, ...(options.complexityWeights ?? {}) },\n }\n\n // Build a name → weight map for aggregation. Mean strategy keeps every\n // weight at 1 (uniform average). Complexity strategy reads the table\n // and lets an explicit `weight` override. Explicit strategy uses ONLY\n // the spec's `weight` (defaulting to 1).\n const weightForConcept = (spec: ConceptSpec): number => {\n if (opts.weightConcepts === 'mean') return 1\n if (spec.weight != null) return spec.weight\n if (opts.weightConcepts === 'complexity') {\n return opts.complexityWeights[spec.complexity ?? 'render'] ?? 1\n }\n return 1\n }\n const weightByName = new Map<string, number>(\n input.expectedConcepts.map((c) => [c.name, weightForConcept(c)]),\n )\n\n try {\n const { value, result } = await callLlmJson<{\n summary: string\n concepts: ConceptFinding[]\n }>(\n {\n model: opts.model,\n messages: [\n {\n role: 'system',\n content:\n 'You are a strict code-review judge. Return strict JSON only. No prose outside the JSON. A keyword in a comment is NOT a working implementation.',\n },\n { role: 'user', content: buildPrompt(input, opts) },\n ],\n jsonSchema: { name: 'semantic_concept_judge', schema: SEMANTIC_SCHEMA },\n temperature: 0,\n timeoutMs: opts.timeoutMs,\n },\n opts.llm,\n )\n\n if (!value?.concepts || !Array.isArray(value.concepts)) {\n throw new Error('judge returned malformed response — expected array under \"concepts\"')\n }\n\n const findings: ConceptFinding[] = value.concepts.map((c) => ({\n concept: String(c.concept),\n present: Boolean(c.present),\n score: Math.max(0, Math.min(10, Number(c.score ?? 0))),\n evidence: String(c.evidence ?? ''),\n severity: (['critical', 'major', 'minor', 'info'] as const).includes(c.severity)\n ? c.severity\n : 'info',\n }))\n\n const presentCount = findings.filter((f) => f.present && f.score >= 7).length\n let weightSum = 0\n let weightedScoreSum = 0\n for (const f of findings) {\n const w = weightByName.get(f.concept) ?? 1\n weightSum += w\n weightedScoreSum += w * f.score\n }\n const scoreAvg =\n weightSum > 0\n ? weightedScoreSum / weightSum\n : findings.reduce((a, f) => a + f.score, 0) / Math.max(1, findings.length)\n\n return {\n kind: 'semantic-concept',\n version: SEMANTIC_CONCEPT_JUDGE_VERSION,\n score: Number((scoreAvg / 10).toFixed(3)),\n presentCount,\n totalCount,\n findings,\n summary: String(value.summary ?? ''),\n durationMs: Date.now() - start,\n costUsd: result.costUsd ?? null,\n available: true,\n }\n } catch (err) {\n return {\n kind: 'semantic-concept',\n version: SEMANTIC_CONCEPT_JUDGE_VERSION,\n score: 0,\n presentCount: 0,\n totalCount,\n findings: [],\n summary: '',\n durationMs: Date.now() - start,\n costUsd: null,\n available: false,\n error: err instanceof Error ? err.message : String(err),\n }\n }\n}\n\n/**\n * Factory: pin LLM options once, return a closure that accepts inputs.\n * Convenient for pipelines that want to share a single LlmClient config.\n */\nexport function createSemanticConceptJudge(\n options: SemanticConceptJudgeOptions = {},\n): (input: SemanticConceptJudgeInput) => Promise<SemanticConceptJudgeResult> {\n return (input) => runSemanticConceptJudge(input, options)\n}\n"],"mappings":";;;;;;;;;;;;;;;;AACA,SAAS,UAAU;AA2BZ,SAAS,gBAAgB,QAA4C;AAC1E,SAAO,GAAG;AAAA,IACR,MAAM,OAAO,YAAY;AAAA,IACzB,QAAQ,OAAO;AAAA,IACf,QAAQ,OAAO;AAAA,IACf,QAAQ,EAAE,OAAO,OAAO,MAAM;AAAA,EAChC,CAAC;AACH;;;ACqFO,SAAS,iBAAiB,MAAwC;AACvE,UAAQ,KAAK,WAAW;AAAA,IACtB,KAAK;AACH,aAAO;AAAA,QACL,KAAK;AAAA,QACL,KAAK;AAAA,QACL,IAAI,UAAU;AAAA,UACZ,SAAS,KAAK,WAAW;AAAA,UACzB,QAAQ,KAAK;AAAA,QACf,CAAqB;AAAA,MACvB;AAAA,IACF,KAAK;AACH,aAAO;AAAA,QACL,KAAK;AAAA,QACL,KAAK;AAAA,QACL,IAAI,UAAU;AAAA,UACZ,SAAS,KAAK,WAAW;AAAA,UACzB,QAAQ,KAAK,UAAU;AAAA,QACzB,CAAqB;AAAA,MACvB;AAAA,IACF,KAAK;AACH,aAAO;AAAA,QACL,KAAK;AAAA,QACL,KAAK;AAAA,QACL,IAAI,UAAU;AAAA,UACZ,SAAS,KAAK;AAAA,UACd,QAAQ,KAAK;AAAA,QACf,CAAqB;AAAA,MACvB;AAAA,IACF,KAAK;AACH,aAAO;AAAA,QACL,WAAW;AAAA,QACX,cAAc,KAAK;AAAA,QACnB,MAAM,OAAO,KAAK,aAAa,KAAK,KAAK,aAAa,KAAK,KAAK,YAAY,GAAG,QAAQ;AAAA,MACzF;AAAA,IACF,KAAK;AACH,aAAO;AAAA,QACL,WAAW;AAAA,QACX,cAAc,KAAK;AAAA,QACnB,MAAM,OAAO,KAAK,aAAa,KAAK,QAAQ,aAAa,KAAK,KAAK,YAAY,GAAG,QAAQ;AAAA,MAC5F;AAAA,EACJ;AACF;AAEA,SAAS,cACP,WACA,cACA,OACY;AACZ,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA,MAAM,OAAO,KAAK,aAAa;AAC7B,YAAM,WAAW,aAAa,KAAK,YAAY;AAO/C,YAAM,OAAO,MAAM,KAAK;AAAA,QACtB,OAAO,SAAS;AAAA,QAChB,UAAU,IAAI;AAAA,QACd,UAAU,IAAI;AAAA,QACd,YAAY,IAAI;AAAA,QAChB,aAAa,IAAI;AAAA,QACjB,WAAW,IAAI;AAAA,QACf,WAAW,IAAI;AAAA,MACjB,CAAC;AACD,UAAI,CAAC,UAAU,OAAQ,QAAO,MAAM;AACpC,aAAO,MAAM,QAAQ,KAAK,CAAC,MAAM,iBAAiB,SAAS,MAAM,CAAC,CAAC;AAAA,IACrE;AAAA,EACF;AACF;AAEA,SAAS,iBAAiB,QAAqC;AAC7D,MAAI,OAAO,QAAS,QAAO,QAAQ,OAAO,aAAa,MAAM,CAAC;AAC9D,SAAO,IAAI,QAAe,CAAC,GAAG,WAAW;AACvC,WAAO,iBAAiB,SAAS,MAAM,OAAO,aAAa,MAAM,CAAC,GAAG,EAAE,MAAM,KAAK,CAAC;AAAA,EACrF,CAAC;AACH;AAEA,SAAS,aAAa,QAA4B;AAChD,QAAM,SAAU,OAAgC;AAChD,MAAI,kBAAkB,MAAO,QAAO;AACpC,QAAM,IAAI,IAAI,MAAM,0BAA0B;AAC9C,IAAE,OAAO;AACT,SAAO;AACT;AAEA,SAAS,aAAa,KAAkB,cAA+C;AACrF,MAAI,IAAI,MAAO,QAAO;AACtB,MAAI,CAAC,cAAc;AACjB,UAAM,IAAI;AAAA,MACR;AAAA,IAEF;AAAA,EACF;AACA,SAAO,EAAE,GAAG,KAAK,OAAO,aAAa;AACvC;;;AC/KA,IAAM,sBAAsB;AAE5B,IAAM,iBAAiB;AAEvB,IAAM,YAAY;AAElB,SAAS,IAAI,GAA2B;AACtC,SAAO,OAAO,MAAM,YAAY,OAAO,SAAS,CAAC,IAAI,IAAI;AAC3D;AACA,SAAS,cAAc,GAAoC;AACzD,SAAO,IAAI,EAAE,WAAW,kBAAkB,CAAC,KAAK,IAAI,EAAE,WAAW,wBAAwB,CAAC;AAC5F;AACA,SAAS,eAAe,GAAoC;AAC1D,SAAO,IAAI,EAAE,WAAW,mBAAmB,CAAC,KAAK,IAAI,EAAE,WAAW,yBAAyB,CAAC;AAC9F;AACA,SAAS,OAAO,GAAoC;AAClD,SAAO,IAAI,EAAE,WAAW,IAAI;AAC9B;AACA,SAAS,WAAW,GAAoC;AACtD,MAAI,EAAE,UAAW,QAAO,EAAE;AAC1B,QAAM,IAAI,EAAE,WAAW,WAAW;AAClC,SAAO,OAAO,MAAM,YAAY,EAAE,SAAS,IAAI,IAAI;AACrD;AAMO,SAAS,oBAAoB,OAAuD;AAEzF,QAAM,UAAU,CAAC,GAAG,KAAK,EAAE,KAAK,CAAC,GAAG,MAAM;AACxC,UAAM,KAAK,OAAO,CAAC;AACnB,UAAM,KAAK,OAAO,CAAC;AACnB,QAAI,OAAO,QAAQ,OAAO,QAAQ,OAAO,GAAI,QAAO,KAAK;AACzD,WAAO,EAAE,WAAW,cAAc,EAAE,UAAU;AAAA,EAChD,CAAC;AAED,QAAM,uBAAiC,CAAC;AACxC,QAAM,wBAAkC,CAAC;AACzC,QAAM,gBAAwC,CAAC;AAC/C,MAAI,sBAAsB;AAE1B,aAAW,KAAK,SAAS;AACvB,UAAM,MAAM,cAAc,CAAC;AAC3B,QAAI,QAAQ,KAAM,sBAAqB,KAAK,GAAG;AAC/C,UAAM,OAAO,eAAe,CAAC;AAC7B,QAAI,SAAS,KAAM,uBAAsB,KAAK,IAAI;AAClD,UAAM,OAAO,WAAW,CAAC;AACzB,QAAI,MAAM;AACR,oBAAc,IAAI,KAAK,cAAc,IAAI,KAAK,KAAK;AACnD,UAAI,UAAU,KAAK,IAAI,EAAG,uBAAsB;AAAA,IAClD;AAAA,EACF;AAEA,QAAM,iBAAiB,OAAO,OAAO,aAAa,EAAE,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC;AAC7E,QAAM,gBAAgB,OAAO,KAAK,aAAa,EAAE;AACjD,QAAM,qBAAqB,mBAAmB,IAAI,IAAI,gBAAgB;AAEtE,QAAM,UAA8B,CAAC;AAErC,MAAI,qBAAqB,UAAU,GAAG;AACpC,UAAM,QAAQ,qBAAqB,CAAC;AACpC,UAAM,OAAO,qBAAqB,qBAAqB,SAAS,CAAC;AACjE,UAAM,SAAS,QAAQ,IAAI,OAAO,QAAQ;AAC1C,QAAI,OAAO,SAAS,UAAU,qBAAqB;AACjD,cAAQ,KAAK;AAAA,QACX,MAAM;AAAA,QACN,UAAU;AAAA,QACV,QAAQ,yBAAyB,OAAO,QAAQ,CAAC,CAAC,MAAM,KAAK,SAAI,IAAI,YAAY,qBAAqB,MAAM;AAAA,QAC5G,UAAU;AAAA,UACR;AAAA,UACA;AAAA,UACA,UAAU,OAAO,OAAO,QAAQ,CAAC,CAAC;AAAA,UAClC,OAAO,qBAAqB;AAAA,QAC9B;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF;AAEA,MAAI,sBAAsB,UAAU,GAAG;AACrC,UAAM,QAAQ,sBAAsB,CAAC;AACrC,UAAM,OAAO,sBAAsB,sBAAsB,SAAS,CAAC;AACnE,QAAI,OAAO,OAAO;AAChB,cAAQ,KAAK;AAAA,QACX,MAAM;AAAA,QACN,UAAU;AAAA,QACV,QAAQ,4BAA4B,KAAK,SAAI,IAAI,SAAS,sBAAsB,MAAM;AAAA,QACtF,UAAU,EAAE,OAAO,MAAM,OAAO,sBAAsB,OAAO;AAAA,MAC/D,CAAC;AAAA,IACH;AAAA,EACF;AAEA,MAAI,kBAAkB,kBAAkB,kBAAkB,GAAG;AAC3D,UAAM,OAAO,OAAO,KAAK,aAAa,EAAE,CAAC;AACzC,YAAQ,KAAK;AAAA,MACX,MAAM;AAAA,MACN,UAAU;AAAA,MACV,QAAQ,OAAO,cAAc,qBAAqB,IAAI;AAAA,MACtD,UAAU,EAAE,MAAM,MAAM,OAAO,gBAAgB,gBAAgB,EAAE;AAAA,IACnE,CAAC;AAAA,EACH;AAEA,MAAI,kBAAkB,kBAAkB,CAAC,qBAAqB;AAC5D,YAAQ,KAAK;AAAA,MACX,MAAM;AAAA,MACN,UAAU;AAAA,MACV,QAAQ,GAAG,cAAc;AAAA,MACzB,UAAU,EAAE,YAAY,gBAAgB,oBAAoB,EAAE;AAAA,IAChE,CAAC;AAAA,EACH;AAEA,SAAO;AAAA,IACL,cAAc,qBAAqB;AAAA,IACnC;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;;;AC9IA,IAAM,qBAAqD;AAAA,EACzD,0BACE;AAAA,EACF,uBACE;AAAA,EACF,0BACE;AAAA,EACF,wBACE;AACJ;AAEA,IAAM,aAAa;AAMZ,SAAS,yBACd,SACA,OAAoD,CAAC,GACnC;AAClB,QAAM,YAAY,KAAK,aAAa;AACpC,SAAO,QAAQ,QAAQ;AAAA,IAAI,CAAC,QAC1B,YAAY;AAAA,MACV,YAAY;AAAA,MACZ,MAAM;AAAA,MACN,SAAS,IAAI;AAAA;AAAA,MACb,OAAO,IAAI;AAAA,MACX,UAAU,IAAI;AAAA;AAAA,MAEd,YAAY;AAAA,MACZ,eAAe;AAAA,QACb;AAAA,UACE,MAAM;AAAA,UACN,KAAK,uBAAuB,IAAI,IAAI;AAAA,UACpC,SAAS,KAAK,UAAU,IAAI,QAAQ;AAAA,QACtC;AAAA,MACF;AAAA,MACA,oBAAoB,mBAAmB,IAAI,IAAI;AAAA,MAC/C,UAAU,EAAE,eAAe,MAAM,UAAU,IAAI,SAAS;AAAA,MACxD,GAAI,KAAK,aAAa,EAAE,aAAa,KAAK,WAAW,IAAI,CAAC;AAAA,IAC5D,CAAC;AAAA,EACH;AACF;AAGO,SAAS,oBAAiD;AAC/D,SAAO;AAAA,IACL,IAAI;AAAA,IACJ,aACE;AAAA,IACF,WAAW;AAAA,IACX,MAAM,EAAE,MAAM,gBAAgB;AAAA,IAC9B,SAAS;AAAA,IACT,MAAM,QAAQ,OAAO;AACnB,YAAM,WAAW,MAAM,MAAM,YAAY;AACzC,YAAM,QAA4B,CAAC;AACnC,iBAAW,WAAW,SAAS,kBAAkB;AAC/C,cAAM,SAAS,MAAM,MAAM,UAAU,EAAE,UAAU,QAAQ,CAAC;AAC1D,YAAI,OAAO,MAAO,OAAM,KAAK,GAAG,OAAO,KAAK;AAAA,MAC9C;AACA,aAAO,yBAAyB,oBAAoB,KAAK,CAAC;AAAA,IAC5D;AAAA,EACF;AACF;;;AC1DO,SAAS,4BACd,OAAsC,CAAC,GACtB;AACjB,QAAM,WAAW,IAAI,gBAAgB,KAAK,QAAQ;AAClD,MAAI,KAAK,sBAAsB,OAAO;AACpC,aAAS,SAAS,kBAAkB,CAAC;AAAA,EACvC;AACA,MAAI,KAAK,IAAI;AACX,UAAM,QAAQ,KAAK,SAAS;AAC5B,eAAW,QAAQ,OAAO;AACxB,eAAS,SAAS,uBAAuB,MAAM,EAAE,IAAI,KAAK,IAAI,OAAO,KAAK,MAAM,CAAC,CAAC;AAAA,IACpF;AAAA,EACF;AACA,SAAO;AACT;;;AChCO,IAAM,QAAN,MAAY;AAAA,EACT,SAAS;AAAA,EACA,UAA6B,CAAC;AAAA,EAE/C,MAAM,UAA+B;AACnC,QAAI,CAAC,KAAK,QAAQ;AAChB,WAAK,SAAS;AACd,aAAO,MAAM,KAAK,QAAQ;AAAA,IAC5B;AACA,WAAO,IAAI,QAAoB,CAAC,YAAY;AAC1C,WAAK,QAAQ,KAAK,MAAM;AACtB,gBAAQ,MAAM,KAAK,QAAQ,CAAC;AAAA,MAC9B,CAAC;AAAA,IACH,CAAC;AAAA,EACH;AAAA,EAEQ,UAAgB;AACtB,UAAM,OAAO,KAAK,QAAQ,MAAM;AAChC,QAAI,MAAM;AACR,WAAK;AAAA,IACP,OAAO;AACL,WAAK,SAAS;AAAA,IAChB;AAAA,EACF;AAAA,EAEA,MAAM,aAAgB,IAAsC;AAC1D,UAAM,UAAU,MAAM,KAAK,QAAQ;AACnC,QAAI;AACF,aAAO,MAAM,GAAG;AAAA,IAClB,UAAE;AACA,cAAQ;AAAA,IACV;AAAA,EACF;AAAA;AAAA,EAGA,IAAI,WAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,UAAkB;AACpB,WAAO,KAAK,QAAQ;AAAA,EACtB;AACF;;;AC5CA,SAAS,gBAAgB,YAAY,iBAAiB;AACtD,SAAS,eAAe;AAGxB,IAAM,UAAU,oBAAI,IAAmB;AAEvC,SAAS,SAAS,MAAqB;AACrC,MAAI,IAAI,QAAQ,IAAI,IAAI;AACxB,MAAI,CAAC,GAAG;AACN,QAAI,IAAI,MAAM;AACd,YAAQ,IAAI,MAAM,CAAC;AAAA,EACrB;AACA,SAAO;AACT;AAEO,IAAM,sBAAN,MAA0B;AAAA,EAE/B,YAA4B,MAAc;AAAd;AAC1B,SAAK,QAAQ,SAAS,IAAI;AAC1B,QAAI,CAAC,WAAW,QAAQ,IAAI,CAAC,GAAG;AAC9B,gBAAU,QAAQ,IAAI,GAAG,EAAE,WAAW,KAAK,CAAC;AAAA,IAC9C;AAAA,EACF;AAAA,EAL4B;AAAA,EADX;AAAA,EAQjB,MAAM,OAAO,OAA+B;AAC1C,UAAM,OAAO,GAAG,KAAK,UAAU,KAAK,CAAC;AAAA;AACrC,UAAM,KAAK,MAAM,aAAa,MAAM;AAClC,qBAAe,KAAK,MAAM,IAAI;AAAA,IAChC,CAAC;AAAA,EACH;AACF;AAGO,SAAS,iCAAuC;AACrD,UAAQ,MAAM;AAChB;;;AC1BA,SAAS,cAAAA,aAAY,oBAAoB;AAclC,IAAM,gBAAN,MAAoB;AAAA,EAGzB,YAA4B,MAAc;AAAd;AAC1B,SAAK,WAAW,IAAI,oBAAoB,IAAI;AAAA,EAC9C;AAAA,EAF4B;AAAA,EAFX;AAAA,EAMjB,MAAM,OAAO,OAAe,UAA2C;AACrE,eAAW,KAAK,UAAU;AACxB,YAAM,MAAwB,EAAE,GAAG,GAAG,QAAQ,MAAM;AACpD,YAAM,KAAK,SAAS,OAAO,GAAG;AAAA,IAChC;AAAA,EACF;AAAA;AAAA,EAGA,UAA8B;AAC5B,QAAI,CAACC,YAAW,KAAK,IAAI,EAAG,QAAO,CAAC;AACpC,UAAM,MAAM,aAAa,KAAK,MAAM,MAAM;AAC1C,QAAI,CAAC,IAAK,QAAO,CAAC;AAClB,UAAM,MAA0B,CAAC;AACjC,eAAW,QAAQ,IAAI,MAAM,IAAI,GAAG;AAClC,UAAI,CAAC,KAAM;AACX,UAAI;AACF,YAAI,KAAK,KAAK,MAAM,IAAI,CAAqB;AAAA,MAC/C,QAAQ;AAAA,MAGR;AAAA,IACF;AACA,WAAO;AAAA,EACT;AAAA;AAAA,EAGA,QAAQ,OAAmC;AACzC,WAAO,KAAK,QAAQ,EAAE,OAAO,CAAC,MAAM,EAAE,WAAW,KAAK;AAAA,EACxD;AACF;AAiCO,SAAS,kBAAkB,GAAmB,GAA4B;AAC/E,MAAI,EAAE,aAAa,EAAE,SAAU,QAAO;AACtC,MAAI,KAAK,KAAK,EAAE,cAAc,MAAM,EAAE,cAAc,EAAE,IAAI,KAAM,QAAO;AACvE,MAAI,EAAE,cAAc,WAAW,EAAE,cAAc,OAAQ,QAAO;AAC9D,SAAO;AACT;AAMO,SAAS,aACd,UACA,SACA,SAAqB,CAAC,GACR;AACd,QAAM,aAAa,OAAO,cAAc;AACxC,QAAM,WAAW,IAAI,IAAI,SAAS,IAAI,CAAC,MAAM,CAAC,EAAE,YAAY,CAAC,CAAC,CAAC;AAC/D,QAAM,UAAU,IAAI,IAAI,QAAQ,IAAI,CAAC,MAAM,CAAC,EAAE,YAAY,CAAC,CAAC,CAAC;AAE7D,QAAM,WAA+B,CAAC;AACtC,QAAM,cAAkC,CAAC;AACzC,QAAM,YAAgC,CAAC;AACvC,QAAM,UAAmC,CAAC;AAE1C,aAAW,CAAC,IAAI,GAAG,KAAK,SAAS;AAC/B,UAAM,OAAO,SAAS,IAAI,EAAE;AAC5B,QAAI,CAAC,MAAM;AACT,eAAS,KAAK,GAAG;AACjB;AAAA,IACF;AACA,QAAI,WAAW,MAAM,GAAG,GAAG;AACzB,cAAQ,KAAK,EAAE,UAAU,MAAM,SAAS,IAAI,CAAC;AAAA,IAC/C,OAAO;AACL,gBAAU,KAAK,GAAG;AAAA,IACpB;AAAA,EACF;AACA,aAAW,CAAC,IAAI,IAAI,KAAK,UAAU;AACjC,QAAI,CAAC,QAAQ,IAAI,EAAE,EAAG,aAAY,KAAK,IAAI;AAAA,EAC7C;AACA,SAAO,EAAE,UAAU,aAAa,WAAW,QAAQ;AACrD;;;AC3HA,SAAsB,cAAAC,aAAY,aAAa,gBAAAC,eAAc,gBAAgB;AAC7E,SAAS,YAAY;AAyDrB,IAAM,uBAAuB;AAE7B,IAAM,oBACJ;AACF,IAAM,aAAa;AAInB,SAAS,cAAc,MAAgD;AACrE,MAAI,CAACC,YAAW,IAAI,EAAG,QAAO,CAAC;AAC/B,QAAM,MAAwC,CAAC;AAC/C,aAAW,SAAS,YAAY,MAAM,EAAE,eAAe,KAAK,CAAC,GAAG;AAC9D,QAAI,CAAC,MAAM,YAAY,KAAK,CAAC,MAAM,eAAe,EAAG;AACrD,UAAM,UAAU,KAAK,MAAM,MAAM,MAAM,UAAU;AACjD,QAAIA,YAAW,OAAO,EAAG,KAAI,KAAK,EAAE,MAAM,MAAM,MAAM,MAAM,QAAQ,CAAC;AAAA,EACvE;AACA,SAAO;AACT;AAEA,SAAS,UAAU,KAAa,KAAuB;AACrD,MAAI,CAACA,YAAW,GAAG,EAAG,QAAO,CAAC;AAC9B,QAAM,QAAkB,CAAC;AACzB,QAAM,QAAQ,CAAC,GAAG;AAClB,SAAO,MAAM,QAAQ;AACnB,UAAM,MAAM,MAAM,IAAI;AACtB,QAAI;AACJ,QAAI;AACF,gBAAU,YAAY,KAAK,EAAE,eAAe,KAAK,CAAC;AAAA,IACpD,QAAQ;AACN;AAAA,IACF;AACA,eAAW,KAAK,SAAS;AACvB,YAAM,OAAO,KAAK,KAAK,EAAE,IAAI;AAC7B,UAAI,EAAE,YAAY,EAAG,OAAM,KAAK,IAAI;AAAA,eAC3B,EAAE,KAAK,SAAS,QAAQ,GAAG;AAClC,cAAM,KAAK,IAAI;AACf,YAAI,MAAM,KAAK,MAAM,UAAU,IAAK,QAAO;AAAA,MAC7C;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,uBAAuB,MAAsB;AACpD,QAAM,KAAK,wBAAwB,KAAK,IAAI;AAC5C,QAAM,QAAQ,KAAK,CAAC,KAAK;AACzB,QAAM,IAAI,uBAAuB,KAAK,KAAK;AAC3C,SAAO,IAAI,CAAC,KAAK;AACnB;AAEA,SAAS,eAAe,OAAiB,MAAc,SAA2B;AAChF,MAAI,IAAI;AACR,aAAW,QAAQ,OAAO;AACxB,UAAM,aAAa,CAAC,KAAK,MAAM,WAAW,IAAI,GAAG,GAAG,QAAQ,IAAI,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC,CAAC;AACrF,eAAW,OAAO,YAAY;AAC5B,UAAI,CAACA,YAAW,GAAG,EAAG;AACtB,UAAI;AACF,YAAI,SAAS,GAAG,EAAE,YAAY,EAAG,MAAK,YAAY,GAAG,EAAE;AAAA,YAClD,MAAK;AAAA,MACZ,QAAQ;AAAA,MAER;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AAGO,SAAS,sBAAsB,QAAgD;AACpF,QAAM,SAAS,OAAO,WAAW;AAAA,IAAQ,CAAC,EAAE,MAAM,KAAK,MACrD,cAAc,IAAI,EAAE,IAAI,CAAC,OAAO,EAAE,GAAG,GAAG,KAAK,EAAE;AAAA,EACjD;AACA,QAAM,QAAQ,OAAO,IAAI,CAAC,MAAM,EAAE,IAAI;AAGtC,QAAM,SAAS,IAAI,IAAoB,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;AAC/D,QAAM,QAAQ,IAAI,IAAoB,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;AAC9D,QAAM,UAAU;AAChB,QAAM,QAAQ;AACd,MAAI,cAAc;AAClB,aAAW,OAAO,OAAO,gBAAgB;AACvC,eAAW,QAAQ,UAAU,KAAK,OAAO,wBAAwB,CAAC,GAAG;AACnE,qBAAe;AACf,UAAI;AACJ,UAAI;AACF,eAAOC,cAAa,MAAM,MAAM;AAAA,MAClC,QAAQ;AACN;AAAA,MACF;AACA,iBAAW,KAAK,KAAK,SAAS,OAAO,GAAG;AACtC,cAAM,IAAI,EAAE,CAAC;AACb,YAAI,CAAC,EAAG;AACR,cAAM,IAAI,EAAE,MAAM,GAAG,EAAE,IAAI,KAAK;AAChC,cAAM,OAAO,OAAO,IAAI,CAAC;AACzB,YAAI,SAAS,OAAW,QAAO,IAAI,GAAG,OAAO,CAAC;AAAA,MAChD;AACA,iBAAW,KAAK,KAAK,SAAS,KAAK,GAAG;AACpC,cAAM,IAAI,EAAE,CAAC;AACb,YAAI,MAAM,OAAW;AACrB,cAAM,OAAO,MAAM,IAAI,CAAC;AACxB,YAAI,SAAS,OAAW,OAAM,IAAI,GAAG,OAAO,CAAC;AAAA,MAC/C;AAAA,IACF;AAAA,EACF;AAGA,QAAM,SAAS,oBAAI,IAAoB;AACvC,aAAW,KAAK,QAAQ;AACtB,QAAI;AACF,aAAO,IAAI,EAAE,MAAMA,cAAa,EAAE,MAAM,MAAM,CAAC;AAAA,IACjD,QAAQ;AACN,aAAO,IAAI,EAAE,MAAM,EAAE;AAAA,IACvB;AAAA,EACF;AACA,QAAM,UAAU,IAAI,IAAoB,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;AAChE,aAAW,UAAU,OAAO;AAC1B,UAAM,MAAM,IAAI,OAAO,IAAI,MAAM,aAAa,MAAM,QAAQ;AAC5D,eAAW,KAAK,QAAQ;AACtB,UAAI,EAAE,SAAS,OAAQ;AACvB,UAAI,IAAI,KAAK,OAAO,IAAI,EAAE,IAAI,KAAK,EAAE,EAAG,SAAQ,IAAI,QAAQ,QAAQ,IAAI,MAAM,IAAK,CAAC;AAAA,IACtF;AAAA,EACF;AAEA,QAAM,UAA8B,OAAO,IAAI,CAAC,MAAM;AACpD,UAAM,OAAO,OAAO,IAAI,EAAE,IAAI,KAAK;AACnC,UAAM,MAAM,EAAE,KAAK,QAAQ,gBAAgB,EAAE;AAC7C,WAAO;AAAA,MACL,MAAM,EAAE;AAAA,MACR,MAAM,EAAE;AAAA,MACR,MAAM,EAAE;AAAA,MACR,OAAO,OAAO,KAAK,MAAM,IAAI,EAAE,SAAS;AAAA,MACxC,mBAAmB,OAAO,IAAI,EAAE,IAAI,KAAK;AAAA,MACzC,kBAAkB,MAAM,IAAI,EAAE,IAAI,KAAK;AAAA,MACvC,aAAa,QAAQ,IAAI,EAAE,IAAI,KAAK;AAAA,MACpC,eAAe;AAAA,QACb,OAAO,iBAAiB,CAAC;AAAA,QACzB,EAAE;AAAA,QACF,OAAO,kBAAkB,EAAE,IAAI,KAAK,CAAC;AAAA,MACvC;AAAA,MACA,oBAAoB,KAAK,MAAM,iBAAiB,KAAK,CAAC,GAAG;AAAA,MACzD,kBAAkBD,YAAW,KAAK,KAAK,YAAY,CAAC;AAAA,MACpD,aAAaA,YAAW,KAAK,KAAK,OAAO,CAAC;AAAA,MAC1C,UAAU,KAAK,SAAS,kBAAkB;AAAA,MAC1C,mBAAmB,WAAW,KAAK,uBAAuB,IAAI,KAAK,KAAK,MAAM,GAAG,GAAG,CAAC;AAAA,IACvF;AAAA,EACF,CAAC;AACD,SAAO,EAAE,qBAAqB,aAAa,QAAQ;AACrD;AAIA,IAAME,cAAa;AAEnB,SAAS,QACP,MACA,SACA,OACA,UACA,YACA,YACA,aACA,aACA,WACgB;AAChB,SAAO;AAAA,IACL,gBAAgB;AAAA,IAChB,YAAY,iBAAiB,EAAE,YAAYA,aAAY,MAAM,SAAS,MAAM,CAAC;AAAA,IAC7E,YAAYA;AAAA,IACZ,aAAa;AAAA,IACb;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,eAAe,CAAC,EAAE,MAAM,YAAY,KAAK,YAAY,CAAC;AAAA,IACtD,oBAAoB;AAAA,IACpB;AAAA,IACA;AAAA,EACF;AACF;AAGO,SAAS,uBACd,QACA,YACkB;AAClB,QAAM,MAAwB,CAAC;AAC/B,aAAW,KAAK,OAAO,SAAS;AAC9B,UAAM,cAAc,EAAE,oBAAoB,EAAE;AAC5C,UAAM,YAAY,cAAc,EAAE,cAAc,EAAE;AAGlD,QAAI,cAAc,GAAG;AACnB,UAAI;AAAA,QACF;AAAA,UACE;AAAA,UACA,EAAE;AAAA,UACF,UAAU,EAAE,IAAI;AAAA,UAChB;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA,EAAE;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF,WAAW,gBAAgB,KAAK,EAAE,cAAc,EAAE,gBAAgB,GAAG;AAEnE,UAAI;AAAA,QACF;AAAA,UACE;AAAA,UACA,EAAE;AAAA,UACF,UAAU,EAAE,IAAI,iFAAiF,EAAE,WAAW,eAAe,EAAE,aAAa;AAAA,UAC5I;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA,EAAE;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAGA,QAAI,eAAe,KAAK,CAAC,EAAE,mBAAmB;AAC5C,UAAI;AAAA,QACF;AAAA,UACE;AAAA,UACA,EAAE;AAAA,UACF,UAAU,EAAE,IAAI;AAAA,UAChB;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA,EAAE;AAAA,QACJ;AAAA,MACF;AAAA,IACF;AAGA,QAAI,EAAE,SAAS,YAAY,EAAE,oBAAoB,GAAG;AAClD,UAAI;AAAA,QACF;AAAA,UACE;AAAA,UACA,EAAE;AAAA,UACF,iBAAiB,EAAE,IAAI,aAAa,EAAE,iBAAiB;AAAA,UACvD;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA,EAAE;AAAA,QACJ;AAAA,MACF;AAAA,IACF;AAGA,QAAI,EAAE,QAAQ,wBAAwB,CAAC,EAAE,kBAAkB;AACzD,UAAI;AAAA,QACF;AAAA,UACE;AAAA,UACA,EAAE;AAAA,UACF,UAAU,EAAE,IAAI,QAAQ,EAAE,KAAK;AAAA,UAC/B;AAAA,UACA;AAAA,UACA;AAAA,UACA,mFAAmF,EAAE,KAAK;AAAA,UAC1F,EAAE;AAAA,QACJ;AAAA,MACF;AAAA,IACF;AAGA,QAAI,CAAC,EAAE,aAAa;AAClB,UAAI;AAAA,QACF;AAAA,UACE;AAAA,UACA,EAAE;AAAA,UACF,UAAU,EAAE,IAAI;AAAA,UAChB;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA,EAAE;AAAA,QACJ;AAAA,MACF;AAAA,IACF;AAGA,QAAI,CAAC,EAAE,UAAU;AACf,UAAI;AAAA,QACF;AAAA,UACE;AAAA,UACA,EAAE;AAAA,UACF,UAAU,EAAE,IAAI;AAAA,UAChB;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA,EAAE;AAAA,QACJ;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AAIO,IAAM,oBAAN,MAA6D;AAAA,EACzD,KAAKA;AAAA,EACL,cACP;AAAA,EACO,YAAY;AAAA,EACZ,OAAO,EAAE,MAAM,iBAA0B,iBAAiB,EAAE;AAAA,EAC5D,UAAU;AAAA,EAEnB,MAAM,QAAQ,OAAyB,KAAgD;AACrF,UAAM,aAAa,IAAI,MAAM,eAAc,oBAAI,KAAK,GAAE,YAAY;AAClE,QAAI;AAAA,MACF,gBAAgB,MAAM,QAAQ,MAAM,gBAAgB,MAAM,mBAAmB;AAAA,IAC/E;AACA,WAAO,uBAAuB,OAAO,UAAU;AAAA,EACjD;AACF;AAEO,IAAM,sBAAsB,IAAI,kBAAkB;;;ACtXlD,IAAM,4BAA6C;AAAA,EACxD,SAAS;AAAA,EACT,cAAc;AAAA,EACd,kBAAkB;AAAA,EAClB,cAAc;AAAA,EACd,gBAAgB;AAAA,EAChB,cAAc;AAAA,EACd,aAAa;AAAA,EACb,WAAW;AAAA,EACX,kBAAkB;AAAA,EAClB,SAAS;AAAA,EACT,aAAa;AACf;AAEO,SAAS,kBAAkB,OAAiB,UAAoC,CAAC,GAAW;AACjG,QAAM,IAAI,EAAE,GAAG,2BAA2B,GAAG,QAAQ;AACrD,SACE,EAAE,UAAU,QAAQ,MAAM,OAAO,IACjC,EAAE,eAAe,QAAQ,MAAM,YAAY,IAC3C,EAAE,mBAAmB,QAAQ,MAAM,gBAAgB,IACnD,EAAE,eAAe,QAAQ,MAAM,YAAY,IAC3C,EAAE,iBAAiB,QAAQ,MAAM,cAAc,IAC/C,EAAE,eAAe,QAAQ,MAAM,YAAY,IAC3C,EAAE,cAAc,QAAQ,MAAM,WAAW,IACzC,EAAE,YAAY,QAAQ,MAAM,SAAS,IACrC,EAAE,mBAAmB,QAAQ,MAAM,gBAAgB,IACnD,EAAE,UAAU,KAAK,IAAI,GAAG,aAAa,MAAM,OAAO,CAAC,IACnD,EAAE,cAAc,KAAK,IAAI,GAAG,aAAa,MAAM,WAAW,IAAI,EAAE;AAEpE;AAEO,SAAS,QAAQ,OAAuB;AAC7C,MAAI,CAAC,OAAO,SAAS,KAAK,EAAG,QAAO;AACpC,SAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,KAAK,CAAC;AACvC;AAEA,SAAS,aAAa,OAAuB;AAC3C,SAAO,OAAO,SAAS,KAAK,IAAI,QAAQ;AAC1C;;;AClDA,IAAM,yBAAyB;AAAA,EAC7B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEO,IAAM,YAAN,MAAgB;AAAA,EACJ;AAAA,EACA;AAAA,EAEjB,YAAY,UAA4B,CAAC,GAAG;AAC1C,SAAK,UAAU,QAAQ;AACvB,SAAK,gBAAgB,QAAQ,iBAAiB;AAAA,EAChD;AAAA,EAEA,MAAM,MAAM,OAAmB,OAAkC;AAC/D,UAAM,MAAM,MAAM,MAAM,OAAO,KAAK;AACpC,QAAI,CAAC,IAAK,OAAM,IAAI,cAAc,OAAO,KAAK,YAAY;AAC1D,UAAM,CAAC,OAAO,QAAQ,WAAW,MAAM,IAAI,MAAM,QAAQ,IAAI;AAAA,MAC3D,MAAM,MAAM,EAAE,MAAM,CAAC;AAAA,MACrB,MAAM,OAAO,EAAE,MAAM,CAAC;AAAA,MACtB,MAAM,UAAU,KAAK;AAAA,MACrB,MAAM,OAAO,KAAK;AAAA,IACpB,CAAC;AACD,WAAO,KAAK,WAAW,EAAE,KAAK,OAAO,QAAQ,WAAW,OAAO,CAAC;AAAA,EAClE;AAAA,EAEA,WAAW,OAA2B;AACpC,UAAM,QAAkB,CAAC;AACzB,UAAM,WAAW,MAAM,MAAM;AAAA,MAC3B,CAAC,MAA2C,EAAE,SAAS;AAAA,IACzD;AACA,UAAM,YAAY,MAAM,MAAM;AAAA,MAC5B,CAAC,MAA4C,EAAE,SAAS;AAAA,IAC1D;AACA,UAAM,aAAa,MAAM,MAAM;AAAA,MAC7B,CAAC,MAA6C,EAAE,SAAS;AAAA,IAC3D;AACA,UAAM,eAAe,MAAM,MAAM;AAAA,MAC/B,CAAC,MAA+C,EAAE,SAAS;AAAA,IAC7D;AACA,UAAM,iBAAiB,WAAW;AAAA,MAChC,CAAC,SAAS,KAAK,cAAc,gBAAgB,KAAK,YAAY,cAAc;AAAA,IAC9E;AAEA,UAAM,UACJ,MAAM,IAAI,SAAS,SAAS,OAAO,IAAI,MAAM,IAAI,WAAW,cAAc,MAAM;AAClF,QAAI,CAAC,QAAS,OAAM,KAAK,qCAAqC;AAE9D,UAAM,eAAe,WAAW,SAC5B,WAAW,OAAO,CAAC,KAAK,SAAS,MAAM,oBAAoB,KAAK,KAAK,GAAG,CAAC,IACzE,WAAW,SACX;AACJ,UAAM,eACJ,OAAO,MAAM,IAAI,SAAS,UAAU,WAChC;AAAA,MACE,MAAM,IAAI,QAAQ,QAAQ,IAAI,MAAM,IAAI,QAAQ,QAAQ,MAAM,MAAM,IAAI,QAAQ;AAAA,IAClF,IACA;AACN,UAAM,eAAe,gBAAgB,gBAAgB;AAErD,UAAM,kBAAkB,UAAU,OAAO,CAAC,SAAS,KAAK,WAAW,OAAO,EAAE;AAC5E,UAAM,iBAAiB,UAAU,WAAW,IAAI,IAAI,kBAAkB,UAAU;AAChF,QAAI,UAAU,WAAW,EAAG,OAAM,KAAK,wBAAwB;AAE/D,UAAM,gBACJ,MAAM,UAAU,SAChB,UAAU,OAAO,CAAC,SAAS,0BAA0B,KAAK,KAAK,QAAQ,CAAC,EAAE;AAC5E,UAAM,eAAe,gBAAgB,IAAI,QAAQ,gBAAgB,CAAC,IAAI;AACtE,QAAI,CAAC,aAAc,OAAM,KAAK,uCAAuC;AAErE,UAAM,eAAe,aAAa;AAAA,MAChC,CAAC,SAAS,OAAO,KAAK,eAAe,YAAY,KAAK,aAAa;AAAA,IACrE;AACA,UAAM,cAAc,aAAa,SAC7B,aAAa;AAAA,MACX,CAAC,KAAK,SAAS,OAAO,KAAK,eAAe,KAAK,KAAK,IAAI,GAAG,KAAK,cAAc,CAAC;AAAA,MAC/E;AAAA,IACF,IAAI,aAAa,SACjB,UAAU;AAAA,MAAK,CAAC,SACZ,yCAAyC,KAAK,KAAK,UAAU,KAAK,IAAI,CAAC;AAAA,IACzE,IACA,MACA;AACN,QAAI,CAAC,YAAa,OAAM,KAAK,sCAAsC;AAEnE,UAAM,eAAe,WAAW,OAAO,CAAC,SAAS,gBAAgB,IAAI,CAAC;AACtE,UAAM,oBAAoB,eAAe,OAAO,CAAC,SAAS,gBAAgB,IAAI,CAAC;AAC/E,UAAM,YAAY,eAAe,SAAU,kBAAkB,SAAS,IAAI,IAAK;AAC/E,QAAI,kBAAkB;AACpB,YAAM,KAAK,yBAAyB,kBAAkB,MAAM,cAAc;AAAA,aACnE,CAAC,eAAe,OAAQ,OAAM,KAAK,iCAAiC;AAE7E,UAAM,mBAAmB,WAAW,SAAS,aAAa,SAAS,WAAW,SAAS;AACvF,QAAI,iBAAkB,OAAM,KAAK,YAAY,aAAa,MAAM,8BAA8B;AAE9F,UAAM,2BACJ,gBACA,aAAa,SACb,SAAS,OAAO,CAAC,SAAS,kBAAkB,KAAK,UAAU,EAAE,CAAC,EAAE;AAClE,UAAM,eACJ,SAAS,OAAO,CAAC,SAAS,KAAK,QAAQ,KAAK,UAAU,EAAE,CAAC,EAAE,SAC3D,MAAM,OAAO,OAAO,CAAC,UAAU,KAAK,QAAQ,KAAK,UAAU,MAAM,OAAO,CAAC,CAAC,EAAE;AAC9E,UAAM,mBACJ,2BAA2B,iBAAiB,IACxC,IACA,4BAA4B,2BAA2B;AAC7D,UAAM,eACJ,2BAA2B,iBAAiB,IACxC,IACA,gBAAgB,2BAA2B;AACjD,QAAI,eAAe,EAAG,OAAM,KAAK,YAAY,YAAY,kBAAkB;AAE3E,UAAM,UAAU,MAAM,OAAO,SACzB,KAAK;AAAA,MACH,GAAG,MAAM,OACN,OAAO,CAAC,UAA6B,MAAM,cAAc,KAAK,EAC9D,IAAI,CAAC,UAA6B,MAAM,QAAQ;AAAA,MACnD;AAAA,IACF,IACA,SAAS,OAAO,CAAC,KAAK,SAAS,OAAO,KAAK,WAAW,IAAI,CAAC;AAC/D,UAAM,cACJ,MAAM,IAAI,WAAW,MAAM,IAAI,YAC3B,KAAK,IAAI,IAAI,MAAM,IAAI,UAAU,MAAM,IAAI,aAAa,GAAI,IAC5D;AAEN,WAAO;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA,EAEA,KAAK,OAAyB;AAC5B,WAAO,kBAAkB,OAAO,KAAK,OAAO;AAAA,EAC9C;AAAA,EAEQ,QAAQ,MAAuB;AACrC,WAAO,KAAK,cAAc,KAAK,CAAC,YAAY,QAAQ,KAAK,IAAI,CAAC;AAAA,EAChE;AACF;AAEA,SAAS,oBAAoB,OAAuB;AAClD,SAAO,QAAQ,IAAI,QAAQ,QAAQ,EAAE,IAAI,QAAQ,KAAK;AACxD;AAEA,SAAS,kBAAkB,MAAuB;AAChD,SAAO,qGAAqG;AAAA,IAC1G;AAAA,EACF;AACF;AAEA,SAAS,gBAAgB,MAAiD;AACxE,SACE,KAAK,YAAY,aAAa,QAC9B,KAAK,YAAY,YAAY,cAC7B,eAAe,KAAK,YAAY,gBAAgB,KAChD,eAAe,KAAK,YAAY,YAAY,KAC5C,KAAK,SAAS;AAElB;AAEA,SAAS,eAAe,OAAyB;AAC/C,SAAO,OAAO,UAAU,YAAY,QAAQ;AAC9C;;;ACvFO,IAAM,6BAAgE;AAAA,EAC3E,QAAQ;AAAA,EACR,WAAW;AAAA,EACX,SAAS;AACX;AA2BO,IAAM,iCAAiC;AAE9C,IAAM,qBAAqB;AAC3B,IAAM,mBAAmB;AACzB,IAAM,uBAAuB;AAC7B,IAAM,kBAAkB;AACxB,IAAM,gBAAgB;AAEtB,IAAM,kBAAkB;AAAA,EACtB,MAAM;AAAA,EACN,sBAAsB;AAAA,EACtB,UAAU,CAAC,WAAW,UAAU;AAAA,EAChC,YAAY;AAAA,IACV,SAAS,EAAE,MAAM,UAAU,WAAW,IAAI,WAAW,IAAI;AAAA,IACzD,UAAU;AAAA,MACR,MAAM;AAAA,MACN,UAAU;AAAA,MACV,OAAO;AAAA,QACL,MAAM;AAAA,QACN,sBAAsB;AAAA,QACtB,UAAU,CAAC,WAAW,WAAW,SAAS,YAAY,UAAU;AAAA,QAChE,YAAY;AAAA,UACV,SAAS,EAAE,MAAM,UAAU,WAAW,GAAG,WAAW,IAAI;AAAA,UACxD,SAAS,EAAE,MAAM,UAAU;AAAA,UAC3B,OAAO,EAAE,MAAM,UAAU,SAAS,GAAG,SAAS,GAAG;AAAA,UACjD,UAAU,EAAE,MAAM,UAAU,WAAW,GAAG,WAAW,IAAI;AAAA,UACzD,UAAU,EAAE,MAAM,UAAU,MAAM,CAAC,YAAY,SAAS,SAAS,MAAM,EAAE;AAAA,QAC3E;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACF;AAEA,SAAS,SAAS,MAAc,KAAa,OAAuB;AAClE,MAAI,KAAK,UAAU,IAAK,QAAO;AAC/B,SAAO,GAAG,KAAK,MAAM,GAAG,GAAG,CAAC;AAAA,oBAAkB,KAAK,SAAS,GAAG,aAAa,KAAK;AACnF;AAEA,SAAS,YACP,OACA,MACQ;AACR,QAAM,aAAa,MAAM,YACtB,OAAO,CAAC,MAAM,EAAE,QAAQ,UAAU,KAAK,eAAe,EACtD,IAAI,CAAC,MAAM,aAAa,EAAE,IAAI;AAAA,EAAS,EAAE,OAAO,EAAE,EAClD,KAAK,MAAM;AAEd,QAAM,OAAO,MAAM,cAAc;AAEjC,SAAO;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUP,MAAM,WAAW;AAAA;AAAA,EAEjB,MAAM,gBAAgB;AAAA,UAA+B,MAAM,aAAa;AAAA,iBAAoB,MAAM,uBAAuB,EAAE;AAAA;AAAA,IAAS,EAAE;AAAA,EACtI,MAAM,iBACL;AAAA,IACC,CAAC,GAAG,MACF,KAAK,IAAI,CAAC,MAAM,EAAE,IAAI,IAAI,EAAE,UAAU,SAAS,mBAAc,EAAE,SAAS,MAAM,GAAG,CAAC,EAAE,KAAK,KAAK,CAAC,MAAM,EAAE;AAAA,EAC3G,EACC,KAAK,IAAI,CAAC;AAAA;AAAA,EAEX,OAAO;AAAA,EAAqD,SAAS,MAAM,KAAK,cAAc,MAAM,CAAC;AAAA;AAAA,IAAS,EAAE;AAAA,EAChH,SAAS,YAAY,KAAK,gBAAgB,QAAQ,CAAC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAkBrD;AASA,eAAsB,wBACpB,OACA,UAAuC,CAAC,GACH;AACrC,QAAM,QAAQ,KAAK,IAAI;AACvB,QAAM,aAAa,MAAM,iBAAiB;AAE1C,MAAI,eAAe,GAAG;AACpB,WAAO;AAAA,MACL,MAAM;AAAA,MACN,SAAS;AAAA,MACT,OAAO;AAAA,MACP,cAAc;AAAA,MACd,YAAY;AAAA,MACZ,UAAU,CAAC;AAAA,MACX,SAAS;AAAA,MACT,YAAY;AAAA,MACZ,SAAS;AAAA,MACT,WAAW;AAAA,MACX,OAAO;AAAA,IACT;AAAA,EACF;AAEA,QAAM,OAA8C;AAAA,IAClD,OAAO,QAAQ,SAAS;AAAA,IACxB,WAAW,QAAQ,aAAa;AAAA,IAChC,gBAAgB,QAAQ,kBAAkB;AAAA,IAC1C,iBAAiB,QAAQ,mBAAmB;AAAA,IAC5C,cAAc,QAAQ,gBAAgB;AAAA,IACtC,KAAK,QAAQ,OAAO,CAAC;AAAA,IACrB,gBAAgB,QAAQ,kBAAkB;AAAA,IAC1C,mBAAmB,EAAE,GAAG,4BAA4B,GAAI,QAAQ,qBAAqB,CAAC,EAAG;AAAA,EAC3F;AAMA,QAAM,mBAAmB,CAAC,SAA8B;AACtD,QAAI,KAAK,mBAAmB,OAAQ,QAAO;AAC3C,QAAI,KAAK,UAAU,KAAM,QAAO,KAAK;AACrC,QAAI,KAAK,mBAAmB,cAAc;AACxC,aAAO,KAAK,kBAAkB,KAAK,cAAc,QAAQ,KAAK;AAAA,IAChE;AACA,WAAO;AAAA,EACT;AACA,QAAM,eAAe,IAAI;AAAA,IACvB,MAAM,iBAAiB,IAAI,CAAC,MAAM,CAAC,EAAE,MAAM,iBAAiB,CAAC,CAAC,CAAC;AAAA,EACjE;AAEA,MAAI;AACF,UAAM,EAAE,OAAO,OAAO,IAAI,MAAM;AAAA,MAI9B;AAAA,QACE,OAAO,KAAK;AAAA,QACZ,UAAU;AAAA,UACR;AAAA,YACE,MAAM;AAAA,YACN,SACE;AAAA,UACJ;AAAA,UACA,EAAE,MAAM,QAAQ,SAAS,YAAY,OAAO,IAAI,EAAE;AAAA,QACpD;AAAA,QACA,YAAY,EAAE,MAAM,0BAA0B,QAAQ,gBAAgB;AAAA,QACtE,aAAa;AAAA,QACb,WAAW,KAAK;AAAA,MAClB;AAAA,MACA,KAAK;AAAA,IACP;AAEA,QAAI,CAAC,OAAO,YAAY,CAAC,MAAM,QAAQ,MAAM,QAAQ,GAAG;AACtD,YAAM,IAAI,MAAM,0EAAqE;AAAA,IACvF;AAEA,UAAM,WAA6B,MAAM,SAAS,IAAI,CAAC,OAAO;AAAA,MAC5D,SAAS,OAAO,EAAE,OAAO;AAAA,MACzB,SAAS,QAAQ,EAAE,OAAO;AAAA,MAC1B,OAAO,KAAK,IAAI,GAAG,KAAK,IAAI,IAAI,OAAO,EAAE,SAAS,CAAC,CAAC,CAAC;AAAA,MACrD,UAAU,OAAO,EAAE,YAAY,EAAE;AAAA,MACjC,UAAW,CAAC,YAAY,SAAS,SAAS,MAAM,EAAY,SAAS,EAAE,QAAQ,IAC3E,EAAE,WACF;AAAA,IACN,EAAE;AAEF,UAAM,eAAe,SAAS,OAAO,CAAC,MAAM,EAAE,WAAW,EAAE,SAAS,CAAC,EAAE;AACvE,QAAI,YAAY;AAChB,QAAI,mBAAmB;AACvB,eAAW,KAAK,UAAU;AACxB,YAAM,IAAI,aAAa,IAAI,EAAE,OAAO,KAAK;AACzC,mBAAa;AACb,0BAAoB,IAAI,EAAE;AAAA,IAC5B;AACA,UAAM,WACJ,YAAY,IACR,mBAAmB,YACnB,SAAS,OAAO,CAAC,GAAG,MAAM,IAAI,EAAE,OAAO,CAAC,IAAI,KAAK,IAAI,GAAG,SAAS,MAAM;AAE7E,WAAO;AAAA,MACL,MAAM;AAAA,MACN,SAAS;AAAA,MACT,OAAO,QAAQ,WAAW,IAAI,QAAQ,CAAC,CAAC;AAAA,MACxC;AAAA,MACA;AAAA,MACA;AAAA,MACA,SAAS,OAAO,MAAM,WAAW,EAAE;AAAA,MACnC,YAAY,KAAK,IAAI,IAAI;AAAA,MACzB,SAAS,OAAO,WAAW;AAAA,MAC3B,WAAW;AAAA,IACb;AAAA,EACF,SAAS,KAAK;AACZ,WAAO;AAAA,MACL,MAAM;AAAA,MACN,SAAS;AAAA,MACT,OAAO;AAAA,MACP,cAAc;AAAA,MACd;AAAA,MACA,UAAU,CAAC;AAAA,MACX,SAAS;AAAA,MACT,YAAY,KAAK,IAAI,IAAI;AAAA,MACzB,SAAS;AAAA,MACT,WAAW;AAAA,MACX,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IACxD;AAAA,EACF;AACF;AAMO,SAAS,2BACd,UAAuC,CAAC,GACmC;AAC3E,SAAO,CAAC,UAAU,wBAAwB,OAAO,OAAO;AAC1D;","names":["existsSync","existsSync","existsSync","readFileSync","existsSync","readFileSync","ANALYST_ID"]}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/analyst/types.ts","../src/analyst/finding-subject.ts","../src/analyst/parse-tolerant.ts","../src/analyst/finding-signature.ts","../src/analyst/structure-findings.ts","../src/analyst/kind-factory.ts","../src/analyst/tool-groups.ts","../src/analyst/kinds/failure-mode.ts","../src/analyst/kinds/improvement.ts","../src/analyst/kinds/knowledge-gap.ts","../src/analyst/kinds/knowledge-poisoning.ts","../src/analyst/kinds/index.ts","../src/analyst/registry.ts"],"sourcesContent":["/**\n * Analyst contract — the missing orchestration layer over agent-eval's\n * existing analyzers (analyzeTraces, MultiLayerVerifier, RunCritic,\n * SemanticConceptJudge, JudgeFn, ...).\n *\n * Each existing primitive returns its own output shape. The Analyst\n * contract is the single envelope every primitive lifts into, so a\n * registry can run N analysts against a run and a single renderer can\n * compose findings without knowing which analyzer produced them.\n *\n * The contract is intentionally domain-agnostic: nothing here knows\n * about code, voice, RAG, or any particular agent stack. Analysts\n * declare what INPUT KIND they need (a trace store, an artifact dir,\n * a RunRecord, a JudgeInput, or `custom`), and the registry routes\n * the matching input from `AnalystRunInputs`.\n */\n\nimport { createHash } from 'node:crypto'\nimport type { RunRecord } from '../run-record'\nimport type { TraceAnalysisStore } from '../trace-analyst/store'\nimport type { JudgeInput } from '../types'\nimport type { ChatClient } from './chat-client'\n\n/**\n * Unified envelope every analyst emits. Schema-versioned so renderers\n * and time-series diffs survive future field additions.\n */\nexport interface AnalystFinding {\n schema_version: '1.0.0'\n /**\n * Stable hash over identity-defining fields (analyst_id + canonical\n * claim + area + optional subject). Two findings from two runs that\n * \"are the same finding\" share this id — that's what `diffFindings`\n * uses to compute appeared/disappeared sets across runs.\n */\n finding_id: string\n analyst_id: string\n produced_at: string\n severity: AnalystSeverity\n /**\n * Coarse classification. Renderers group by this. Free-form so\n * domain-specific analysts can introduce categories without a\n * schema change ('agent-reasoning', 'verification', 'cost',\n * 'tool-use', 'safety', 'latency', 'data-quality', ...).\n */\n area: string\n claim: string\n rationale?: string\n evidence_refs: EvidenceRef[]\n recommended_action?: string\n validation_plan?: string\n /** 0..1 — the analyst's own confidence. Not calibrated across analysts. */\n confidence: number\n /**\n * Optional subject the finding is about — leaf id, agent id, request\n * id. Included in finding_id when present so per-subject findings\n * diff cleanly across runs.\n */\n subject?: string\n /** FIREWALL provenance (docs/learning-flywheel.md): true iff this finding was\n * lifted from a JUDGE verdict (an acceptance score), not OBSERVED from the\n * agent's behavior. A judge-derived finding must NEVER be admitted as a\n * steering input — that is the held-out judge leaking into the loop. Set at\n * the lift site (createJudgeAdapter); checked by `assertNoJudgeVerdict`.\n * Provenance, not evidence presence, is the correct discriminator: an\n * evidence-less trace-analyst observation legitimately steers, while a judge\n * verdict that happens to cite an artifact must not. */\n derived_from_judge?: boolean\n /** Analyst-private extras; renderers ignore unless they know the analyst. */\n metadata?: Record<string, unknown>\n}\n\nexport type AnalystSeverity = 'critical' | 'high' | 'medium' | 'low' | 'info'\n\nexport interface EvidenceRef {\n /**\n * Where the evidence lives. `span` and `event` refer to OTLP trace\n * elements; `artifact` to a file inside the run's artifact tree;\n * `finding` to another AnalystFinding (cross-analyst chaining);\n * `metric` to a named scalar reading the renderer knows how to read.\n */\n kind: 'span' | 'event' | 'artifact' | 'finding' | 'metric'\n uri: string\n excerpt?: string\n}\n\n// ── Analyst contract ─────────────────────────────────────────────────\n\n/**\n * The discriminator the registry uses to pass the right input.\n * `custom` is the escape hatch — analysts that need something else\n * (e.g. an embedding cache, a partner SDK handle) read it from\n * `AnalystRunInputs.custom[<analyst id>]`.\n */\nexport type AnalystInputKind =\n | 'trace-store'\n | 'artifact-dir'\n | 'run-record'\n | 'judge-input'\n | 'custom'\n\nexport interface AnalystCost {\n /** `deterministic` analysts MUST NOT call the LLM. */\n kind: 'deterministic' | 'llm'\n /** Optional declared upper bound; the registry can enforce a budget. */\n est_usd_per_run?: number\n /** Models the analyst expects to use (informational). */\n models?: string[]\n}\n\nexport interface AnalystRequirements {\n /** Min number of shots / samples the analyst needs to produce signal. */\n min_shots?: number\n /** Capabilities the runtime must supply (e.g. ['network', 'gpu']). */\n capabilities?: string[]\n}\n\n/**\n * What's passed to every analyst call. The registry resolves which\n * field the analyst's `inputKind` selects and asserts it's present.\n */\nexport interface AnalystRunInputs {\n traceStore?: TraceAnalysisStore\n artifactDir?: string\n runRecord?: RunRecord\n judgeInput?: JudgeInput\n /** Keyed by analyst id; populated by callers that registered custom analysts. */\n custom?: Record<string, unknown>\n}\n\nexport interface AnalystContext {\n runId: string\n /** Stable correlation id so logs from a single registry.run() share a tag. */\n correlationId: string\n /** Wall-clock deadline (epoch ms). Analysts SHOULD honor for graceful cancel. */\n deadlineMs?: number\n /** Per-analyst USD budget. Analysts MAY check before issuing LLM calls. */\n budgetUsd?: number\n /**\n * Shared chat client. Analysts that call an LLM go through this so\n * the operator picks transport (sandbox-sdk | router | cli-bridge |\n * direct-provider | mock) at the registry boundary without touching\n * analyst code.\n */\n chat?: ChatClient\n /**\n * Findings from a prior run the operator wants the analyst to see as\n * retrieval context. Kinds that take advantage of cross-run memory\n * (failure-mode \"I saw this cluster last run\", knowledge-gap \"the wiki\n * page I asked for is still missing\") render these into the actor's\n * working set. Filtering is the operator's job: pass the slice that\n * matches the analyst's id, or pass everything and let the kind\n * filter. Empty / absent means no cross-run context.\n */\n priorFindings?: ReadonlyArray<AnalystFinding>\n /** Free-form runtime tags (env, host, op). Findings can echo these into metadata. */\n tags?: Record<string, string>\n /** Logger callback — analysts SHOULD prefer this over console.* for testability. */\n log?: (msg: string, fields?: Record<string, unknown>) => void\n /** Optional abort signal. Analysts SHOULD pass it through to LLM calls. */\n signal?: AbortSignal\n}\n\n/**\n * The minimal contract. Concrete analysts can refine `TInput` so\n * implementations stay type-safe (e.g. a trace analyst's `TInput` is\n * `TraceAnalysisStore`); the registry passes the right field from\n * `AnalystRunInputs` based on `inputKind`.\n */\nexport interface Analyst<TInput = unknown> {\n /** Stable identifier — appears in finding_id, telemetry, and registry exclusion lists. */\n readonly id: string\n /** Human-readable. One sentence. */\n readonly description: string\n readonly inputKind: AnalystInputKind\n readonly cost: AnalystCost\n readonly requires?: AnalystRequirements\n /** Bump on breaking changes to claim wording or area so old finding_ids don't collide. */\n readonly version: string\n analyze(input: TInput, ctx: AnalystContext): Promise<AnalystFinding[]>\n}\n\n// ── finding_id stability ─────────────────────────────────────────────\n\n/**\n * Compute the stable finding_id from the identity-defining fields.\n * Default implementation hashes {analyst_id, area, subject, normalized claim}.\n * Analysts that emit findings whose claim text varies per run (timestamps,\n * counts) SHOULD either: (a) pass an explicit `id_basis` to fix the hash,\n * or (b) move the variable part into `rationale`/`metadata` and keep the\n * `claim` static.\n */\nexport function computeFindingId(input: {\n analyst_id: string\n area: string\n subject?: string\n claim: string\n /** Override the claim for hashing — use when the displayed claim has run-specific bits. */\n id_basis?: string\n}): string {\n const basis = JSON.stringify({\n a: input.analyst_id,\n r: input.area,\n s: input.subject ?? '',\n c: normalizeClaim(input.id_basis ?? input.claim),\n })\n return `f_${createHash('sha256').update(basis).digest('hex').slice(0, 20)}`\n}\n\nfunction normalizeClaim(c: string): string {\n // Lowercase, collapse whitespace, strip trailing punctuation. Goal:\n // \"Leaf X failed install\" and \"Leaf X failed install.\" hash the same.\n return c\n .toLowerCase()\n .replace(/\\s+/g, ' ')\n .replace(/[.!?;:,]+$/g, '')\n .trim()\n}\n\n/**\n * Convenience factory: produce a fully-formed AnalystFinding with the\n * id computed automatically. Analyst code stays terse.\n */\nexport function makeFinding(\n init: Omit<AnalystFinding, 'schema_version' | 'finding_id' | 'produced_at'> & {\n id_basis?: string\n produced_at?: string\n },\n): AnalystFinding {\n const { id_basis, produced_at, ...rest } = init\n return {\n schema_version: '1.0.0',\n finding_id: computeFindingId({\n analyst_id: rest.analyst_id,\n area: rest.area,\n subject: rest.subject,\n claim: rest.claim,\n id_basis,\n }),\n produced_at: produced_at ?? new Date().toISOString(),\n ...rest,\n }\n}\n\n// ── Registry result envelope ────────────────────────────────────────\n\nexport interface AnalystRunSummary {\n analyst_id: string\n status: 'ok' | 'skipped' | 'failed'\n /** Why skipped — missing input, budget exceeded, capability unmet. */\n reason?: string\n findings_count: number\n latency_ms: number\n cost_usd: number\n /** When `status='failed'`: the error class + message, never the full stack. */\n error?: { class: string; message: string }\n}\n\nexport interface AnalystRunResult {\n run_id: string\n correlation_id: string\n started_at: string\n ended_at: string\n findings: AnalystFinding[]\n per_analyst: AnalystRunSummary[]\n /** Total LLM cost in USD across all analysts in this registry.run(). */\n total_cost_usd: number\n}\n\n// ── Streaming event envelope ────────────────────────────────────────\n\n/**\n * Events emitted by `AnalystRegistry.runStream(...)` in real time as\n * the registry executes. UIs subscribe via `for await (const ev of\n * registry.runStream(...))`; `registry.run(...)` is a thin collector\n * over the same stream, so the two surfaces share their invariants.\n *\n * Per-finding events are intentionally omitted — analyzers are batch\n * operations (an Ax actor returns the full `findings:json[]` at the\n * end of the responder), so streaming inside one analyst would only\n * emit partial JSON consumers can't render. The kind-completion event\n * is the right granularity; subscribers wanting per-finding rendering\n * iterate `event.findings` themselves.\n */\nexport type AnalystRunEvent =\n | {\n type: 'run-started'\n run_id: string\n correlation_id: string\n started_at: string\n /** The ordered list of analyst ids the registry will run. */\n analyst_ids: ReadonlyArray<string>\n }\n | {\n type: 'analyst-skipped'\n summary: AnalystRunSummary\n }\n | {\n type: 'analyst-started'\n analyst_id: string\n started_at: string\n }\n | {\n type: 'analyst-completed'\n /** `summary.status` is `'ok'` for clean completion or `'failed'` for thrown analysts. */\n summary: AnalystRunSummary\n findings: ReadonlyArray<AnalystFinding>\n }\n | {\n type: 'run-completed'\n result: AnalystRunResult\n }\n","/**\n * Typed `FindingSubject` — the canonical grammar every analyst kind emits.\n *\n * Background: kind actor prompts have always documented a subject grammar\n * (e.g. `system-prompt:<section>`, `agent-knowledge:wiki:<slug>`) but the\n * LLM was unconstrained — it could emit `subject: \"fix the prompt\"`\n * (prose) and downstream adapters routed on `startsWith(...)` would\n * silently skip it. Every per-vertical `ImprovementAdapter` had a\n * routing table that mostly caught nothing.\n *\n * This module fixes that:\n * - `parseFindingSubject(raw)` — returns the typed `FindingSubject`\n * when `raw` matches the grammar, else `null`. Used at the\n * `RawAnalystFindingSchema` boundary so malformed subjects are\n * rejected loudly instead of silently lifted into the registry.\n * - `FindingSubjectKind` — the union of valid locus categories. Each\n * variant carries the typed components downstream adapters resolve\n * against the agent's surface manifest (no string parsing in the\n * adapter).\n * - `FINDING_SUBJECT_GRAMMAR_PROMPT` — single source of truth for the\n * grammar string embedded in kind actor prompts. Drift between\n * prompt and parser is impossible if every kind imports this.\n *\n * The grammar is intentionally NARROW — only loci the substrate's\n * default `ImprovementAdapter` / `KnowledgeAdapter` can act on. A\n * finding with a subject outside this set fails the parser; the kind\n * author either extends the grammar here (and adds adapter routing)\n * or rephrases the prompt to map onto an existing variant.\n *\n * `failure-mode` is the one exception — its subjects are free-form\n * cluster labels, not loci. The schema preserves them as\n * `{ kind: 'cluster', label }` and the adapters skip them (cluster\n * findings are evidence, not actionable mutations).\n */\n\nimport { z } from 'zod'\n\n// ── canonical grammar ─────────────────────────────────────────────────\n\n/**\n * Discriminated union of every locus the substrate can route findings to.\n *\n * Adapters narrow on `kind` and use the typed components (no string\n * parsing). Adding a variant here REQUIRES updating the parser, the\n * grammar prompt, and at least one adapter — by design.\n */\nexport type FindingSubject =\n // ── agent-knowledge:* — routed to the KnowledgeAdapter ──\n | { kind: 'knowledge.wiki'; slug: string; heading?: string }\n | { kind: 'knowledge.claim'; topic: string }\n | { kind: 'knowledge.raw'; sourceId: string }\n | { kind: 'knowledge.stale'; slug: string }\n // ── system-prompt / tool / new-tool / rag / memory / scaffolding / output-schema ──\n // routed to the ImprovementAdapter\n | { kind: 'system-prompt'; section: string }\n | { kind: 'tool-doc'; tool: string; aspect?: string }\n | { kind: 'new-tool'; name: string }\n | { kind: 'rag'; corpus: string; docId: string }\n | { kind: 'memory'; key: string }\n | { kind: 'scaffolding'; concern: string }\n | { kind: 'output-schema'; field: string }\n // ── websearch / prior-run-summary — routed to the KnowledgeAdapter as stale signals\n | { kind: 'websearch.outdated'; topic: string }\n | { kind: 'prior-run-summary'; topic: string }\n // ── failure-mode cluster label — preserved verbatim, not routed\n | { kind: 'cluster'; label: string }\n\nexport type FindingSubjectKind = FindingSubject['kind']\n\nexport const FINDING_SUBJECT_KINDS: ReadonlyArray<FindingSubjectKind> = [\n 'knowledge.wiki',\n 'knowledge.claim',\n 'knowledge.raw',\n 'knowledge.stale',\n 'system-prompt',\n 'tool-doc',\n 'new-tool',\n 'rag',\n 'memory',\n 'scaffolding',\n 'output-schema',\n 'websearch.outdated',\n 'prior-run-summary',\n 'cluster',\n]\n\n// ── parser ────────────────────────────────────────────────────────────\n\n/**\n * Parse a raw subject string emitted by an analyst kind's actor.\n *\n * Returns the typed `FindingSubject` when `raw` matches the grammar,\n * else `null`. Callers use the `null` return as a signal to either\n * (a) reject the finding at parse time (kinds that emit typed loci —\n * knowledge-gap, improvement, knowledge-poisoning) or (b) lift it as\n * a cluster label (failure-mode).\n *\n * Slugs are constrained to `[a-z0-9-]+` (lowercase kebab) to keep file\n * paths sane downstream. Topics / keys / sections allow any non-empty\n * string (free-form for the LLM's voice) but get trimmed.\n *\n * Empty / whitespace-only inputs return `null`. `undefined` returns\n * `null`. Both are surfaced by the caller as a rejected subject.\n */\nexport function parseFindingSubject(raw: string | null | undefined): FindingSubject | null {\n if (raw === null || raw === undefined) return null\n const trimmed = raw.trim()\n if (trimmed.length === 0) return null\n\n // agent-knowledge:wiki:<slug>[#<heading>]\n const wiki = trimmed.match(\n /^agent-knowledge:wiki:([a-z0-9][a-z0-9-]*)(?:#([a-z0-9][a-z0-9-]*))?$/,\n )\n if (wiki)\n return { kind: 'knowledge.wiki', slug: wiki[1]!, ...(wiki[2] ? { heading: wiki[2] } : {}) }\n\n // agent-knowledge:claim:<topic>\n const claim = trimmed.match(/^agent-knowledge:claim:(.+)$/)\n if (claim && claim[1]!.trim().length > 0)\n return { kind: 'knowledge.claim', topic: claim[1]!.trim() }\n\n // agent-knowledge:raw:<source-id>\n const raw_ = trimmed.match(/^agent-knowledge:raw:(.+)$/)\n if (raw_ && raw_[1]!.trim().length > 0)\n return { kind: 'knowledge.raw', sourceId: raw_[1]!.trim() }\n\n // agent-knowledge:stale:<slug>\n const stale = trimmed.match(/^agent-knowledge:stale:([a-z0-9][a-z0-9-]*)$/)\n if (stale) return { kind: 'knowledge.stale', slug: stale[1]! }\n\n // system-prompt:<section>\n const sp = trimmed.match(/^system-prompt:(.+)$/)\n if (sp && sp[1]!.trim().length > 0) return { kind: 'system-prompt', section: sp[1]!.trim() }\n\n // tool-doc:<tool>[:<aspect>]\n const tdAspect = trimmed.match(/^tool-doc:([a-z0-9][a-z0-9_-]*):(.+)$/)\n if (tdAspect && tdAspect[2]!.trim().length > 0) {\n return { kind: 'tool-doc', tool: tdAspect[1]!, aspect: tdAspect[2]!.trim() }\n }\n const td = trimmed.match(/^tool-doc:([a-z0-9][a-z0-9_-]*)$/)\n if (td) return { kind: 'tool-doc', tool: td[1]! }\n\n // new-tool:<name>\n const nt = trimmed.match(/^new-tool:([a-z0-9][a-z0-9_-]*)$/)\n if (nt) return { kind: 'new-tool', name: nt[1]! }\n\n // rag:<corpus>:<doc-id>\n const rag = trimmed.match(/^rag:([a-z0-9][a-z0-9_-]*):(.+)$/)\n if (rag && rag[2]!.trim().length > 0) {\n return { kind: 'rag', corpus: rag[1]!, docId: rag[2]!.trim() }\n }\n\n // memory:<key>\n const mem = trimmed.match(/^memory:(.+)$/)\n if (mem && mem[1]!.trim().length > 0) return { kind: 'memory', key: mem[1]!.trim() }\n\n // scaffolding:<concern>\n const sc = trimmed.match(/^scaffolding:(.+)$/)\n if (sc && sc[1]!.trim().length > 0) return { kind: 'scaffolding', concern: sc[1]!.trim() }\n\n // output-schema:<field>\n const os = trimmed.match(/^output-schema:(.+)$/)\n if (os && os[1]!.trim().length > 0) return { kind: 'output-schema', field: os[1]!.trim() }\n\n // websearch:outdated:<topic>\n const ws = trimmed.match(/^websearch:outdated:(.+)$/)\n if (ws && ws[1]!.trim().length > 0) return { kind: 'websearch.outdated', topic: ws[1]!.trim() }\n\n // prior-run-summary:<topic>\n const prs = trimmed.match(/^prior-run-summary:(.+)$/)\n if (prs && prs[1]!.trim().length > 0) return { kind: 'prior-run-summary', topic: prs[1]!.trim() }\n\n // cluster (no prefix — a free-form evidence label, never a routed locus, so\n // it admits dotted/underscored identifiers like `appworld.task.530b157_1`.\n // ':' stays excluded so it cannot collide with the prefixed grammars above.)\n if (/^[a-z0-9][a-z0-9._-]*$/.test(trimmed) && trimmed.length <= 80) {\n return { kind: 'cluster', label: trimmed }\n }\n\n return null\n}\n\n/**\n * Render the parsed subject back to its canonical string form. Inverse\n * of `parseFindingSubject`; useful when the substrate constructs new\n * findings programmatically (e.g. for tests, replays, or\n * `id_basis` carry-forward).\n */\nexport function renderFindingSubject(s: FindingSubject): string {\n switch (s.kind) {\n case 'knowledge.wiki':\n return s.heading\n ? `agent-knowledge:wiki:${s.slug}#${s.heading}`\n : `agent-knowledge:wiki:${s.slug}`\n case 'knowledge.claim':\n return `agent-knowledge:claim:${s.topic}`\n case 'knowledge.raw':\n return `agent-knowledge:raw:${s.sourceId}`\n case 'knowledge.stale':\n return `agent-knowledge:stale:${s.slug}`\n case 'system-prompt':\n return `system-prompt:${s.section}`\n case 'tool-doc':\n return s.aspect ? `tool-doc:${s.tool}:${s.aspect}` : `tool-doc:${s.tool}`\n case 'new-tool':\n return `new-tool:${s.name}`\n case 'rag':\n return `rag:${s.corpus}:${s.docId}`\n case 'memory':\n return `memory:${s.key}`\n case 'scaffolding':\n return `scaffolding:${s.concern}`\n case 'output-schema':\n return `output-schema:${s.field}`\n case 'websearch.outdated':\n return `websearch:outdated:${s.topic}`\n case 'prior-run-summary':\n return `prior-run-summary:${s.topic}`\n case 'cluster':\n return s.label\n }\n}\n\n// ── grammar prompt — single source of truth for actor instructions ──\n\n/**\n * The grammar text embedded into kind actor prompts. Kinds opt into\n * the subset of variants they emit (e.g. `improvement` excludes the\n * cluster variant; `failure-mode` includes ONLY the cluster variant).\n *\n * Drift between prompt and parser is impossible: every kind imports\n * this constant + the matching `expects` set, and the unit tests below\n * lock the table to the parser.\n */\nexport const FINDING_SUBJECT_GRAMMAR_PROMPT = [\n 'Subjects MUST match this grammar — anything else is rejected at parse time and your work is wasted:',\n '',\n ' Knowledge loci (write to the agent-knowledge base):',\n ' agent-knowledge:wiki:<slug>[#<heading>] create / update a wiki page',\n ' agent-knowledge:claim:<topic> draft a claim / relation triple',\n ' agent-knowledge:raw:<source-id> lift a raw source into a curated page',\n ' agent-knowledge:stale:<slug> mark a page superseded',\n '',\n ' Runtime mutable surfaces (write to prompts / tools / scaffolding):',\n ' system-prompt:<section> add / replace a system-prompt section',\n ' tool-doc:<tool>[:<aspect>] rewrite a tool description',\n ' new-tool:<name> propose a new tool surface',\n ' rag:<corpus>:<doc-id> ingest / correct a RAG document',\n ' memory:<key> invalidate / set a memory entry',\n ' scaffolding:<concern> change a precondition / retry / verifier',\n ' output-schema:<field> constrain the agent output shape',\n '',\n ' Stale signals (knowledge-poisoning only):',\n ' websearch:outdated:<topic> stale web result',\n ' prior-run-summary:<topic> stale prior-run summary',\n '',\n ' Cluster label (failure-mode only):',\n ' <kebab-case-label> short cluster id, e.g. \"tool-call-loop\"',\n '',\n 'Slugs / tool ids: [a-z0-9-]+ (lowercase kebab). Topics / keys / sections: free-form, trimmed.',\n].join('\\n')\n\n// ── kind expects sets ─────────────────────────────────────────────────\n\n/**\n * The variants each kind is allowed to emit. Used at the kind factory\n * boundary so a knowledge-gap finding can't sneak in a `system-prompt:*`\n * subject (the improvement-analyst's job) and vice versa.\n *\n * `failure-mode` is restricted to `cluster` — the only kind that emits\n * a non-locus subject.\n */\nexport const KIND_EXPECTED_SUBJECTS: Record<string, ReadonlyArray<FindingSubjectKind>> = {\n 'failure-mode': ['cluster'],\n 'knowledge-gap': [\n 'knowledge.wiki',\n 'knowledge.claim',\n 'knowledge.raw',\n 'knowledge.stale',\n 'tool-doc',\n 'system-prompt',\n 'memory',\n 'websearch.outdated',\n 'prior-run-summary',\n ],\n 'knowledge-poisoning': [\n 'knowledge.wiki',\n 'knowledge.claim',\n 'knowledge.raw',\n 'tool-doc',\n 'system-prompt',\n 'memory',\n 'websearch.outdated',\n 'prior-run-summary',\n ],\n improvement: [\n 'system-prompt',\n 'tool-doc',\n 'new-tool',\n 'rag',\n 'memory',\n 'scaffolding',\n 'output-schema',\n 'knowledge.wiki',\n 'knowledge.claim',\n ],\n}\n\n// ── Zod schema for boundary validation ───────────────────────────────\n\n/**\n * Zod schema that validates a raw subject string and returns the parsed\n * `FindingSubject`. Embedded in `RawAnalystFindingSchema` via\n * `transform`, so `subject` arrives at the kind factory either as a\n * typed locus or as a parse error attached to a single Zod issue.\n *\n * Optionality is preserved: subjects ARE optional on the wire (some\n * findings are descriptive, not actionable). When present, they MUST\n * parse — emitting a malformed subject is a contract violation, not a\n * soft signal.\n */\nexport const FindingSubjectStringSchema = z\n .string()\n .refine((s) => parseFindingSubject(s) !== null, {\n message: 'subject does not match the finding-subject grammar',\n })\n","/**\n * Forgiving pre-parse for analyst findings. Weak models routinely emit\n * schema-correct content in an unusable wrapper — fenced ```json blocks, a\n * single object where an array is expected, trailing commas. Measured: GPT-4o\n * drops to 0% usable output purely from markdown-fence wrapping\n * (arXiv:2605.02363). A five-line de-fence recovers most of it. This module is\n * the de-fence/coerce step that runs BEFORE Zod, so a recoverable finding is\n * repaired, not dropped.\n *\n * Pure + deterministic. No model, no network.\n */\n\n/** Strip a ```lang ... ``` (or bare ``` ... ```) code fence, if the string is one. */\nexport function stripCodeFences(text: string): string {\n const t = text.trim()\n const fence = /^```[a-zA-Z0-9]*\\s*\\n?([\\s\\S]*?)\\n?```$/\n const m = t.match(fence)\n return m ? m[1]!.trim() : t\n}\n\n/** Remove trailing commas before } or ] — the most common near-JSON defect. */\nfunction dropTrailingCommas(s: string): string {\n return s.replace(/,(\\s*[}\\]])/g, '$1')\n}\n\n/**\n * Best-effort parse of a string into JSON. De-fences, drops trailing commas,\n * then `JSON.parse`. Returns `undefined` (never throws) when unrecoverable.\n */\nexport function coerceJson(text: string): unknown {\n const candidate = dropTrailingCommas(stripCodeFences(text))\n try {\n return JSON.parse(candidate)\n } catch {\n return undefined\n }\n}\n\n/**\n * Coerce arbitrary actor/structurer output into an array of candidate finding\n * rows: a JSON string → parse; a single object → 1-element array; an array →\n * as-is; anything else → []. Callers still run each row through Zod\n * (`parseRawFinding`) — this only fixes the SHAPE, never invents fields.\n */\nexport function coerceToFindingRows(raw: unknown): unknown[] {\n let value = raw\n if (typeof value === 'string') {\n const parsed = coerceJson(value)\n if (parsed === undefined) return []\n value = parsed\n }\n if (Array.isArray(value)) return value\n if (value && typeof value === 'object') {\n // Some models wrap the array as { findings: [...] } — unwrap that one case.\n const inner = (value as Record<string, unknown>).findings\n if (Array.isArray(inner)) return inner\n return [value]\n }\n return []\n}\n","/**\n * Typed Ax output for analyst findings.\n *\n * Replaces the legacy `findings:string[]` pattern (where every bullet\n * became a flat-severity `AnalystFinding`) with a structured object\n * array. Ax binds the field as `findings:json[]` so the provider emits\n * native structured output; at the kind-factory boundary we Zod-validate\n * each emitted finding so malformed rows fail loud instead of being\n * silently lifted with default severity.\n *\n * Why not `f.object().array()` directly in the signature? The Ax\n * signature string `question:string -> findings:json[]` already lets\n * the provider emit JSON arrays. A Zod boundary is required either\n * way (the provider can return any JSON), and Zod gives us a single\n * validation surface independent of which Ax version is installed.\n */\n\nimport { z } from 'zod'\nimport { parseFindingSubject } from './finding-subject'\nimport { coerceJson } from './parse-tolerant'\n\nexport const ANALYST_SEVERITIES = ['critical', 'high', 'medium', 'low', 'info'] as const\n\nexport const RawAnalystFindingSchema = z\n .object({\n severity: z.enum(ANALYST_SEVERITIES),\n claim: z.string().min(1).max(2000),\n /**\n * Subject locus the finding is about. Validated at parse time\n * against the documented grammar (`finding-subject.ts`). Findings\n * with a malformed subject are rejected — they would have been\n * silently skipped by every downstream adapter, so failing loud at\n * parse time turns a hidden no-op into a kind-prompt audit signal.\n *\n * Optional because purely descriptive findings (no actionable\n * locus) are legitimate; they just don't route through the\n * KnowledgeAdapter / ImprovementAdapter.\n */\n subject: z\n .string()\n .max(400)\n .refine((s) => parseFindingSubject(s) !== null, {\n message: 'subject does not match the finding-subject grammar',\n })\n .optional(),\n evidence_uri: z.string().min(1).max(2000),\n evidence_excerpt: z.string().max(2000).optional(),\n confidence: z.number().min(0).max(1),\n rationale: z.string().max(4000).optional(),\n recommended_action: z.string().max(2000).optional(),\n })\n .strict()\n\nexport type RawAnalystFinding = z.infer<typeof RawAnalystFindingSchema>\n\n/**\n * Description embedded into the actor prompt so the LLM knows what\n * shape to emit. Kept here so kinds share one source of truth rather\n * than restating the schema in every prompt.\n */\nexport const RAW_FINDING_SCHEMA_PROMPT = `Each finding MUST be a JSON object with these fields:\n - severity: one of \"critical\" | \"high\" | \"medium\" | \"low\" | \"info\"\n - claim: one-sentence statement (max 2000 chars)\n - subject?: the routing locus this finding is about. It MUST be one of the exact subject forms listed in this kind's instructions above (e.g. \\`system-prompt:<section>\\`, \\`agent-knowledge:wiki:<slug>\\`, \\`tool-doc:<tool>\\`). A free phrase, a bare noun, or any form not in that list is REJECTED at parse time and the finding is discarded — omit subject entirely rather than guess a form.\n - evidence_uri: REQUIRED, never blank. Exactly one of \"span://<trace_id>/<span_id>\" (trace evidence), \"artifact://<relative-path>\" (files), \"metric://<name>\" (named scalars) — ALWAYS cite a real id surfaced by the tools. If you have no citable id, do not emit the finding.\n - evidence_excerpt?: short quote (<=2000 chars) from the cited span/artifact\n - confidence: number 0..1 — 0.9+ when backed by exact quotes, 0.6-0.8 for inferred patterns, <0.5 for speculative\n - rationale?: one or two sentences explaining the reasoning\n - recommended_action?: concrete change phrased as an imperative (\"Add ...\", \"Replace ...\", \"Stop ...\") — omit when the finding is purely descriptive\n\nEmit an empty array when the question has no findings to report. Do not fabricate evidence.`\n\n/**\n * Validate one row emitted by the LLM. Returns the typed finding on\n * success; returns `null` and logs the reason on failure so the kind\n * factory can skip-and-count rather than abort the whole analyst run.\n */\nexport function parseRawFinding(\n row: unknown,\n log?: (msg: string, fields?: Record<string, unknown>) => void,\n): RawAnalystFinding | null {\n const result = RawAnalystFindingSchema.safeParse(row)\n if (result.success) return result.data\n // A schema-correct finding in an unusable wrapper (a JSON string, a fenced\n // block) should be repaired, not dropped. Coerce the shape and retry ONCE.\n if (typeof row === 'string') {\n const coerced = coerceJson(row)\n if (coerced !== undefined) {\n const retry = RawAnalystFindingSchema.safeParse(coerced)\n if (retry.success) return retry.data\n }\n }\n log?.('finding rejected: schema failure', {\n issues: result.error.issues.map((i) => ({\n path: i.path.join('.'),\n code: i.code,\n message: i.message,\n })),\n })\n return null\n}\n","/**\n * `structureFindings` — the deferred structuring pass (DSPy TwoStepAdapter /\n * HALO `synthesize_traces` analog). The agentic actor reasons FREE-FORM and\n * emits a prose `report` (which any model does reliably); this separate, cheap\n * call's ONLY job is to turn that report into `AnalystFinding[]`. Decoupling\n * reasoning from structuring is what makes the SEMANTIC findings model-agnostic\n * — the reasoning model never has to satisfy a strict typed-array contract\n * while it diagnoses.\n *\n * Forgiving: the response runs through `coerceToFindingRows` (de-fence, lift\n * single→array) before Zod, and on a zero-finding extraction from a substantive\n * report it reasks ONCE with the schema restated. Returns a typed outcome so a\n * legitimate \"nothing to report\" is distinguishable from a failed extraction\n * (no silent empty).\n */\n\nimport { callLlm, type LlmClientOptions } from '../llm-client'\nimport { parseRawFinding, type RawAnalystFinding } from './finding-signature'\nimport { coerceToFindingRows } from './parse-tolerant'\nimport { type AnalystFinding, makeFinding } from './types'\n\nexport interface StructureFindingsOptions {\n /** The actor's free-form diagnosis prose. */\n report: string\n analystId: string\n /** Coarse classification stamped on every extracted finding. */\n area: string\n model: string\n baseUrl: string\n apiKey?: string\n /** Max reask attempts after a zero/invalid extraction. Default 1. */\n maxReasks?: number\n /** Test seam: inject a fetch (no network in unit tests). */\n fetchImpl?: LlmClientOptions['fetch']\n}\n\nexport interface StructureFindingsResult {\n findings: AnalystFinding[]\n outcome: 'ok' | 'extraction_failed'\n}\n\nconst SYSTEM = [\n 'You convert a free-form trace-analysis report into a STRICT JSON array of findings.',\n 'Output ONLY the JSON array — no prose, no code fences.',\n 'Each element: {\"severity\":\"critical|high|medium|low|info\",\"claim\":string,\"evidence_uri\":string,',\n '\"subject\"?:string,\"rationale\"?:string,\"recommended_action\"?:string,\"confidence\":number(0..1)}.',\n 'evidence_uri cites the trace element the report referenced (e.g. \"span://<trace>/<span>\") or \"report://summary\".',\n 'If the report asserts NO problems, output exactly [].',\n].join(' ')\n\nfunction buildRows(raw: unknown, analystId: string, area: string): AnalystFinding[] {\n const rows = coerceToFindingRows(raw)\n const out: AnalystFinding[] = []\n for (const row of rows) {\n // Recovery findings are extracted from PROSE — the report itself is the\n // evidence. A weak model often returns a sound claim + severity but omits\n // `evidence_uri`; default it to the report rather than dropping the row\n // (the strict evidence_uri requirement is a recovery yield-killer).\n const normalized =\n row &&\n typeof row === 'object' &&\n !Array.isArray(row) &&\n !(row as Record<string, unknown>).evidence_uri\n ? { ...(row as Record<string, unknown>), evidence_uri: 'report://summary' }\n : row\n const parsed: RawAnalystFinding | null = parseRawFinding(normalized)\n if (!parsed) continue\n out.push(\n makeFinding({\n analyst_id: analystId,\n area,\n subject: parsed.subject,\n claim: parsed.claim,\n rationale: parsed.rationale,\n severity: parsed.severity,\n confidence: parsed.confidence,\n evidence_refs: [\n {\n kind: parsed.evidence_uri.startsWith('span://') ? 'span' : 'artifact',\n uri: parsed.evidence_uri,\n excerpt: parsed.evidence_excerpt,\n },\n ],\n recommended_action: parsed.recommended_action,\n }),\n )\n }\n return out\n}\n\nexport async function structureFindings(\n opts: StructureFindingsOptions,\n): Promise<StructureFindingsResult> {\n const maxReasks = opts.maxReasks ?? 1\n const llm = { baseUrl: opts.baseUrl, apiKey: opts.apiKey, fetch: opts.fetchImpl }\n let user = `TRACE-ANALYSIS REPORT:\\n${opts.report}\\n\\nReturn the findings JSON array.`\n\n for (let attempt = 0; attempt <= maxReasks; attempt++) {\n const res = await callLlm(\n {\n model: opts.model,\n messages: [\n { role: 'system', content: SYSTEM },\n { role: 'user', content: user },\n ],\n },\n llm,\n )\n const text = res.content.trim()\n const findings = buildRows(text, opts.analystId, opts.area)\n if (findings.length > 0) return { findings, outcome: 'ok' }\n // A report that asserts nothing is a legitimate empty — only reask when the\n // report is substantive (the extraction, not the diagnosis, likely failed).\n if (opts.report.trim().length < 200) return { findings: [], outcome: 'ok' }\n user = `${user}\\n\\nThat produced no valid findings. The report DOES describe issues — re-extract them as the strict JSON array described in the system prompt. Output ONLY the array.`\n }\n return { findings: [], outcome: 'extraction_failed' }\n}\n","/**\n * Analyst-kind factory — the typed, focused replacement for the\n * legacy `createTraceAnalystAdapter`.\n *\n * A \"kind\" is a specialized analyst whose actor prompt, tool subset,\n * and Ax recursion config target one failure-mode lens (failure-mode\n * classification, knowledge gap discovery, knowledge poisoning, recursive\n * self-improvement, ...). Kinds emit findings in the typed `RawAnalystFinding`\n * shape via a JSON-array Ax output; the factory validates each row with\n * Zod and lifts it into `AnalystFinding[]` with no shape guessing.\n *\n * Composition rules:\n * - Each kind owns its actor description. No generic \"answer this\n * question\" prompt — the prompt names the failure lens.\n * - Each kind picks a narrow tool subset from `ANALYST_TOOL_GROUPS`.\n * A kind that never needs full-trace dumps can drop `viewTrace` /\n * `viewSpans` and stay cheap.\n * - Each kind declares its recursion + parallelism budget. Discovery-\n * heavy kinds (failure-mode) get higher `maxDepth`; lens kinds\n * (poisoning) usually stay at 0 since they have a tighter brief.\n *\n * Optimizer hook: kinds may declare `goldens` — labeled examples used\n * by `AxMiPRO` / `AxBootstrapFewShot` / `AxGEPA` to fit the actor\n * description programmatically. Stored on the kind, not the registry,\n * because the right metric is kind-specific.\n */\n\nimport type { AxAIService, AxFunction } from '@ax-llm/ax'\nimport { AxJSRuntime, agent } from '@ax-llm/ax'\nimport type { TraceAnalysisStore } from '../trace-analyst/store'\nimport { TraceFileMissingError } from '../trace-analyst/store-otlp'\nimport {\n parseRawFinding,\n RAW_FINDING_SCHEMA_PROMPT,\n type RawAnalystFinding,\n} from './finding-signature'\nimport { KIND_EXPECTED_SUBJECTS, parseFindingSubject } from './finding-subject'\nimport { structureFindings } from './structure-findings'\nimport type { Analyst, AnalystContext, AnalystCost, AnalystFinding } from './types'\nimport { makeFinding } from './types'\n\n/**\n * Per-kind specification. The factory turns this into a regular\n * `Analyst<TraceAnalysisStore>` ready for `AnalystRegistry.register()`.\n */\nexport interface TraceAnalystKindSpec {\n /** Stable id. Appears in finding_id, telemetry, and registry exclusions. */\n id: string\n /** One-sentence description shown in `registry.list()`. */\n description: string\n /** Coarse classification stamped on every emitted finding (`failure-mode`, `knowledge-gap`, ...). */\n area: string\n /** Bump on any breaking change to the actor prompt or output schema. */\n version: string\n /** Actor system prompt. Must instruct the LLM to emit `findings` per the schema. */\n actorDescription: string\n /** Responder system prompt; falls back to a minimal \"format the findings\" instruction. */\n responderDescription?: string\n /** Tool functions the actor may call. Pick narrow subsets via `ANALYST_TOOL_GROUPS`. */\n buildTools: (store: TraceAnalysisStore) => AxFunction[]\n /** Recursion budget. `maxDepth: 0` disables subagents. */\n recursion?: { maxDepth: number; maxParallelSubagents?: number }\n /** Actor turn cap. Default 12. */\n maxTurns?: number\n /** Runtime char cap. Default 6000. */\n maxRuntimeChars?: number\n /** Cost classification surfaced in `registry.list()` and budget enforcement. */\n cost: AnalystCost\n /** Per-finding-row hook — kinds may reject / rewrite before lifting. */\n postProcess?: (row: RawAnalystFinding, ctx: AnalystContext) => RawAnalystFinding | null\n /** Optional optimizer hook — populated when a kind wants to fit its prompt against labeled examples. */\n goldens?: TraceAnalystGolden[]\n}\n\n/**\n * One labeled example consumed by Ax optimizers (MIPRO / GEPA / Bootstrap).\n * Each input is the same `{question}` an analyst would receive; `expected`\n * is the ground-truth finding set a fitted prompt should produce on this\n * input. Metric: kind-specific (default: F1 on `finding_id` overlap).\n */\nexport interface TraceAnalystGolden {\n question: string\n expected: ReadonlyArray<Omit<RawAnalystFinding, 'confidence'>>\n}\n\nexport interface CreateTraceAnalystKindOpts {\n /** AxAIService bound at registration time. */\n ai: AxAIService\n /** Optional model override; falls back to the AI service's default. */\n model?: string\n /** Override the spec's `version` (e.g. when an optimizer has fitted a new prompt). */\n versionSuffix?: string\n /**\n * Optional two-phase recovery: when the agentic harvest is empty but the\n * actor produced a substantive free-form `report`, extract findings from that\n * prose via a tolerant chat-completions pass (`structureFindings`) — no\n * strict-emission contract, so it works on weak models. Omit to leave the\n * actor's harvest as-is (the report is still surfaced fail-loud either way).\n */\n recovery?: { baseUrl: string; apiKey?: string; model?: string; fetchImpl?: typeof fetch }\n}\n\n/**\n * Build an `Analyst<TraceAnalysisStore>` from a kind spec.\n *\n * Lifts the Ax pipeline once at registration time so the registry\n * gets a stateless analyst. The Ax agent is freshly constructed per\n * `analyze()` call (the agent carries chat-log + usage state we don't\n * want shared across analyst runs).\n */\nexport function createTraceAnalystKind(\n spec: TraceAnalystKindSpec,\n opts: CreateTraceAnalystKindOpts,\n): Analyst<TraceAnalysisStore> {\n const version = opts.versionSuffix ? `${spec.version}+${opts.versionSuffix}` : spec.version\n return {\n id: spec.id,\n description: spec.description,\n inputKind: 'trace-store',\n cost: spec.cost,\n version,\n async analyze(store, ctx) {\n const tools = spec.buildTools(store)\n const maxDepth = spec.recursion?.maxDepth ?? 0\n const maxParallel = spec.recursion?.maxParallelSubagents ?? 2\n const priorContext = renderPriorFindings(ctx.priorFindings)\n\n const actorDescription =\n spec.actorDescription.trim() +\n priorContext +\n '\\n\\n' +\n RAW_FINDING_SCHEMA_PROMPT +\n '\\n\\nFirst write `report`: a concise free-form prose diagnosis of what ' +\n 'the traces show — what succeeded, what was suboptimal or failed — with ' +\n 'concrete trace ids and numbers. THEN return the structured `findings` ' +\n 'array (it MAY be empty when there is nothing to report). Use `final(...)` ' +\n 'with the `{ report, findings }` payload when you are done.'\n\n const ax = agent<{ question: string }, { report: string; findings: unknown[] }>(\n 'question:string -> report:string, findings:json[]',\n {\n agentIdentity: {\n name: spec.id,\n description: spec.description,\n },\n contextFields: ['question'],\n runtime: new AxJSRuntime({\n permissions: [],\n blockDynamicImport: true,\n allowedModules: [],\n freezeIntrinsics: true,\n blockShadowRealm: true,\n preventGlobalThisExtensions: false,\n }),\n mode: maxDepth > 0 ? 'advanced' : 'simple',\n recursionOptions: maxDepth > 0 ? { maxDepth } : undefined,\n maxTurns: spec.maxTurns ?? 12,\n maxRuntimeChars: spec.maxRuntimeChars ?? 6000,\n maxBatchedLlmQueryConcurrency: maxParallel,\n promptLevel: 'detailed',\n // Trace analysis depends on exact prior tool results and runtime variables.\n contextPolicy: { preset: 'full', budget: 'balanced' },\n functions: { local: tools },\n actorOptions: {\n description: actorDescription,\n ...(opts.model ? { model: opts.model } : {}),\n showThoughts: false,\n thinkingTokenBudget: 'none',\n },\n responderOptions: {\n description:\n spec.responderDescription ??\n \"Pass through the actor's `report` prose verbatim, and format the `findings` array exactly as the actor produced it. Do not add, drop, or summarize entries.\",\n ...(opts.model ? { model: opts.model } : {}),\n showThoughts: false,\n },\n bubbleErrors: [TraceFileMissingError],\n },\n )\n\n ctx.log?.(`analyst.kind ${spec.id} forward`, {\n max_depth: maxDepth,\n tool_count: tools.length,\n tags: ctx.tags,\n })\n\n const result = await ax.forward(opts.ai, { question: deriveQuestion(ctx, spec) })\n\n const expectedSubjects = KIND_EXPECTED_SUBJECTS[spec.id]\n const out: AnalystFinding[] = []\n const rawRows = Array.isArray(result.findings) ? result.findings : []\n let rejectedWrongKind = 0\n for (const row of rawRows) {\n const parsed = parseRawFinding(row, ctx.log)\n if (!parsed) continue\n // Subject-grammar check: if the kind has a declared expects-set\n // (every shipped kind does), the finding's subject MUST parse to\n // one of the declared variants. A wrong-kind subject is a\n // contract violation — the actor's prompt drifted from the\n // grammar — and we count it for prompt-audit visibility.\n if (expectedSubjects && parsed.subject !== undefined) {\n const parsedSubject = parseFindingSubject(parsed.subject)\n if (parsedSubject === null) {\n ctx.log?.('finding rejected: subject failed to parse', {\n kind: spec.id,\n subject: parsed.subject,\n })\n rejectedWrongKind += 1\n continue\n }\n if (!expectedSubjects.includes(parsedSubject.kind)) {\n ctx.log?.('finding rejected: subject variant not allowed for this kind', {\n kind: spec.id,\n subject_kind: parsedSubject.kind,\n subject: parsed.subject,\n allowed: expectedSubjects,\n })\n rejectedWrongKind += 1\n continue\n }\n }\n const postProcessed = spec.postProcess?.(parsed, ctx) ?? parsed\n if (!postProcessed) continue\n out.push(toAnalystFinding(spec, postProcessed))\n }\n\n ctx.log?.(`analyst.kind ${spec.id} done`, {\n emitted: rawRows.length,\n accepted: out.length,\n rejected_wrong_subject: rejectedWrongKind,\n })\n\n // Two-phase recovery / fail-loud. The actor reasons free-form (the\n // `report`); a weak model often produces a sound diagnosis but fails the\n // strict findings emission (or the rows get rejected). If the harvest is\n // empty but the report is substantive, recover findings from the prose\n // via the tolerant structuring pass (opt-in), and — either way — surface\n // the report as a visible info finding so an empty harvest is never a\n // silent zero. A genuinely empty diagnosis (short/no report) stays empty.\n const report = typeof result.report === 'string' ? result.report : ''\n if (out.length === 0 && report.trim().length >= 200) {\n if (opts.recovery) {\n const recovered = await structureFindings({\n report,\n analystId: spec.id,\n area: spec.area,\n model: opts.recovery.model ?? opts.model ?? '',\n baseUrl: opts.recovery.baseUrl,\n apiKey: opts.recovery.apiKey,\n fetchImpl: opts.recovery.fetchImpl,\n })\n out.push(...recovered.findings)\n ctx.log?.(`analyst.kind ${spec.id} recovery`, {\n outcome: recovered.outcome,\n recovered: recovered.findings.length,\n })\n }\n if (out.length === 0) {\n out.push(\n makeFinding({\n analyst_id: spec.id,\n area: spec.area,\n claim: 'Analyst produced a diagnosis but no structured findings — see report.',\n rationale: report.slice(0, 1500),\n severity: 'info',\n confidence: 0.3,\n evidence_refs: [\n { kind: 'artifact', uri: 'report://summary', excerpt: report.slice(0, 2000) },\n ],\n metadata: { outcome: 'extraction_failed' },\n }),\n )\n }\n }\n return out\n },\n }\n}\n\nfunction deriveQuestion(ctx: AnalystContext, spec: TraceAnalystKindSpec): string {\n // The actor's user message must orient it at the task, not echo the kind id.\n // A bare id like \"failure-mode\" gives the actor nothing to act on, so it\n // spends turns inspecting the input instead of reading traces. Operators can\n // still steer with `tags.focus = \"leaf-X\"`, appended to the task directive.\n const focus = ctx.tags?.focus?.trim()\n const task = `Analyze this trace dataset with the available tools and report ${spec.area} findings. ${spec.description}`\n return focus ? `${task} Focus: ${focus}.` : task\n}\n\nfunction toAnalystFinding(spec: TraceAnalystKindSpec, raw: RawAnalystFinding): AnalystFinding {\n return makeFinding({\n analyst_id: spec.id,\n area: spec.area,\n subject: raw.subject,\n claim: raw.claim,\n rationale: raw.rationale,\n severity: raw.severity,\n confidence: raw.confidence,\n evidence_refs: [\n {\n kind: evidenceKindFromUri(raw.evidence_uri),\n uri: raw.evidence_uri,\n excerpt: raw.evidence_excerpt,\n },\n ],\n recommended_action: raw.recommended_action,\n metadata: { kind_version: spec.version },\n })\n}\n\nfunction evidenceKindFromUri(uri: string): 'span' | 'artifact' | 'metric' | 'event' | 'finding' {\n if (uri.startsWith('span://')) return 'span'\n if (uri.startsWith('artifact://')) return 'artifact'\n if (uri.startsWith('metric://')) return 'metric'\n if (uri.startsWith('event://')) return 'event'\n if (uri.startsWith('finding://')) return 'finding'\n return 'artifact'\n}\n\n/**\n * Render a compact prior-findings block the actor reads alongside its\n * brief. Each row is one line so the actor can scan dozens cheaply.\n * The kind's prompt instructs the actor to (a) check whether a new\n * cluster matches a prior `finding_id` (carry the id forward via\n * `id_basis` to keep diffs stable) and (b) raise severity / confidence\n * when a prior finding has reappeared without remediation.\n *\n * Returns the empty string when there are no prior findings — most\n * runs are \"first-of-its-kind\" and the prompt stays unchanged.\n *\n * Exported for tests + for consumers that build their own actor\n * prompts (e.g. specialized analysts living outside the default kinds).\n */\nexport function renderPriorFindings(prior: AnalystContext['priorFindings']): string {\n if (!prior || prior.length === 0) return ''\n const MAX_ROWS = 40 // keep the block under ~2KB; older history is summarized externally\n const rows = prior.slice(0, MAX_ROWS).map((f) => {\n const subject = f.subject ? ` [${f.subject}]` : ''\n return ` - id=${f.finding_id} ${f.severity}${subject} ${truncateForContext(f.claim, 160)}`\n })\n const overflow =\n prior.length > MAX_ROWS\n ? `\\n ... +${prior.length - MAX_ROWS} more prior findings (older history truncated)`\n : ''\n return [\n '',\n '',\n 'PRIOR FINDINGS (from a previous run on related data):',\n 'When the work you do now matches a row below, REUSE the `finding_id` (pass it as `id_basis`) so the cross-run diff stays stable.',\n 'A finding that reappears with no remediation evidence SHOULD raise its `confidence` and may justify a higher `severity`.',\n ...rows,\n overflow,\n ]\n .filter(Boolean)\n .join('\\n')\n}\n\nfunction truncateForContext(s: string, max: number): string {\n if (s.length <= max) return s\n return `${s.slice(0, max - 1).trimEnd()}…`\n}\n","/**\n * Pre-curated tool subsets for analyst kinds.\n *\n * The full trace-analyst tool set is seven functions. Most kinds only\n * need three or four. Picking from named groups instead of importing\n * the whole bundle keeps every kind's actor-context budget tight and\n * makes \"what can this analyst see?\" obvious at registration time.\n *\n * Each function in the group keeps its full `name`/`description` from\n * `buildTraceAnalystTools` — we filter, we don't re-implement.\n */\n\nimport type { AxFunction } from '@ax-llm/ax'\nimport type { TraceAnalysisStore } from '../trace-analyst/store'\nimport { buildTraceAnalystTools } from '../trace-analyst/tools'\n\n/** Named tool sets. Kinds pass `tools: TRACE_TOOL_GROUPS.failureForensics` etc. */\nexport type TraceToolGroupName =\n /** All seven tools. Use for open-ended discovery kinds. */\n | 'all'\n /** Overview + paginated query + count. No deep reads. Cheap. */\n | 'discovery'\n /** Discovery + viewTrace + viewSpans. Deep-read but no regex search. */\n | 'discoveryAndRead'\n /** Discovery + search tools. For pattern-matching across many traces. */\n | 'discoveryAndSearch'\n /** Discovery + viewSpans + searchSpan. Targeted-span work after another kind narrows down. */\n | 'targeted'\n\nconst TOOL_NAMES_BY_GROUP: Record<TraceToolGroupName, ReadonlySet<string>> = {\n all: new Set(),\n discovery: new Set(['getDatasetOverview', 'queryTraces', 'countTraces']),\n discoveryAndRead: new Set([\n 'getDatasetOverview',\n 'queryTraces',\n 'countTraces',\n 'viewTrace',\n 'viewSpans',\n ]),\n discoveryAndSearch: new Set([\n 'getDatasetOverview',\n 'queryTraces',\n 'countTraces',\n 'searchTrace',\n 'searchSpan',\n ]),\n targeted: new Set(['getDatasetOverview', 'queryTraces', 'viewSpans', 'searchSpan']),\n}\n\n/**\n * Build the tool set for a named group bound to a specific trace store.\n *\n * `all` returns every tool. Other groups filter `buildTraceAnalystTools`\n * by name to the documented subset. An unrecognised group name throws —\n * silently returning all tools would defeat the cost-control point.\n */\nexport function buildTraceToolsForGroup(\n group: TraceToolGroupName,\n store: TraceAnalysisStore,\n): AxFunction[] {\n const all = buildTraceAnalystTools({ store })\n if (group === 'all') return all\n const allow = TOOL_NAMES_BY_GROUP[group]\n if (!allow) throw new Error(`unknown trace tool group: ${group}`)\n return all.filter((tool) => allow.has((tool as { name: string }).name))\n}\n","/**\n * Failure-mode analyst — classifies what went wrong and why.\n *\n * Brief: read the trace dataset, identify the top failure modes across\n * runs, classify each with severity + evidence, and surface them as\n * findings. The actor's job is *taxonomy + evidence*, not fix-design —\n * that's the improvement-analyst's job.\n *\n * Recursion is deep (`maxDepth: 3`) because real failure-mode\n * discovery is genuinely tree-shaped: the actor splits the dataset\n * into candidate clusters, each cluster spawns a focused investigator\n * that drills into representative traces, and a deeply-recursed\n * investigator may itself split a confounded mode into two sub-modes.\n * Each level fans out 4-way, so the analyst can investigate up to\n * ~16 leaf clusters before hitting the depth ceiling.\n */\n\nimport type { TraceAnalystKindSpec } from '../kind-factory'\nimport { buildTraceToolsForGroup } from '../tool-groups'\n\nconst ACTOR_PROMPT = `You are a failure-mode classifier for an OTLP trace dataset. Your job is to identify the **distinct ways agents failed** in this dataset, not to grade individual runs.\n\nDISCOVERY → CLUSTER → CITE protocol:\n\n1. Call \\`traces.getDatasetOverview({})\\` first. Use \\`has_errors\\`, \\`models\\`, \\`agent_names\\`, \\`tools\\`, and \\`sample_trace_ids\\` to size the failure surface.\n2. Use \\`traces.queryTraces({ filters: { has_errors: true }, limit })\\` to pull error-bearing traces. Combine with \\`traces.countTraces\\` to see what fraction of the dataset failed.\n3. For each candidate failure cluster, use \\`traces.searchTrace\\` with regex like \\`STATUS_CODE_ERROR\\`, \\`MaxTurnsExceeded\\`, \\`assertion\\`, \\`unauthorized\\`, \\`timeout\\`, \\`429\\`, \\`5\\\\d\\\\d\\`, the agent's specific error strings, or the names of its tools. Pull one or two representative traces per cluster, **not all** of them.\n4. **Cluster, do not enumerate.** Two failures with the same root cause should be ONE finding citing both traces, not two findings. The point of this analyst is to compress N runs into K modes.\n5. For each cluster you can defend with evidence, emit ONE finding with:\n - \\`area\\` = \"failure-mode\"\n - \\`subject\\` = a short label for the cluster (\"tool-call-loop\", \"auth-revoked-mid-run\", \"agent-asked-clarification-too-late\", ...)\n - \\`claim\\` = one sentence stating the mode\n - \\`severity\\` = \"critical\" when it blocks the run, \"high\" when the run finished degraded, \"medium\" when it slowed convergence\n - \\`evidence_uri\\` = \\`span://<trace_id>/<span_id>\\` of the most representative span\n - \\`evidence_excerpt\\` = the exact quote (e.g. error message, stuck tool call payload, contradictory turn output)\n - \\`confidence\\` = 0.85+ when multiple traces show the same shape; 0.6-0.8 for a single-trace inference; <0.5 for speculative.\n - \\`recommended_action\\` = imperative-phrased fix idea (kept short — the improvement-analyst will expand on these)\n\nIf the dataset has no failures, return an empty findings array — do NOT pad with low-confidence speculation.\n\n**Delegate aggressively.** The recursion budget is there to be used:\n- After your first \\`getDatasetOverview\\` + \\`queryTraces\\` calls, you should have 3-6 candidate failure clusters in mind. Spawn one \\`llmQuery\\` per cluster in a single batch — they investigate in parallel.\n- A sub-investigator that finds its cluster is actually two distinct modes should split again at its own level. Recursion is meant to discover sub-modes, not to do trivial drilling that the parent could do in-line.\n- Pass narrow context to each subagent: { question: 'investigate the auth-revoked-mid-run cluster', context: { trace_ids: ['abc', 'def'], suspected_root_cause: 'token refresh skipped on idle sessions' } }. Subagents need enough context to skip re-discovery but not the whole conversation.\n- Each subagent returns its findings as JSON; the parent merges them. Do NOT have subagents call \\`final()\\` — they return their findings list to you, and you call \\`final()\\` once at the top.\n\nOBSERVABILITY rules:\n- Each non-final turn must emit at least one \\`console.log\\` for evidence.\n- Reuse runtime variables across turns; don't recompute.\n- Call \\`final({ findings: [...] })\\` exactly once, after you've gathered evidence for every cluster you intend to report.`\n\nexport const FAILURE_MODE_KIND_SPEC: TraceAnalystKindSpec = {\n id: 'failure-mode',\n description:\n 'Clusters trace-dataset failures into distinct failure modes with cited evidence and a short recommended action.',\n area: 'failure-mode',\n version: '1.0.0',\n actorDescription: ACTOR_PROMPT,\n buildTools: (store) => buildTraceToolsForGroup('all', store),\n recursion: { maxDepth: 3, maxParallelSubagents: 4 },\n maxTurns: 24,\n cost: { kind: 'llm' },\n}\n","/**\n * Improvement analyst — actionable, recursive self-improvement findings.\n *\n * Brief: read findings from upstream analysts (failure-mode,\n * knowledge-gap, knowledge-poisoning) AND the trace dataset itself,\n * then propose **concrete edits** to the agent's runtime: prompt\n * additions, RAG documents to ingest, tool descriptions to rewrite,\n * scaffolding changes to make, memory entries to invalidate. Each\n * finding is one proposed edit with the locus, the diff, and the\n * expected effect.\n *\n * This is the recursive-self-improvement loop's last mile: the prior\n * kinds describe *what's wrong*; this kind describes *what to change*.\n *\n * Recursion is deep (`maxDepth: 3`) because real improvement proposals\n * are competitive: for each failure-mode there are usually 2-3 viable\n * fix directions (tighten prompt vs add tool vs adjust scaffolding),\n * and the actor should explore each with a focused subagent before\n * picking the highest-leverage one to recommend.\n */\n\nimport type { TraceAnalystKindSpec } from '../kind-factory'\nimport { buildTraceToolsForGroup } from '../tool-groups'\n\nconst ACTOR_PROMPT = `You are a recursive-self-improvement analyst. Your job is to propose **concrete, locus-named edits** the agent's runtime should adopt to fix the failure modes, knowledge gaps, and poisonings present in this dataset.\n\nUpstream analysts have already classified the problems. Your job is to convert each problem into a *change to make* and grade its expected leverage. Each finding is one proposed edit.\n\nDISCOVERY → CANDIDATE-FIXES → COMPETE → CITE protocol:\n\n1. \\`traces.getDatasetOverview({})\\` first. Note the agents, tools, and any system-prompt fingerprints (look for the prompt text echoed in early spans).\n2. For each high-severity failure pattern, generate 2-3 candidate fixes. Real candidate axes:\n - **System-prompt edit** — add an instruction, remove a misleading one, restructure precedence\n - **Tool description edit** — rewrite a tool's description so the agent picks it correctly / passes valid args\n - **New tool** — add a tool the agent kept emulating in code\n - **RAG ingestion** — add a document or correct a stale one\n - **Memory invalidation** — clear cached prior-run decisions that no longer apply\n - **Scaffolding** — add a precondition check, a retry policy, a turn budget, a verification step\n - **Output schema** — narrow the agent's output to forbid the failure shape\n3. **Compete candidate fixes via subagents.** For each failure cluster, spawn one \\`llmQuery\\` per candidate-fix axis you want to evaluate. Each subagent's job: simulate the fix on the cited traces and report (i) likely effect, (ii) side effects, (iii) implementation cost as small/medium/large. Pass the cluster's failing trace_ids and the candidate axis as context.\n4. After subagents return, **pick the winning candidate per cluster** based on (effect / cost) and emit ONE finding. Discard the losing candidates — the output is the recommendation, not the candidate set.\n5. **Cross-reference upstream findings.** If a finding cites a prior failure-mode or knowledge-gap finding, use \\`evidence_uri = \"finding://<prior-finding-id>\"\\` (the registry supports this kind). This builds the dependency graph that lets the dashboard show \"fix #X resolves failure modes A, B, C.\"\n\nFor each winning recommendation, emit ONE finding with:\n- \\`area\\` = \"improvement\"\n- \\`subject\\` = the locus to edit: \\`system-prompt:<section>\\`, \\`tool-doc:<tool-name>\\`, \\`new-tool:<proposed-name>\\`, \\`rag:<corpus>:<doc-id>\\`, \\`memory:<key>\\`, \\`scaffolding:<concern>\\`, \\`output-schema:<field>\\`\n- \\`claim\\` = one sentence stating the edit (\"Add a precondition check to refuse tool X calls without arg Y\")\n- \\`severity\\` = leverage rating: \"critical\" when fix resolves a critical failure mode; \"high\" when it resolves a high; \"medium\" when it's a quality-of-life win; \"info\" when it's a cleanup with no behavioral effect\n- \\`evidence_uri\\` = the failure-mode finding id this fix targets (\\`finding://<id>\\`) when it exists; else the most representative span\n- \\`evidence_excerpt\\` = a fragment showing the problem the fix targets\n- \\`confidence\\` = 0.85+ when the fix is mechanical and the failure mode is well-evidenced; 0.6-0.8 when the fix requires judgment; <0.5 for speculative\n- \\`rationale\\` = why this candidate beat its alternatives (2 sentences max)\n- \\`recommended_action\\` = the **literal edit**, phrased as a diff or a quoted replacement: \"Replace section X with: '...'\" or \"Add tool with description: '...'\" or \"Set retry policy to max_attempts=3 with exponential backoff\"\n\nIf no upstream failure findings exist in this run, derive your own from the trace dataset using the failure-mode protocol inline (\\`searchTrace\\` for STATUS_CODE_ERROR / MaxTurnsExceeded / etc.). But prefer to consume upstream findings when present — the kinds are designed to chain.\n\nDo NOT propose a fix you cannot defend with evidence. \"Tighten the prompt\" is not a finding; \"Add 'When the user asks for X, always Y' to the system prompt section \"request-classification\"\" is.\n\nOBSERVABILITY rules:\n- Each non-final turn must emit at least one \\`console.log\\` for evidence.\n- Call \\`final({ findings: [...] })\\` exactly once at the top level.`\n\nexport const IMPROVEMENT_KIND_SPEC: TraceAnalystKindSpec = {\n id: 'improvement',\n description:\n 'Converts upstream failure / gap / poisoning findings into concrete locus-named edits (prompt, tool-doc, RAG, scaffolding) with leverage grades.',\n area: 'improvement',\n version: '1.0.0',\n actorDescription: ACTOR_PROMPT,\n buildTools: (store) => buildTraceToolsForGroup('all', store),\n recursion: { maxDepth: 3, maxParallelSubagents: 4 },\n maxTurns: 30,\n maxRuntimeChars: 12000,\n cost: { kind: 'llm' },\n}\n","/**\n * Knowledge-gap analyst — what did the agent NOT know that it needed?\n *\n * Brief: find moments in the trace where the agent had to guess, ask\n * the user to fill in context, recover from a wrong assumption, or\n * loop on a retrieval. Each finding names a *missing or outdated piece\n * of knowledge* the agent's curated knowledge base should have held —\n * or a downstream lookup (web, docs, tool description) that surfaced\n * stale or outdated information.\n *\n * The primary expected store is `@tangle-network/agent-knowledge`: a\n * Karpathy-style wiki the agent maintains with raw ↔ curated pages,\n * source anchors, and claim/relation triples. A gap is anything the\n * agent had to discover at run-time that should already have lived\n * there. Secondary loci: web-search results that returned outdated\n * pages, tool descriptions that omitted critical behavior, system-\n * prompt sections that didn't cover the case.\n *\n * Distinct from failure-mode: failure-mode classifies *how* it broke;\n * knowledge-gap names the *information* whose absence (or staleness)\n * caused the break. One failure-mode often maps to several gaps.\n *\n * Recursion (`maxDepth: 2`) is enough to fan out one subagent per\n * candidate gap-source layer; each subagent runs a focused detection.\n */\n\nimport type { TraceAnalystKindSpec } from '../kind-factory'\nimport { buildTraceToolsForGroup } from '../tool-groups'\n\nconst ACTOR_PROMPT = `You are a knowledge-gap analyst for an OTLP trace dataset. Your job is to identify the **specific pieces of information the agent lacked, or that were stale**, that caused poor decisions.\n\nThe agent under analysis maintains a curated knowledge base via \\`@tangle-network/agent-knowledge\\` — a wiki of \\`KnowledgePage\\`s with raw source anchors, claims, and relations. The primary expected store of agent-knowable facts IS that wiki. A \"knowledge gap\" is anything the agent had to discover or guess at run-time that the wiki should have held — or an outdated/contradictory fact the agent picked up from a non-wiki source.\n\nDISCOVERY → ATTRIBUTE-TO-LAYER → CITE protocol:\n\n1. \\`traces.getDatasetOverview({})\\` first. Note which agents, tools, and models appear.\n2. Pull traces where the agent shows gap signals. The strongest signals are:\n - Self-correction turns (\"I assumed X but…\", \"let me re-check\", \"actually,\")\n - Clarifying-question turns where the agent asked the user something the runtime should have surfaced\n - Repeated retrieval / lookup calls for the same artifact with slightly varied queries\n - Tool errors that name a missing argument or unknown resource\n - Web-search calls returning pages dated before a known cutoff for content that changes (versioned APIs, schemas, policies)\n - Agent quoting a tool's docs / system prompt incorrectly because the actual text was insufficient\n - Fabricated identifiers that don't appear in dataset \\`sample_trace_ids\\`\n Use \\`traces.searchTrace\\` with patterns like \\`I (don.?t|do not) know\\`, \\`assumed\\`, \\`unclear\\`, \\`could you (clarify|tell me|provide)\\`, \\`not found\\`, \\`undefined\\`, \\`unknown\\`, \\`null\\`, dates older than the analysis window, or the agent's specific clarification phrases.\n3. For each gap, identify the **layer of the runtime that should have prevented it**. The locus is the value of \\`subject\\` on the finding. Use one of:\n - \\`agent-knowledge:wiki:<page-slug>\\` — the wiki page that should exist but doesn't, or exists but lacks the claim\n - \\`agent-knowledge:wiki:<page-slug>#<heading>\\` — wiki page exists but a specific section is missing\n - \\`agent-knowledge:claim:<topic>\\` — a specific claim/relation triple that should be in the wiki\n - \\`agent-knowledge:raw:<source-id>\\` — raw source captured but never lifted into a curated page\n - \\`agent-knowledge:stale:<page-slug>\\` — wiki page exists but contradicts ground-truth evidence in this trace (the wiki itself drifted)\n - \\`websearch:outdated:<topic>\\` — agent relied on a web result that was stale; wiki should have superseded it\n - \\`tool-doc:<tool-name>:<aspect>\\` — tool description missed a behavior aspect (return shape, failure modes, side effects)\n - \\`system-prompt:<section>\\` — system prompt should have stated the rule directly\n - \\`memory:<key>\\` — prior-run memory should have surfaced an earlier decision\n4. For each gap you can defend with evidence, emit ONE finding with:\n - \\`area\\` = \"knowledge-gap\"\n - \\`subject\\` = the locus string from the list above\n - \\`claim\\` = a sentence naming the missing or stale knowledge (\"wiki has no page on invoice line-item shape, agent had to re-derive it from raw spans\")\n - \\`severity\\` = \"high\" when the gap caused a failure or a clarifying question; \"medium\" when it caused unnecessary turns; \"low\" when it caused minor inefficiency\n - \\`evidence_uri\\` = \\`span://<trace_id>/<span_id>\\` of the moment the gap surfaced (the question, the self-correction, the retrieval miss, the stale web result)\n - \\`evidence_excerpt\\` = exact quote where the agent showed the gap\n - \\`confidence\\` = 0.85+ when the agent itself articulated the gap; 0.6-0.8 when inferred from behavior\n - \\`recommended_action\\` = phrased as a wiki edit when the locus is \\`agent-knowledge:*\\` (\"Create wiki page \\`invoice-line-items\\` with claims: ...\"), or as a prompt/tool-doc edit otherwise\n\n**Delegate per layer.** After your first scan, you should have candidates spread across \\`agent-knowledge:*\\`, \\`websearch:outdated\\`, \\`tool-doc:*\\`, \\`system-prompt:*\\`, and \\`memory:*\\`. Spawn one \\`llmQuery\\` per layer in parallel — each subagent runs a focused detection (e.g. the \\`agent-knowledge\\` subagent looks for both missing-pages AND stale-pages; the \\`websearch\\` subagent looks specifically for date staleness signals; the \\`tool-doc\\` subagent looks for tool-call argument errors a fuller description would have prevented). Subagents return findings; you merge and emit one \\`final({ findings })\\` at the top.\n\nDo NOT report a gap that the agent later recovered from cleanly within the same turn — that's resilience, not a gap. Cite the *non-recovery* version when both exist.\n\nOBSERVABILITY rules:\n- Each non-final turn must emit at least one \\`console.log\\` for evidence.\n- Call \\`final({ findings: [...] })\\` exactly once at the top level.`\n\nexport const KNOWLEDGE_GAP_KIND_SPEC: TraceAnalystKindSpec = {\n id: 'knowledge-gap',\n description:\n 'Identifies missing or stale pieces of knowledge — primarily against the agent-knowledge wiki — and attributes each to the runtime layer (wiki page, claim, raw source, websearch, tool-doc, system-prompt, memory) that should have held it.',\n area: 'knowledge-gap',\n version: '1.0.0',\n actorDescription: ACTOR_PROMPT,\n buildTools: (store) => buildTraceToolsForGroup('discoveryAndSearch', store),\n recursion: { maxDepth: 2, maxParallelSubagents: 4 },\n maxTurns: 18,\n cost: { kind: 'llm' },\n}\n","/**\n * Knowledge-poisoning analyst — what FALSE information misled the agent?\n *\n * Brief: find moments where the agent acted on information that was\n * *wrong* — stale memory, RAG documents that contradicted ground truth,\n * tool descriptions that lied about return shapes, system-prompt\n * instructions that no longer matched reality, prior-run summaries that\n * cached a wrong decision.\n *\n * Distinct from knowledge-gap: a gap is \"the agent didn't know X\"; a\n * poisoning is \"the agent confidently used X, but X was wrong.\" Gaps\n * surface as questions / self-correction; poisonings surface as\n * confident-but-wrong actions that downstream evidence contradicts.\n *\n * Recursion is moderate (`maxDepth: 2`) because each candidate\n * poisoning typically needs two sub-investigations: one to confirm\n * the agent acted on the false belief, one to confirm the belief\n * itself is actually false in ground truth.\n */\n\nimport type { TraceAnalystKindSpec } from '../kind-factory'\nimport { buildTraceToolsForGroup } from '../tool-groups'\n\nconst ACTOR_PROMPT = `You are a knowledge-poisoning analyst for an OTLP trace dataset. Your job is to identify cases where the agent **confidently used wrong information** — not where it lacked information (that's the knowledge-gap analyst).\n\nDISCOVERY → DUAL-VERIFY → CITE protocol:\n\n1. \\`traces.getDatasetOverview({})\\` first. Identify the agents, models, and tools.\n2. Pull traces where the agent's confident action was later contradicted. Strongest signals:\n - Agent stated a fact in one span; a later span surfaced contradictory evidence; the agent then proceeded anyway or fabricated reconciliation.\n - Tool call with stale arguments (an id that no longer exists, an API shape that changed).\n - Agent cited an \\`agent-knowledge\\` wiki page or claim whose content contradicts the trace's own evidence — the wiki itself drifted.\n - Web-search result the agent cited that returned an outdated page; agent treated it as canonical.\n - System-prompt instruction the agent followed that ground-truth evidence in the trace contradicts (e.g. prompt says \"use endpoint A\"; tool reply says \"endpoint A deprecated, use B\").\n - Repeated wrong-shape parsing despite the tool's actual output proving the shape.\n3. Use \\`traces.searchTrace\\` with regex on phrases like \\`actually\\`, \\`turns out\\`, \\`previously assumed\\`, \\`old version\\`, \\`deprecated\\`, \\`updated to\\`, \\`now uses\\`, or specific entity names you suspect have changed.\n4. For each candidate poisoning, **DUAL-VERIFY**:\n - Confirm the agent actually acted on the false belief (cite the span where it did)\n - Confirm the belief is actually false in this trace's own evidence (cite the span that contradicts it)\n Only emit a finding when both halves are nailed down. If you can only nail one, drop it — single-evidence poisoning findings are too speculative to be useful.\n\n**Delegate the dual-verify.** Use the recursion budget so each candidate poisoning gets one subagent investigating \"did the agent act?\" and one investigating \"is the belief false?\". After your first scan, fire off N parallel \\`llmQuery\\` pairs (one cluster per pair). Subagents return their findings; you accept only the ones where BOTH halves of the pair were confirmed.\n\nFor each confirmed poisoning, emit ONE finding with:\n- \\`area\\` = \"knowledge-poisoning\"\n- \\`subject\\` = the source of the false belief, one of: \\`agent-knowledge:wiki:<page-slug>\\` (wiki page contradicts current ground truth), \\`agent-knowledge:claim:<topic>\\` (a specific claim/relation went stale), \\`agent-knowledge:raw:<source-id>\\` (the raw source is outdated and the wiki inherited the drift), \\`websearch:outdated:<url-or-topic>\\`, \\`tool-doc:<tool>\\`, \\`system-prompt:<section>\\`, \\`memory:<key>\\`, \\`prior-run-summary:<topic>\\`\n- \\`claim\\` = one sentence: \"agent believed X (from source S); evidence in trace shows X is false\"\n- \\`severity\\` = \"critical\" when poisoning caused a wrong user-visible action; \"high\" when caught internally but wasted significant work; \"medium\" for inefficiency only\n- \\`evidence_uri\\` = \\`span://<trace_id>/<span_id>\\` of the action span (the moment the agent acted on the false belief)\n- \\`evidence_excerpt\\` = exact quote of the confident-but-wrong claim or action\n- \\`confidence\\` = 0.85+ when both halves are exact-quote backed; 0.6-0.8 when one half is inferred\n- \\`recommended_action\\` = where the source should be updated and how (\"Update wiki page \\`X\\` claim \\`Y\\` to '...'\", \"Invalidate raw source \\`Z\\` and re-curate\", \"Replace system-prompt section X with 'tool foo now returns Y'\")\n\nDo NOT report a finding if the agent caught and corrected the false belief in the same turn — that's the system working. Reserve poisoning for cases where the false belief shaped downstream action.\n\nOBSERVABILITY rules:\n- Each non-final turn must emit at least one \\`console.log\\` for evidence.\n- Call \\`final({ findings: [...] })\\` exactly once at the top level.`\n\nexport const KNOWLEDGE_POISONING_KIND_SPEC: TraceAnalystKindSpec = {\n id: 'knowledge-poisoning',\n description:\n 'Identifies confident-but-wrong actions caused by stale memory, contradicting RAG, deprecated tool docs, or outdated system-prompt instructions.',\n area: 'knowledge-poisoning',\n version: '1.0.0',\n actorDescription: ACTOR_PROMPT,\n buildTools: (store) => buildTraceToolsForGroup('all', store),\n recursion: { maxDepth: 2, maxParallelSubagents: 4 },\n maxTurns: 20,\n cost: { kind: 'llm' },\n}\n","/**\n * Default analyst kinds focused on agent failure + recursive\n * self-improvement.\n *\n * The four kinds chain: failure-mode classifies; knowledge-gap and\n * knowledge-poisoning explain *why* in two orthogonal ways; improvement\n * proposes concrete edits. Register all four against the same trace\n * store and the registry runs them in dependency order if the operator\n * pipes findings between them.\n */\n\nexport { FAILURE_MODE_KIND_SPEC } from './failure-mode'\nexport { IMPROVEMENT_KIND_SPEC } from './improvement'\nexport { KNOWLEDGE_GAP_KIND_SPEC } from './knowledge-gap'\nexport { KNOWLEDGE_POISONING_KIND_SPEC } from './knowledge-poisoning'\n\nimport type { TraceAnalystKindSpec } from '../kind-factory'\nimport { FAILURE_MODE_KIND_SPEC } from './failure-mode'\nimport { IMPROVEMENT_KIND_SPEC } from './improvement'\nimport { KNOWLEDGE_GAP_KIND_SPEC } from './knowledge-gap'\nimport { KNOWLEDGE_POISONING_KIND_SPEC } from './knowledge-poisoning'\n\n/**\n * The default kind suite. Order is the run order operators should\n * use: failure-mode first (no upstream deps), gap + poisoning next\n * (both depend on failures), improvement last (chains all three).\n */\nexport const DEFAULT_TRACE_ANALYST_KINDS: readonly TraceAnalystKindSpec[] = [\n FAILURE_MODE_KIND_SPEC,\n KNOWLEDGE_GAP_KIND_SPEC,\n KNOWLEDGE_POISONING_KIND_SPEC,\n IMPROVEMENT_KIND_SPEC,\n] as const\n","/**\n * AnalystRegistry — orchestrate N analysts against one run.\n *\n * Owns three responsibilities and only three:\n * 1. Registration — ids must be unique; bad registrations fail loudly\n * at register-time, not run-time.\n * 2. Routing — each analyst declares its `inputKind`; the registry\n * picks the matching field from AnalystRunInputs and skips the\n * analyst with a logged reason if it's missing.\n * 3. Isolation — one analyst's exception MUST NOT stop other analysts.\n * Failed analysts produce zero findings + a 'failed' summary row.\n *\n * Cross-cutting concerns (telemetry, error → finding conversion, cost\n * ingestion, storage rotation) live in `AnalystHooks`. Budget shaping\n * (equal split vs weighted vs custom) lives in `BudgetPolicy`. Both\n * have sensible defaults; consumers override only what they need.\n */\n\nimport { randomUUID } from 'node:crypto'\nimport type { ChatClient } from './chat-client'\nimport type {\n Analyst,\n AnalystContext,\n AnalystFinding,\n AnalystRunEvent,\n AnalystRunInputs,\n AnalystRunResult,\n AnalystRunSummary,\n} from './types'\n\n// ── Hook + policy surfaces ─────────────────────────────────────────\n\nexport interface AnalystHooks {\n /** Before analyze() — last chance to mutate ctx (e.g. inject tags, override budget). */\n onBeforeAnalyze?(args: {\n analyst: Analyst\n ctx: AnalystContext\n runId: string\n }): void | Promise<void>\n /** After every analyst (ok | failed | skipped). Use for telemetry, ingestion, rotation. */\n onAfterAnalyze?(args: {\n analyst: Analyst\n summary: AnalystRunSummary\n findings: AnalystFinding[]\n runId: string\n }): void | Promise<void>\n /**\n * On analyst exception. Hook MAY return findings to convert the\n * error into structured findings; the summary still reports 'failed'.\n * Return void to keep the default empty-findings behavior.\n */\n onError?(args: {\n analyst: Analyst\n error: Error\n runId: string\n }): AnalystFinding[] | undefined | Promise<AnalystFinding[] | undefined>\n /** Once after registry.run() completes. Use for final aggregation, persistence. */\n onComplete?(args: { result: AnalystRunResult }): void | Promise<void>\n}\n\nexport interface BudgetPolicy {\n /** Overall USD cap across the registry.run(). */\n totalUsd?: number\n /** Per-analyst weight for the default allocator. Missing ids get weight 1. */\n weights?: Record<string, number>\n /**\n * Custom allocator — receives the analyst, remaining/total budget, and\n * the count of analysts that will run. Returns the per-analyst budget\n * (or undefined to leave it uncapped). Overrides weights when set.\n */\n allocate?: (args: {\n analyst: Analyst\n totalUsd: number | undefined\n remainingUsd: number | undefined\n runningCount: number\n }) => number | undefined\n}\n\nexport interface AnalystRegistryOptions {\n /** Shared chat client passed to every LLM analyst via AnalystContext. */\n chat?: ChatClient\n /** Logger callback. Defaults to a no-op. */\n log?: (msg: string, fields?: Record<string, unknown>) => void\n /** Hooks invoked around analyze() — observability + customization seam. */\n hooks?: AnalystHooks\n /** Default budget when run() doesn't override. */\n defaultBudget?: BudgetPolicy\n}\n\nexport interface RegistryRunOpts {\n /** Restrict to a subset of registered analysts by id. */\n only?: string[]\n /** Skip these analysts even if registered. Useful for cheap iteration. */\n skip?: string[]\n /** Budget policy — totalUsd + optional weights/allocator. Falls back to options.defaultBudget. */\n budget?: BudgetPolicy\n /** Wall-clock cap. Analysts SHOULD honor `ctx.deadlineMs`. */\n timeoutMs?: number\n /** Abort signal — forwarded into every analyst's context. */\n signal?: AbortSignal\n /** Tags echoed into AnalystContext.tags — useful for tracking environment/version in findings. */\n tags?: Record<string, string>\n /**\n * Prior-run findings made available as retrieval context to every\n * analyst via `ctx.priorFindings`. The registry forwards the slice\n * whose `analyst_id` matches each registered analyst so a kind sees\n * only its own history. Pass `{ '*': findings }` to broadcast to\n * every analyst (useful for cross-kind chaining where the improvement\n * analyst consumes upstream failure findings).\n */\n priorFindings?: ReadonlyArray<AnalystFinding> | Record<string, ReadonlyArray<AnalystFinding>>\n}\n\nexport class AnalystRegistry {\n private readonly analysts = new Map<string, Analyst>()\n private readonly options: AnalystRegistryOptions\n\n constructor(options: AnalystRegistryOptions = {}) {\n this.options = options\n }\n\n register(analyst: Analyst): void {\n if (!analyst.id) throw new Error('AnalystRegistry.register: analyst.id is required')\n if (this.analysts.has(analyst.id)) {\n throw new Error(`AnalystRegistry.register: duplicate analyst id \"${analyst.id}\"`)\n }\n if (!analyst.version) {\n throw new Error(`AnalystRegistry.register: analyst \"${analyst.id}\" must declare a version`)\n }\n this.analysts.set(analyst.id, analyst)\n }\n\n list(): ReadonlyArray<{\n id: string\n description: string\n version: string\n cost: Analyst['cost']\n }> {\n return Array.from(this.analysts.values()).map((a) => ({\n id: a.id,\n description: a.description,\n version: a.version,\n cost: a.cost,\n }))\n }\n\n async run(\n runId: string,\n inputs: AnalystRunInputs,\n runOpts: RegistryRunOpts = {},\n ): Promise<AnalystRunResult> {\n // Thin collector over `runStream`. Both surfaces share the same\n // loop body so they cannot drift on isolation / hook order / cost.\n for await (const ev of this.runStream(runId, inputs, runOpts)) {\n if (ev.type === 'run-completed') return ev.result\n }\n throw new Error('AnalystRegistry.run: stream completed without run-completed event')\n }\n\n /**\n * Streaming counterpart to `run()`. Emits `AnalystRunEvent` values\n * in real time — `run-started`, then per-analyst `skipped` /\n * `started` / `completed`, then a terminal `run-completed` whose\n * payload is the full `AnalystRunResult`. UIs use this to render\n * progress; persistence consumers use `run()` and read the result.\n *\n * Hooks (`onBeforeAnalyze` / `onAfterAnalyze` / `onError` /\n * `onComplete`) fire as before — streaming is additive, not a hook\n * replacement.\n */\n async *runStream(\n runId: string,\n inputs: AnalystRunInputs,\n runOpts: RegistryRunOpts = {},\n ): AsyncGenerator<AnalystRunEvent, void, void> {\n const correlationId = `ar_${randomUUID().slice(0, 12)}`\n const log = this.options.log ?? (() => {})\n const hooks = this.options.hooks ?? {}\n const startedAt = new Date().toISOString()\n const started = Date.now()\n const deadlineMs = runOpts.timeoutMs ? started + runOpts.timeoutMs : undefined\n\n const selected = this.selectAnalysts(runOpts)\n const budget = runOpts.budget ?? this.options.defaultBudget\n\n yield {\n type: 'run-started',\n run_id: runId,\n correlation_id: correlationId,\n started_at: startedAt,\n analyst_ids: selected.map((a) => a.id),\n }\n\n const summaries: AnalystRunSummary[] = []\n const allFindings: AnalystFinding[] = []\n let totalCost = 0\n let remainingUsd = budget?.totalUsd\n\n for (const analyst of selected) {\n const t0 = Date.now()\n const input = this.routeInput(analyst, inputs)\n if (input.kind === 'missing') {\n const summary: AnalystRunSummary = {\n analyst_id: analyst.id,\n status: 'skipped',\n reason: `missing input of kind '${analyst.inputKind}'`,\n findings_count: 0,\n latency_ms: 0,\n cost_usd: 0,\n }\n summaries.push(summary)\n log(`[analyst] skip ${analyst.id} — missing input`, { runId, kind: analyst.inputKind })\n await hooks.onAfterAnalyze?.({ analyst, summary, findings: [], runId })\n yield { type: 'analyst-skipped', summary }\n continue\n }\n\n const perBudget = allocateBudget(budget, {\n analyst,\n remainingUsd,\n runningCount: selected.length,\n })\n\n const ctx: AnalystContext = {\n runId,\n correlationId,\n deadlineMs,\n budgetUsd: perBudget,\n chat: this.options.chat,\n tags: runOpts.tags,\n log: (msg, fields) => log(`[${analyst.id}] ${msg}`, { runId, correlationId, ...fields }),\n signal: runOpts.signal,\n priorFindings: selectPriorFindings(runOpts.priorFindings, analyst.id),\n }\n\n await hooks.onBeforeAnalyze?.({ analyst, ctx, runId })\n yield {\n type: 'analyst-started',\n analyst_id: analyst.id,\n started_at: new Date(t0).toISOString(),\n }\n\n try {\n const findings = await (analyst as Analyst<unknown>).analyze(input.value, ctx)\n const latency = Date.now() - t0\n const cost = sumFindingCost(findings)\n totalCost += cost\n if (typeof remainingUsd === 'number') remainingUsd = Math.max(0, remainingUsd - cost)\n allFindings.push(...findings)\n const summary: AnalystRunSummary = {\n analyst_id: analyst.id,\n status: 'ok',\n findings_count: findings.length,\n latency_ms: latency,\n cost_usd: cost,\n }\n summaries.push(summary)\n log(`[analyst] ok ${analyst.id}`, {\n runId,\n findings: findings.length,\n latency_ms: latency,\n cost_usd: cost,\n })\n await hooks.onAfterAnalyze?.({ analyst, summary, findings, runId })\n yield { type: 'analyst-completed', summary, findings }\n } catch (err) {\n const latency = Date.now() - t0\n const e = err instanceof Error ? err : new Error(String(err))\n // Hook gets first chance to convert the error into findings.\n const hookFindings = (await hooks.onError?.({ analyst, error: e, runId })) ?? []\n if (hookFindings.length) allFindings.push(...hookFindings)\n const summary: AnalystRunSummary = {\n analyst_id: analyst.id,\n status: 'failed',\n findings_count: hookFindings.length,\n latency_ms: latency,\n cost_usd: 0,\n error: { class: e.constructor.name, message: e.message },\n }\n summaries.push(summary)\n log(`[analyst] FAIL ${analyst.id}`, {\n runId,\n error_class: e.constructor.name,\n error: e.message,\n })\n await hooks.onAfterAnalyze?.({ analyst, summary, findings: hookFindings, runId })\n yield { type: 'analyst-completed', summary, findings: hookFindings }\n // Continue — isolation invariant.\n }\n }\n\n const result: AnalystRunResult = {\n run_id: runId,\n correlation_id: correlationId,\n started_at: startedAt,\n ended_at: new Date().toISOString(),\n findings: allFindings,\n per_analyst: summaries,\n total_cost_usd: totalCost,\n }\n await hooks.onComplete?.({ result })\n yield { type: 'run-completed', result }\n }\n\n private selectAnalysts(opts: RegistryRunOpts): Analyst[] {\n let candidates = Array.from(this.analysts.values())\n if (opts.only?.length) {\n const only = new Set(opts.only)\n candidates = candidates.filter((a) => only.has(a.id))\n }\n if (opts.skip?.length) {\n const skip = new Set(opts.skip)\n candidates = candidates.filter((a) => !skip.has(a.id))\n }\n return candidates\n }\n\n private routeInput(\n analyst: Analyst,\n inputs: AnalystRunInputs,\n ): { kind: 'present'; value: unknown } | { kind: 'missing' } {\n switch (analyst.inputKind) {\n case 'trace-store':\n return inputs.traceStore\n ? { kind: 'present', value: inputs.traceStore }\n : { kind: 'missing' }\n case 'artifact-dir':\n return inputs.artifactDir\n ? { kind: 'present', value: inputs.artifactDir }\n : { kind: 'missing' }\n case 'run-record':\n return inputs.runRecord ? { kind: 'present', value: inputs.runRecord } : { kind: 'missing' }\n case 'judge-input':\n return inputs.judgeInput\n ? { kind: 'present', value: inputs.judgeInput }\n : { kind: 'missing' }\n case 'custom': {\n const v = inputs.custom?.[analyst.id]\n return v !== undefined ? { kind: 'present', value: v } : { kind: 'missing' }\n }\n }\n }\n}\n\n/**\n * Default budget allocator: prefer the custom `allocate` callback if\n * provided; else weighted split when weights are set; else equal split\n * across `runningCount`. Returns undefined when no totalUsd is known.\n */\nfunction allocateBudget(\n policy: BudgetPolicy | undefined,\n args: { analyst: Analyst; remainingUsd: number | undefined; runningCount: number },\n): number | undefined {\n if (!policy) return undefined\n if (policy.allocate) {\n return policy.allocate({\n analyst: args.analyst,\n totalUsd: policy.totalUsd,\n remainingUsd: args.remainingUsd,\n runningCount: args.runningCount,\n })\n }\n if (policy.totalUsd == null) return undefined\n if (policy.weights) {\n // Weighted split: caller-supplied weights, default 1 for missing ids.\n // We can only normalize against the analysts in this run, but the\n // registry doesn't know all ids at allocator-time without passing\n // them. We approximate by treating `runningCount` as the count of\n // weight=1 analysts when the weight map omits ids. The exact split\n // is left to consumers that need precision via `allocate`.\n const w = policy.weights[args.analyst.id] ?? 1\n const totalWeight = Math.max(1, args.runningCount) // see note above\n return (policy.totalUsd * w) / totalWeight\n }\n return policy.totalUsd / Math.max(1, args.runningCount)\n}\n\n/**\n * Findings may carry their cost in `metadata.cost_usd` when the analyst\n * tracks it (the LLM-driven adapters do this — they sum chat-client\n * responses). Deterministic findings have no cost field.\n */\nfunction sumFindingCost(findings: AnalystFinding[]): number {\n let sum = 0\n for (const f of findings) {\n const c = f.metadata?.cost_usd\n if (typeof c === 'number' && Number.isFinite(c)) sum += c\n }\n return sum\n}\n\n/**\n * Resolve the `priorFindings` slice an analyst sees.\n *\n * - Array form → the analyst sees only findings whose `analyst_id`\n * matches its own id, so a kind never reads\n * another kind's history by accident.\n * - Record form → the analyst gets the entry keyed by its id, with\n * the `'*'` wildcard appended (in that order). Use\n * the wildcard for cross-kind chaining, e.g. when\n * `improvement` should see all upstream failure /\n * gap / poisoning findings.\n */\nfunction selectPriorFindings(\n source: RegistryRunOpts['priorFindings'],\n analystId: string,\n): ReadonlyArray<AnalystFinding> | undefined {\n if (!source) return undefined\n if (Array.isArray(source)) {\n const own = source.filter((f) => f.analyst_id === analystId)\n return own.length > 0 ? own : undefined\n }\n const record = source as Record<string, ReadonlyArray<AnalystFinding>>\n const own = record[analystId] ?? []\n const wildcard = record['*'] ?? []\n const merged = [...own, ...wildcard]\n return merged.length > 0 ? merged : undefined\n}\n"],"mappings":";;;;;;;;;AAiBA,SAAS,kBAAkB;AA+KpB,SAAS,iBAAiB,OAOtB;AACT,QAAM,QAAQ,KAAK,UAAU;AAAA,IAC3B,GAAG,MAAM;AAAA,IACT,GAAG,MAAM;AAAA,IACT,GAAG,MAAM,WAAW;AAAA,IACpB,GAAG,eAAe,MAAM,YAAY,MAAM,KAAK;AAAA,EACjD,CAAC;AACD,SAAO,KAAK,WAAW,QAAQ,EAAE,OAAO,KAAK,EAAE,OAAO,KAAK,EAAE,MAAM,GAAG,EAAE,CAAC;AAC3E;AAEA,SAAS,eAAe,GAAmB;AAGzC,SAAO,EACJ,YAAY,EACZ,QAAQ,QAAQ,GAAG,EACnB,QAAQ,eAAe,EAAE,EACzB,KAAK;AACV;AAMO,SAAS,YACd,MAIgB;AAChB,QAAM,EAAE,UAAU,aAAa,GAAG,KAAK,IAAI;AAC3C,SAAO;AAAA,IACL,gBAAgB;AAAA,IAChB,YAAY,iBAAiB;AAAA,MAC3B,YAAY,KAAK;AAAA,MACjB,MAAM,KAAK;AAAA,MACX,SAAS,KAAK;AAAA,MACd,OAAO,KAAK;AAAA,MACZ;AAAA,IACF,CAAC;AAAA,IACD,aAAa,gBAAe,oBAAI,KAAK,GAAE,YAAY;AAAA,IACnD,GAAG;AAAA,EACL;AACF;;;AC/MA,SAAS,SAAS;AAkCX,IAAM,wBAA2D;AAAA,EACtE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAoBO,SAAS,oBAAoB,KAAuD;AACzF,MAAI,QAAQ,QAAQ,QAAQ,OAAW,QAAO;AAC9C,QAAM,UAAU,IAAI,KAAK;AACzB,MAAI,QAAQ,WAAW,EAAG,QAAO;AAGjC,QAAM,OAAO,QAAQ;AAAA,IACnB;AAAA,EACF;AACA,MAAI;AACF,WAAO,EAAE,MAAM,kBAAkB,MAAM,KAAK,CAAC,GAAI,GAAI,KAAK,CAAC,IAAI,EAAE,SAAS,KAAK,CAAC,EAAE,IAAI,CAAC,EAAG;AAG5F,QAAM,QAAQ,QAAQ,MAAM,8BAA8B;AAC1D,MAAI,SAAS,MAAM,CAAC,EAAG,KAAK,EAAE,SAAS;AACrC,WAAO,EAAE,MAAM,mBAAmB,OAAO,MAAM,CAAC,EAAG,KAAK,EAAE;AAG5D,QAAM,OAAO,QAAQ,MAAM,4BAA4B;AACvD,MAAI,QAAQ,KAAK,CAAC,EAAG,KAAK,EAAE,SAAS;AACnC,WAAO,EAAE,MAAM,iBAAiB,UAAU,KAAK,CAAC,EAAG,KAAK,EAAE;AAG5D,QAAM,QAAQ,QAAQ,MAAM,8CAA8C;AAC1E,MAAI,MAAO,QAAO,EAAE,MAAM,mBAAmB,MAAM,MAAM,CAAC,EAAG;AAG7D,QAAM,KAAK,QAAQ,MAAM,sBAAsB;AAC/C,MAAI,MAAM,GAAG,CAAC,EAAG,KAAK,EAAE,SAAS,EAAG,QAAO,EAAE,MAAM,iBAAiB,SAAS,GAAG,CAAC,EAAG,KAAK,EAAE;AAG3F,QAAM,WAAW,QAAQ,MAAM,uCAAuC;AACtE,MAAI,YAAY,SAAS,CAAC,EAAG,KAAK,EAAE,SAAS,GAAG;AAC9C,WAAO,EAAE,MAAM,YAAY,MAAM,SAAS,CAAC,GAAI,QAAQ,SAAS,CAAC,EAAG,KAAK,EAAE;AAAA,EAC7E;AACA,QAAM,KAAK,QAAQ,MAAM,kCAAkC;AAC3D,MAAI,GAAI,QAAO,EAAE,MAAM,YAAY,MAAM,GAAG,CAAC,EAAG;AAGhD,QAAM,KAAK,QAAQ,MAAM,kCAAkC;AAC3D,MAAI,GAAI,QAAO,EAAE,MAAM,YAAY,MAAM,GAAG,CAAC,EAAG;AAGhD,QAAM,MAAM,QAAQ,MAAM,kCAAkC;AAC5D,MAAI,OAAO,IAAI,CAAC,EAAG,KAAK,EAAE,SAAS,GAAG;AACpC,WAAO,EAAE,MAAM,OAAO,QAAQ,IAAI,CAAC,GAAI,OAAO,IAAI,CAAC,EAAG,KAAK,EAAE;AAAA,EAC/D;AAGA,QAAM,MAAM,QAAQ,MAAM,eAAe;AACzC,MAAI,OAAO,IAAI,CAAC,EAAG,KAAK,EAAE,SAAS,EAAG,QAAO,EAAE,MAAM,UAAU,KAAK,IAAI,CAAC,EAAG,KAAK,EAAE;AAGnF,QAAM,KAAK,QAAQ,MAAM,oBAAoB;AAC7C,MAAI,MAAM,GAAG,CAAC,EAAG,KAAK,EAAE,SAAS,EAAG,QAAO,EAAE,MAAM,eAAe,SAAS,GAAG,CAAC,EAAG,KAAK,EAAE;AAGzF,QAAM,KAAK,QAAQ,MAAM,sBAAsB;AAC/C,MAAI,MAAM,GAAG,CAAC,EAAG,KAAK,EAAE,SAAS,EAAG,QAAO,EAAE,MAAM,iBAAiB,OAAO,GAAG,CAAC,EAAG,KAAK,EAAE;AAGzF,QAAM,KAAK,QAAQ,MAAM,2BAA2B;AACpD,MAAI,MAAM,GAAG,CAAC,EAAG,KAAK,EAAE,SAAS,EAAG,QAAO,EAAE,MAAM,sBAAsB,OAAO,GAAG,CAAC,EAAG,KAAK,EAAE;AAG9F,QAAM,MAAM,QAAQ,MAAM,0BAA0B;AACpD,MAAI,OAAO,IAAI,CAAC,EAAG,KAAK,EAAE,SAAS,EAAG,QAAO,EAAE,MAAM,qBAAqB,OAAO,IAAI,CAAC,EAAG,KAAK,EAAE;AAKhG,MAAI,yBAAyB,KAAK,OAAO,KAAK,QAAQ,UAAU,IAAI;AAClE,WAAO,EAAE,MAAM,WAAW,OAAO,QAAQ;AAAA,EAC3C;AAEA,SAAO;AACT;AAQO,SAAS,qBAAqB,GAA2B;AAC9D,UAAQ,EAAE,MAAM;AAAA,IACd,KAAK;AACH,aAAO,EAAE,UACL,wBAAwB,EAAE,IAAI,IAAI,EAAE,OAAO,KAC3C,wBAAwB,EAAE,IAAI;AAAA,IACpC,KAAK;AACH,aAAO,yBAAyB,EAAE,KAAK;AAAA,IACzC,KAAK;AACH,aAAO,uBAAuB,EAAE,QAAQ;AAAA,IAC1C,KAAK;AACH,aAAO,yBAAyB,EAAE,IAAI;AAAA,IACxC,KAAK;AACH,aAAO,iBAAiB,EAAE,OAAO;AAAA,IACnC,KAAK;AACH,aAAO,EAAE,SAAS,YAAY,EAAE,IAAI,IAAI,EAAE,MAAM,KAAK,YAAY,EAAE,IAAI;AAAA,IACzE,KAAK;AACH,aAAO,YAAY,EAAE,IAAI;AAAA,IAC3B,KAAK;AACH,aAAO,OAAO,EAAE,MAAM,IAAI,EAAE,KAAK;AAAA,IACnC,KAAK;AACH,aAAO,UAAU,EAAE,GAAG;AAAA,IACxB,KAAK;AACH,aAAO,eAAe,EAAE,OAAO;AAAA,IACjC,KAAK;AACH,aAAO,iBAAiB,EAAE,KAAK;AAAA,IACjC,KAAK;AACH,aAAO,sBAAsB,EAAE,KAAK;AAAA,IACtC,KAAK;AACH,aAAO,qBAAqB,EAAE,KAAK;AAAA,IACrC,KAAK;AACH,aAAO,EAAE;AAAA,EACb;AACF;AAaO,IAAM,iCAAiC;AAAA,EAC5C;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,EAAE,KAAK,IAAI;AAYJ,IAAM,yBAA4E;AAAA,EACvF,gBAAgB,CAAC,SAAS;AAAA,EAC1B,iBAAiB;AAAA,IACf;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAAA,EACA,uBAAuB;AAAA,IACrB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAAA,EACA,aAAa;AAAA,IACX;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAeO,IAAM,6BAA6B,EACvC,OAAO,EACP,OAAO,CAAC,MAAM,oBAAoB,CAAC,MAAM,MAAM;AAAA,EAC9C,SAAS;AACX,CAAC;;;ACxTI,SAAS,gBAAgB,MAAsB;AACpD,QAAM,IAAI,KAAK,KAAK;AACpB,QAAM,QAAQ;AACd,QAAM,IAAI,EAAE,MAAM,KAAK;AACvB,SAAO,IAAI,EAAE,CAAC,EAAG,KAAK,IAAI;AAC5B;AAGA,SAAS,mBAAmB,GAAmB;AAC7C,SAAO,EAAE,QAAQ,gBAAgB,IAAI;AACvC;AAMO,SAAS,WAAW,MAAuB;AAChD,QAAM,YAAY,mBAAmB,gBAAgB,IAAI,CAAC;AAC1D,MAAI;AACF,WAAO,KAAK,MAAM,SAAS;AAAA,EAC7B,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAQO,SAAS,oBAAoB,KAAyB;AAC3D,MAAI,QAAQ;AACZ,MAAI,OAAO,UAAU,UAAU;AAC7B,UAAM,SAAS,WAAW,KAAK;AAC/B,QAAI,WAAW,OAAW,QAAO,CAAC;AAClC,YAAQ;AAAA,EACV;AACA,MAAI,MAAM,QAAQ,KAAK,EAAG,QAAO;AACjC,MAAI,SAAS,OAAO,UAAU,UAAU;AAEtC,UAAM,QAAS,MAAkC;AACjD,QAAI,MAAM,QAAQ,KAAK,EAAG,QAAO;AACjC,WAAO,CAAC,KAAK;AAAA,EACf;AACA,SAAO,CAAC;AACV;;;AC1CA,SAAS,KAAAA,UAAS;AAIX,IAAM,qBAAqB,CAAC,YAAY,QAAQ,UAAU,OAAO,MAAM;AAEvE,IAAM,0BAA0BC,GACpC,OAAO;AAAA,EACN,UAAUA,GAAE,KAAK,kBAAkB;AAAA,EACnC,OAAOA,GAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,GAAI;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYjC,SAASA,GACN,OAAO,EACP,IAAI,GAAG,EACP,OAAO,CAAC,MAAM,oBAAoB,CAAC,MAAM,MAAM;AAAA,IAC9C,SAAS;AAAA,EACX,CAAC,EACA,SAAS;AAAA,EACZ,cAAcA,GAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,GAAI;AAAA,EACxC,kBAAkBA,GAAE,OAAO,EAAE,IAAI,GAAI,EAAE,SAAS;AAAA,EAChD,YAAYA,GAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC;AAAA,EACnC,WAAWA,GAAE,OAAO,EAAE,IAAI,GAAI,EAAE,SAAS;AAAA,EACzC,oBAAoBA,GAAE,OAAO,EAAE,IAAI,GAAI,EAAE,SAAS;AACpD,CAAC,EACA,OAAO;AASH,IAAM,4BAA4B;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAiBlC,SAAS,gBACd,KACA,KAC0B;AAC1B,QAAM,SAAS,wBAAwB,UAAU,GAAG;AACpD,MAAI,OAAO,QAAS,QAAO,OAAO;AAGlC,MAAI,OAAO,QAAQ,UAAU;AAC3B,UAAM,UAAU,WAAW,GAAG;AAC9B,QAAI,YAAY,QAAW;AACzB,YAAM,QAAQ,wBAAwB,UAAU,OAAO;AACvD,UAAI,MAAM,QAAS,QAAO,MAAM;AAAA,IAClC;AAAA,EACF;AACA,QAAM,oCAAoC;AAAA,IACxC,QAAQ,OAAO,MAAM,OAAO,IAAI,CAAC,OAAO;AAAA,MACtC,MAAM,EAAE,KAAK,KAAK,GAAG;AAAA,MACrB,MAAM,EAAE;AAAA,MACR,SAAS,EAAE;AAAA,IACb,EAAE;AAAA,EACJ,CAAC;AACD,SAAO;AACT;;;AC3DA,IAAM,SAAS;AAAA,EACb;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,EAAE,KAAK,GAAG;AAEV,SAAS,UAAU,KAAc,WAAmB,MAAgC;AAClF,QAAM,OAAO,oBAAoB,GAAG;AACpC,QAAM,MAAwB,CAAC;AAC/B,aAAW,OAAO,MAAM;AAKtB,UAAM,aACJ,OACA,OAAO,QAAQ,YACf,CAAC,MAAM,QAAQ,GAAG,KAClB,CAAE,IAAgC,eAC9B,EAAE,GAAI,KAAiC,cAAc,mBAAmB,IACxE;AACN,UAAM,SAAmC,gBAAgB,UAAU;AACnE,QAAI,CAAC,OAAQ;AACb,QAAI;AAAA,MACF,YAAY;AAAA,QACV,YAAY;AAAA,QACZ;AAAA,QACA,SAAS,OAAO;AAAA,QAChB,OAAO,OAAO;AAAA,QACd,WAAW,OAAO;AAAA,QAClB,UAAU,OAAO;AAAA,QACjB,YAAY,OAAO;AAAA,QACnB,eAAe;AAAA,UACb;AAAA,YACE,MAAM,OAAO,aAAa,WAAW,SAAS,IAAI,SAAS;AAAA,YAC3D,KAAK,OAAO;AAAA,YACZ,SAAS,OAAO;AAAA,UAClB;AAAA,QACF;AAAA,QACA,oBAAoB,OAAO;AAAA,MAC7B,CAAC;AAAA,IACH;AAAA,EACF;AACA,SAAO;AACT;AAEA,eAAsB,kBACpB,MACkC;AAClC,QAAM,YAAY,KAAK,aAAa;AACpC,QAAM,MAAM,EAAE,SAAS,KAAK,SAAS,QAAQ,KAAK,QAAQ,OAAO,KAAK,UAAU;AAChF,MAAI,OAAO;AAAA,EAA2B,KAAK,MAAM;AAAA;AAAA;AAEjD,WAAS,UAAU,GAAG,WAAW,WAAW,WAAW;AACrD,UAAM,MAAM,MAAM;AAAA,MAChB;AAAA,QACE,OAAO,KAAK;AAAA,QACZ,UAAU;AAAA,UACR,EAAE,MAAM,UAAU,SAAS,OAAO;AAAA,UAClC,EAAE,MAAM,QAAQ,SAAS,KAAK;AAAA,QAChC;AAAA,MACF;AAAA,MACA;AAAA,IACF;AACA,UAAM,OAAO,IAAI,QAAQ,KAAK;AAC9B,UAAM,WAAW,UAAU,MAAM,KAAK,WAAW,KAAK,IAAI;AAC1D,QAAI,SAAS,SAAS,EAAG,QAAO,EAAE,UAAU,SAAS,KAAK;AAG1D,QAAI,KAAK,OAAO,KAAK,EAAE,SAAS,IAAK,QAAO,EAAE,UAAU,CAAC,GAAG,SAAS,KAAK;AAC1E,WAAO,GAAG,IAAI;AAAA;AAAA;AAAA,EAChB;AACA,SAAO,EAAE,UAAU,CAAC,GAAG,SAAS,oBAAoB;AACtD;;;ACzFA,SAAS,aAAa,aAAa;AAkF5B,SAAS,uBACd,MACA,MAC6B;AAC7B,QAAM,UAAU,KAAK,gBAAgB,GAAG,KAAK,OAAO,IAAI,KAAK,aAAa,KAAK,KAAK;AACpF,SAAO;AAAA,IACL,IAAI,KAAK;AAAA,IACT,aAAa,KAAK;AAAA,IAClB,WAAW;AAAA,IACX,MAAM,KAAK;AAAA,IACX;AAAA,IACA,MAAM,QAAQ,OAAO,KAAK;AACxB,YAAM,QAAQ,KAAK,WAAW,KAAK;AACnC,YAAM,WAAW,KAAK,WAAW,YAAY;AAC7C,YAAM,cAAc,KAAK,WAAW,wBAAwB;AAC5D,YAAM,eAAe,oBAAoB,IAAI,aAAa;AAE1D,YAAM,mBACJ,KAAK,iBAAiB,KAAK,IAC3B,eACA,SACA,4BACA;AAMF,YAAM,KAAK;AAAA,QACT;AAAA,QACA;AAAA,UACE,eAAe;AAAA,YACb,MAAM,KAAK;AAAA,YACX,aAAa,KAAK;AAAA,UACpB;AAAA,UACA,eAAe,CAAC,UAAU;AAAA,UAC1B,SAAS,IAAI,YAAY;AAAA,YACvB,aAAa,CAAC;AAAA,YACd,oBAAoB;AAAA,YACpB,gBAAgB,CAAC;AAAA,YACjB,kBAAkB;AAAA,YAClB,kBAAkB;AAAA,YAClB,6BAA6B;AAAA,UAC/B,CAAC;AAAA,UACD,MAAM,WAAW,IAAI,aAAa;AAAA,UAClC,kBAAkB,WAAW,IAAI,EAAE,SAAS,IAAI;AAAA,UAChD,UAAU,KAAK,YAAY;AAAA,UAC3B,iBAAiB,KAAK,mBAAmB;AAAA,UACzC,+BAA+B;AAAA,UAC/B,aAAa;AAAA;AAAA,UAEb,eAAe,EAAE,QAAQ,QAAQ,QAAQ,WAAW;AAAA,UACpD,WAAW,EAAE,OAAO,MAAM;AAAA,UAC1B,cAAc;AAAA,YACZ,aAAa;AAAA,YACb,GAAI,KAAK,QAAQ,EAAE,OAAO,KAAK,MAAM,IAAI,CAAC;AAAA,YAC1C,cAAc;AAAA,YACd,qBAAqB;AAAA,UACvB;AAAA,UACA,kBAAkB;AAAA,YAChB,aACE,KAAK,wBACL;AAAA,YACF,GAAI,KAAK,QAAQ,EAAE,OAAO,KAAK,MAAM,IAAI,CAAC;AAAA,YAC1C,cAAc;AAAA,UAChB;AAAA,UACA,cAAc,CAAC,qBAAqB;AAAA,QACtC;AAAA,MACF;AAEA,UAAI,MAAM,gBAAgB,KAAK,EAAE,YAAY;AAAA,QAC3C,WAAW;AAAA,QACX,YAAY,MAAM;AAAA,QAClB,MAAM,IAAI;AAAA,MACZ,CAAC;AAED,YAAM,SAAS,MAAM,GAAG,QAAQ,KAAK,IAAI,EAAE,UAAU,eAAe,KAAK,IAAI,EAAE,CAAC;AAEhF,YAAM,mBAAmB,uBAAuB,KAAK,EAAE;AACvD,YAAM,MAAwB,CAAC;AAC/B,YAAM,UAAU,MAAM,QAAQ,OAAO,QAAQ,IAAI,OAAO,WAAW,CAAC;AACpE,UAAI,oBAAoB;AACxB,iBAAW,OAAO,SAAS;AACzB,cAAM,SAAS,gBAAgB,KAAK,IAAI,GAAG;AAC3C,YAAI,CAAC,OAAQ;AAMb,YAAI,oBAAoB,OAAO,YAAY,QAAW;AACpD,gBAAM,gBAAgB,oBAAoB,OAAO,OAAO;AACxD,cAAI,kBAAkB,MAAM;AAC1B,gBAAI,MAAM,6CAA6C;AAAA,cACrD,MAAM,KAAK;AAAA,cACX,SAAS,OAAO;AAAA,YAClB,CAAC;AACD,iCAAqB;AACrB;AAAA,UACF;AACA,cAAI,CAAC,iBAAiB,SAAS,cAAc,IAAI,GAAG;AAClD,gBAAI,MAAM,+DAA+D;AAAA,cACvE,MAAM,KAAK;AAAA,cACX,cAAc,cAAc;AAAA,cAC5B,SAAS,OAAO;AAAA,cAChB,SAAS;AAAA,YACX,CAAC;AACD,iCAAqB;AACrB;AAAA,UACF;AAAA,QACF;AACA,cAAM,gBAAgB,KAAK,cAAc,QAAQ,GAAG,KAAK;AACzD,YAAI,CAAC,cAAe;AACpB,YAAI,KAAK,iBAAiB,MAAM,aAAa,CAAC;AAAA,MAChD;AAEA,UAAI,MAAM,gBAAgB,KAAK,EAAE,SAAS;AAAA,QACxC,SAAS,QAAQ;AAAA,QACjB,UAAU,IAAI;AAAA,QACd,wBAAwB;AAAA,MAC1B,CAAC;AASD,YAAM,SAAS,OAAO,OAAO,WAAW,WAAW,OAAO,SAAS;AACnE,UAAI,IAAI,WAAW,KAAK,OAAO,KAAK,EAAE,UAAU,KAAK;AACnD,YAAI,KAAK,UAAU;AACjB,gBAAM,YAAY,MAAM,kBAAkB;AAAA,YACxC;AAAA,YACA,WAAW,KAAK;AAAA,YAChB,MAAM,KAAK;AAAA,YACX,OAAO,KAAK,SAAS,SAAS,KAAK,SAAS;AAAA,YAC5C,SAAS,KAAK,SAAS;AAAA,YACvB,QAAQ,KAAK,SAAS;AAAA,YACtB,WAAW,KAAK,SAAS;AAAA,UAC3B,CAAC;AACD,cAAI,KAAK,GAAG,UAAU,QAAQ;AAC9B,cAAI,MAAM,gBAAgB,KAAK,EAAE,aAAa;AAAA,YAC5C,SAAS,UAAU;AAAA,YACnB,WAAW,UAAU,SAAS;AAAA,UAChC,CAAC;AAAA,QACH;AACA,YAAI,IAAI,WAAW,GAAG;AACpB,cAAI;AAAA,YACF,YAAY;AAAA,cACV,YAAY,KAAK;AAAA,cACjB,MAAM,KAAK;AAAA,cACX,OAAO;AAAA,cACP,WAAW,OAAO,MAAM,GAAG,IAAI;AAAA,cAC/B,UAAU;AAAA,cACV,YAAY;AAAA,cACZ,eAAe;AAAA,gBACb,EAAE,MAAM,YAAY,KAAK,oBAAoB,SAAS,OAAO,MAAM,GAAG,GAAI,EAAE;AAAA,cAC9E;AAAA,cACA,UAAU,EAAE,SAAS,oBAAoB;AAAA,YAC3C,CAAC;AAAA,UACH;AAAA,QACF;AAAA,MACF;AACA,aAAO;AAAA,IACT;AAAA,EACF;AACF;AAEA,SAAS,eAAe,KAAqB,MAAoC;AAK/E,QAAM,QAAQ,IAAI,MAAM,OAAO,KAAK;AACpC,QAAM,OAAO,kEAAkE,KAAK,IAAI,cAAc,KAAK,WAAW;AACtH,SAAO,QAAQ,GAAG,IAAI,WAAW,KAAK,MAAM;AAC9C;AAEA,SAAS,iBAAiB,MAA4B,KAAwC;AAC5F,SAAO,YAAY;AAAA,IACjB,YAAY,KAAK;AAAA,IACjB,MAAM,KAAK;AAAA,IACX,SAAS,IAAI;AAAA,IACb,OAAO,IAAI;AAAA,IACX,WAAW,IAAI;AAAA,IACf,UAAU,IAAI;AAAA,IACd,YAAY,IAAI;AAAA,IAChB,eAAe;AAAA,MACb;AAAA,QACE,MAAM,oBAAoB,IAAI,YAAY;AAAA,QAC1C,KAAK,IAAI;AAAA,QACT,SAAS,IAAI;AAAA,MACf;AAAA,IACF;AAAA,IACA,oBAAoB,IAAI;AAAA,IACxB,UAAU,EAAE,cAAc,KAAK,QAAQ;AAAA,EACzC,CAAC;AACH;AAEA,SAAS,oBAAoB,KAAmE;AAC9F,MAAI,IAAI,WAAW,SAAS,EAAG,QAAO;AACtC,MAAI,IAAI,WAAW,aAAa,EAAG,QAAO;AAC1C,MAAI,IAAI,WAAW,WAAW,EAAG,QAAO;AACxC,MAAI,IAAI,WAAW,UAAU,EAAG,QAAO;AACvC,MAAI,IAAI,WAAW,YAAY,EAAG,QAAO;AACzC,SAAO;AACT;AAgBO,SAAS,oBAAoB,OAAgD;AAClF,MAAI,CAAC,SAAS,MAAM,WAAW,EAAG,QAAO;AACzC,QAAM,WAAW;AACjB,QAAM,OAAO,MAAM,MAAM,GAAG,QAAQ,EAAE,IAAI,CAAC,MAAM;AAC/C,UAAM,UAAU,EAAE,UAAU,KAAK,EAAE,OAAO,MAAM;AAChD,WAAO,UAAU,EAAE,UAAU,IAAI,EAAE,QAAQ,GAAG,OAAO,IAAI,mBAAmB,EAAE,OAAO,GAAG,CAAC;AAAA,EAC3F,CAAC;AACD,QAAM,WACJ,MAAM,SAAS,WACX;AAAA,SAAY,MAAM,SAAS,QAAQ,mDACnC;AACN,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,GAAG;AAAA,IACH;AAAA,EACF,EACG,OAAO,OAAO,EACd,KAAK,IAAI;AACd;AAEA,SAAS,mBAAmB,GAAW,KAAqB;AAC1D,MAAI,EAAE,UAAU,IAAK,QAAO;AAC5B,SAAO,GAAG,EAAE,MAAM,GAAG,MAAM,CAAC,EAAE,QAAQ,CAAC;AACzC;;;AC3UA,IAAM,sBAAuE;AAAA,EAC3E,KAAK,oBAAI,IAAI;AAAA,EACb,WAAW,oBAAI,IAAI,CAAC,sBAAsB,eAAe,aAAa,CAAC;AAAA,EACvE,kBAAkB,oBAAI,IAAI;AAAA,IACxB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AAAA,EACD,oBAAoB,oBAAI,IAAI;AAAA,IAC1B;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AAAA,EACD,UAAU,oBAAI,IAAI,CAAC,sBAAsB,eAAe,aAAa,YAAY,CAAC;AACpF;AASO,SAAS,wBACd,OACA,OACc;AACd,QAAM,MAAM,uBAAuB,EAAE,MAAM,CAAC;AAC5C,MAAI,UAAU,MAAO,QAAO;AAC5B,QAAM,QAAQ,oBAAoB,KAAK;AACvC,MAAI,CAAC,MAAO,OAAM,IAAI,MAAM,6BAA6B,KAAK,EAAE;AAChE,SAAO,IAAI,OAAO,CAAC,SAAS,MAAM,IAAK,KAA0B,IAAI,CAAC;AACxE;;;AC7CA,IAAM,eAAe;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AA+Bd,IAAM,yBAA+C;AAAA,EAC1D,IAAI;AAAA,EACJ,aACE;AAAA,EACF,MAAM;AAAA,EACN,SAAS;AAAA,EACT,kBAAkB;AAAA,EAClB,YAAY,CAAC,UAAU,wBAAwB,OAAO,KAAK;AAAA,EAC3D,WAAW,EAAE,UAAU,GAAG,sBAAsB,EAAE;AAAA,EAClD,UAAU;AAAA,EACV,MAAM,EAAE,MAAM,MAAM;AACtB;;;ACtCA,IAAMC,gBAAe;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAsCd,IAAM,wBAA8C;AAAA,EACzD,IAAI;AAAA,EACJ,aACE;AAAA,EACF,MAAM;AAAA,EACN,SAAS;AAAA,EACT,kBAAkBA;AAAA,EAClB,YAAY,CAAC,UAAU,wBAAwB,OAAO,KAAK;AAAA,EAC3D,WAAW,EAAE,UAAU,GAAG,sBAAsB,EAAE;AAAA,EAClD,UAAU;AAAA,EACV,iBAAiB;AAAA,EACjB,MAAM,EAAE,MAAM,MAAM;AACtB;;;AC7CA,IAAMC,gBAAe;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AA4Cd,IAAM,0BAAgD;AAAA,EAC3D,IAAI;AAAA,EACJ,aACE;AAAA,EACF,MAAM;AAAA,EACN,SAAS;AAAA,EACT,kBAAkBA;AAAA,EAClB,YAAY,CAAC,UAAU,wBAAwB,sBAAsB,KAAK;AAAA,EAC1E,WAAW,EAAE,UAAU,GAAG,sBAAsB,EAAE;AAAA,EAClD,UAAU;AAAA,EACV,MAAM,EAAE,MAAM,MAAM;AACtB;;;AC7DA,IAAMC,gBAAe;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAoCd,IAAM,gCAAsD;AAAA,EACjE,IAAI;AAAA,EACJ,aACE;AAAA,EACF,MAAM;AAAA,EACN,SAAS;AAAA,EACT,kBAAkBA;AAAA,EAClB,YAAY,CAAC,UAAU,wBAAwB,OAAO,KAAK;AAAA,EAC3D,WAAW,EAAE,UAAU,GAAG,sBAAsB,EAAE;AAAA,EAClD,UAAU;AAAA,EACV,MAAM,EAAE,MAAM,MAAM;AACtB;;;AC3CO,IAAM,8BAA+D;AAAA,EAC1E;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;;;ACdA,SAAS,kBAAkB;AA+FpB,IAAM,kBAAN,MAAsB;AAAA,EACV,WAAW,oBAAI,IAAqB;AAAA,EACpC;AAAA,EAEjB,YAAY,UAAkC,CAAC,GAAG;AAChD,SAAK,UAAU;AAAA,EACjB;AAAA,EAEA,SAAS,SAAwB;AAC/B,QAAI,CAAC,QAAQ,GAAI,OAAM,IAAI,MAAM,kDAAkD;AACnF,QAAI,KAAK,SAAS,IAAI,QAAQ,EAAE,GAAG;AACjC,YAAM,IAAI,MAAM,mDAAmD,QAAQ,EAAE,GAAG;AAAA,IAClF;AACA,QAAI,CAAC,QAAQ,SAAS;AACpB,YAAM,IAAI,MAAM,sCAAsC,QAAQ,EAAE,0BAA0B;AAAA,IAC5F;AACA,SAAK,SAAS,IAAI,QAAQ,IAAI,OAAO;AAAA,EACvC;AAAA,EAEA,OAKG;AACD,WAAO,MAAM,KAAK,KAAK,SAAS,OAAO,CAAC,EAAE,IAAI,CAAC,OAAO;AAAA,MACpD,IAAI,EAAE;AAAA,MACN,aAAa,EAAE;AAAA,MACf,SAAS,EAAE;AAAA,MACX,MAAM,EAAE;AAAA,IACV,EAAE;AAAA,EACJ;AAAA,EAEA,MAAM,IACJ,OACA,QACA,UAA2B,CAAC,GACD;AAG3B,qBAAiB,MAAM,KAAK,UAAU,OAAO,QAAQ,OAAO,GAAG;AAC7D,UAAI,GAAG,SAAS,gBAAiB,QAAO,GAAG;AAAA,IAC7C;AACA,UAAM,IAAI,MAAM,mEAAmE;AAAA,EACrF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAaA,OAAO,UACL,OACA,QACA,UAA2B,CAAC,GACiB;AAC7C,UAAM,gBAAgB,MAAM,WAAW,EAAE,MAAM,GAAG,EAAE,CAAC;AACrD,UAAM,MAAM,KAAK,QAAQ,QAAQ,MAAM;AAAA,IAAC;AACxC,UAAM,QAAQ,KAAK,QAAQ,SAAS,CAAC;AACrC,UAAM,aAAY,oBAAI,KAAK,GAAE,YAAY;AACzC,UAAM,UAAU,KAAK,IAAI;AACzB,UAAM,aAAa,QAAQ,YAAY,UAAU,QAAQ,YAAY;AAErE,UAAM,WAAW,KAAK,eAAe,OAAO;AAC5C,UAAM,SAAS,QAAQ,UAAU,KAAK,QAAQ;AAE9C,UAAM;AAAA,MACJ,MAAM;AAAA,MACN,QAAQ;AAAA,MACR,gBAAgB;AAAA,MAChB,YAAY;AAAA,MACZ,aAAa,SAAS,IAAI,CAAC,MAAM,EAAE,EAAE;AAAA,IACvC;AAEA,UAAM,YAAiC,CAAC;AACxC,UAAM,cAAgC,CAAC;AACvC,QAAI,YAAY;AAChB,QAAI,eAAe,QAAQ;AAE3B,eAAW,WAAW,UAAU;AAC9B,YAAM,KAAK,KAAK,IAAI;AACpB,YAAM,QAAQ,KAAK,WAAW,SAAS,MAAM;AAC7C,UAAI,MAAM,SAAS,WAAW;AAC5B,cAAM,UAA6B;AAAA,UACjC,YAAY,QAAQ;AAAA,UACpB,QAAQ;AAAA,UACR,QAAQ,0BAA0B,QAAQ,SAAS;AAAA,UACnD,gBAAgB;AAAA,UAChB,YAAY;AAAA,UACZ,UAAU;AAAA,QACZ;AACA,kBAAU,KAAK,OAAO;AACtB,YAAI,kBAAkB,QAAQ,EAAE,yBAAoB,EAAE,OAAO,MAAM,QAAQ,UAAU,CAAC;AACtF,cAAM,MAAM,iBAAiB,EAAE,SAAS,SAAS,UAAU,CAAC,GAAG,MAAM,CAAC;AACtE,cAAM,EAAE,MAAM,mBAAmB,QAAQ;AACzC;AAAA,MACF;AAEA,YAAM,YAAY,eAAe,QAAQ;AAAA,QACvC;AAAA,QACA;AAAA,QACA,cAAc,SAAS;AAAA,MACzB,CAAC;AAED,YAAM,MAAsB;AAAA,QAC1B;AAAA,QACA;AAAA,QACA;AAAA,QACA,WAAW;AAAA,QACX,MAAM,KAAK,QAAQ;AAAA,QACnB,MAAM,QAAQ;AAAA,QACd,KAAK,CAAC,KAAK,WAAW,IAAI,IAAI,QAAQ,EAAE,KAAK,GAAG,IAAI,EAAE,OAAO,eAAe,GAAG,OAAO,CAAC;AAAA,QACvF,QAAQ,QAAQ;AAAA,QAChB,eAAe,oBAAoB,QAAQ,eAAe,QAAQ,EAAE;AAAA,MACtE;AAEA,YAAM,MAAM,kBAAkB,EAAE,SAAS,KAAK,MAAM,CAAC;AACrD,YAAM;AAAA,QACJ,MAAM;AAAA,QACN,YAAY,QAAQ;AAAA,QACpB,YAAY,IAAI,KAAK,EAAE,EAAE,YAAY;AAAA,MACvC;AAEA,UAAI;AACF,cAAM,WAAW,MAAO,QAA6B,QAAQ,MAAM,OAAO,GAAG;AAC7E,cAAM,UAAU,KAAK,IAAI,IAAI;AAC7B,cAAM,OAAO,eAAe,QAAQ;AACpC,qBAAa;AACb,YAAI,OAAO,iBAAiB,SAAU,gBAAe,KAAK,IAAI,GAAG,eAAe,IAAI;AACpF,oBAAY,KAAK,GAAG,QAAQ;AAC5B,cAAM,UAA6B;AAAA,UACjC,YAAY,QAAQ;AAAA,UACpB,QAAQ;AAAA,UACR,gBAAgB,SAAS;AAAA,UACzB,YAAY;AAAA,UACZ,UAAU;AAAA,QACZ;AACA,kBAAU,KAAK,OAAO;AACtB,YAAI,gBAAgB,QAAQ,EAAE,IAAI;AAAA,UAChC;AAAA,UACA,UAAU,SAAS;AAAA,UACnB,YAAY;AAAA,UACZ,UAAU;AAAA,QACZ,CAAC;AACD,cAAM,MAAM,iBAAiB,EAAE,SAAS,SAAS,UAAU,MAAM,CAAC;AAClE,cAAM,EAAE,MAAM,qBAAqB,SAAS,SAAS;AAAA,MACvD,SAAS,KAAK;AACZ,cAAM,UAAU,KAAK,IAAI,IAAI;AAC7B,cAAM,IAAI,eAAe,QAAQ,MAAM,IAAI,MAAM,OAAO,GAAG,CAAC;AAE5D,cAAM,eAAgB,MAAM,MAAM,UAAU,EAAE,SAAS,OAAO,GAAG,MAAM,CAAC,KAAM,CAAC;AAC/E,YAAI,aAAa,OAAQ,aAAY,KAAK,GAAG,YAAY;AACzD,cAAM,UAA6B;AAAA,UACjC,YAAY,QAAQ;AAAA,UACpB,QAAQ;AAAA,UACR,gBAAgB,aAAa;AAAA,UAC7B,YAAY;AAAA,UACZ,UAAU;AAAA,UACV,OAAO,EAAE,OAAO,EAAE,YAAY,MAAM,SAAS,EAAE,QAAQ;AAAA,QACzD;AACA,kBAAU,KAAK,OAAO;AACtB,YAAI,kBAAkB,QAAQ,EAAE,IAAI;AAAA,UAClC;AAAA,UACA,aAAa,EAAE,YAAY;AAAA,UAC3B,OAAO,EAAE;AAAA,QACX,CAAC;AACD,cAAM,MAAM,iBAAiB,EAAE,SAAS,SAAS,UAAU,cAAc,MAAM,CAAC;AAChF,cAAM,EAAE,MAAM,qBAAqB,SAAS,UAAU,aAAa;AAAA,MAErE;AAAA,IACF;AAEA,UAAM,SAA2B;AAAA,MAC/B,QAAQ;AAAA,MACR,gBAAgB;AAAA,MAChB,YAAY;AAAA,MACZ,WAAU,oBAAI,KAAK,GAAE,YAAY;AAAA,MACjC,UAAU;AAAA,MACV,aAAa;AAAA,MACb,gBAAgB;AAAA,IAClB;AACA,UAAM,MAAM,aAAa,EAAE,OAAO,CAAC;AACnC,UAAM,EAAE,MAAM,iBAAiB,OAAO;AAAA,EACxC;AAAA,EAEQ,eAAe,MAAkC;AACvD,QAAI,aAAa,MAAM,KAAK,KAAK,SAAS,OAAO,CAAC;AAClD,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,OAAO,IAAI,IAAI,KAAK,IAAI;AAC9B,mBAAa,WAAW,OAAO,CAAC,MAAM,KAAK,IAAI,EAAE,EAAE,CAAC;AAAA,IACtD;AACA,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,OAAO,IAAI,IAAI,KAAK,IAAI;AAC9B,mBAAa,WAAW,OAAO,CAAC,MAAM,CAAC,KAAK,IAAI,EAAE,EAAE,CAAC;AAAA,IACvD;AACA,WAAO;AAAA,EACT;AAAA,EAEQ,WACN,SACA,QAC2D;AAC3D,YAAQ,QAAQ,WAAW;AAAA,MACzB,KAAK;AACH,eAAO,OAAO,aACV,EAAE,MAAM,WAAW,OAAO,OAAO,WAAW,IAC5C,EAAE,MAAM,UAAU;AAAA,MACxB,KAAK;AACH,eAAO,OAAO,cACV,EAAE,MAAM,WAAW,OAAO,OAAO,YAAY,IAC7C,EAAE,MAAM,UAAU;AAAA,MACxB,KAAK;AACH,eAAO,OAAO,YAAY,EAAE,MAAM,WAAW,OAAO,OAAO,UAAU,IAAI,EAAE,MAAM,UAAU;AAAA,MAC7F,KAAK;AACH,eAAO,OAAO,aACV,EAAE,MAAM,WAAW,OAAO,OAAO,WAAW,IAC5C,EAAE,MAAM,UAAU;AAAA,MACxB,KAAK,UAAU;AACb,cAAM,IAAI,OAAO,SAAS,QAAQ,EAAE;AACpC,eAAO,MAAM,SAAY,EAAE,MAAM,WAAW,OAAO,EAAE,IAAI,EAAE,MAAM,UAAU;AAAA,MAC7E;AAAA,IACF;AAAA,EACF;AACF;AAOA,SAAS,eACP,QACA,MACoB;AACpB,MAAI,CAAC,OAAQ,QAAO;AACpB,MAAI,OAAO,UAAU;AACnB,WAAO,OAAO,SAAS;AAAA,MACrB,SAAS,KAAK;AAAA,MACd,UAAU,OAAO;AAAA,MACjB,cAAc,KAAK;AAAA,MACnB,cAAc,KAAK;AAAA,IACrB,CAAC;AAAA,EACH;AACA,MAAI,OAAO,YAAY,KAAM,QAAO;AACpC,MAAI,OAAO,SAAS;AAOlB,UAAM,IAAI,OAAO,QAAQ,KAAK,QAAQ,EAAE,KAAK;AAC7C,UAAM,cAAc,KAAK,IAAI,GAAG,KAAK,YAAY;AACjD,WAAQ,OAAO,WAAW,IAAK;AAAA,EACjC;AACA,SAAO,OAAO,WAAW,KAAK,IAAI,GAAG,KAAK,YAAY;AACxD;AAOA,SAAS,eAAe,UAAoC;AAC1D,MAAI,MAAM;AACV,aAAW,KAAK,UAAU;AACxB,UAAM,IAAI,EAAE,UAAU;AACtB,QAAI,OAAO,MAAM,YAAY,OAAO,SAAS,CAAC,EAAG,QAAO;AAAA,EAC1D;AACA,SAAO;AACT;AAcA,SAAS,oBACP,QACA,WAC2C;AAC3C,MAAI,CAAC,OAAQ,QAAO;AACpB,MAAI,MAAM,QAAQ,MAAM,GAAG;AACzB,UAAMC,OAAM,OAAO,OAAO,CAAC,MAAM,EAAE,eAAe,SAAS;AAC3D,WAAOA,KAAI,SAAS,IAAIA,OAAM;AAAA,EAChC;AACA,QAAM,SAAS;AACf,QAAM,MAAM,OAAO,SAAS,KAAK,CAAC;AAClC,QAAM,WAAW,OAAO,GAAG,KAAK,CAAC;AACjC,QAAM,SAAS,CAAC,GAAG,KAAK,GAAG,QAAQ;AACnC,SAAO,OAAO,SAAS,IAAI,SAAS;AACtC;","names":["z","z","ACTOR_PROMPT","ACTOR_PROMPT","ACTOR_PROMPT","own"]}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import {
|
|
2
2
|
runCanaries,
|
|
3
3
|
scoreRedTeamOutput
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-RPLZ4OIB.js";
|
|
5
5
|
import {
|
|
6
6
|
runCampaign
|
|
7
7
|
} from "./chunk-ZPSKPT3V.js";
|
|
@@ -432,4 +432,4 @@ export {
|
|
|
432
432
|
paretoSignificanceGate,
|
|
433
433
|
runEval
|
|
434
434
|
};
|
|
435
|
-
//# sourceMappingURL=chunk-
|
|
435
|
+
//# sourceMappingURL=chunk-GXHLRXDI.js.map
|
|
@@ -397,6 +397,20 @@ function validateRunRecord(input) {
|
|
|
397
397
|
for (const [k, v] of Object.entries(raw)) {
|
|
398
398
|
expectFiniteNumber(v, `outcome.raw.${k}`);
|
|
399
399
|
}
|
|
400
|
+
if (outRec.realness !== void 0) {
|
|
401
|
+
const r = outRec.realness;
|
|
402
|
+
if (r === null || typeof r !== "object") {
|
|
403
|
+
throw new RunRecordValidationError("outcome.realness must be an object", "outcome.realness");
|
|
404
|
+
}
|
|
405
|
+
const rr = r;
|
|
406
|
+
expectFiniteNumber(rr.score, "outcome.realness.score");
|
|
407
|
+
if (typeof rr.gated !== "boolean") {
|
|
408
|
+
throw new RunRecordValidationError(
|
|
409
|
+
"outcome.realness.gated must be a boolean",
|
|
410
|
+
"outcome.realness.gated"
|
|
411
|
+
);
|
|
412
|
+
}
|
|
413
|
+
}
|
|
400
414
|
if (outRec.judgeScores !== void 0) {
|
|
401
415
|
validateJudgeScores(outRec.judgeScores, "outcome.judgeScores");
|
|
402
416
|
}
|
|
@@ -538,4 +552,4 @@ export {
|
|
|
538
552
|
parseRunRecordSafe,
|
|
539
553
|
roundTripRunRecord
|
|
540
554
|
};
|
|
541
|
-
//# sourceMappingURL=chunk-
|
|
555
|
+
//# sourceMappingURL=chunk-KWRRMR3J.js.map
|