@tangle-network/agent-runtime 0.45.0 → 0.47.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/README.md +3 -3
  2. package/dist/agent.d.ts +5 -5
  3. package/dist/agent.js +2 -2
  4. package/dist/agent.js.map +1 -1
  5. package/dist/analyst-loop.d.ts +5 -40
  6. package/dist/analyst-loop.js +2 -4
  7. package/dist/{chunk-IJ6FGOPO.js → chunk-5YDS7BLC.js} +12 -7
  8. package/dist/chunk-5YDS7BLC.js.map +1 -0
  9. package/dist/{chunk-KEWO4KI6.js → chunk-72JQCHOZ.js} +850 -131
  10. package/dist/chunk-72JQCHOZ.js.map +1 -0
  11. package/dist/{chunk-PRX45WE2.js → chunk-GSUO5QS6.js} +1 -119
  12. package/dist/chunk-GSUO5QS6.js.map +1 -0
  13. package/dist/{chunk-FK53TXOP.js → chunk-HNUXAZIJ.js} +4 -27
  14. package/dist/chunk-HNUXAZIJ.js.map +1 -0
  15. package/dist/{chunk-IJGS6J7X.js → chunk-JNPK46YH.js} +2 -2
  16. package/dist/{chunk-QR4UUC5P.js → chunk-KADIJAD4.js} +33 -19
  17. package/dist/chunk-KADIJAD4.js.map +1 -0
  18. package/dist/{chunk-NYN5RTLP.js → chunk-MGFEUYOH.js} +7 -7
  19. package/dist/chunk-MGFEUYOH.js.map +1 -0
  20. package/dist/{chunk-Z2QXVBA6.js → chunk-T4OQQEE3.js} +4 -4
  21. package/dist/chunk-T4OQQEE3.js.map +1 -0
  22. package/dist/{chunk-KSMX62JF.js → chunk-VR4JIC5H.js} +2 -2
  23. package/dist/{coder-CczgMqFx.d.ts → coder-CVZNGbyg.d.ts} +1 -1
  24. package/dist/{dynamic-BvllHV6M.d.ts → driver-DYU2sgHr.d.ts} +6 -6
  25. package/dist/{improvement-adapter-CWegd3vw.d.ts → improvement-adapter-BC4HhuAR.d.ts} +1 -1
  26. package/dist/improvement.d.ts +2 -2
  27. package/dist/index.d.ts +8 -8
  28. package/dist/index.js +8 -8
  29. package/dist/{kb-gate-D9GBocLN.d.ts → kb-gate-51BlLlVM.d.ts} +13 -7
  30. package/dist/{loop-runner-bin-CPrCoKqC.d.ts → loop-runner-bin-DEm4roYF.d.ts} +11 -11
  31. package/dist/loop-runner-bin.d.ts +6 -6
  32. package/dist/loop-runner-bin.js +6 -6
  33. package/dist/loops.d.ts +5 -5
  34. package/dist/loops.js +18 -10
  35. package/dist/mcp/bin.js +6 -6
  36. package/dist/mcp/bin.js.map +1 -1
  37. package/dist/mcp/index.d.ts +75 -74
  38. package/dist/mcp/index.js +203 -31
  39. package/dist/mcp/index.js.map +1 -1
  40. package/dist/{otel-export-Dy2DyUCU.d.ts → otel-export-EzfsVUhh.d.ts} +1 -1
  41. package/dist/profiles.d.ts +8 -8
  42. package/dist/profiles.js +1 -1
  43. package/dist/profiles.js.map +1 -1
  44. package/dist/{run-loop--hSoIknW.d.ts → run-loop-DvD4aGiE.d.ts} +2 -2
  45. package/dist/runtime.d.ts +244 -57
  46. package/dist/runtime.js +18 -10
  47. package/dist/{types-1HbsFa7H.d.ts → types-Cbx3dNK5.d.ts} +23 -23
  48. package/dist/{types-DdzkffAm.d.ts → types-nBMuollC.d.ts} +34 -5
  49. package/dist/{types-BtRLF2U3.d.ts → types-p8dWBIXL.d.ts} +1 -1
  50. package/dist/workflow.d.ts +3 -3
  51. package/dist/workflow.js +2 -2
  52. package/dist/workflow.js.map +1 -1
  53. package/package.json +1 -1
  54. package/skills/agent-runtime-adoption/SKILL.md +3 -3
  55. package/skills/generate-eval/SKILL.md +60 -0
  56. package/skills/loop-writer/SKILL.md +163 -0
  57. package/dist/chunk-FK53TXOP.js.map +0 -1
  58. package/dist/chunk-IJ6FGOPO.js.map +0 -1
  59. package/dist/chunk-KEWO4KI6.js.map +0 -1
  60. package/dist/chunk-NYN5RTLP.js.map +0 -1
  61. package/dist/chunk-PRX45WE2.js.map +0 -1
  62. package/dist/chunk-QR4UUC5P.js.map +0 -1
  63. package/dist/chunk-Z2QXVBA6.js.map +0 -1
  64. /package/dist/{chunk-IJGS6J7X.js.map → chunk-JNPK46YH.js.map} +0 -0
  65. /package/dist/{chunk-KSMX62JF.js.map → chunk-VR4JIC5H.js.map} +0 -0
package/README.md CHANGED
@@ -54,7 +54,7 @@ That is the common case. Everything below is for when one chat turn is not enoug
54
54
  | Declare an agent (profile, surfaces, adapters) | `defineAgent` | `/agent` |
55
55
  | Run a one-shot task with verification and eval | `runAgentTask` | root |
56
56
  | Run a multi-attempt loop (refine or fanout-vote) | `runLoop` plus a driver | `/loops` |
57
- | Let the agent choose the loop shape per round | `createDynamicDriver` plus `createSandboxPlanner` | `/loops` |
57
+ | Let the agent choose the loop shape per round | `createDriver` plus `createSandboxPlanner` | `/loops` |
58
58
  | Delegate a disciplined loop by mode (code, research, ...) | `runDelegatedLoop` or `agent-runtime-loop` | root |
59
59
  | Build code reliably (reviewed, gated) | `createDefaultCoderDelegate` | `/mcp` |
60
60
  | Grow a knowledge base with only grounded facts | `createKbGate` | `/mcp` |
@@ -82,7 +82,7 @@ const result = await runLoop({
82
82
  result.winner // highest-scoring valid attempt
83
83
  ```
84
84
 
85
- Shipped drivers (`/loops/drivers`): `createRefineDriver` (single task, iterate until valid), `createFanoutVoteDriver` (N parallel, vote), and `createDynamicDriver` (the agent authors the topology at runtime). The dynamic driver emits one `TopologyMove` per round (`refine`, `fanout`, or `stop`) from an injected planner; a malformed move throws `PlannerError`, so the loop never runs a topology nobody chose. Topology is orthogonal to harness: the planner never names a backend, and the kernel's `agentRuns` decide which harness runs each branch.
85
+ Shipped drivers (`/loops/drivers`): `createRefineDriver` (single task, iterate until valid), `createFanoutVoteDriver` (N parallel, vote), and `createDriver` (the agent authors the topology at runtime). The dynamic driver emits one `TopologyMove` per round (`refine`, `fanout`, or `stop`) from an injected planner; a malformed move throws `PlannerError`, so the loop never runs a topology nobody chose. Topology is orthogonal to harness: the planner never names a backend, and the kernel's `agentRuns` decide which harness runs each branch.
86
86
 
87
87
  `runProgram` (also in `/loops`) is the recursive op-set (`sample`, `steer`, `fork`, `parallel`, `select`, `seq`, `stop`) plus a tree executor, for programs that compose sub-loops.
88
88
 
@@ -171,7 +171,7 @@ One entrypoint, `runExperiment(adapter, { sandboxClient, agentRun, arms, ... })`
171
171
  | Router base URL | `https://router.tangle.tools/v1` | `TANGLE_ROUTER_BASE_URL` env |
172
172
  | Sandbox base URL | `https://sandbox.tangle.tools` | `SANDBOX_API_URL` env |
173
173
  | Loop iteration cap | 10 (`runLoop`), 8 (dynamic driver) | `runLoop({ maxIterations })` |
174
- | Driver | none, required by `runLoop` | `createRefineDriver`, `createFanoutVoteDriver`, `createDynamicDriver` |
174
+ | Driver | none, required by `runLoop` | `createRefineDriver`, `createFanoutVoteDriver`, `createDriver` |
175
175
  | Winner selection (coder delegate) | `highest-score` | `winnerSelection` option |
176
176
  | KB gate min passage | 12 chars | `createKbGate({ minPassageChars })` |
177
177
  | `selfImprove` gate | held-out gate (default) | pass `gate: defaultProductionGate` for red-team hardening |
package/dist/agent.d.ts CHANGED
@@ -1,9 +1,9 @@
1
1
  import * as _tangle_network_agent_eval from '@tangle-network/agent-eval';
2
2
  import { TraceAnalystKindSpec, AnalystFinding } from '@tangle-network/agent-eval';
3
- import { R as RuntimeStreamEvent, b as LoopSandboxClient, O as OutputAdapter, A as AgentRunSpec } from './types-DdzkffAm.js';
4
- import { A as AgentSurfaces } from './improvement-adapter-CWegd3vw.js';
5
- export { C as CreateSurfaceImprovementAdapterOpts, D as DraftPatchInput, a as DraftPatchOutput, R as ResolvedSurface, S as SurfaceImprovementEdit, b as SurfaceValidationIssue, c as createSurfaceImprovementAdapter, r as renderSurfaceIssues, d as resolveSubjectPath, v as validateSurfaces } from './improvement-adapter-CWegd3vw.js';
6
- import { K as KnowledgeAdapter, a as RunAnalystLoopResult } from './types-BtRLF2U3.js';
3
+ import { R as RuntimeStreamEvent, S as SandboxClient, O as OutputAdapter, A as AgentRunSpec } from './types-nBMuollC.js';
4
+ import { A as AgentSurfaces } from './improvement-adapter-BC4HhuAR.js';
5
+ export { C as CreateSurfaceImprovementAdapterOpts, D as DraftPatchInput, a as DraftPatchOutput, R as ResolvedSurface, S as SurfaceImprovementEdit, b as SurfaceValidationIssue, c as createSurfaceImprovementAdapter, r as renderSurfaceIssues, d as resolveSubjectPath, v as validateSurfaces } from './improvement-adapter-BC4HhuAR.js';
6
+ import { K as KnowledgeAdapter, a as RunAnalystLoopResult } from './types-p8dWBIXL.js';
7
7
  import { AgentProfile, SandboxEvent } from '@tangle-network/sandbox';
8
8
  import { C as ComposeProductionAgentProfileOptions } from './delegation-profile-1GbW5yA3.js';
9
9
  import './runtime-hooks-C7JwKb9E.js';
@@ -404,7 +404,7 @@ interface CreateSandboxActOptions<TPersona, TRunOutput> {
404
404
  /** Canonical agent profile — the same one the prod chat turn composes from. */
405
405
  baseProfile: AgentProfile;
406
406
  /** Sandbox client used to boot the per-run sandbox. */
407
- sandboxClient: LoopSandboxClient;
407
+ sandboxClient: SandboxClient;
408
408
  /** Persona → prompt. Pure; the eval cell's input. */
409
409
  buildPrompt: (persona: TPersona) => string;
410
410
  /** Sandbox event stream → typed output the rubric scores. */
package/dist/agent.js CHANGED
@@ -3,10 +3,10 @@ import {
3
3
  } from "./chunk-7JITYN6T.js";
4
4
  import {
5
5
  createSandboxForSpec
6
- } from "./chunk-KEWO4KI6.js";
6
+ } from "./chunk-72JQCHOZ.js";
7
7
  import {
8
8
  mapSandboxEvent
9
- } from "./chunk-PRX45WE2.js";
9
+ } from "./chunk-GSUO5QS6.js";
10
10
  import {
11
11
  __require
12
12
  } from "./chunk-DGUM43GV.js";
package/dist/agent.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/agent/surfaces.ts","../src/agent/define-agent.ts","../src/agent/improvement-adapter.ts","../src/agent/knowledge-adapter.ts","../src/agent/outcome.ts","../src/agent/sandbox-act.ts"],"sourcesContent":["/**\n * `AgentSurfaces` — declarative map of the mutable file/directory paths\n * the self-improvement loop can edit on behalf of an agent.\n *\n * The substrate uses this map to resolve every parsed `FindingSubject`\n * (from agent-eval) to a real on-disk path. No per-vertical glue;\n * no fabricated paths; no silent `existsSync(...)` skips that hide\n * misconfiguration from the operator.\n *\n * Surfaces are validated at `defineAgent` time — missing paths fail\n * loud with a list of every offender. A surface that's not needed\n * (e.g. an agent with no RAG corpora) is simply omitted; the loop\n * refuses to route those subjects rather than fabricating a target.\n */\n\nimport { existsSync } from 'node:fs'\nimport { isAbsolute, join } from 'node:path'\nimport type { FindingSubject } from '@tangle-network/agent-eval'\n\n/**\n * Surface declarations. Every path is repo-relative (or absolute) at\n * `defineAgent` time. At resolution time, paths are joined against the\n * agent's `repoRoot`.\n *\n * `systemPrompt`, `tools`, `personas` are DIRECTORIES; the loop appends\n * `<section>.md`, `<tool>/README.md`, `<persona-id>.yaml` etc.\n * `rubric`, `outputSchema` are SINGLE FILES; the loop edits them in\n * place.\n *\n * `knowledge` is the agent-knowledge root (typically `.agent-knowledge`);\n * `applyKnowledgeWriteBlocks` writes pages relative to it.\n *\n * Optional surfaces (`scaffolding`, `memory`, `rag`, `outputSchema`)\n * can be omitted — the loop will reject findings targeting them with a\n * clear log message instead of fabricating a path.\n */\nexport interface AgentSurfaces {\n /** Directory containing one markdown file per system-prompt section. */\n systemPrompt: string\n /** Directory containing one subdir per tool (`<tool>/README.md`). */\n tools: string\n /** Single file (TypeScript module) defining the rubric weights + dimensions. */\n rubric: string\n /** Knowledge-base root; typically `.agent-knowledge`. */\n knowledge: string\n /** Directory containing one YAML/JSON file per persona. */\n personas: string\n /** Optional: directory containing scaffolding rules (precondition checks, retry policies). */\n scaffolding?: string\n /** Optional: memory store path (JSONL / SQLite / DB). */\n memory?: string\n /** Optional: directory containing RAG corpora (`<corpus>/<doc-id>.md`). */\n rag?: string\n /** Optional: single file defining the output schema (Zod / JSON Schema). */\n outputSchema?: string\n}\n\nexport interface ResolvedSurface {\n /** Absolute filesystem path the operator can `cat` / `vim`. */\n absolutePath: string\n /** Repo-relative path for PR descriptions, diffs, audit logs. */\n repoRelativePath: string\n /** Whether the path currently exists on disk. */\n exists: boolean\n /** The substrate's intent: edit an existing file or create a new one. */\n intent: 'edit-existing' | 'create-new'\n}\n\n/**\n * Resolve a parsed `FindingSubject` to the file path the substrate\n * should edit (or create) on disk.\n *\n * Returns `null` when:\n * - the subject targets a surface the agent didn't declare\n * (e.g. `rag:*` when `surfaces.rag` is undefined), OR\n * - the subject is a `cluster` (failure-mode emits these as evidence,\n * not actionable mutations — they don't route to a file).\n *\n * Returns a `ResolvedSurface` with `intent: 'create-new'` when the\n * subject names a path that doesn't yet exist (e.g. a new wiki page).\n * The caller chooses whether to honour the create — for tightly-managed\n * surfaces like `systemPrompt` it's usually a contract violation\n * (the analyst named a section that doesn't exist); for `knowledge`\n * it's the whole point.\n */\nexport function resolveSubjectPath(\n subject: FindingSubject,\n surfaces: AgentSurfaces,\n repoRoot: string,\n): ResolvedSurface | null {\n const candidates = candidatePathsForSubject(subject, surfaces)\n if (candidates.length === 0) return null\n\n // Probe candidates in order, preferring the first one that exists on disk.\n // Lets the substrate accept both the flat `<section>.md` convention and\n // the skill-dir `<section>/SKILL.md` convention without forcing one layout.\n // When none exists, fall back to the first candidate (canonical create-new).\n for (const rel of candidates) {\n const abs = isAbsolute(rel) ? rel : join(repoRoot, rel)\n if (existsSync(abs)) {\n return { absolutePath: abs, repoRelativePath: rel, exists: true, intent: 'edit-existing' }\n }\n }\n const fallback = candidates[0]!\n const fallbackAbs = isAbsolute(fallback) ? fallback : join(repoRoot, fallback)\n return {\n absolutePath: fallbackAbs,\n repoRelativePath: fallback,\n exists: false,\n intent: 'create-new',\n }\n}\n\nfunction candidatePathsForSubject(\n subject: FindingSubject,\n surfaces: AgentSurfaces,\n): ReadonlyArray<string> {\n switch (subject.kind) {\n case 'knowledge.wiki':\n case 'knowledge.stale':\n return [join(surfaces.knowledge, `${subject.slug}.md`)]\n case 'knowledge.claim':\n // Claims land in a per-topic claims directory under the knowledge root.\n return [join(surfaces.knowledge, 'claims', `${slugify(subject.topic)}.md`)]\n case 'knowledge.raw':\n return [join(surfaces.knowledge, 'raw', `${subject.sourceId}.md`)]\n case 'system-prompt': {\n const slug = slugify(subject.section)\n // Prefer flat layout for create-new (canonical); probe skill-dir layout\n // in case the existing repo (tax/legal/gtm/creative) uses\n // `<section>/SKILL.md` already.\n return [\n join(surfaces.systemPrompt, `${slug}.md`),\n join(surfaces.systemPrompt, slug, 'SKILL.md'),\n join(surfaces.systemPrompt, slug, 'index.md'),\n ]\n }\n case 'tool-doc':\n if (subject.aspect) {\n return [join(surfaces.tools, subject.tool, `${slugify(subject.aspect)}.md`)]\n }\n // tool-doc default: `<tool>/README.md`; also probe `<tool>.md` for flat\n // tool-list repos.\n return [\n join(surfaces.tools, subject.tool, 'README.md'),\n join(surfaces.tools, `${subject.tool}.md`),\n ]\n case 'new-tool':\n return [join(surfaces.tools, subject.name, 'README.md')]\n case 'rag':\n if (!surfaces.rag) return []\n return [join(surfaces.rag, subject.corpus, `${subject.docId}.md`)]\n case 'memory':\n if (!surfaces.memory) return []\n return [join(surfaces.memory, `${slugify(subject.key)}.json`)]\n case 'scaffolding':\n if (!surfaces.scaffolding) return []\n return [join(surfaces.scaffolding, `${slugify(subject.concern)}.md`)]\n case 'output-schema':\n if (!surfaces.outputSchema) return []\n return [surfaces.outputSchema]\n case 'websearch.outdated':\n case 'prior-run-summary':\n // Stale signals don't map to a single file — handled by the knowledge\n // adapter as `agent-knowledge:stale:*` after operator review.\n return []\n case 'cluster':\n // failure-mode cluster labels are evidence, not mutations.\n return []\n }\n}\n\nfunction slugify(s: string): string {\n return (\n s\n .toLowerCase()\n .replace(/[^a-z0-9-]+/g, '-')\n .replace(/^-+|-+$/g, '')\n .slice(0, 200) || 'untitled'\n )\n}\n\n/**\n * Validate that every declared surface exists on disk under `repoRoot`.\n *\n * Returns an array of `SurfaceValidationIssue` — empty when all required\n * surfaces resolve. `defineAgent` throws with the issues rendered, so\n * a misconfigured manifest fails at startup (not at the first finding\n * the loop produces 20 minutes later).\n */\nexport interface SurfaceValidationIssue {\n surface: keyof AgentSurfaces\n path: string\n reason: 'missing' | 'not-directory' | 'not-file'\n}\n\nexport function validateSurfaces(\n surfaces: AgentSurfaces,\n repoRoot: string,\n): ReadonlyArray<SurfaceValidationIssue> {\n const issues: SurfaceValidationIssue[] = []\n const dirSurfaces: ReadonlyArray<keyof AgentSurfaces> = [\n 'systemPrompt',\n 'tools',\n 'personas',\n 'knowledge',\n ]\n const fileSurfaces: ReadonlyArray<keyof AgentSurfaces> = ['rubric']\n const optionalDirSurfaces: ReadonlyArray<keyof AgentSurfaces> = ['scaffolding', 'memory', 'rag']\n const optionalFileSurfaces: ReadonlyArray<keyof AgentSurfaces> = ['outputSchema']\n\n for (const key of dirSurfaces) {\n const p = surfaces[key] as string | undefined\n if (!p) {\n issues.push({ surface: key, path: '', reason: 'missing' })\n continue\n }\n const abs = isAbsolute(p) ? p : join(repoRoot, p)\n if (!existsSync(abs)) {\n issues.push({ surface: key, path: p, reason: 'missing' })\n }\n }\n for (const key of fileSurfaces) {\n const p = surfaces[key] as string | undefined\n if (!p) {\n issues.push({ surface: key, path: '', reason: 'missing' })\n continue\n }\n const abs = isAbsolute(p) ? p : join(repoRoot, p)\n if (!existsSync(abs)) {\n issues.push({ surface: key, path: p, reason: 'missing' })\n }\n }\n for (const key of [...optionalDirSurfaces, ...optionalFileSurfaces]) {\n const p = surfaces[key] as string | undefined\n if (p === undefined) continue\n const abs = isAbsolute(p) ? p : join(repoRoot, p)\n if (!existsSync(abs)) {\n issues.push({ surface: key, path: p, reason: 'missing' })\n }\n }\n return issues\n}\n\nexport function renderSurfaceIssues(\n issues: ReadonlyArray<SurfaceValidationIssue>,\n repoRoot: string,\n): string {\n if (issues.length === 0) return ''\n const lines = issues.map(\n (i) => ` - ${i.surface}: ${i.path ? `\"${i.path}\"` : '<not set>'} (${i.reason})`,\n )\n return [\n `Agent surface validation failed against repoRoot=${repoRoot}:`,\n ...lines,\n '',\n 'Fix the manifest: every required surface must point at an existing',\n 'directory (systemPrompt / tools / personas / knowledge) or file',\n '(rubric). Optional surfaces (scaffolding / memory / rag / outputSchema)',\n 'may be omitted; the loop will reject findings targeting omitted',\n 'surfaces rather than fabricating a path.',\n ].join('\\n')\n}\n","/**\n * `defineAgent` — typed, validated manifest API for declarative agent\n * configuration. The substrate consumes this manifest to wire the\n * canonical eval pattern + analyst self-improvement loop without any\n * per-vertical glue.\n *\n * Design goal: scale to 1000s of vertical agents. Every agent declares\n * its surfaces, rubric, runtime, and analyst configuration in ~50 lines.\n * No per-vertical `ImprovementAdapter`. No per-vertical CLI. No\n * fabricated paths.\n *\n * Validation: `defineAgent` runs `validateSurfaces` synchronously and\n * throws a structured error if any required surface is missing on\n * disk. The cost is one filesystem stat per surface (cheap); the\n * benefit is a manifest that can't ship broken.\n */\n\nimport type { TraceAnalystKindSpec } from '@tangle-network/agent-eval'\nimport type { RuntimeStreamEvent } from '../types'\nimport { type AgentSurfaces, renderSurfaceIssues, validateSurfaces } from './surfaces'\n\n// ── manifest ─────────────────────────────────────────────────────────\n\n/**\n * The full agent manifest. Each agent ships ONE of these.\n *\n * Generics:\n * `TPersona` — the agent's persona shape (loaded from\n * `surfaces.personas`). Defaults to `unknown` so the substrate's\n * persona discovery (`loadPersonas`) can accept anything; per-agent\n * code re-narrows when it matters.\n * `TRunOutput` — the shape `runtime.act` returns. Used by the rubric\n * scorers and emitted into the trace.\n */\nexport interface AgentManifest<TPersona = unknown, TRunOutput = unknown> {\n /**\n * Stable identifier — used as `projectId` in traces, as the analyst\n * loop's `runId` prefix, and as the namespace under which findings\n * are persisted. MUST match the agent's repo name to keep\n * cross-repo telemetry joinable.\n */\n id: string\n\n /**\n * Filesystem root the substrate resolves surface paths against.\n * Typically `process.cwd()` or a fixed absolute path. Use an\n * absolute path when the agent's tests may run from subdirectories\n * (vitest sometimes shifts cwd).\n */\n repoRoot: string\n\n /**\n * Map of mutable surfaces the self-improvement loop can edit. See\n * `AgentSurfaces` — required: `systemPrompt`, `tools`, `rubric`,\n * `knowledge`, `personas`. Optional: `scaffolding`, `memory`, `rag`,\n * `outputSchema`.\n *\n * Every required path is validated at `defineAgent` time. Missing\n * paths throw with the full list of offenders.\n */\n surfaces: AgentSurfaces\n\n /**\n * Rubric the substrate uses to score each run. Dimensions × weights\n * × judges. The substrate computes the weighted composite and\n * stamps it into the RunRecord.\n */\n rubric: AgentRubric<TRunOutput>\n\n /**\n * Runtime adapter — how the substrate INVOKES the agent against a\n * persona. The `act` function takes a persona + a context (with the\n * tracer the substrate threads through for span emission) and\n * returns the run output the rubric will score.\n *\n * The agent's existing production runtime goes in here; the\n * substrate is intentionally thin around it.\n */\n runtime: AgentRuntime<TPersona, TRunOutput>\n\n /**\n * Persona discovery — the substrate loads personas via this function\n * at eval start. Can read from `surfaces.personas`, an API, or be\n * hardcoded. The substrate calls it once per `runAgentEval` call;\n * persona ordering is preserved.\n */\n personas: () => Promise<ReadonlyArray<TPersona>>\n\n /**\n * Analyst kinds the substrate runs against each persona's trace.\n * Defaults to `DEFAULT_TRACE_ANALYST_KINDS` from agent-eval. Per-agent\n * authors can prune (e.g. skip `knowledge-poisoning` when there's no\n * knowledge base) or extend (custom domain kinds).\n *\n * Empty array disables the loop — useful for `pnpm eval --no-analyst`.\n */\n analystKinds: ReadonlyArray<TraceAnalystKindSpec>\n\n /**\n * Analyst LLM configuration. The substrate uses these for all four\n * kinds (override per-kind via `analystKinds` if needed).\n */\n analyst: AnalystConfig\n\n /**\n * Auto-apply policy. Knowledge / improvement edits land only when\n * `enabled === true` AND the source finding's confidence meets the\n * threshold. `mode` controls how applies happen: `'write'` mutates\n * files in-place; `'open-pr'` writes to a branch and opens a PR.\n *\n * Default: knowledge auto-applies at confidence ≥0.85 in `'write'`\n * mode (wiki edits are git-reversible); improvement stays at\n * `enabled: false` until the agent author has measured precision.\n */\n autoApply?: AutoApplyPolicy\n}\n\nexport interface AgentRubric<TRunOutput> {\n /** Dimensions composing the weighted score. Weights sum to 1.0 by convention. */\n dimensions: ReadonlyArray<RubricDimension<TRunOutput>>\n /**\n * Optional judges layered on top of deterministic dimensions. Each\n * judge returns a score per dimension; the substrate averages judges\n * (mean by default) for the LLM contribution.\n */\n judges?: ReadonlyArray<JudgeConfig<TRunOutput>>\n}\n\nexport interface RubricDimension<TRunOutput> {\n /** Unique identifier — appears in finding subjects (`rubric:<id>`). */\n id: string\n /** 0..1 — weight in the composite. */\n weight: number\n /**\n * Deterministic scorer: given the persona + run output, returns a\n * 0..1 score. The substrate sums weight × score across dimensions\n * for the deterministic composite; judges supplement subjective dims.\n */\n score: (input: { persona: unknown; output: TRunOutput }) => number\n /** Optional human-readable label for reports. */\n label?: string\n}\n\nexport interface JudgeConfig<TRunOutput> {\n /** Judge identifier — appears in trace spans + manifest. */\n id: string\n /** Model snapshot to invoke. Pin the snapshot (`claude-sonnet-4-6@2025-04-15`); the validator rejects bare aliases. */\n model: string\n /** Dimensions this judge scores. */\n dimensions: ReadonlyArray<string>\n /**\n * Optional rubric anchors — text examples the judge sees as a\n * few-shot prompt to calibrate. STRONGLY recommended for subjective\n * dimensions; required by the calibration gate (Pearson ≥0.7).\n */\n anchors?: ReadonlyArray<{ input: string; output: TRunOutput; expected: Record<string, number> }>\n}\n\nexport interface AgentRuntime<TPersona, TRunOutput> {\n /**\n * Invoke the agent against one persona. Returns BOTH:\n * - `events`: an `AsyncIterable<RuntimeStreamEvent>` the chat-centric\n * product consumes verbatim (SSE / WebSocket / inline render).\n * **Streaming is mandatory — never collapse this to a single Promise.**\n * The agent's existing `runChatTurn` (or equivalent async generator)\n * plugs in here directly.\n * - `output`: a `Promise<TRunOutput>` resolved AFTER the event stream\n * drains. The eval substrate awaits this for rubric scoring; chat\n * products usually ignore it (they already rendered incrementally).\n *\n * Implementation contract:\n * 1. `act` MUST return immediately (synchronous construction of the\n * `events` iterator + the `output` promise).\n * 2. Iterating `events` drives the underlying LLM/tool calls — the\n * caller chooses when to consume.\n * 3. `output` resolves only after the iterator yields its terminal\n * event (typically `task_end`); see `collectAgentRun` helper.\n *\n * `ctx.emitter` is the substrate-threaded `TraceEmitter` — runtimes\n * SHOULD record LLM/tool spans through it for capture integrity.\n * `ctx.deadlineMs` is wall-clock; the runtime SHOULD honour for graceful\n * cancel. `ctx.signal` is the standard abort signal.\n */\n act: (persona: TPersona, ctx: AgentRunContext) => AgentRunInvocation<TRunOutput>\n}\n\nexport interface AgentRunInvocation<TRunOutput> {\n /** Live stream of typed runtime events. Consumed by chat UX directly. */\n events: AsyncIterable<RuntimeStreamEvent>\n /** Final structured output the rubric scores. Resolves after `events` drains. */\n output: Promise<TRunOutput>\n}\n\n/**\n * Stub for agents whose `runtime.act` is not yet wired to the substrate's\n * eval path. Preserves the streaming contract (empty event stream + a\n * rejected `output` promise that tells the caller exactly what to fix).\n *\n * Per-vertical manifests usually start with this stub and replace it with\n * the agent's real streaming runtime (`runChatTurn` or equivalent) once\n * the eval path consumes the manifest end-to-end.\n */\nexport function unimplementedAgentRun<TRunOutput = unknown>(\n reason = 'AgentRuntime.act is not yet wired for this manifest',\n): AgentRunInvocation<TRunOutput> {\n return {\n events: (async function* empty(): AsyncIterable<RuntimeStreamEvent> {})(),\n output: Promise.reject(new Error(reason)),\n }\n}\n\n/**\n * Drain `act`'s `events` into an array AND await its `output`. Useful for\n * eval / outcome-measurement code paths that don't care about live\n * rendering. The events array is preserved so the substrate can inspect\n * tool calls / readiness / questions retrospectively.\n *\n * IMPORTANT: chat-centric UX MUST NOT call this — it defeats streaming\n * (no incremental render). Use `for await (const ev of invocation.events)`\n * directly in the chat surface.\n */\nexport async function collectAgentRun<TRunOutput>(\n invocation: AgentRunInvocation<TRunOutput>,\n): Promise<{ events: ReadonlyArray<RuntimeStreamEvent>; output: TRunOutput }> {\n const events: RuntimeStreamEvent[] = []\n for await (const ev of invocation.events) events.push(ev)\n const output = await invocation.output\n return { events, output }\n}\n\nexport interface AgentRunContext {\n /** Substrate-managed trace emitter. */\n emitter: import('@tangle-network/agent-eval').TraceEmitter\n /** Stable run id for this persona × variant cell. */\n runId: string\n /** Variant the runtime is exercising (e.g. `'baseline'`, `'source-grounded'`). */\n variantId?: string\n /** Wall-clock deadline (epoch ms). The runtime SHOULD honour for graceful cancel. */\n deadlineMs?: number\n /** Optional abort signal. */\n signal?: AbortSignal\n}\n\nexport interface AnalystConfig {\n /** Model the analyst kinds use. Override per-kind via `analystKinds[i].cost.models`. */\n model: string\n /** Optional total budget across all kinds for one run. Substrate enforces via `BudgetGuard`. */\n budgetUsd?: number\n /** Backend hint for the AxAIService factory — same shape every kind uses. */\n backend?: {\n name?: 'openai' | 'router'\n apiKey?: string\n baseUrl?: string\n }\n}\n\nexport interface AutoApplyPolicy {\n knowledge?: {\n enabled: boolean\n confidenceThreshold?: number\n mode?: 'write' | 'open-pr'\n }\n improvement?: {\n enabled: boolean\n confidenceThreshold?: number\n mode?: 'write' | 'open-pr'\n }\n}\n\n// ── factory + validation ─────────────────────────────────────────────\n\nexport class AgentManifestError extends Error {\n constructor(\n message: string,\n public readonly agentId: string,\n public readonly issues: ReadonlyArray<unknown> = [],\n ) {\n super(message)\n this.name = 'AgentManifestError'\n }\n}\n\n/**\n * Construct a validated agent manifest. Throws `AgentManifestError`\n * if any required surface is missing on disk.\n *\n * Generics: pass your persona / output types if you want narrowed\n * `runtime.act` signatures:\n * `defineAgent<TaxPersona, TaxRunOutput>({ ... })`\n *\n * Most callers don't need the generics — the substrate operates on\n * `unknown` payloads internally and the manifest's `score` /\n * `runtime.act` see the typed shapes via TypeScript inference at\n * the call site.\n */\nexport function defineAgent<TPersona = unknown, TRunOutput = unknown>(\n manifest: AgentManifest<TPersona, TRunOutput>,\n): AgentManifest<TPersona, TRunOutput> {\n if (!manifest.id || manifest.id.trim().length === 0) {\n throw new AgentManifestError('defineAgent: `id` is required', manifest.id ?? '')\n }\n if (!manifest.repoRoot || manifest.repoRoot.trim().length === 0) {\n throw new AgentManifestError('defineAgent: `repoRoot` is required', manifest.id)\n }\n const issues = validateSurfaces(manifest.surfaces, manifest.repoRoot)\n if (issues.length > 0) {\n throw new AgentManifestError(\n renderSurfaceIssues(issues, manifest.repoRoot),\n manifest.id,\n issues,\n )\n }\n // Lightweight rubric sanity: weights sum to ~1.0 (no hard requirement —\n // the substrate normalizes — but flag wildly miscalibrated weights).\n const total = manifest.rubric.dimensions.reduce((acc, d) => acc + d.weight, 0)\n if (manifest.rubric.dimensions.length > 0 && (total < 0.5 || total > 1.5)) {\n throw new AgentManifestError(\n `defineAgent(${manifest.id}): rubric dimension weights sum to ${total.toFixed(3)} — should be ~1.0`,\n manifest.id,\n )\n }\n return manifest\n}\n","/**\n * Substrate-default `ImprovementAdapter` — surfaces-driven, LLM-drafted\n * patches, optional auto-apply or PR-open.\n *\n * This is the one ImprovementAdapter every vertical agent uses. The\n * substrate parses each finding's `subject` via\n * `parseFindingSubject` (agent-eval), resolves it to a real file path\n * via the agent's `AgentSurfaces`, reads the current content, and asks\n * an LLM to draft a unified-diff patch given the finding + current\n * content + per-kind editing-discipline rules.\n *\n * Auto-apply gates on the source-finding's confidence and the\n * autoApply.improvement policy. Two modes:\n * `write` — apply the patch in-place via `git apply -p0`. Operator\n * reviews via `git diff`.\n * `open-pr` — write to a branch, commit, push, open a PR via `gh`.\n * Operator reviews via the PR UI.\n *\n * Fail-loud rules:\n * - Findings whose subject doesn't parse → counted in `errors`.\n * - Findings whose subject targets an undeclared surface → counted in\n * `errors` with the offending kind in the message.\n * - Findings whose target path doesn't exist AND the kind isn't a\n * create-new variant (`new-tool`, `knowledge.wiki`) → counted in\n * `errors` with the resolved path in the message.\n * - LLM drafts that fail JSON-schema validation → counted in\n * `errors` with the schema issue.\n *\n * No silent skips. Every dropped finding has a recorded reason the\n * loop's report surfaces.\n */\n\nimport { spawnSync } from 'node:child_process'\nimport { readFileSync } from 'node:fs'\nimport type { AnalystFinding, FindingSubject } from '@tangle-network/agent-eval'\nimport { parseFindingSubject } from '@tangle-network/agent-eval/analyst'\nimport type { ImprovementAdapter } from '../analyst-loop/types'\nimport type { AgentSurfaces, ResolvedSurface } from './surfaces'\nimport { resolveSubjectPath } from './surfaces'\n\n// ── proposal shape ───────────────────────────────────────────────────\n\nexport interface SurfaceImprovementEdit {\n /** Stable id derived from the source finding so re-proposals are idempotent. */\n id: string\n /** The finding that produced this edit — for revert + audit trail. */\n sourceFindingId: string\n /** Parsed subject; included so the apply step doesn't re-parse. */\n subject: FindingSubject\n /** Resolved on-disk target. */\n target: ResolvedSurface\n /** SHA-256 of the current file content the patch was drafted against. */\n baseSha256: string\n /** Unified-diff patch the LLM drafted (relative to `target.absolutePath`). */\n patch: string\n /** One-line summary the operator sees in the report / PR title. */\n summary: string\n /** Multi-line rationale for the PR body — finding context + LLM reasoning. */\n rationale: string\n /** Carry-forward from the finding so the apply gate can check the threshold. */\n confidence: number\n /** Carry-forward severity for prioritization. */\n severity: AnalystFinding['severity']\n}\n\nexport interface CreateSurfaceImprovementAdapterOpts {\n surfaces: AgentSurfaces\n repoRoot: string\n /**\n * LLM-draft callback. Given a finding + current file content + the\n * resolved target, returns a unified-diff patch + summary + rationale.\n *\n * Required — the substrate doesn't ship a hardcoded prompt; the agent\n * author picks the model (Haiku for cheap routine drafts, Sonnet for\n * substantive prompt rewrites, etc.) via this callback.\n */\n draftPatch: (input: DraftPatchInput) => Promise<DraftPatchOutput>\n /**\n * Apply mode:\n * `write` — `git apply` in-place; operator reviews via `git diff`\n * `open-pr` — branch + commit + push + `gh pr create`\n * `none` — never apply; collect proposals for the report only\n *\n * The `apply` method honours this even when the loop calls it; the\n * effective behaviour is also gated on the per-finding confidence\n * threshold via `runAnalystLoop`'s `autoApply` policy.\n */\n mode?: 'write' | 'open-pr' | 'none'\n /** When `mode === 'open-pr'`, the base branch new PRs target. Default: `main`. */\n baseBranch?: string\n /** Required for `mode === 'open-pr'` — the GH owner/repo (`tangle-network/tax-agent`). */\n ghRepo?: string\n /**\n * When the resolved target doesn't exist, allow the substrate to\n * CREATE the file (for `knowledge.wiki`, `new-tool` subjects). Default\n * true for those kinds, false for `system-prompt` / `rubric` / etc.\n * (named sections that don't exist are a contract violation, not a\n * scaffolding opportunity).\n */\n allowCreateForKinds?: ReadonlyArray<FindingSubject['kind']>\n}\n\nexport interface DraftPatchInput {\n finding: AnalystFinding\n subject: FindingSubject\n target: ResolvedSurface\n /** Current file content (empty string when `intent === 'create-new'`). */\n currentContent: string\n}\n\nexport interface DraftPatchOutput {\n /** Unified diff against the current file content. Empty string skips this finding. */\n patch: string\n /** One-line summary for the operator. */\n summary: string\n /** Multi-line rationale for the PR body. */\n rationale: string\n}\n\n// ── factory ──────────────────────────────────────────────────────────\n\nconst DEFAULT_CREATE_KINDS: ReadonlyArray<FindingSubject['kind']> = [\n 'knowledge.wiki',\n 'knowledge.claim',\n 'knowledge.raw',\n 'new-tool',\n]\n\nexport function createSurfaceImprovementAdapter(\n opts: CreateSurfaceImprovementAdapterOpts,\n): ImprovementAdapter<SurfaceImprovementEdit> {\n const mode = opts.mode ?? 'none'\n const allowCreate = opts.allowCreateForKinds ?? DEFAULT_CREATE_KINDS\n\n return {\n async proposeFromFindings(findings) {\n const edits: SurfaceImprovementEdit[] = []\n const errors: Array<{ findingId: string; subject: string; message: string }> = []\n let skipped = 0\n\n for (const f of findings) {\n const subject = parseFindingSubject(f.subject)\n if (subject === null) {\n if (f.subject !== undefined) {\n errors.push({\n findingId: f.finding_id,\n subject: f.subject,\n message: 'subject does not parse against the finding-subject grammar',\n })\n } else {\n // Subject-less findings are descriptive, not actionable —\n // legitimate; count in `skipped` not `errors`.\n skipped += 1\n }\n continue\n }\n\n // `cluster` findings (failure-mode) are evidence, not mutations.\n if (subject.kind === 'cluster') {\n skipped += 1\n continue\n }\n\n // `agent-knowledge:*` findings flow to the KnowledgeAdapter;\n // the ImprovementAdapter skips them so subjects don't double-route.\n if (subject.kind.startsWith('knowledge.')) {\n skipped += 1\n continue\n }\n\n const target = resolveSubjectPath(subject, opts.surfaces, opts.repoRoot)\n if (target === null) {\n errors.push({\n findingId: f.finding_id,\n subject: f.subject ?? '',\n message: `subject kind \"${subject.kind}\" targets an undeclared surface; declare it in AgentSurfaces or stop emitting this subject`,\n })\n continue\n }\n\n if (target.intent === 'create-new' && !allowCreate.includes(subject.kind)) {\n errors.push({\n findingId: f.finding_id,\n subject: f.subject ?? '',\n message: `target ${target.repoRelativePath} does not exist; the kind \"${subject.kind}\" requires an existing target (analyst named a section that isn't in the codebase)`,\n })\n continue\n }\n\n const currentContent = target.exists ? readFileSync(target.absolutePath, 'utf-8') : ''\n\n let draft: DraftPatchOutput\n try {\n draft = await opts.draftPatch({ finding: f, subject, target, currentContent })\n } catch (err) {\n errors.push({\n findingId: f.finding_id,\n subject: f.subject ?? '',\n message: `draftPatch threw: ${err instanceof Error ? err.message : String(err)}`,\n })\n continue\n }\n\n if (draft.patch.trim().length === 0) {\n skipped += 1\n continue\n }\n\n edits.push({\n id: `imp-${f.finding_id}`,\n sourceFindingId: f.finding_id,\n subject,\n target,\n baseSha256: sha256(currentContent),\n patch: draft.patch,\n summary: draft.summary,\n rationale: draft.rationale,\n confidence: f.confidence,\n severity: f.severity,\n })\n }\n\n return { edits, skipped, errors }\n },\n\n async apply(edits) {\n const applied: string[] = []\n const warnings: string[] = []\n\n if (mode === 'none') {\n warnings.push(\n 'createSurfaceImprovementAdapter: mode=none; no edits applied — adjust manifest.autoApply.improvement.mode',\n )\n return { applied, warnings }\n }\n\n if (mode === 'open-pr' && !opts.ghRepo) {\n warnings.push(\n 'createSurfaceImprovementAdapter: mode=open-pr requires `ghRepo`; falling back to no-op',\n )\n return { applied, warnings }\n }\n\n for (const edit of edits) {\n // Race-detection: confirm the file content hasn't moved since the\n // patch was drafted. A diff applied against drifted content is a\n // recipe for silent corruption.\n const current = edit.target.exists ? readFileSync(edit.target.absolutePath, 'utf-8') : ''\n if (sha256(current) !== edit.baseSha256) {\n warnings.push(\n `${edit.target.repoRelativePath}: base SHA mismatch; file changed after draft. Skipping.`,\n )\n continue\n }\n\n const ok = applyPatchInPlace(edit, opts.repoRoot)\n if (!ok) {\n warnings.push(`${edit.target.repoRelativePath}: git apply failed`)\n continue\n }\n applied.push(edit.target.repoRelativePath)\n }\n\n if (mode === 'open-pr' && applied.length > 0 && opts.ghRepo) {\n const prUrl = openPullRequest(\n applied,\n edits.filter((e) => applied.includes(e.target.repoRelativePath)),\n opts.repoRoot,\n opts.ghRepo,\n opts.baseBranch ?? 'main',\n )\n if (prUrl) warnings.push(`opened PR: ${prUrl}`)\n else warnings.push('PR creation failed; edits are committed to a local branch only')\n }\n\n return { applied, warnings }\n },\n }\n}\n\n// ── apply helpers ────────────────────────────────────────────────────\n\nfunction applyPatchInPlace(edit: SurfaceImprovementEdit, repoRoot: string): boolean {\n const result = spawnSync('git', ['apply', '--whitespace=fix', '-p0', '-'], {\n cwd: repoRoot,\n input: edit.patch,\n encoding: 'utf-8',\n })\n return result.status === 0\n}\n\nfunction openPullRequest(\n paths: ReadonlyArray<string>,\n edits: ReadonlyArray<SurfaceImprovementEdit>,\n repoRoot: string,\n ghRepo: string,\n baseBranch: string,\n): string | null {\n const branch = `analyst-loop/${Date.now()}-${edits[0]?.sourceFindingId.slice(0, 12) ?? 'edits'}`\n // Create branch, stage, commit\n const checkout = spawnSync('git', ['checkout', '-b', branch], { cwd: repoRoot })\n if (checkout.status !== 0) return null\n const add = spawnSync('git', ['add', ...paths], { cwd: repoRoot })\n if (add.status !== 0) return null\n const title = `analyst-loop: ${edits[0]?.summary ?? `${edits.length} improvement edits`}`\n const body = [\n `Automated analyst-loop edits — review carefully before merge.`,\n '',\n `Source findings:`,\n ...edits.map(\n (e) =>\n ` - ${e.sourceFindingId} (confidence ${e.confidence.toFixed(2)}, severity ${e.severity})`,\n ),\n '',\n 'Rationales:',\n ...edits.map((e) => `\\n## ${e.target.repoRelativePath}\\n\\n${e.rationale}`),\n ].join('\\n')\n const commit = spawnSync('git', ['commit', '-m', title, '-m', body], { cwd: repoRoot })\n if (commit.status !== 0) return null\n const push = spawnSync('git', ['push', '-u', 'origin', branch], { cwd: repoRoot })\n if (push.status !== 0) return null\n const pr = spawnSync(\n 'gh',\n [\n 'pr',\n 'create',\n '--repo',\n ghRepo,\n '--title',\n title,\n '--body',\n body,\n '--base',\n baseBranch,\n '--head',\n branch,\n ],\n { cwd: repoRoot, encoding: 'utf-8' },\n )\n if (pr.status !== 0) return null\n return pr.stdout.trim()\n}\n\nfunction sha256(s: string): string {\n // node:crypto is dynamic-imported lazily so the adapter can be tested in\n // environments without crypto (browser tests, mocked envs).\n const crypto = require('node:crypto') as typeof import('node:crypto')\n return crypto.createHash('sha256').update(s, 'utf-8').digest('hex')\n}\n","/**\n * Substrate-default `KnowledgeAdapter` — wraps agent-knowledge's\n * `proposeFromFindings` + `applyKnowledgeWriteBlocks` with substrate\n * defaults (auto-lint after apply, source linkage via finding id).\n *\n * Every agent that ships a `.agent-knowledge/` tree uses this adapter\n * unmodified. Per-agent customization happens at the manifest level\n * (`autoApply.knowledge.confidenceThreshold`, etc.), not by writing a\n * new adapter.\n *\n * Lint discipline: after each apply we run agent-knowledge's\n * `lintKnowledgeIndex` to catch broken links / circular claims /\n * duplicate pages introduced by the new writes. Findings that fail the\n * post-apply lint are recorded in `warnings`; the apply itself is not\n * rolled back (lint failures are soft — humans review the wiki state).\n */\n\nimport type { AnalystFinding } from '@tangle-network/agent-eval'\nimport type { KnowledgeAdapter } from '../analyst-loop/types'\n\nexport interface CreateSurfaceKnowledgeAdapterOpts {\n /** `.agent-knowledge/` root (absolute path the substrate writes blocks against). */\n knowledgeRoot: string\n}\n\n/**\n * Build the adapter. We accept the agent-knowledge functions as DI so\n * the substrate stays decoupled from a specific agent-knowledge\n * version — the agent author imports them in their manifest module\n * and hands them to the factory.\n *\n * `proposeFromFindings(findings)` returns\n * `{ proposals: KnowledgeProposal[]; skipped: number; errors: ... }`.\n *\n * `applyKnowledgeWriteBlocks(root, content)` returns\n * `{ written: string[]; warnings: string[] }`.\n *\n * `lintKnowledgeIndex(index)` (optional) returns `KnowledgeLintFinding[]`.\n */\nexport interface KnowledgeAdapterDeps<TProposal> {\n proposeFromFindings: (findings: ReadonlyArray<AnalystFinding>) => {\n proposals: TProposal[]\n skipped: number\n errors: Array<{ findingId: string; subject: string; message: string }>\n }\n applyKnowledgeWriteBlocks: (\n root: string,\n proposalText: string,\n ) => Promise<{ written: string[]; warnings: string[] }>\n /**\n * Optional post-apply lint hook. The substrate runs it after each\n * batch of writes; failures land in `warnings` (the apply is not\n * rolled back — lint signals drift to review, not block).\n */\n lintAfterApply?: (root: string) => Promise<ReadonlyArray<string>>\n}\n\nexport function createSurfaceKnowledgeAdapter<TProposal>(\n opts: CreateSurfaceKnowledgeAdapterOpts,\n deps: KnowledgeAdapterDeps<TProposal>,\n): KnowledgeAdapter<TProposal> {\n return {\n proposeFromFindings(findings) {\n const batch = deps.proposeFromFindings(findings)\n return {\n proposals: batch.proposals,\n skipped: batch.skipped,\n errors: batch.errors,\n }\n },\n async apply(proposals) {\n const written: string[] = []\n const warnings: string[] = []\n for (const p of proposals) {\n const proposalText = renderProposalAsWriteBlock(p)\n if (proposalText === null) {\n warnings.push(\n `proposal has no writeBlocks/content; skipping (sourceFindingId=${getSourceFindingId(p)})`,\n )\n continue\n }\n try {\n const r = await deps.applyKnowledgeWriteBlocks(opts.knowledgeRoot, proposalText)\n written.push(...r.written)\n warnings.push(...r.warnings)\n } catch (err) {\n warnings.push(\n `applyKnowledgeWriteBlocks failed: ${err instanceof Error ? err.message : String(err)}`,\n )\n }\n }\n if (deps.lintAfterApply && written.length > 0) {\n try {\n const lintIssues = await deps.lintAfterApply(opts.knowledgeRoot)\n for (const issue of lintIssues) warnings.push(`lint: ${issue}`)\n } catch (err) {\n warnings.push(\n `lintAfterApply failed: ${err instanceof Error ? err.message : String(err)}`,\n )\n }\n }\n return { written, warnings }\n },\n }\n}\n\n/**\n * Pluck the canonical write-block text from a proposal regardless of\n * which exact agent-knowledge version produced it. We accept either:\n * - `{ writeBlocks: Array<{ path, content }> }` — the typed shape\n * 1.3.0+ emits\n * - `{ proposalText: string }` — legacy single-block shape\n * - `{ content: string }` — minimal raw form\n *\n * Returns `null` when nothing parseable is present (warned upstream).\n */\nfunction renderProposalAsWriteBlock(p: unknown): string | null {\n if (!p || typeof p !== 'object') return null\n const obj = p as Record<string, unknown>\n if (Array.isArray(obj.writeBlocks)) {\n const blocks = obj.writeBlocks as Array<{ path?: string; content?: string }>\n if (blocks.length === 0) return null\n return blocks\n .map((b) => (typeof b.content === 'string' ? b.content : ''))\n .filter((s) => s.length > 0)\n .join('\\n\\n')\n }\n if (typeof obj.proposalText === 'string') return obj.proposalText\n if (typeof obj.content === 'string') return obj.content\n return null\n}\n\nfunction getSourceFindingId(p: unknown): string {\n if (!p || typeof p !== 'object') return '<unknown>'\n const obj = p as Record<string, unknown>\n if (typeof obj.sourceFindingId === 'string') return obj.sourceFindingId\n if (typeof obj.id === 'string') return obj.id\n return '<unknown>'\n}\n","/**\n * `OutcomeMeasurement` — the missing metric that turns the analyst\n * loop from \"observability\" into \"self-improvement\".\n *\n * Without this hook, the loop reports process counts (`findings: 42`,\n * `applied: 7`) and never proves the applied edits actually improved\n * anything. With this hook, the substrate re-runs the cohort against\n * the same personas after each apply pass and reports a composite\n * score delta. A negative delta is the substrate's strongest signal\n * to either roll back or surface for review.\n *\n * Wiring is intentionally simple: pass the manifest + the `runAgentEval`\n * function and a list of `personaIds` to re-run. The wrapper:\n * 1. Captures the baseline composite from the just-finished run.\n * 2. After `runAnalystLoop` returns, re-invokes `runAgentEval` against\n * the same persona slice.\n * 3. Computes the delta and appends to `loop-report.json`.\n * 4. If `rollbackOnRegression` and delta < 0, reverts applied edits.\n */\n\nimport type { RunAnalystLoopResult } from '../analyst-loop/types'\n\nexport interface OutcomeMeasurement {\n /** Baseline composite before applies — captured from the most-recent eval run. */\n baselineComposite: number\n /** Composite after re-running the cohort with applied edits. */\n afterComposite: number\n /** `afterComposite - baselineComposite`. Positive = the loop improved the agent. */\n delta: number\n /** Per-persona deltas for finer-grained review. */\n perPersona: ReadonlyArray<{ personaId: string; before: number; after: number; delta: number }>\n /** When the substrate rolled back applies due to regression, the paths reverted. */\n rolledBackPaths: ReadonlyArray<string>\n}\n\nexport interface OutcomeMeasurementOpts {\n /** Composite scores from the run that produced the findings. */\n baseline: ReadonlyArray<{ personaId: string; composite: number }>\n /**\n * Re-run callback — the substrate invokes this after applies. The\n * agent author provides their `runAgentEval`-equivalent so the\n * substrate can ask \"score this persona slice now.\"\n *\n * The callback SHOULD reuse the same cohort + judges + variant as\n * the baseline run; only the agent's mutable surfaces have changed.\n */\n reRunCohort: (\n personaIds: ReadonlyArray<string>,\n ) => Promise<ReadonlyArray<{ personaId: string; composite: number }>>\n /** When `true`, applied edits are reverted on negative delta. Default `false`. */\n rollbackOnRegression?: boolean\n /** Callback to revert a list of paths (typically `git checkout HEAD --`). */\n revert?: (paths: ReadonlyArray<string>) => Promise<void>\n}\n\n/**\n * Run `runAnalystLoop` and stamp an `OutcomeMeasurement` onto the\n * result. The substrate calls this after each canonical eval; the\n * delta lands in `loop-report.json` for cross-run trend analysis.\n *\n * The function returns the original `RunAnalystLoopResult` enriched\n * with `outcome` so callers stay backwards-compatible (the field is\n * optional on the type; missing means no measurement was wired).\n */\nexport async function measureOutcome<TProposal, TEdit>(\n result: RunAnalystLoopResult<TProposal, TEdit>,\n opts: OutcomeMeasurementOpts,\n): Promise<RunAnalystLoopResult<TProposal, TEdit> & { outcome: OutcomeMeasurement }> {\n const applied = result.knowledge?.applied ?? []\n const improvementsApplied = result.improvement?.applied ?? []\n const allApplied = [...applied, ...improvementsApplied]\n\n // No applies → no outcome to measure. Return a zero-delta to keep the\n // shape stable for consumers; baseline / after equal.\n if (allApplied.length === 0) {\n return {\n ...result,\n outcome: {\n baselineComposite: meanComposite(opts.baseline),\n afterComposite: meanComposite(opts.baseline),\n delta: 0,\n perPersona: opts.baseline.map((b) => ({\n personaId: b.personaId,\n before: b.composite,\n after: b.composite,\n delta: 0,\n })),\n rolledBackPaths: [],\n },\n }\n }\n\n const personaIds = opts.baseline.map((b) => b.personaId)\n const after = await opts.reRunCohort(personaIds)\n const afterByPersona = new Map(after.map((r) => [r.personaId, r.composite]))\n\n const perPersona = opts.baseline.map((b) => {\n const a = afterByPersona.get(b.personaId) ?? b.composite\n return { personaId: b.personaId, before: b.composite, after: a, delta: a - b.composite }\n })\n const baselineComposite = meanComposite(opts.baseline)\n const afterComposite = meanComposite(after)\n const delta = afterComposite - baselineComposite\n\n let rolledBackPaths: string[] = []\n if (delta < 0 && opts.rollbackOnRegression && opts.revert) {\n await opts.revert(allApplied)\n rolledBackPaths = [...allApplied]\n }\n\n return {\n ...result,\n outcome: {\n baselineComposite,\n afterComposite,\n delta,\n perPersona,\n rolledBackPaths,\n },\n }\n}\n\nfunction meanComposite(rows: ReadonlyArray<{ composite: number }>): number {\n if (rows.length === 0) return 0\n return rows.reduce((acc, r) => acc + r.composite, 0) / rows.length\n}\n","/**\n * Sandbox bridge for `AgentRuntime.act` — prod-faithful eval execution.\n *\n * The point of this adapter is parity: the eval substrate must run the agent\n * through the SAME profile the production chat turn uses, or scorecard numbers\n * grade a profile that never ships. `createSandboxAct` composes the production\n * profile via {@link composeProductionAgentProfile}, boots a sandbox with it\n * through the loop kernel's own {@link createSandboxForSpec}, streams the\n * `streamPrompt` events mapped to the `RuntimeStreamEvent` vocabulary, and\n * resolves the `OutputAdapter`-parsed output for rubric scoring — satisfying\n * the `act` streaming contract with one code path shared by chat and eval.\n *\n * Agents with a bespoke streaming chat turn should wire THAT into `act`\n * directly (the contract is designed for it); this adapter is the default for\n * agents whose turn is a plain prod-profile sandbox dispatch — notably the\n * agents agent-builder generates.\n */\n\nimport type { AgentProfile, SandboxEvent } from '@tangle-network/sandbox'\nimport type { ComposeProductionAgentProfileOptions } from '../mcp/delegation-profile'\nimport { composeProductionAgentProfile } from '../mcp/delegation-profile'\nimport type { AgentRunSpec, LoopSandboxClient, OutputAdapter } from '../runtime'\nimport { createSandboxForSpec, mapSandboxEvent } from '../runtime'\nimport type { RuntimeStreamEvent } from '../types'\nimport type { AgentRunContext, AgentRunInvocation } from './define-agent'\n\nexport interface CreateSandboxActOptions<TPersona, TRunOutput> {\n /** Canonical agent profile — the same one the prod chat turn composes from. */\n baseProfile: AgentProfile\n /** Sandbox client used to boot the per-run sandbox. */\n sandboxClient: LoopSandboxClient\n /** Persona → prompt. Pure; the eval cell's input. */\n buildPrompt: (persona: TPersona) => string\n /** Sandbox event stream → typed output the rubric scores. */\n output: OutputAdapter<TRunOutput>\n /**\n * Per-persona composition overrides (workspace-augmented system prompt,\n * extra file mounts, sandbox key). Merged into\n * {@link composeProductionAgentProfile}; `env` here is overridden by the\n * top-level `env` option when both are set.\n */\n compose?: (persona: TPersona) => ComposeProductionAgentProfileOptions\n /** Sandbox-SDK overrides forwarded to `createSandboxForSpec`. */\n sandboxOverrides?: AgentRunSpec<unknown>['sandboxOverrides']\n /** Stable run name surfaced in mapped `llm_call` events. */\n name?: string\n /** Override the `SandboxEvent → RuntimeStreamEvent` mapper. */\n mapEvent?: (\n event: SandboxEvent,\n opts: { agentRunName?: string },\n ) => RuntimeStreamEvent | undefined\n /** Environment source for delegation-MCP composition. Defaults to `process.env`. */\n env?: Record<string, string | undefined>\n}\n\n/**\n * Build an `AgentRuntime.act` implementation backed by a single prod-profile\n * sandbox run. The returned function honours the `act` contract: it returns\n * synchronously with a live `events` iterator and an `output` promise that\n * resolves only after the iterator drains.\n */\nexport function createSandboxAct<TPersona, TRunOutput>(\n options: CreateSandboxActOptions<TPersona, TRunOutput>,\n): (persona: TPersona, ctx: AgentRunContext) => AgentRunInvocation<TRunOutput> {\n const mapEvent = options.mapEvent ?? mapSandboxEvent\n\n return (persona: TPersona, ctx: AgentRunContext): AgentRunInvocation<TRunOutput> => {\n const profile = composeProductionAgentProfile(options.baseProfile, {\n ...(options.compose?.(persona) ?? {}),\n ...(options.env ? { env: options.env } : {}),\n })\n const agentRunName = options.name ?? profile.name ?? 'agent'\n const message = options.buildPrompt(persona)\n const signal = ctx.signal ?? new AbortController().signal\n\n const raw: SandboxEvent[] = []\n let settle!: (value: TRunOutput) => void\n let fail!: (err: unknown) => void\n const output = new Promise<TRunOutput>((resolve, reject) => {\n settle = resolve\n fail = reject\n })\n // The output promise rejects when the stream errors; if the caller ignores\n // `output` (chat UX) the rejection is still observed by the events iterator\n // throwing. Attach a no-op catch so an ignored rejection is never \"unhandled\".\n output.catch(() => {})\n\n const spec: AgentRunSpec<unknown> = {\n profile,\n taskToPrompt: () => message,\n name: agentRunName,\n ...(options.sandboxOverrides ? { sandboxOverrides: options.sandboxOverrides } : {}),\n }\n\n async function* events(): AsyncIterable<RuntimeStreamEvent> {\n try {\n const box = await createSandboxForSpec(options.sandboxClient, spec, signal)\n for await (const event of box.streamPrompt(message, { signal })) {\n raw.push(event)\n const mapped = mapEvent(event, { agentRunName })\n if (mapped) yield mapped\n }\n settle(options.output.parse(raw))\n } catch (err) {\n fail(err)\n throw err\n }\n }\n\n return { events: events(), output }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;AAeA,SAAS,kBAAkB;AAC3B,SAAS,YAAY,YAAY;AAqE1B,SAAS,mBACd,SACA,UACA,UACwB;AACxB,QAAM,aAAa,yBAAyB,SAAS,QAAQ;AAC7D,MAAI,WAAW,WAAW,EAAG,QAAO;AAMpC,aAAW,OAAO,YAAY;AAC5B,UAAM,MAAM,WAAW,GAAG,IAAI,MAAM,KAAK,UAAU,GAAG;AACtD,QAAI,WAAW,GAAG,GAAG;AACnB,aAAO,EAAE,cAAc,KAAK,kBAAkB,KAAK,QAAQ,MAAM,QAAQ,gBAAgB;AAAA,IAC3F;AAAA,EACF;AACA,QAAM,WAAW,WAAW,CAAC;AAC7B,QAAM,cAAc,WAAW,QAAQ,IAAI,WAAW,KAAK,UAAU,QAAQ;AAC7E,SAAO;AAAA,IACL,cAAc;AAAA,IACd,kBAAkB;AAAA,IAClB,QAAQ;AAAA,IACR,QAAQ;AAAA,EACV;AACF;AAEA,SAAS,yBACP,SACA,UACuB;AACvB,UAAQ,QAAQ,MAAM;AAAA,IACpB,KAAK;AAAA,IACL,KAAK;AACH,aAAO,CAAC,KAAK,SAAS,WAAW,GAAG,QAAQ,IAAI,KAAK,CAAC;AAAA,IACxD,KAAK;AAEH,aAAO,CAAC,KAAK,SAAS,WAAW,UAAU,GAAG,QAAQ,QAAQ,KAAK,CAAC,KAAK,CAAC;AAAA,IAC5E,KAAK;AACH,aAAO,CAAC,KAAK,SAAS,WAAW,OAAO,GAAG,QAAQ,QAAQ,KAAK,CAAC;AAAA,IACnE,KAAK,iBAAiB;AACpB,YAAM,OAAO,QAAQ,QAAQ,OAAO;AAIpC,aAAO;AAAA,QACL,KAAK,SAAS,cAAc,GAAG,IAAI,KAAK;AAAA,QACxC,KAAK,SAAS,cAAc,MAAM,UAAU;AAAA,QAC5C,KAAK,SAAS,cAAc,MAAM,UAAU;AAAA,MAC9C;AAAA,IACF;AAAA,IACA,KAAK;AACH,UAAI,QAAQ,QAAQ;AAClB,eAAO,CAAC,KAAK,SAAS,OAAO,QAAQ,MAAM,GAAG,QAAQ,QAAQ,MAAM,CAAC,KAAK,CAAC;AAAA,MAC7E;AAGA,aAAO;AAAA,QACL,KAAK,SAAS,OAAO,QAAQ,MAAM,WAAW;AAAA,QAC9C,KAAK,SAAS,OAAO,GAAG,QAAQ,IAAI,KAAK;AAAA,MAC3C;AAAA,IACF,KAAK;AACH,aAAO,CAAC,KAAK,SAAS,OAAO,QAAQ,MAAM,WAAW,CAAC;AAAA,IACzD,KAAK;AACH,UAAI,CAAC,SAAS,IAAK,QAAO,CAAC;AAC3B,aAAO,CAAC,KAAK,SAAS,KAAK,QAAQ,QAAQ,GAAG,QAAQ,KAAK,KAAK,CAAC;AAAA,IACnE,KAAK;AACH,UAAI,CAAC,SAAS,OAAQ,QAAO,CAAC;AAC9B,aAAO,CAAC,KAAK,SAAS,QAAQ,GAAG,QAAQ,QAAQ,GAAG,CAAC,OAAO,CAAC;AAAA,IAC/D,KAAK;AACH,UAAI,CAAC,SAAS,YAAa,QAAO,CAAC;AACnC,aAAO,CAAC,KAAK,SAAS,aAAa,GAAG,QAAQ,QAAQ,OAAO,CAAC,KAAK,CAAC;AAAA,IACtE,KAAK;AACH,UAAI,CAAC,SAAS,aAAc,QAAO,CAAC;AACpC,aAAO,CAAC,SAAS,YAAY;AAAA,IAC/B,KAAK;AAAA,IACL,KAAK;AAGH,aAAO,CAAC;AAAA,IACV,KAAK;AAEH,aAAO,CAAC;AAAA,EACZ;AACF;AAEA,SAAS,QAAQ,GAAmB;AAClC,SACE,EACG,YAAY,EACZ,QAAQ,gBAAgB,GAAG,EAC3B,QAAQ,YAAY,EAAE,EACtB,MAAM,GAAG,GAAG,KAAK;AAExB;AAgBO,SAAS,iBACd,UACA,UACuC;AACvC,QAAM,SAAmC,CAAC;AAC1C,QAAM,cAAkD;AAAA,IACtD;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,QAAM,eAAmD,CAAC,QAAQ;AAClE,QAAM,sBAA0D,CAAC,eAAe,UAAU,KAAK;AAC/F,QAAM,uBAA2D,CAAC,cAAc;AAEhF,aAAW,OAAO,aAAa;AAC7B,UAAM,IAAI,SAAS,GAAG;AACtB,QAAI,CAAC,GAAG;AACN,aAAO,KAAK,EAAE,SAAS,KAAK,MAAM,IAAI,QAAQ,UAAU,CAAC;AACzD;AAAA,IACF;AACA,UAAM,MAAM,WAAW,CAAC,IAAI,IAAI,KAAK,UAAU,CAAC;AAChD,QAAI,CAAC,WAAW,GAAG,GAAG;AACpB,aAAO,KAAK,EAAE,SAAS,KAAK,MAAM,GAAG,QAAQ,UAAU,CAAC;AAAA,IAC1D;AAAA,EACF;AACA,aAAW,OAAO,cAAc;AAC9B,UAAM,IAAI,SAAS,GAAG;AACtB,QAAI,CAAC,GAAG;AACN,aAAO,KAAK,EAAE,SAAS,KAAK,MAAM,IAAI,QAAQ,UAAU,CAAC;AACzD;AAAA,IACF;AACA,UAAM,MAAM,WAAW,CAAC,IAAI,IAAI,KAAK,UAAU,CAAC;AAChD,QAAI,CAAC,WAAW,GAAG,GAAG;AACpB,aAAO,KAAK,EAAE,SAAS,KAAK,MAAM,GAAG,QAAQ,UAAU,CAAC;AAAA,IAC1D;AAAA,EACF;AACA,aAAW,OAAO,CAAC,GAAG,qBAAqB,GAAG,oBAAoB,GAAG;AACnE,UAAM,IAAI,SAAS,GAAG;AACtB,QAAI,MAAM,OAAW;AACrB,UAAM,MAAM,WAAW,CAAC,IAAI,IAAI,KAAK,UAAU,CAAC;AAChD,QAAI,CAAC,WAAW,GAAG,GAAG;AACpB,aAAO,KAAK,EAAE,SAAS,KAAK,MAAM,GAAG,QAAQ,UAAU,CAAC;AAAA,IAC1D;AAAA,EACF;AACA,SAAO;AACT;AAEO,SAAS,oBACd,QACA,UACQ;AACR,MAAI,OAAO,WAAW,EAAG,QAAO;AAChC,QAAM,QAAQ,OAAO;AAAA,IACnB,CAAC,MAAM,OAAO,EAAE,OAAO,KAAK,EAAE,OAAO,IAAI,EAAE,IAAI,MAAM,WAAW,KAAK,EAAE,MAAM;AAAA,EAC/E;AACA,SAAO;AAAA,IACL,oDAAoD,QAAQ;AAAA,IAC5D,GAAG;AAAA,IACH;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,EAAE,KAAK,IAAI;AACb;;;AC5DO,SAAS,sBACd,SAAS,uDACuB;AAChC,SAAO;AAAA,IACL,SAAS,gBAAgB,QAA2C;AAAA,IAAC,GAAG;AAAA,IACxE,QAAQ,QAAQ,OAAO,IAAI,MAAM,MAAM,CAAC;AAAA,EAC1C;AACF;AAYA,eAAsB,gBACpB,YAC4E;AAC5E,QAAM,SAA+B,CAAC;AACtC,mBAAiB,MAAM,WAAW,OAAQ,QAAO,KAAK,EAAE;AACxD,QAAM,SAAS,MAAM,WAAW;AAChC,SAAO,EAAE,QAAQ,OAAO;AAC1B;AA2CO,IAAM,qBAAN,cAAiC,MAAM;AAAA,EAC5C,YACE,SACgB,SACA,SAAiC,CAAC,GAClD;AACA,UAAM,OAAO;AAHG;AACA;AAGhB,SAAK,OAAO;AAAA,EACd;AAAA,EALkB;AAAA,EACA;AAKpB;AAeO,SAAS,YACd,UACqC;AACrC,MAAI,CAAC,SAAS,MAAM,SAAS,GAAG,KAAK,EAAE,WAAW,GAAG;AACnD,UAAM,IAAI,mBAAmB,iCAAiC,SAAS,MAAM,EAAE;AAAA,EACjF;AACA,MAAI,CAAC,SAAS,YAAY,SAAS,SAAS,KAAK,EAAE,WAAW,GAAG;AAC/D,UAAM,IAAI,mBAAmB,uCAAuC,SAAS,EAAE;AAAA,EACjF;AACA,QAAM,SAAS,iBAAiB,SAAS,UAAU,SAAS,QAAQ;AACpE,MAAI,OAAO,SAAS,GAAG;AACrB,UAAM,IAAI;AAAA,MACR,oBAAoB,QAAQ,SAAS,QAAQ;AAAA,MAC7C,SAAS;AAAA,MACT;AAAA,IACF;AAAA,EACF;AAGA,QAAM,QAAQ,SAAS,OAAO,WAAW,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,QAAQ,CAAC;AAC7E,MAAI,SAAS,OAAO,WAAW,SAAS,MAAM,QAAQ,OAAO,QAAQ,MAAM;AACzE,UAAM,IAAI;AAAA,MACR,eAAe,SAAS,EAAE,sCAAsC,MAAM,QAAQ,CAAC,CAAC;AAAA,MAChF,SAAS;AAAA,IACX;AAAA,EACF;AACA,SAAO;AACT;;;AClSA,SAAS,iBAAiB;AAC1B,SAAS,oBAAoB;AAE7B,SAAS,2BAA2B;AAsFpC,IAAM,uBAA8D;AAAA,EAClE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEO,SAAS,gCACd,MAC4C;AAC5C,QAAM,OAAO,KAAK,QAAQ;AAC1B,QAAM,cAAc,KAAK,uBAAuB;AAEhD,SAAO;AAAA,IACL,MAAM,oBAAoB,UAAU;AAClC,YAAM,QAAkC,CAAC;AACzC,YAAM,SAAyE,CAAC;AAChF,UAAI,UAAU;AAEd,iBAAW,KAAK,UAAU;AACxB,cAAM,UAAU,oBAAoB,EAAE,OAAO;AAC7C,YAAI,YAAY,MAAM;AACpB,cAAI,EAAE,YAAY,QAAW;AAC3B,mBAAO,KAAK;AAAA,cACV,WAAW,EAAE;AAAA,cACb,SAAS,EAAE;AAAA,cACX,SAAS;AAAA,YACX,CAAC;AAAA,UACH,OAAO;AAGL,uBAAW;AAAA,UACb;AACA;AAAA,QACF;AAGA,YAAI,QAAQ,SAAS,WAAW;AAC9B,qBAAW;AACX;AAAA,QACF;AAIA,YAAI,QAAQ,KAAK,WAAW,YAAY,GAAG;AACzC,qBAAW;AACX;AAAA,QACF;AAEA,cAAM,SAAS,mBAAmB,SAAS,KAAK,UAAU,KAAK,QAAQ;AACvE,YAAI,WAAW,MAAM;AACnB,iBAAO,KAAK;AAAA,YACV,WAAW,EAAE;AAAA,YACb,SAAS,EAAE,WAAW;AAAA,YACtB,SAAS,iBAAiB,QAAQ,IAAI;AAAA,UACxC,CAAC;AACD;AAAA,QACF;AAEA,YAAI,OAAO,WAAW,gBAAgB,CAAC,YAAY,SAAS,QAAQ,IAAI,GAAG;AACzE,iBAAO,KAAK;AAAA,YACV,WAAW,EAAE;AAAA,YACb,SAAS,EAAE,WAAW;AAAA,YACtB,SAAS,UAAU,OAAO,gBAAgB,8BAA8B,QAAQ,IAAI;AAAA,UACtF,CAAC;AACD;AAAA,QACF;AAEA,cAAM,iBAAiB,OAAO,SAAS,aAAa,OAAO,cAAc,OAAO,IAAI;AAEpF,YAAI;AACJ,YAAI;AACF,kBAAQ,MAAM,KAAK,WAAW,EAAE,SAAS,GAAG,SAAS,QAAQ,eAAe,CAAC;AAAA,QAC/E,SAAS,KAAK;AACZ,iBAAO,KAAK;AAAA,YACV,WAAW,EAAE;AAAA,YACb,SAAS,EAAE,WAAW;AAAA,YACtB,SAAS,qBAAqB,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,UAChF,CAAC;AACD;AAAA,QACF;AAEA,YAAI,MAAM,MAAM,KAAK,EAAE,WAAW,GAAG;AACnC,qBAAW;AACX;AAAA,QACF;AAEA,cAAM,KAAK;AAAA,UACT,IAAI,OAAO,EAAE,UAAU;AAAA,UACvB,iBAAiB,EAAE;AAAA,UACnB;AAAA,UACA;AAAA,UACA,YAAY,OAAO,cAAc;AAAA,UACjC,OAAO,MAAM;AAAA,UACb,SAAS,MAAM;AAAA,UACf,WAAW,MAAM;AAAA,UACjB,YAAY,EAAE;AAAA,UACd,UAAU,EAAE;AAAA,QACd,CAAC;AAAA,MACH;AAEA,aAAO,EAAE,OAAO,SAAS,OAAO;AAAA,IAClC;AAAA,IAEA,MAAM,MAAM,OAAO;AACjB,YAAM,UAAoB,CAAC;AAC3B,YAAM,WAAqB,CAAC;AAE5B,UAAI,SAAS,QAAQ;AACnB,iBAAS;AAAA,UACP;AAAA,QACF;AACA,eAAO,EAAE,SAAS,SAAS;AAAA,MAC7B;AAEA,UAAI,SAAS,aAAa,CAAC,KAAK,QAAQ;AACtC,iBAAS;AAAA,UACP;AAAA,QACF;AACA,eAAO,EAAE,SAAS,SAAS;AAAA,MAC7B;AAEA,iBAAW,QAAQ,OAAO;AAIxB,cAAM,UAAU,KAAK,OAAO,SAAS,aAAa,KAAK,OAAO,cAAc,OAAO,IAAI;AACvF,YAAI,OAAO,OAAO,MAAM,KAAK,YAAY;AACvC,mBAAS;AAAA,YACP,GAAG,KAAK,OAAO,gBAAgB;AAAA,UACjC;AACA;AAAA,QACF;AAEA,cAAM,KAAK,kBAAkB,MAAM,KAAK,QAAQ;AAChD,YAAI,CAAC,IAAI;AACP,mBAAS,KAAK,GAAG,KAAK,OAAO,gBAAgB,oBAAoB;AACjE;AAAA,QACF;AACA,gBAAQ,KAAK,KAAK,OAAO,gBAAgB;AAAA,MAC3C;AAEA,UAAI,SAAS,aAAa,QAAQ,SAAS,KAAK,KAAK,QAAQ;AAC3D,cAAM,QAAQ;AAAA,UACZ;AAAA,UACA,MAAM,OAAO,CAAC,MAAM,QAAQ,SAAS,EAAE,OAAO,gBAAgB,CAAC;AAAA,UAC/D,KAAK;AAAA,UACL,KAAK;AAAA,UACL,KAAK,cAAc;AAAA,QACrB;AACA,YAAI,MAAO,UAAS,KAAK,cAAc,KAAK,EAAE;AAAA,YACzC,UAAS,KAAK,gEAAgE;AAAA,MACrF;AAEA,aAAO,EAAE,SAAS,SAAS;AAAA,IAC7B;AAAA,EACF;AACF;AAIA,SAAS,kBAAkB,MAA8B,UAA2B;AAClF,QAAM,SAAS,UAAU,OAAO,CAAC,SAAS,oBAAoB,OAAO,GAAG,GAAG;AAAA,IACzE,KAAK;AAAA,IACL,OAAO,KAAK;AAAA,IACZ,UAAU;AAAA,EACZ,CAAC;AACD,SAAO,OAAO,WAAW;AAC3B;AAEA,SAAS,gBACP,OACA,OACA,UACA,QACA,YACe;AACf,QAAM,SAAS,gBAAgB,KAAK,IAAI,CAAC,IAAI,MAAM,CAAC,GAAG,gBAAgB,MAAM,GAAG,EAAE,KAAK,OAAO;AAE9F,QAAM,WAAW,UAAU,OAAO,CAAC,YAAY,MAAM,MAAM,GAAG,EAAE,KAAK,SAAS,CAAC;AAC/E,MAAI,SAAS,WAAW,EAAG,QAAO;AAClC,QAAM,MAAM,UAAU,OAAO,CAAC,OAAO,GAAG,KAAK,GAAG,EAAE,KAAK,SAAS,CAAC;AACjE,MAAI,IAAI,WAAW,EAAG,QAAO;AAC7B,QAAM,QAAQ,iBAAiB,MAAM,CAAC,GAAG,WAAW,GAAG,MAAM,MAAM,oBAAoB;AACvF,QAAM,OAAO;AAAA,IACX;AAAA,IACA;AAAA,IACA;AAAA,IACA,GAAG,MAAM;AAAA,MACP,CAAC,MACC,OAAO,EAAE,eAAe,gBAAgB,EAAE,WAAW,QAAQ,CAAC,CAAC,cAAc,EAAE,QAAQ;AAAA,IAC3F;AAAA,IACA;AAAA,IACA;AAAA,IACA,GAAG,MAAM,IAAI,CAAC,MAAM;AAAA,KAAQ,EAAE,OAAO,gBAAgB;AAAA;AAAA,EAAO,EAAE,SAAS,EAAE;AAAA,EAC3E,EAAE,KAAK,IAAI;AACX,QAAM,SAAS,UAAU,OAAO,CAAC,UAAU,MAAM,OAAO,MAAM,IAAI,GAAG,EAAE,KAAK,SAAS,CAAC;AACtF,MAAI,OAAO,WAAW,EAAG,QAAO;AAChC,QAAM,OAAO,UAAU,OAAO,CAAC,QAAQ,MAAM,UAAU,MAAM,GAAG,EAAE,KAAK,SAAS,CAAC;AACjF,MAAI,KAAK,WAAW,EAAG,QAAO;AAC9B,QAAM,KAAK;AAAA,IACT;AAAA,IACA;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,IACA,EAAE,KAAK,UAAU,UAAU,QAAQ;AAAA,EACrC;AACA,MAAI,GAAG,WAAW,EAAG,QAAO;AAC5B,SAAO,GAAG,OAAO,KAAK;AACxB;AAEA,SAAS,OAAO,GAAmB;AAGjC,QAAM,SAAS,UAAQ,QAAa;AACpC,SAAO,OAAO,WAAW,QAAQ,EAAE,OAAO,GAAG,OAAO,EAAE,OAAO,KAAK;AACpE;;;ACnSO,SAAS,8BACd,MACA,MAC6B;AAC7B,SAAO;AAAA,IACL,oBAAoB,UAAU;AAC5B,YAAM,QAAQ,KAAK,oBAAoB,QAAQ;AAC/C,aAAO;AAAA,QACL,WAAW,MAAM;AAAA,QACjB,SAAS,MAAM;AAAA,QACf,QAAQ,MAAM;AAAA,MAChB;AAAA,IACF;AAAA,IACA,MAAM,MAAM,WAAW;AACrB,YAAM,UAAoB,CAAC;AAC3B,YAAM,WAAqB,CAAC;AAC5B,iBAAW,KAAK,WAAW;AACzB,cAAM,eAAe,2BAA2B,CAAC;AACjD,YAAI,iBAAiB,MAAM;AACzB,mBAAS;AAAA,YACP,kEAAkE,mBAAmB,CAAC,CAAC;AAAA,UACzF;AACA;AAAA,QACF;AACA,YAAI;AACF,gBAAM,IAAI,MAAM,KAAK,0BAA0B,KAAK,eAAe,YAAY;AAC/E,kBAAQ,KAAK,GAAG,EAAE,OAAO;AACzB,mBAAS,KAAK,GAAG,EAAE,QAAQ;AAAA,QAC7B,SAAS,KAAK;AACZ,mBAAS;AAAA,YACP,qCAAqC,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,UACvF;AAAA,QACF;AAAA,MACF;AACA,UAAI,KAAK,kBAAkB,QAAQ,SAAS,GAAG;AAC7C,YAAI;AACF,gBAAM,aAAa,MAAM,KAAK,eAAe,KAAK,aAAa;AAC/D,qBAAW,SAAS,WAAY,UAAS,KAAK,SAAS,KAAK,EAAE;AAAA,QAChE,SAAS,KAAK;AACZ,mBAAS;AAAA,YACP,0BAA0B,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,UAC5E;AAAA,QACF;AAAA,MACF;AACA,aAAO,EAAE,SAAS,SAAS;AAAA,IAC7B;AAAA,EACF;AACF;AAYA,SAAS,2BAA2B,GAA2B;AAC7D,MAAI,CAAC,KAAK,OAAO,MAAM,SAAU,QAAO;AACxC,QAAM,MAAM;AACZ,MAAI,MAAM,QAAQ,IAAI,WAAW,GAAG;AAClC,UAAM,SAAS,IAAI;AACnB,QAAI,OAAO,WAAW,EAAG,QAAO;AAChC,WAAO,OACJ,IAAI,CAAC,MAAO,OAAO,EAAE,YAAY,WAAW,EAAE,UAAU,EAAG,EAC3D,OAAO,CAAC,MAAM,EAAE,SAAS,CAAC,EAC1B,KAAK,MAAM;AAAA,EAChB;AACA,MAAI,OAAO,IAAI,iBAAiB,SAAU,QAAO,IAAI;AACrD,MAAI,OAAO,IAAI,YAAY,SAAU,QAAO,IAAI;AAChD,SAAO;AACT;AAEA,SAAS,mBAAmB,GAAoB;AAC9C,MAAI,CAAC,KAAK,OAAO,MAAM,SAAU,QAAO;AACxC,QAAM,MAAM;AACZ,MAAI,OAAO,IAAI,oBAAoB,SAAU,QAAO,IAAI;AACxD,MAAI,OAAO,IAAI,OAAO,SAAU,QAAO,IAAI;AAC3C,SAAO;AACT;;;AC1EA,eAAsB,eACpB,QACA,MACmF;AACnF,QAAM,UAAU,OAAO,WAAW,WAAW,CAAC;AAC9C,QAAM,sBAAsB,OAAO,aAAa,WAAW,CAAC;AAC5D,QAAM,aAAa,CAAC,GAAG,SAAS,GAAG,mBAAmB;AAItD,MAAI,WAAW,WAAW,GAAG;AAC3B,WAAO;AAAA,MACL,GAAG;AAAA,MACH,SAAS;AAAA,QACP,mBAAmB,cAAc,KAAK,QAAQ;AAAA,QAC9C,gBAAgB,cAAc,KAAK,QAAQ;AAAA,QAC3C,OAAO;AAAA,QACP,YAAY,KAAK,SAAS,IAAI,CAAC,OAAO;AAAA,UACpC,WAAW,EAAE;AAAA,UACb,QAAQ,EAAE;AAAA,UACV,OAAO,EAAE;AAAA,UACT,OAAO;AAAA,QACT,EAAE;AAAA,QACF,iBAAiB,CAAC;AAAA,MACpB;AAAA,IACF;AAAA,EACF;AAEA,QAAM,aAAa,KAAK,SAAS,IAAI,CAAC,MAAM,EAAE,SAAS;AACvD,QAAM,QAAQ,MAAM,KAAK,YAAY,UAAU;AAC/C,QAAM,iBAAiB,IAAI,IAAI,MAAM,IAAI,CAAC,MAAM,CAAC,EAAE,WAAW,EAAE,SAAS,CAAC,CAAC;AAE3E,QAAM,aAAa,KAAK,SAAS,IAAI,CAAC,MAAM;AAC1C,UAAM,IAAI,eAAe,IAAI,EAAE,SAAS,KAAK,EAAE;AAC/C,WAAO,EAAE,WAAW,EAAE,WAAW,QAAQ,EAAE,WAAW,OAAO,GAAG,OAAO,IAAI,EAAE,UAAU;AAAA,EACzF,CAAC;AACD,QAAM,oBAAoB,cAAc,KAAK,QAAQ;AACrD,QAAM,iBAAiB,cAAc,KAAK;AAC1C,QAAM,QAAQ,iBAAiB;AAE/B,MAAI,kBAA4B,CAAC;AACjC,MAAI,QAAQ,KAAK,KAAK,wBAAwB,KAAK,QAAQ;AACzD,UAAM,KAAK,OAAO,UAAU;AAC5B,sBAAkB,CAAC,GAAG,UAAU;AAAA,EAClC;AAEA,SAAO;AAAA,IACL,GAAG;AAAA,IACH,SAAS;AAAA,MACP;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACF;AAEA,SAAS,cAAc,MAAoD;AACzE,MAAI,KAAK,WAAW,EAAG,QAAO;AAC9B,SAAO,KAAK,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,WAAW,CAAC,IAAI,KAAK;AAC9D;;;AChEO,SAAS,iBACd,SAC6E;AAC7E,QAAM,WAAW,QAAQ,YAAY;AAErC,SAAO,CAAC,SAAmB,QAAyD;AAClF,UAAM,UAAU,8BAA8B,QAAQ,aAAa;AAAA,MACjE,GAAI,QAAQ,UAAU,OAAO,KAAK,CAAC;AAAA,MACnC,GAAI,QAAQ,MAAM,EAAE,KAAK,QAAQ,IAAI,IAAI,CAAC;AAAA,IAC5C,CAAC;AACD,UAAM,eAAe,QAAQ,QAAQ,QAAQ,QAAQ;AACrD,UAAM,UAAU,QAAQ,YAAY,OAAO;AAC3C,UAAM,SAAS,IAAI,UAAU,IAAI,gBAAgB,EAAE;AAEnD,UAAM,MAAsB,CAAC;AAC7B,QAAI;AACJ,QAAI;AACJ,UAAM,SAAS,IAAI,QAAoB,CAAC,SAAS,WAAW;AAC1D,eAAS;AACT,aAAO;AAAA,IACT,CAAC;AAID,WAAO,MAAM,MAAM;AAAA,IAAC,CAAC;AAErB,UAAM,OAA8B;AAAA,MAClC;AAAA,MACA,cAAc,MAAM;AAAA,MACpB,MAAM;AAAA,MACN,GAAI,QAAQ,mBAAmB,EAAE,kBAAkB,QAAQ,iBAAiB,IAAI,CAAC;AAAA,IACnF;AAEA,oBAAgB,SAA4C;AAC1D,UAAI;AACF,cAAM,MAAM,MAAM,qBAAqB,QAAQ,eAAe,MAAM,MAAM;AAC1E,yBAAiB,SAAS,IAAI,aAAa,SAAS,EAAE,OAAO,CAAC,GAAG;AAC/D,cAAI,KAAK,KAAK;AACd,gBAAM,SAAS,SAAS,OAAO,EAAE,aAAa,CAAC;AAC/C,cAAI,OAAQ,OAAM;AAAA,QACpB;AACA,eAAO,QAAQ,OAAO,MAAM,GAAG,CAAC;AAAA,MAClC,SAAS,KAAK;AACZ,aAAK,GAAG;AACR,cAAM;AAAA,MACR;AAAA,IACF;AAEA,WAAO,EAAE,QAAQ,OAAO,GAAG,OAAO;AAAA,EACpC;AACF;","names":[]}
1
+ {"version":3,"sources":["../src/agent/surfaces.ts","../src/agent/define-agent.ts","../src/agent/improvement-adapter.ts","../src/agent/knowledge-adapter.ts","../src/agent/outcome.ts","../src/agent/sandbox-act.ts"],"sourcesContent":["/**\n * `AgentSurfaces` — declarative map of the mutable file/directory paths\n * the self-improvement loop can edit on behalf of an agent.\n *\n * The substrate uses this map to resolve every parsed `FindingSubject`\n * (from agent-eval) to a real on-disk path. No per-vertical glue;\n * no fabricated paths; no silent `existsSync(...)` skips that hide\n * misconfiguration from the operator.\n *\n * Surfaces are validated at `defineAgent` time — missing paths fail\n * loud with a list of every offender. A surface that's not needed\n * (e.g. an agent with no RAG corpora) is simply omitted; the loop\n * refuses to route those subjects rather than fabricating a target.\n */\n\nimport { existsSync } from 'node:fs'\nimport { isAbsolute, join } from 'node:path'\nimport type { FindingSubject } from '@tangle-network/agent-eval'\n\n/**\n * Surface declarations. Every path is repo-relative (or absolute) at\n * `defineAgent` time. At resolution time, paths are joined against the\n * agent's `repoRoot`.\n *\n * `systemPrompt`, `tools`, `personas` are DIRECTORIES; the loop appends\n * `<section>.md`, `<tool>/README.md`, `<persona-id>.yaml` etc.\n * `rubric`, `outputSchema` are SINGLE FILES; the loop edits them in\n * place.\n *\n * `knowledge` is the agent-knowledge root (typically `.agent-knowledge`);\n * `applyKnowledgeWriteBlocks` writes pages relative to it.\n *\n * Optional surfaces (`scaffolding`, `memory`, `rag`, `outputSchema`)\n * can be omitted — the loop will reject findings targeting them with a\n * clear log message instead of fabricating a path.\n */\nexport interface AgentSurfaces {\n /** Directory containing one markdown file per system-prompt section. */\n systemPrompt: string\n /** Directory containing one subdir per tool (`<tool>/README.md`). */\n tools: string\n /** Single file (TypeScript module) defining the rubric weights + dimensions. */\n rubric: string\n /** Knowledge-base root; typically `.agent-knowledge`. */\n knowledge: string\n /** Directory containing one YAML/JSON file per persona. */\n personas: string\n /** Optional: directory containing scaffolding rules (precondition checks, retry policies). */\n scaffolding?: string\n /** Optional: memory store path (JSONL / SQLite / DB). */\n memory?: string\n /** Optional: directory containing RAG corpora (`<corpus>/<doc-id>.md`). */\n rag?: string\n /** Optional: single file defining the output schema (Zod / JSON Schema). */\n outputSchema?: string\n}\n\nexport interface ResolvedSurface {\n /** Absolute filesystem path the operator can `cat` / `vim`. */\n absolutePath: string\n /** Repo-relative path for PR descriptions, diffs, audit logs. */\n repoRelativePath: string\n /** Whether the path currently exists on disk. */\n exists: boolean\n /** The substrate's intent: edit an existing file or create a new one. */\n intent: 'edit-existing' | 'create-new'\n}\n\n/**\n * Resolve a parsed `FindingSubject` to the file path the substrate\n * should edit (or create) on disk.\n *\n * Returns `null` when:\n * - the subject targets a surface the agent didn't declare\n * (e.g. `rag:*` when `surfaces.rag` is undefined), OR\n * - the subject is a `cluster` (failure-mode emits these as evidence,\n * not actionable mutations — they don't route to a file).\n *\n * Returns a `ResolvedSurface` with `intent: 'create-new'` when the\n * subject names a path that doesn't yet exist (e.g. a new wiki page).\n * The caller chooses whether to honour the create — for tightly-managed\n * surfaces like `systemPrompt` it's usually a contract violation\n * (the analyst named a section that doesn't exist); for `knowledge`\n * it's the whole point.\n */\nexport function resolveSubjectPath(\n subject: FindingSubject,\n surfaces: AgentSurfaces,\n repoRoot: string,\n): ResolvedSurface | null {\n const candidates = candidatePathsForSubject(subject, surfaces)\n if (candidates.length === 0) return null\n\n // Probe candidates in order, preferring the first one that exists on disk.\n // Lets the substrate accept both the flat `<section>.md` convention and\n // the skill-dir `<section>/SKILL.md` convention without forcing one layout.\n // When none exists, fall back to the first candidate (canonical create-new).\n for (const rel of candidates) {\n const abs = isAbsolute(rel) ? rel : join(repoRoot, rel)\n if (existsSync(abs)) {\n return { absolutePath: abs, repoRelativePath: rel, exists: true, intent: 'edit-existing' }\n }\n }\n const fallback = candidates[0]!\n const fallbackAbs = isAbsolute(fallback) ? fallback : join(repoRoot, fallback)\n return {\n absolutePath: fallbackAbs,\n repoRelativePath: fallback,\n exists: false,\n intent: 'create-new',\n }\n}\n\nfunction candidatePathsForSubject(\n subject: FindingSubject,\n surfaces: AgentSurfaces,\n): ReadonlyArray<string> {\n switch (subject.kind) {\n case 'knowledge.wiki':\n case 'knowledge.stale':\n return [join(surfaces.knowledge, `${subject.slug}.md`)]\n case 'knowledge.claim':\n // Claims land in a per-topic claims directory under the knowledge root.\n return [join(surfaces.knowledge, 'claims', `${slugify(subject.topic)}.md`)]\n case 'knowledge.raw':\n return [join(surfaces.knowledge, 'raw', `${subject.sourceId}.md`)]\n case 'system-prompt': {\n const slug = slugify(subject.section)\n // Prefer flat layout for create-new (canonical); probe skill-dir layout\n // in case the existing repo (tax/legal/gtm/creative) uses\n // `<section>/SKILL.md` already.\n return [\n join(surfaces.systemPrompt, `${slug}.md`),\n join(surfaces.systemPrompt, slug, 'SKILL.md'),\n join(surfaces.systemPrompt, slug, 'index.md'),\n ]\n }\n case 'tool-doc':\n if (subject.aspect) {\n return [join(surfaces.tools, subject.tool, `${slugify(subject.aspect)}.md`)]\n }\n // tool-doc default: `<tool>/README.md`; also probe `<tool>.md` for flat\n // tool-list repos.\n return [\n join(surfaces.tools, subject.tool, 'README.md'),\n join(surfaces.tools, `${subject.tool}.md`),\n ]\n case 'new-tool':\n return [join(surfaces.tools, subject.name, 'README.md')]\n case 'rag':\n if (!surfaces.rag) return []\n return [join(surfaces.rag, subject.corpus, `${subject.docId}.md`)]\n case 'memory':\n if (!surfaces.memory) return []\n return [join(surfaces.memory, `${slugify(subject.key)}.json`)]\n case 'scaffolding':\n if (!surfaces.scaffolding) return []\n return [join(surfaces.scaffolding, `${slugify(subject.concern)}.md`)]\n case 'output-schema':\n if (!surfaces.outputSchema) return []\n return [surfaces.outputSchema]\n case 'websearch.outdated':\n case 'prior-run-summary':\n // Stale signals don't map to a single file — handled by the knowledge\n // adapter as `agent-knowledge:stale:*` after operator review.\n return []\n case 'cluster':\n // failure-mode cluster labels are evidence, not mutations.\n return []\n }\n}\n\nfunction slugify(s: string): string {\n return (\n s\n .toLowerCase()\n .replace(/[^a-z0-9-]+/g, '-')\n .replace(/^-+|-+$/g, '')\n .slice(0, 200) || 'untitled'\n )\n}\n\n/**\n * Validate that every declared surface exists on disk under `repoRoot`.\n *\n * Returns an array of `SurfaceValidationIssue` — empty when all required\n * surfaces resolve. `defineAgent` throws with the issues rendered, so\n * a misconfigured manifest fails at startup (not at the first finding\n * the loop produces 20 minutes later).\n */\nexport interface SurfaceValidationIssue {\n surface: keyof AgentSurfaces\n path: string\n reason: 'missing' | 'not-directory' | 'not-file'\n}\n\nexport function validateSurfaces(\n surfaces: AgentSurfaces,\n repoRoot: string,\n): ReadonlyArray<SurfaceValidationIssue> {\n const issues: SurfaceValidationIssue[] = []\n const dirSurfaces: ReadonlyArray<keyof AgentSurfaces> = [\n 'systemPrompt',\n 'tools',\n 'personas',\n 'knowledge',\n ]\n const fileSurfaces: ReadonlyArray<keyof AgentSurfaces> = ['rubric']\n const optionalDirSurfaces: ReadonlyArray<keyof AgentSurfaces> = ['scaffolding', 'memory', 'rag']\n const optionalFileSurfaces: ReadonlyArray<keyof AgentSurfaces> = ['outputSchema']\n\n for (const key of dirSurfaces) {\n const p = surfaces[key] as string | undefined\n if (!p) {\n issues.push({ surface: key, path: '', reason: 'missing' })\n continue\n }\n const abs = isAbsolute(p) ? p : join(repoRoot, p)\n if (!existsSync(abs)) {\n issues.push({ surface: key, path: p, reason: 'missing' })\n }\n }\n for (const key of fileSurfaces) {\n const p = surfaces[key] as string | undefined\n if (!p) {\n issues.push({ surface: key, path: '', reason: 'missing' })\n continue\n }\n const abs = isAbsolute(p) ? p : join(repoRoot, p)\n if (!existsSync(abs)) {\n issues.push({ surface: key, path: p, reason: 'missing' })\n }\n }\n for (const key of [...optionalDirSurfaces, ...optionalFileSurfaces]) {\n const p = surfaces[key] as string | undefined\n if (p === undefined) continue\n const abs = isAbsolute(p) ? p : join(repoRoot, p)\n if (!existsSync(abs)) {\n issues.push({ surface: key, path: p, reason: 'missing' })\n }\n }\n return issues\n}\n\nexport function renderSurfaceIssues(\n issues: ReadonlyArray<SurfaceValidationIssue>,\n repoRoot: string,\n): string {\n if (issues.length === 0) return ''\n const lines = issues.map(\n (i) => ` - ${i.surface}: ${i.path ? `\"${i.path}\"` : '<not set>'} (${i.reason})`,\n )\n return [\n `Agent surface validation failed against repoRoot=${repoRoot}:`,\n ...lines,\n '',\n 'Fix the manifest: every required surface must point at an existing',\n 'directory (systemPrompt / tools / personas / knowledge) or file',\n '(rubric). Optional surfaces (scaffolding / memory / rag / outputSchema)',\n 'may be omitted; the loop will reject findings targeting omitted',\n 'surfaces rather than fabricating a path.',\n ].join('\\n')\n}\n","/**\n * `defineAgent` — typed, validated manifest API for declarative agent\n * configuration. The substrate consumes this manifest to wire the\n * canonical eval pattern + analyst self-improvement loop without any\n * per-vertical glue.\n *\n * Design goal: scale to 1000s of vertical agents. Every agent declares\n * its surfaces, rubric, runtime, and analyst configuration in ~50 lines.\n * No per-vertical `ImprovementAdapter`. No per-vertical CLI. No\n * fabricated paths.\n *\n * Validation: `defineAgent` runs `validateSurfaces` synchronously and\n * throws a structured error if any required surface is missing on\n * disk. The cost is one filesystem stat per surface (cheap); the\n * benefit is a manifest that can't ship broken.\n */\n\nimport type { TraceAnalystKindSpec } from '@tangle-network/agent-eval'\nimport type { RuntimeStreamEvent } from '../types'\nimport { type AgentSurfaces, renderSurfaceIssues, validateSurfaces } from './surfaces'\n\n// ── manifest ─────────────────────────────────────────────────────────\n\n/**\n * The full agent manifest. Each agent ships ONE of these.\n *\n * Generics:\n * `TPersona` — the agent's persona shape (loaded from\n * `surfaces.personas`). Defaults to `unknown` so the substrate's\n * persona discovery (`loadPersonas`) can accept anything; per-agent\n * code re-narrows when it matters.\n * `TRunOutput` — the shape `runtime.act` returns. Used by the rubric\n * scorers and emitted into the trace.\n */\nexport interface AgentManifest<TPersona = unknown, TRunOutput = unknown> {\n /**\n * Stable identifier — used as `projectId` in traces, as the analyst\n * loop's `runId` prefix, and as the namespace under which findings\n * are persisted. MUST match the agent's repo name to keep\n * cross-repo telemetry joinable.\n */\n id: string\n\n /**\n * Filesystem root the substrate resolves surface paths against.\n * Typically `process.cwd()` or a fixed absolute path. Use an\n * absolute path when the agent's tests may run from subdirectories\n * (vitest sometimes shifts cwd).\n */\n repoRoot: string\n\n /**\n * Map of mutable surfaces the self-improvement loop can edit. See\n * `AgentSurfaces` — required: `systemPrompt`, `tools`, `rubric`,\n * `knowledge`, `personas`. Optional: `scaffolding`, `memory`, `rag`,\n * `outputSchema`.\n *\n * Every required path is validated at `defineAgent` time. Missing\n * paths throw with the full list of offenders.\n */\n surfaces: AgentSurfaces\n\n /**\n * Rubric the substrate uses to score each run. Dimensions × weights\n * × judges. The substrate computes the weighted composite and\n * stamps it into the RunRecord.\n */\n rubric: AgentRubric<TRunOutput>\n\n /**\n * Runtime adapter — how the substrate INVOKES the agent against a\n * persona. The `act` function takes a persona + a context (with the\n * tracer the substrate threads through for span emission) and\n * returns the run output the rubric will score.\n *\n * The agent's existing production runtime goes in here; the\n * substrate is intentionally thin around it.\n */\n runtime: AgentRuntime<TPersona, TRunOutput>\n\n /**\n * Persona discovery — the substrate loads personas via this function\n * at eval start. Can read from `surfaces.personas`, an API, or be\n * hardcoded. The substrate calls it once per `runAgentEval` call;\n * persona ordering is preserved.\n */\n personas: () => Promise<ReadonlyArray<TPersona>>\n\n /**\n * Analyst kinds the substrate runs against each persona's trace.\n * Defaults to `DEFAULT_TRACE_ANALYST_KINDS` from agent-eval. Per-agent\n * authors can prune (e.g. skip `knowledge-poisoning` when there's no\n * knowledge base) or extend (custom domain kinds).\n *\n * Empty array disables the loop — useful for `pnpm eval --no-analyst`.\n */\n analystKinds: ReadonlyArray<TraceAnalystKindSpec>\n\n /**\n * Analyst LLM configuration. The substrate uses these for all four\n * kinds (override per-kind via `analystKinds` if needed).\n */\n analyst: AnalystConfig\n\n /**\n * Auto-apply policy. Knowledge / improvement edits land only when\n * `enabled === true` AND the source finding's confidence meets the\n * threshold. `mode` controls how applies happen: `'write'` mutates\n * files in-place; `'open-pr'` writes to a branch and opens a PR.\n *\n * Default: knowledge auto-applies at confidence ≥0.85 in `'write'`\n * mode (wiki edits are git-reversible); improvement stays at\n * `enabled: false` until the agent author has measured precision.\n */\n autoApply?: AutoApplyPolicy\n}\n\nexport interface AgentRubric<TRunOutput> {\n /** Dimensions composing the weighted score. Weights sum to 1.0 by convention. */\n dimensions: ReadonlyArray<RubricDimension<TRunOutput>>\n /**\n * Optional judges layered on top of deterministic dimensions. Each\n * judge returns a score per dimension; the substrate averages judges\n * (mean by default) for the LLM contribution.\n */\n judges?: ReadonlyArray<JudgeConfig<TRunOutput>>\n}\n\nexport interface RubricDimension<TRunOutput> {\n /** Unique identifier — appears in finding subjects (`rubric:<id>`). */\n id: string\n /** 0..1 — weight in the composite. */\n weight: number\n /**\n * Deterministic scorer: given the persona + run output, returns a\n * 0..1 score. The substrate sums weight × score across dimensions\n * for the deterministic composite; judges supplement subjective dims.\n */\n score: (input: { persona: unknown; output: TRunOutput }) => number\n /** Optional human-readable label for reports. */\n label?: string\n}\n\nexport interface JudgeConfig<TRunOutput> {\n /** Judge identifier — appears in trace spans + manifest. */\n id: string\n /** Model snapshot to invoke. Pin the snapshot (`claude-sonnet-4-6@2025-04-15`); the validator rejects bare aliases. */\n model: string\n /** Dimensions this judge scores. */\n dimensions: ReadonlyArray<string>\n /**\n * Optional rubric anchors — text examples the judge sees as a\n * few-shot prompt to calibrate. STRONGLY recommended for subjective\n * dimensions; required by the calibration gate (Pearson ≥0.7).\n */\n anchors?: ReadonlyArray<{ input: string; output: TRunOutput; expected: Record<string, number> }>\n}\n\nexport interface AgentRuntime<TPersona, TRunOutput> {\n /**\n * Invoke the agent against one persona. Returns BOTH:\n * - `events`: an `AsyncIterable<RuntimeStreamEvent>` the chat-centric\n * product consumes verbatim (SSE / WebSocket / inline render).\n * **Streaming is mandatory — never collapse this to a single Promise.**\n * The agent's existing `runChatTurn` (or equivalent async generator)\n * plugs in here directly.\n * - `output`: a `Promise<TRunOutput>` resolved AFTER the event stream\n * drains. The eval substrate awaits this for rubric scoring; chat\n * products usually ignore it (they already rendered incrementally).\n *\n * Implementation contract:\n * 1. `act` MUST return immediately (synchronous construction of the\n * `events` iterator + the `output` promise).\n * 2. Iterating `events` drives the underlying LLM/tool calls — the\n * caller chooses when to consume.\n * 3. `output` resolves only after the iterator yields its terminal\n * event (typically `task_end`); see `collectAgentRun` helper.\n *\n * `ctx.emitter` is the substrate-threaded `TraceEmitter` — runtimes\n * SHOULD record LLM/tool spans through it for capture integrity.\n * `ctx.deadlineMs` is wall-clock; the runtime SHOULD honour for graceful\n * cancel. `ctx.signal` is the standard abort signal.\n */\n act: (persona: TPersona, ctx: AgentRunContext) => AgentRunInvocation<TRunOutput>\n}\n\nexport interface AgentRunInvocation<TRunOutput> {\n /** Live stream of typed runtime events. Consumed by chat UX directly. */\n events: AsyncIterable<RuntimeStreamEvent>\n /** Final structured output the rubric scores. Resolves after `events` drains. */\n output: Promise<TRunOutput>\n}\n\n/**\n * Stub for agents whose `runtime.act` is not yet wired to the substrate's\n * eval path. Preserves the streaming contract (empty event stream + a\n * rejected `output` promise that tells the caller exactly what to fix).\n *\n * Per-vertical manifests usually start with this stub and replace it with\n * the agent's real streaming runtime (`runChatTurn` or equivalent) once\n * the eval path consumes the manifest end-to-end.\n */\nexport function unimplementedAgentRun<TRunOutput = unknown>(\n reason = 'AgentRuntime.act is not yet wired for this manifest',\n): AgentRunInvocation<TRunOutput> {\n return {\n events: (async function* empty(): AsyncIterable<RuntimeStreamEvent> {})(),\n output: Promise.reject(new Error(reason)),\n }\n}\n\n/**\n * Drain `act`'s `events` into an array AND await its `output`. Useful for\n * eval / outcome-measurement code paths that don't care about live\n * rendering. The events array is preserved so the substrate can inspect\n * tool calls / readiness / questions retrospectively.\n *\n * IMPORTANT: chat-centric UX MUST NOT call this — it defeats streaming\n * (no incremental render). Use `for await (const ev of invocation.events)`\n * directly in the chat surface.\n */\nexport async function collectAgentRun<TRunOutput>(\n invocation: AgentRunInvocation<TRunOutput>,\n): Promise<{ events: ReadonlyArray<RuntimeStreamEvent>; output: TRunOutput }> {\n const events: RuntimeStreamEvent[] = []\n for await (const ev of invocation.events) events.push(ev)\n const output = await invocation.output\n return { events, output }\n}\n\nexport interface AgentRunContext {\n /** Substrate-managed trace emitter. */\n emitter: import('@tangle-network/agent-eval').TraceEmitter\n /** Stable run id for this persona × variant cell. */\n runId: string\n /** Variant the runtime is exercising (e.g. `'baseline'`, `'source-grounded'`). */\n variantId?: string\n /** Wall-clock deadline (epoch ms). The runtime SHOULD honour for graceful cancel. */\n deadlineMs?: number\n /** Optional abort signal. */\n signal?: AbortSignal\n}\n\nexport interface AnalystConfig {\n /** Model the analyst kinds use. Override per-kind via `analystKinds[i].cost.models`. */\n model: string\n /** Optional total budget across all kinds for one run. Substrate enforces via `BudgetGuard`. */\n budgetUsd?: number\n /** Backend hint for the AxAIService factory — same shape every kind uses. */\n backend?: {\n name?: 'openai' | 'router'\n apiKey?: string\n baseUrl?: string\n }\n}\n\nexport interface AutoApplyPolicy {\n knowledge?: {\n enabled: boolean\n confidenceThreshold?: number\n mode?: 'write' | 'open-pr'\n }\n improvement?: {\n enabled: boolean\n confidenceThreshold?: number\n mode?: 'write' | 'open-pr'\n }\n}\n\n// ── factory + validation ─────────────────────────────────────────────\n\nexport class AgentManifestError extends Error {\n constructor(\n message: string,\n public readonly agentId: string,\n public readonly issues: ReadonlyArray<unknown> = [],\n ) {\n super(message)\n this.name = 'AgentManifestError'\n }\n}\n\n/**\n * Construct a validated agent manifest. Throws `AgentManifestError`\n * if any required surface is missing on disk.\n *\n * Generics: pass your persona / output types if you want narrowed\n * `runtime.act` signatures:\n * `defineAgent<TaxPersona, TaxRunOutput>({ ... })`\n *\n * Most callers don't need the generics — the substrate operates on\n * `unknown` payloads internally and the manifest's `score` /\n * `runtime.act` see the typed shapes via TypeScript inference at\n * the call site.\n */\nexport function defineAgent<TPersona = unknown, TRunOutput = unknown>(\n manifest: AgentManifest<TPersona, TRunOutput>,\n): AgentManifest<TPersona, TRunOutput> {\n if (!manifest.id || manifest.id.trim().length === 0) {\n throw new AgentManifestError('defineAgent: `id` is required', manifest.id ?? '')\n }\n if (!manifest.repoRoot || manifest.repoRoot.trim().length === 0) {\n throw new AgentManifestError('defineAgent: `repoRoot` is required', manifest.id)\n }\n const issues = validateSurfaces(manifest.surfaces, manifest.repoRoot)\n if (issues.length > 0) {\n throw new AgentManifestError(\n renderSurfaceIssues(issues, manifest.repoRoot),\n manifest.id,\n issues,\n )\n }\n // Lightweight rubric sanity: weights sum to ~1.0 (no hard requirement —\n // the substrate normalizes — but flag wildly miscalibrated weights).\n const total = manifest.rubric.dimensions.reduce((acc, d) => acc + d.weight, 0)\n if (manifest.rubric.dimensions.length > 0 && (total < 0.5 || total > 1.5)) {\n throw new AgentManifestError(\n `defineAgent(${manifest.id}): rubric dimension weights sum to ${total.toFixed(3)} — should be ~1.0`,\n manifest.id,\n )\n }\n return manifest\n}\n","/**\n * Substrate-default `ImprovementAdapter` — surfaces-driven, LLM-drafted\n * patches, optional auto-apply or PR-open.\n *\n * This is the one ImprovementAdapter every vertical agent uses. The\n * substrate parses each finding's `subject` via\n * `parseFindingSubject` (agent-eval), resolves it to a real file path\n * via the agent's `AgentSurfaces`, reads the current content, and asks\n * an LLM to draft a unified-diff patch given the finding + current\n * content + per-kind editing-discipline rules.\n *\n * Auto-apply gates on the source-finding's confidence and the\n * autoApply.improvement policy. Two modes:\n * `write` — apply the patch in-place via `git apply -p0`. Operator\n * reviews via `git diff`.\n * `open-pr` — write to a branch, commit, push, open a PR via `gh`.\n * Operator reviews via the PR UI.\n *\n * Fail-loud rules:\n * - Findings whose subject doesn't parse → counted in `errors`.\n * - Findings whose subject targets an undeclared surface → counted in\n * `errors` with the offending kind in the message.\n * - Findings whose target path doesn't exist AND the kind isn't a\n * create-new variant (`new-tool`, `knowledge.wiki`) → counted in\n * `errors` with the resolved path in the message.\n * - LLM drafts that fail JSON-schema validation → counted in\n * `errors` with the schema issue.\n *\n * No silent skips. Every dropped finding has a recorded reason the\n * loop's report surfaces.\n */\n\nimport { spawnSync } from 'node:child_process'\nimport { readFileSync } from 'node:fs'\nimport type { AnalystFinding, FindingSubject } from '@tangle-network/agent-eval'\nimport { parseFindingSubject } from '@tangle-network/agent-eval/analyst'\nimport type { ImprovementAdapter } from '../analyst-loop/types'\nimport type { AgentSurfaces, ResolvedSurface } from './surfaces'\nimport { resolveSubjectPath } from './surfaces'\n\n// ── proposal shape ───────────────────────────────────────────────────\n\nexport interface SurfaceImprovementEdit {\n /** Stable id derived from the source finding so re-proposals are idempotent. */\n id: string\n /** The finding that produced this edit — for revert + audit trail. */\n sourceFindingId: string\n /** Parsed subject; included so the apply step doesn't re-parse. */\n subject: FindingSubject\n /** Resolved on-disk target. */\n target: ResolvedSurface\n /** SHA-256 of the current file content the patch was drafted against. */\n baseSha256: string\n /** Unified-diff patch the LLM drafted (relative to `target.absolutePath`). */\n patch: string\n /** One-line summary the operator sees in the report / PR title. */\n summary: string\n /** Multi-line rationale for the PR body — finding context + LLM reasoning. */\n rationale: string\n /** Carry-forward from the finding so the apply gate can check the threshold. */\n confidence: number\n /** Carry-forward severity for prioritization. */\n severity: AnalystFinding['severity']\n}\n\nexport interface CreateSurfaceImprovementAdapterOpts {\n surfaces: AgentSurfaces\n repoRoot: string\n /**\n * LLM-draft callback. Given a finding + current file content + the\n * resolved target, returns a unified-diff patch + summary + rationale.\n *\n * Required — the substrate doesn't ship a hardcoded prompt; the agent\n * author picks the model (Haiku for cheap routine drafts, Sonnet for\n * substantive prompt rewrites, etc.) via this callback.\n */\n draftPatch: (input: DraftPatchInput) => Promise<DraftPatchOutput>\n /**\n * Apply mode:\n * `write` — `git apply` in-place; operator reviews via `git diff`\n * `open-pr` — branch + commit + push + `gh pr create`\n * `none` — never apply; collect proposals for the report only\n *\n * The `apply` method honours this even when the loop calls it; the\n * effective behaviour is also gated on the per-finding confidence\n * threshold via `runAnalystLoop`'s `autoApply` policy.\n */\n mode?: 'write' | 'open-pr' | 'none'\n /** When `mode === 'open-pr'`, the base branch new PRs target. Default: `main`. */\n baseBranch?: string\n /** Required for `mode === 'open-pr'` — the GH owner/repo (`tangle-network/tax-agent`). */\n ghRepo?: string\n /**\n * When the resolved target doesn't exist, allow the substrate to\n * CREATE the file (for `knowledge.wiki`, `new-tool` subjects). Default\n * true for those kinds, false for `system-prompt` / `rubric` / etc.\n * (named sections that don't exist are a contract violation, not a\n * scaffolding opportunity).\n */\n allowCreateForKinds?: ReadonlyArray<FindingSubject['kind']>\n}\n\nexport interface DraftPatchInput {\n finding: AnalystFinding\n subject: FindingSubject\n target: ResolvedSurface\n /** Current file content (empty string when `intent === 'create-new'`). */\n currentContent: string\n}\n\nexport interface DraftPatchOutput {\n /** Unified diff against the current file content. Empty string skips this finding. */\n patch: string\n /** One-line summary for the operator. */\n summary: string\n /** Multi-line rationale for the PR body. */\n rationale: string\n}\n\n// ── factory ──────────────────────────────────────────────────────────\n\nconst DEFAULT_CREATE_KINDS: ReadonlyArray<FindingSubject['kind']> = [\n 'knowledge.wiki',\n 'knowledge.claim',\n 'knowledge.raw',\n 'new-tool',\n]\n\nexport function createSurfaceImprovementAdapter(\n opts: CreateSurfaceImprovementAdapterOpts,\n): ImprovementAdapter<SurfaceImprovementEdit> {\n const mode = opts.mode ?? 'none'\n const allowCreate = opts.allowCreateForKinds ?? DEFAULT_CREATE_KINDS\n\n return {\n async proposeFromFindings(findings) {\n const edits: SurfaceImprovementEdit[] = []\n const errors: Array<{ findingId: string; subject: string; message: string }> = []\n let skipped = 0\n\n for (const f of findings) {\n const subject = parseFindingSubject(f.subject)\n if (subject === null) {\n if (f.subject !== undefined) {\n errors.push({\n findingId: f.finding_id,\n subject: f.subject,\n message: 'subject does not parse against the finding-subject grammar',\n })\n } else {\n // Subject-less findings are descriptive, not actionable —\n // legitimate; count in `skipped` not `errors`.\n skipped += 1\n }\n continue\n }\n\n // `cluster` findings (failure-mode) are evidence, not mutations.\n if (subject.kind === 'cluster') {\n skipped += 1\n continue\n }\n\n // `agent-knowledge:*` findings flow to the KnowledgeAdapter;\n // the ImprovementAdapter skips them so subjects don't double-route.\n if (subject.kind.startsWith('knowledge.')) {\n skipped += 1\n continue\n }\n\n const target = resolveSubjectPath(subject, opts.surfaces, opts.repoRoot)\n if (target === null) {\n errors.push({\n findingId: f.finding_id,\n subject: f.subject ?? '',\n message: `subject kind \"${subject.kind}\" targets an undeclared surface; declare it in AgentSurfaces or stop emitting this subject`,\n })\n continue\n }\n\n if (target.intent === 'create-new' && !allowCreate.includes(subject.kind)) {\n errors.push({\n findingId: f.finding_id,\n subject: f.subject ?? '',\n message: `target ${target.repoRelativePath} does not exist; the kind \"${subject.kind}\" requires an existing target (analyst named a section that isn't in the codebase)`,\n })\n continue\n }\n\n const currentContent = target.exists ? readFileSync(target.absolutePath, 'utf-8') : ''\n\n let draft: DraftPatchOutput\n try {\n draft = await opts.draftPatch({ finding: f, subject, target, currentContent })\n } catch (err) {\n errors.push({\n findingId: f.finding_id,\n subject: f.subject ?? '',\n message: `draftPatch threw: ${err instanceof Error ? err.message : String(err)}`,\n })\n continue\n }\n\n if (draft.patch.trim().length === 0) {\n skipped += 1\n continue\n }\n\n edits.push({\n id: `imp-${f.finding_id}`,\n sourceFindingId: f.finding_id,\n subject,\n target,\n baseSha256: sha256(currentContent),\n patch: draft.patch,\n summary: draft.summary,\n rationale: draft.rationale,\n confidence: f.confidence,\n severity: f.severity,\n })\n }\n\n return { edits, skipped, errors }\n },\n\n async apply(edits) {\n const applied: string[] = []\n const warnings: string[] = []\n\n if (mode === 'none') {\n warnings.push(\n 'createSurfaceImprovementAdapter: mode=none; no edits applied — adjust manifest.autoApply.improvement.mode',\n )\n return { applied, warnings }\n }\n\n if (mode === 'open-pr' && !opts.ghRepo) {\n warnings.push(\n 'createSurfaceImprovementAdapter: mode=open-pr requires `ghRepo`; falling back to no-op',\n )\n return { applied, warnings }\n }\n\n for (const edit of edits) {\n // Race-detection: confirm the file content hasn't moved since the\n // patch was drafted. A diff applied against drifted content is a\n // recipe for silent corruption.\n const current = edit.target.exists ? readFileSync(edit.target.absolutePath, 'utf-8') : ''\n if (sha256(current) !== edit.baseSha256) {\n warnings.push(\n `${edit.target.repoRelativePath}: base SHA mismatch; file changed after draft. Skipping.`,\n )\n continue\n }\n\n const ok = applyPatchInPlace(edit, opts.repoRoot)\n if (!ok) {\n warnings.push(`${edit.target.repoRelativePath}: git apply failed`)\n continue\n }\n applied.push(edit.target.repoRelativePath)\n }\n\n if (mode === 'open-pr' && applied.length > 0 && opts.ghRepo) {\n const prUrl = openPullRequest(\n applied,\n edits.filter((e) => applied.includes(e.target.repoRelativePath)),\n opts.repoRoot,\n opts.ghRepo,\n opts.baseBranch ?? 'main',\n )\n if (prUrl) warnings.push(`opened PR: ${prUrl}`)\n else warnings.push('PR creation failed; edits are committed to a local branch only')\n }\n\n return { applied, warnings }\n },\n }\n}\n\n// ── apply helpers ────────────────────────────────────────────────────\n\nfunction applyPatchInPlace(edit: SurfaceImprovementEdit, repoRoot: string): boolean {\n const result = spawnSync('git', ['apply', '--whitespace=fix', '-p0', '-'], {\n cwd: repoRoot,\n input: edit.patch,\n encoding: 'utf-8',\n })\n return result.status === 0\n}\n\nfunction openPullRequest(\n paths: ReadonlyArray<string>,\n edits: ReadonlyArray<SurfaceImprovementEdit>,\n repoRoot: string,\n ghRepo: string,\n baseBranch: string,\n): string | null {\n const branch = `analyst-loop/${Date.now()}-${edits[0]?.sourceFindingId.slice(0, 12) ?? 'edits'}`\n // Create branch, stage, commit\n const checkout = spawnSync('git', ['checkout', '-b', branch], { cwd: repoRoot })\n if (checkout.status !== 0) return null\n const add = spawnSync('git', ['add', ...paths], { cwd: repoRoot })\n if (add.status !== 0) return null\n const title = `analyst-loop: ${edits[0]?.summary ?? `${edits.length} improvement edits`}`\n const body = [\n `Automated analyst-loop edits — review carefully before merge.`,\n '',\n `Source findings:`,\n ...edits.map(\n (e) =>\n ` - ${e.sourceFindingId} (confidence ${e.confidence.toFixed(2)}, severity ${e.severity})`,\n ),\n '',\n 'Rationales:',\n ...edits.map((e) => `\\n## ${e.target.repoRelativePath}\\n\\n${e.rationale}`),\n ].join('\\n')\n const commit = spawnSync('git', ['commit', '-m', title, '-m', body], { cwd: repoRoot })\n if (commit.status !== 0) return null\n const push = spawnSync('git', ['push', '-u', 'origin', branch], { cwd: repoRoot })\n if (push.status !== 0) return null\n const pr = spawnSync(\n 'gh',\n [\n 'pr',\n 'create',\n '--repo',\n ghRepo,\n '--title',\n title,\n '--body',\n body,\n '--base',\n baseBranch,\n '--head',\n branch,\n ],\n { cwd: repoRoot, encoding: 'utf-8' },\n )\n if (pr.status !== 0) return null\n return pr.stdout.trim()\n}\n\nfunction sha256(s: string): string {\n // node:crypto is dynamic-imported lazily so the adapter can be tested in\n // environments without crypto (browser tests, mocked envs).\n const crypto = require('node:crypto') as typeof import('node:crypto')\n return crypto.createHash('sha256').update(s, 'utf-8').digest('hex')\n}\n","/**\n * Substrate-default `KnowledgeAdapter` — wraps agent-knowledge's\n * `proposeFromFindings` + `applyKnowledgeWriteBlocks` with substrate\n * defaults (auto-lint after apply, source linkage via finding id).\n *\n * Every agent that ships a `.agent-knowledge/` tree uses this adapter\n * unmodified. Per-agent customization happens at the manifest level\n * (`autoApply.knowledge.confidenceThreshold`, etc.), not by writing a\n * new adapter.\n *\n * Lint discipline: after each apply we run agent-knowledge's\n * `lintKnowledgeIndex` to catch broken links / circular claims /\n * duplicate pages introduced by the new writes. Findings that fail the\n * post-apply lint are recorded in `warnings`; the apply itself is not\n * rolled back (lint failures are soft — humans review the wiki state).\n */\n\nimport type { AnalystFinding } from '@tangle-network/agent-eval'\nimport type { KnowledgeAdapter } from '../analyst-loop/types'\n\nexport interface CreateSurfaceKnowledgeAdapterOpts {\n /** `.agent-knowledge/` root (absolute path the substrate writes blocks against). */\n knowledgeRoot: string\n}\n\n/**\n * Build the adapter. We accept the agent-knowledge functions as DI so\n * the substrate stays decoupled from a specific agent-knowledge\n * version — the agent author imports them in their manifest module\n * and hands them to the factory.\n *\n * `proposeFromFindings(findings)` returns\n * `{ proposals: KnowledgeProposal[]; skipped: number; errors: ... }`.\n *\n * `applyKnowledgeWriteBlocks(root, content)` returns\n * `{ written: string[]; warnings: string[] }`.\n *\n * `lintKnowledgeIndex(index)` (optional) returns `KnowledgeLintFinding[]`.\n */\nexport interface KnowledgeAdapterDeps<TProposal> {\n proposeFromFindings: (findings: ReadonlyArray<AnalystFinding>) => {\n proposals: TProposal[]\n skipped: number\n errors: Array<{ findingId: string; subject: string; message: string }>\n }\n applyKnowledgeWriteBlocks: (\n root: string,\n proposalText: string,\n ) => Promise<{ written: string[]; warnings: string[] }>\n /**\n * Optional post-apply lint hook. The substrate runs it after each\n * batch of writes; failures land in `warnings` (the apply is not\n * rolled back — lint signals drift to review, not block).\n */\n lintAfterApply?: (root: string) => Promise<ReadonlyArray<string>>\n}\n\nexport function createSurfaceKnowledgeAdapter<TProposal>(\n opts: CreateSurfaceKnowledgeAdapterOpts,\n deps: KnowledgeAdapterDeps<TProposal>,\n): KnowledgeAdapter<TProposal> {\n return {\n proposeFromFindings(findings) {\n const batch = deps.proposeFromFindings(findings)\n return {\n proposals: batch.proposals,\n skipped: batch.skipped,\n errors: batch.errors,\n }\n },\n async apply(proposals) {\n const written: string[] = []\n const warnings: string[] = []\n for (const p of proposals) {\n const proposalText = renderProposalAsWriteBlock(p)\n if (proposalText === null) {\n warnings.push(\n `proposal has no writeBlocks/content; skipping (sourceFindingId=${getSourceFindingId(p)})`,\n )\n continue\n }\n try {\n const r = await deps.applyKnowledgeWriteBlocks(opts.knowledgeRoot, proposalText)\n written.push(...r.written)\n warnings.push(...r.warnings)\n } catch (err) {\n warnings.push(\n `applyKnowledgeWriteBlocks failed: ${err instanceof Error ? err.message : String(err)}`,\n )\n }\n }\n if (deps.lintAfterApply && written.length > 0) {\n try {\n const lintIssues = await deps.lintAfterApply(opts.knowledgeRoot)\n for (const issue of lintIssues) warnings.push(`lint: ${issue}`)\n } catch (err) {\n warnings.push(\n `lintAfterApply failed: ${err instanceof Error ? err.message : String(err)}`,\n )\n }\n }\n return { written, warnings }\n },\n }\n}\n\n/**\n * Pluck the canonical write-block text from a proposal regardless of\n * which exact agent-knowledge version produced it. We accept either:\n * - `{ writeBlocks: Array<{ path, content }> }` — the typed shape\n * 1.3.0+ emits\n * - `{ proposalText: string }` — legacy single-block shape\n * - `{ content: string }` — minimal raw form\n *\n * Returns `null` when nothing parseable is present (warned upstream).\n */\nfunction renderProposalAsWriteBlock(p: unknown): string | null {\n if (!p || typeof p !== 'object') return null\n const obj = p as Record<string, unknown>\n if (Array.isArray(obj.writeBlocks)) {\n const blocks = obj.writeBlocks as Array<{ path?: string; content?: string }>\n if (blocks.length === 0) return null\n return blocks\n .map((b) => (typeof b.content === 'string' ? b.content : ''))\n .filter((s) => s.length > 0)\n .join('\\n\\n')\n }\n if (typeof obj.proposalText === 'string') return obj.proposalText\n if (typeof obj.content === 'string') return obj.content\n return null\n}\n\nfunction getSourceFindingId(p: unknown): string {\n if (!p || typeof p !== 'object') return '<unknown>'\n const obj = p as Record<string, unknown>\n if (typeof obj.sourceFindingId === 'string') return obj.sourceFindingId\n if (typeof obj.id === 'string') return obj.id\n return '<unknown>'\n}\n","/**\n * `OutcomeMeasurement` — the missing metric that turns the analyst\n * loop from \"observability\" into \"self-improvement\".\n *\n * Without this hook, the loop reports process counts (`findings: 42`,\n * `applied: 7`) and never proves the applied edits actually improved\n * anything. With this hook, the substrate re-runs the cohort against\n * the same personas after each apply pass and reports a composite\n * score delta. A negative delta is the substrate's strongest signal\n * to either roll back or surface for review.\n *\n * Wiring is intentionally simple: pass the manifest + the `runAgentEval`\n * function and a list of `personaIds` to re-run. The wrapper:\n * 1. Captures the baseline composite from the just-finished run.\n * 2. After `runAnalystLoop` returns, re-invokes `runAgentEval` against\n * the same persona slice.\n * 3. Computes the delta and appends to `loop-report.json`.\n * 4. If `rollbackOnRegression` and delta < 0, reverts applied edits.\n */\n\nimport type { RunAnalystLoopResult } from '../analyst-loop/types'\n\nexport interface OutcomeMeasurement {\n /** Baseline composite before applies — captured from the most-recent eval run. */\n baselineComposite: number\n /** Composite after re-running the cohort with applied edits. */\n afterComposite: number\n /** `afterComposite - baselineComposite`. Positive = the loop improved the agent. */\n delta: number\n /** Per-persona deltas for finer-grained review. */\n perPersona: ReadonlyArray<{ personaId: string; before: number; after: number; delta: number }>\n /** When the substrate rolled back applies due to regression, the paths reverted. */\n rolledBackPaths: ReadonlyArray<string>\n}\n\nexport interface OutcomeMeasurementOpts {\n /** Composite scores from the run that produced the findings. */\n baseline: ReadonlyArray<{ personaId: string; composite: number }>\n /**\n * Re-run callback — the substrate invokes this after applies. The\n * agent author provides their `runAgentEval`-equivalent so the\n * substrate can ask \"score this persona slice now.\"\n *\n * The callback SHOULD reuse the same cohort + judges + variant as\n * the baseline run; only the agent's mutable surfaces have changed.\n */\n reRunCohort: (\n personaIds: ReadonlyArray<string>,\n ) => Promise<ReadonlyArray<{ personaId: string; composite: number }>>\n /** When `true`, applied edits are reverted on negative delta. Default `false`. */\n rollbackOnRegression?: boolean\n /** Callback to revert a list of paths (typically `git checkout HEAD --`). */\n revert?: (paths: ReadonlyArray<string>) => Promise<void>\n}\n\n/**\n * Run `runAnalystLoop` and stamp an `OutcomeMeasurement` onto the\n * result. The substrate calls this after each canonical eval; the\n * delta lands in `loop-report.json` for cross-run trend analysis.\n *\n * The function returns the original `RunAnalystLoopResult` enriched\n * with `outcome` so callers stay backwards-compatible (the field is\n * optional on the type; missing means no measurement was wired).\n */\nexport async function measureOutcome<TProposal, TEdit>(\n result: RunAnalystLoopResult<TProposal, TEdit>,\n opts: OutcomeMeasurementOpts,\n): Promise<RunAnalystLoopResult<TProposal, TEdit> & { outcome: OutcomeMeasurement }> {\n const applied = result.knowledge?.applied ?? []\n const improvementsApplied = result.improvement?.applied ?? []\n const allApplied = [...applied, ...improvementsApplied]\n\n // No applies → no outcome to measure. Return a zero-delta to keep the\n // shape stable for consumers; baseline / after equal.\n if (allApplied.length === 0) {\n return {\n ...result,\n outcome: {\n baselineComposite: meanComposite(opts.baseline),\n afterComposite: meanComposite(opts.baseline),\n delta: 0,\n perPersona: opts.baseline.map((b) => ({\n personaId: b.personaId,\n before: b.composite,\n after: b.composite,\n delta: 0,\n })),\n rolledBackPaths: [],\n },\n }\n }\n\n const personaIds = opts.baseline.map((b) => b.personaId)\n const after = await opts.reRunCohort(personaIds)\n const afterByPersona = new Map(after.map((r) => [r.personaId, r.composite]))\n\n const perPersona = opts.baseline.map((b) => {\n const a = afterByPersona.get(b.personaId) ?? b.composite\n return { personaId: b.personaId, before: b.composite, after: a, delta: a - b.composite }\n })\n const baselineComposite = meanComposite(opts.baseline)\n const afterComposite = meanComposite(after)\n const delta = afterComposite - baselineComposite\n\n let rolledBackPaths: string[] = []\n if (delta < 0 && opts.rollbackOnRegression && opts.revert) {\n await opts.revert(allApplied)\n rolledBackPaths = [...allApplied]\n }\n\n return {\n ...result,\n outcome: {\n baselineComposite,\n afterComposite,\n delta,\n perPersona,\n rolledBackPaths,\n },\n }\n}\n\nfunction meanComposite(rows: ReadonlyArray<{ composite: number }>): number {\n if (rows.length === 0) return 0\n return rows.reduce((acc, r) => acc + r.composite, 0) / rows.length\n}\n","/**\n * Sandbox bridge for `AgentRuntime.act` — prod-faithful eval execution.\n *\n * The point of this adapter is parity: the eval substrate must run the agent\n * through the SAME profile the production chat turn uses, or scorecard numbers\n * grade a profile that never ships. `createSandboxAct` composes the production\n * profile via {@link composeProductionAgentProfile}, boots a sandbox with it\n * through the loop kernel's own {@link createSandboxForSpec}, streams the\n * `streamPrompt` events mapped to the `RuntimeStreamEvent` vocabulary, and\n * resolves the `OutputAdapter`-parsed output for rubric scoring — satisfying\n * the `act` streaming contract with one code path shared by chat and eval.\n *\n * Agents with a bespoke streaming chat turn should wire THAT into `act`\n * directly (the contract is designed for it); this adapter is the default for\n * agents whose turn is a plain prod-profile sandbox dispatch — notably the\n * agents agent-builder generates.\n */\n\nimport type { AgentProfile, SandboxEvent } from '@tangle-network/sandbox'\nimport type { ComposeProductionAgentProfileOptions } from '../mcp/delegation-profile'\nimport { composeProductionAgentProfile } from '../mcp/delegation-profile'\nimport type { AgentRunSpec, OutputAdapter, SandboxClient } from '../runtime'\nimport { createSandboxForSpec, mapSandboxEvent } from '../runtime'\nimport type { RuntimeStreamEvent } from '../types'\nimport type { AgentRunContext, AgentRunInvocation } from './define-agent'\n\nexport interface CreateSandboxActOptions<TPersona, TRunOutput> {\n /** Canonical agent profile — the same one the prod chat turn composes from. */\n baseProfile: AgentProfile\n /** Sandbox client used to boot the per-run sandbox. */\n sandboxClient: SandboxClient\n /** Persona → prompt. Pure; the eval cell's input. */\n buildPrompt: (persona: TPersona) => string\n /** Sandbox event stream → typed output the rubric scores. */\n output: OutputAdapter<TRunOutput>\n /**\n * Per-persona composition overrides (workspace-augmented system prompt,\n * extra file mounts, sandbox key). Merged into\n * {@link composeProductionAgentProfile}; `env` here is overridden by the\n * top-level `env` option when both are set.\n */\n compose?: (persona: TPersona) => ComposeProductionAgentProfileOptions\n /** Sandbox-SDK overrides forwarded to `createSandboxForSpec`. */\n sandboxOverrides?: AgentRunSpec<unknown>['sandboxOverrides']\n /** Stable run name surfaced in mapped `llm_call` events. */\n name?: string\n /** Override the `SandboxEvent → RuntimeStreamEvent` mapper. */\n mapEvent?: (\n event: SandboxEvent,\n opts: { agentRunName?: string },\n ) => RuntimeStreamEvent | undefined\n /** Environment source for delegation-MCP composition. Defaults to `process.env`. */\n env?: Record<string, string | undefined>\n}\n\n/**\n * Build an `AgentRuntime.act` implementation backed by a single prod-profile\n * sandbox run. The returned function honours the `act` contract: it returns\n * synchronously with a live `events` iterator and an `output` promise that\n * resolves only after the iterator drains.\n */\nexport function createSandboxAct<TPersona, TRunOutput>(\n options: CreateSandboxActOptions<TPersona, TRunOutput>,\n): (persona: TPersona, ctx: AgentRunContext) => AgentRunInvocation<TRunOutput> {\n const mapEvent = options.mapEvent ?? mapSandboxEvent\n\n return (persona: TPersona, ctx: AgentRunContext): AgentRunInvocation<TRunOutput> => {\n const profile = composeProductionAgentProfile(options.baseProfile, {\n ...(options.compose?.(persona) ?? {}),\n ...(options.env ? { env: options.env } : {}),\n })\n const agentRunName = options.name ?? profile.name ?? 'agent'\n const message = options.buildPrompt(persona)\n const signal = ctx.signal ?? new AbortController().signal\n\n const raw: SandboxEvent[] = []\n let settle!: (value: TRunOutput) => void\n let fail!: (err: unknown) => void\n const output = new Promise<TRunOutput>((resolve, reject) => {\n settle = resolve\n fail = reject\n })\n // The output promise rejects when the stream errors; if the caller ignores\n // `output` (chat UX) the rejection is still observed by the events iterator\n // throwing. Attach a no-op catch so an ignored rejection is never \"unhandled\".\n output.catch(() => {})\n\n const spec: AgentRunSpec<unknown> = {\n profile,\n taskToPrompt: () => message,\n name: agentRunName,\n ...(options.sandboxOverrides ? { sandboxOverrides: options.sandboxOverrides } : {}),\n }\n\n async function* events(): AsyncIterable<RuntimeStreamEvent> {\n try {\n const box = await createSandboxForSpec(options.sandboxClient, spec, signal)\n for await (const event of box.streamPrompt(message, { signal })) {\n raw.push(event)\n const mapped = mapEvent(event, { agentRunName })\n if (mapped) yield mapped\n }\n settle(options.output.parse(raw))\n } catch (err) {\n fail(err)\n throw err\n }\n }\n\n return { events: events(), output }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;AAeA,SAAS,kBAAkB;AAC3B,SAAS,YAAY,YAAY;AAqE1B,SAAS,mBACd,SACA,UACA,UACwB;AACxB,QAAM,aAAa,yBAAyB,SAAS,QAAQ;AAC7D,MAAI,WAAW,WAAW,EAAG,QAAO;AAMpC,aAAW,OAAO,YAAY;AAC5B,UAAM,MAAM,WAAW,GAAG,IAAI,MAAM,KAAK,UAAU,GAAG;AACtD,QAAI,WAAW,GAAG,GAAG;AACnB,aAAO,EAAE,cAAc,KAAK,kBAAkB,KAAK,QAAQ,MAAM,QAAQ,gBAAgB;AAAA,IAC3F;AAAA,EACF;AACA,QAAM,WAAW,WAAW,CAAC;AAC7B,QAAM,cAAc,WAAW,QAAQ,IAAI,WAAW,KAAK,UAAU,QAAQ;AAC7E,SAAO;AAAA,IACL,cAAc;AAAA,IACd,kBAAkB;AAAA,IAClB,QAAQ;AAAA,IACR,QAAQ;AAAA,EACV;AACF;AAEA,SAAS,yBACP,SACA,UACuB;AACvB,UAAQ,QAAQ,MAAM;AAAA,IACpB,KAAK;AAAA,IACL,KAAK;AACH,aAAO,CAAC,KAAK,SAAS,WAAW,GAAG,QAAQ,IAAI,KAAK,CAAC;AAAA,IACxD,KAAK;AAEH,aAAO,CAAC,KAAK,SAAS,WAAW,UAAU,GAAG,QAAQ,QAAQ,KAAK,CAAC,KAAK,CAAC;AAAA,IAC5E,KAAK;AACH,aAAO,CAAC,KAAK,SAAS,WAAW,OAAO,GAAG,QAAQ,QAAQ,KAAK,CAAC;AAAA,IACnE,KAAK,iBAAiB;AACpB,YAAM,OAAO,QAAQ,QAAQ,OAAO;AAIpC,aAAO;AAAA,QACL,KAAK,SAAS,cAAc,GAAG,IAAI,KAAK;AAAA,QACxC,KAAK,SAAS,cAAc,MAAM,UAAU;AAAA,QAC5C,KAAK,SAAS,cAAc,MAAM,UAAU;AAAA,MAC9C;AAAA,IACF;AAAA,IACA,KAAK;AACH,UAAI,QAAQ,QAAQ;AAClB,eAAO,CAAC,KAAK,SAAS,OAAO,QAAQ,MAAM,GAAG,QAAQ,QAAQ,MAAM,CAAC,KAAK,CAAC;AAAA,MAC7E;AAGA,aAAO;AAAA,QACL,KAAK,SAAS,OAAO,QAAQ,MAAM,WAAW;AAAA,QAC9C,KAAK,SAAS,OAAO,GAAG,QAAQ,IAAI,KAAK;AAAA,MAC3C;AAAA,IACF,KAAK;AACH,aAAO,CAAC,KAAK,SAAS,OAAO,QAAQ,MAAM,WAAW,CAAC;AAAA,IACzD,KAAK;AACH,UAAI,CAAC,SAAS,IAAK,QAAO,CAAC;AAC3B,aAAO,CAAC,KAAK,SAAS,KAAK,QAAQ,QAAQ,GAAG,QAAQ,KAAK,KAAK,CAAC;AAAA,IACnE,KAAK;AACH,UAAI,CAAC,SAAS,OAAQ,QAAO,CAAC;AAC9B,aAAO,CAAC,KAAK,SAAS,QAAQ,GAAG,QAAQ,QAAQ,GAAG,CAAC,OAAO,CAAC;AAAA,IAC/D,KAAK;AACH,UAAI,CAAC,SAAS,YAAa,QAAO,CAAC;AACnC,aAAO,CAAC,KAAK,SAAS,aAAa,GAAG,QAAQ,QAAQ,OAAO,CAAC,KAAK,CAAC;AAAA,IACtE,KAAK;AACH,UAAI,CAAC,SAAS,aAAc,QAAO,CAAC;AACpC,aAAO,CAAC,SAAS,YAAY;AAAA,IAC/B,KAAK;AAAA,IACL,KAAK;AAGH,aAAO,CAAC;AAAA,IACV,KAAK;AAEH,aAAO,CAAC;AAAA,EACZ;AACF;AAEA,SAAS,QAAQ,GAAmB;AAClC,SACE,EACG,YAAY,EACZ,QAAQ,gBAAgB,GAAG,EAC3B,QAAQ,YAAY,EAAE,EACtB,MAAM,GAAG,GAAG,KAAK;AAExB;AAgBO,SAAS,iBACd,UACA,UACuC;AACvC,QAAM,SAAmC,CAAC;AAC1C,QAAM,cAAkD;AAAA,IACtD;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,QAAM,eAAmD,CAAC,QAAQ;AAClE,QAAM,sBAA0D,CAAC,eAAe,UAAU,KAAK;AAC/F,QAAM,uBAA2D,CAAC,cAAc;AAEhF,aAAW,OAAO,aAAa;AAC7B,UAAM,IAAI,SAAS,GAAG;AACtB,QAAI,CAAC,GAAG;AACN,aAAO,KAAK,EAAE,SAAS,KAAK,MAAM,IAAI,QAAQ,UAAU,CAAC;AACzD;AAAA,IACF;AACA,UAAM,MAAM,WAAW,CAAC,IAAI,IAAI,KAAK,UAAU,CAAC;AAChD,QAAI,CAAC,WAAW,GAAG,GAAG;AACpB,aAAO,KAAK,EAAE,SAAS,KAAK,MAAM,GAAG,QAAQ,UAAU,CAAC;AAAA,IAC1D;AAAA,EACF;AACA,aAAW,OAAO,cAAc;AAC9B,UAAM,IAAI,SAAS,GAAG;AACtB,QAAI,CAAC,GAAG;AACN,aAAO,KAAK,EAAE,SAAS,KAAK,MAAM,IAAI,QAAQ,UAAU,CAAC;AACzD;AAAA,IACF;AACA,UAAM,MAAM,WAAW,CAAC,IAAI,IAAI,KAAK,UAAU,CAAC;AAChD,QAAI,CAAC,WAAW,GAAG,GAAG;AACpB,aAAO,KAAK,EAAE,SAAS,KAAK,MAAM,GAAG,QAAQ,UAAU,CAAC;AAAA,IAC1D;AAAA,EACF;AACA,aAAW,OAAO,CAAC,GAAG,qBAAqB,GAAG,oBAAoB,GAAG;AACnE,UAAM,IAAI,SAAS,GAAG;AACtB,QAAI,MAAM,OAAW;AACrB,UAAM,MAAM,WAAW,CAAC,IAAI,IAAI,KAAK,UAAU,CAAC;AAChD,QAAI,CAAC,WAAW,GAAG,GAAG;AACpB,aAAO,KAAK,EAAE,SAAS,KAAK,MAAM,GAAG,QAAQ,UAAU,CAAC;AAAA,IAC1D;AAAA,EACF;AACA,SAAO;AACT;AAEO,SAAS,oBACd,QACA,UACQ;AACR,MAAI,OAAO,WAAW,EAAG,QAAO;AAChC,QAAM,QAAQ,OAAO;AAAA,IACnB,CAAC,MAAM,OAAO,EAAE,OAAO,KAAK,EAAE,OAAO,IAAI,EAAE,IAAI,MAAM,WAAW,KAAK,EAAE,MAAM;AAAA,EAC/E;AACA,SAAO;AAAA,IACL,oDAAoD,QAAQ;AAAA,IAC5D,GAAG;AAAA,IACH;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,EAAE,KAAK,IAAI;AACb;;;AC5DO,SAAS,sBACd,SAAS,uDACuB;AAChC,SAAO;AAAA,IACL,SAAS,gBAAgB,QAA2C;AAAA,IAAC,GAAG;AAAA,IACxE,QAAQ,QAAQ,OAAO,IAAI,MAAM,MAAM,CAAC;AAAA,EAC1C;AACF;AAYA,eAAsB,gBACpB,YAC4E;AAC5E,QAAM,SAA+B,CAAC;AACtC,mBAAiB,MAAM,WAAW,OAAQ,QAAO,KAAK,EAAE;AACxD,QAAM,SAAS,MAAM,WAAW;AAChC,SAAO,EAAE,QAAQ,OAAO;AAC1B;AA2CO,IAAM,qBAAN,cAAiC,MAAM;AAAA,EAC5C,YACE,SACgB,SACA,SAAiC,CAAC,GAClD;AACA,UAAM,OAAO;AAHG;AACA;AAGhB,SAAK,OAAO;AAAA,EACd;AAAA,EALkB;AAAA,EACA;AAKpB;AAeO,SAAS,YACd,UACqC;AACrC,MAAI,CAAC,SAAS,MAAM,SAAS,GAAG,KAAK,EAAE,WAAW,GAAG;AACnD,UAAM,IAAI,mBAAmB,iCAAiC,SAAS,MAAM,EAAE;AAAA,EACjF;AACA,MAAI,CAAC,SAAS,YAAY,SAAS,SAAS,KAAK,EAAE,WAAW,GAAG;AAC/D,UAAM,IAAI,mBAAmB,uCAAuC,SAAS,EAAE;AAAA,EACjF;AACA,QAAM,SAAS,iBAAiB,SAAS,UAAU,SAAS,QAAQ;AACpE,MAAI,OAAO,SAAS,GAAG;AACrB,UAAM,IAAI;AAAA,MACR,oBAAoB,QAAQ,SAAS,QAAQ;AAAA,MAC7C,SAAS;AAAA,MACT;AAAA,IACF;AAAA,EACF;AAGA,QAAM,QAAQ,SAAS,OAAO,WAAW,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,QAAQ,CAAC;AAC7E,MAAI,SAAS,OAAO,WAAW,SAAS,MAAM,QAAQ,OAAO,QAAQ,MAAM;AACzE,UAAM,IAAI;AAAA,MACR,eAAe,SAAS,EAAE,sCAAsC,MAAM,QAAQ,CAAC,CAAC;AAAA,MAChF,SAAS;AAAA,IACX;AAAA,EACF;AACA,SAAO;AACT;;;AClSA,SAAS,iBAAiB;AAC1B,SAAS,oBAAoB;AAE7B,SAAS,2BAA2B;AAsFpC,IAAM,uBAA8D;AAAA,EAClE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEO,SAAS,gCACd,MAC4C;AAC5C,QAAM,OAAO,KAAK,QAAQ;AAC1B,QAAM,cAAc,KAAK,uBAAuB;AAEhD,SAAO;AAAA,IACL,MAAM,oBAAoB,UAAU;AAClC,YAAM,QAAkC,CAAC;AACzC,YAAM,SAAyE,CAAC;AAChF,UAAI,UAAU;AAEd,iBAAW,KAAK,UAAU;AACxB,cAAM,UAAU,oBAAoB,EAAE,OAAO;AAC7C,YAAI,YAAY,MAAM;AACpB,cAAI,EAAE,YAAY,QAAW;AAC3B,mBAAO,KAAK;AAAA,cACV,WAAW,EAAE;AAAA,cACb,SAAS,EAAE;AAAA,cACX,SAAS;AAAA,YACX,CAAC;AAAA,UACH,OAAO;AAGL,uBAAW;AAAA,UACb;AACA;AAAA,QACF;AAGA,YAAI,QAAQ,SAAS,WAAW;AAC9B,qBAAW;AACX;AAAA,QACF;AAIA,YAAI,QAAQ,KAAK,WAAW,YAAY,GAAG;AACzC,qBAAW;AACX;AAAA,QACF;AAEA,cAAM,SAAS,mBAAmB,SAAS,KAAK,UAAU,KAAK,QAAQ;AACvE,YAAI,WAAW,MAAM;AACnB,iBAAO,KAAK;AAAA,YACV,WAAW,EAAE;AAAA,YACb,SAAS,EAAE,WAAW;AAAA,YACtB,SAAS,iBAAiB,QAAQ,IAAI;AAAA,UACxC,CAAC;AACD;AAAA,QACF;AAEA,YAAI,OAAO,WAAW,gBAAgB,CAAC,YAAY,SAAS,QAAQ,IAAI,GAAG;AACzE,iBAAO,KAAK;AAAA,YACV,WAAW,EAAE;AAAA,YACb,SAAS,EAAE,WAAW;AAAA,YACtB,SAAS,UAAU,OAAO,gBAAgB,8BAA8B,QAAQ,IAAI;AAAA,UACtF,CAAC;AACD;AAAA,QACF;AAEA,cAAM,iBAAiB,OAAO,SAAS,aAAa,OAAO,cAAc,OAAO,IAAI;AAEpF,YAAI;AACJ,YAAI;AACF,kBAAQ,MAAM,KAAK,WAAW,EAAE,SAAS,GAAG,SAAS,QAAQ,eAAe,CAAC;AAAA,QAC/E,SAAS,KAAK;AACZ,iBAAO,KAAK;AAAA,YACV,WAAW,EAAE;AAAA,YACb,SAAS,EAAE,WAAW;AAAA,YACtB,SAAS,qBAAqB,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,UAChF,CAAC;AACD;AAAA,QACF;AAEA,YAAI,MAAM,MAAM,KAAK,EAAE,WAAW,GAAG;AACnC,qBAAW;AACX;AAAA,QACF;AAEA,cAAM,KAAK;AAAA,UACT,IAAI,OAAO,EAAE,UAAU;AAAA,UACvB,iBAAiB,EAAE;AAAA,UACnB;AAAA,UACA;AAAA,UACA,YAAY,OAAO,cAAc;AAAA,UACjC,OAAO,MAAM;AAAA,UACb,SAAS,MAAM;AAAA,UACf,WAAW,MAAM;AAAA,UACjB,YAAY,EAAE;AAAA,UACd,UAAU,EAAE;AAAA,QACd,CAAC;AAAA,MACH;AAEA,aAAO,EAAE,OAAO,SAAS,OAAO;AAAA,IAClC;AAAA,IAEA,MAAM,MAAM,OAAO;AACjB,YAAM,UAAoB,CAAC;AAC3B,YAAM,WAAqB,CAAC;AAE5B,UAAI,SAAS,QAAQ;AACnB,iBAAS;AAAA,UACP;AAAA,QACF;AACA,eAAO,EAAE,SAAS,SAAS;AAAA,MAC7B;AAEA,UAAI,SAAS,aAAa,CAAC,KAAK,QAAQ;AACtC,iBAAS;AAAA,UACP;AAAA,QACF;AACA,eAAO,EAAE,SAAS,SAAS;AAAA,MAC7B;AAEA,iBAAW,QAAQ,OAAO;AAIxB,cAAM,UAAU,KAAK,OAAO,SAAS,aAAa,KAAK,OAAO,cAAc,OAAO,IAAI;AACvF,YAAI,OAAO,OAAO,MAAM,KAAK,YAAY;AACvC,mBAAS;AAAA,YACP,GAAG,KAAK,OAAO,gBAAgB;AAAA,UACjC;AACA;AAAA,QACF;AAEA,cAAM,KAAK,kBAAkB,MAAM,KAAK,QAAQ;AAChD,YAAI,CAAC,IAAI;AACP,mBAAS,KAAK,GAAG,KAAK,OAAO,gBAAgB,oBAAoB;AACjE;AAAA,QACF;AACA,gBAAQ,KAAK,KAAK,OAAO,gBAAgB;AAAA,MAC3C;AAEA,UAAI,SAAS,aAAa,QAAQ,SAAS,KAAK,KAAK,QAAQ;AAC3D,cAAM,QAAQ;AAAA,UACZ;AAAA,UACA,MAAM,OAAO,CAAC,MAAM,QAAQ,SAAS,EAAE,OAAO,gBAAgB,CAAC;AAAA,UAC/D,KAAK;AAAA,UACL,KAAK;AAAA,UACL,KAAK,cAAc;AAAA,QACrB;AACA,YAAI,MAAO,UAAS,KAAK,cAAc,KAAK,EAAE;AAAA,YACzC,UAAS,KAAK,gEAAgE;AAAA,MACrF;AAEA,aAAO,EAAE,SAAS,SAAS;AAAA,IAC7B;AAAA,EACF;AACF;AAIA,SAAS,kBAAkB,MAA8B,UAA2B;AAClF,QAAM,SAAS,UAAU,OAAO,CAAC,SAAS,oBAAoB,OAAO,GAAG,GAAG;AAAA,IACzE,KAAK;AAAA,IACL,OAAO,KAAK;AAAA,IACZ,UAAU;AAAA,EACZ,CAAC;AACD,SAAO,OAAO,WAAW;AAC3B;AAEA,SAAS,gBACP,OACA,OACA,UACA,QACA,YACe;AACf,QAAM,SAAS,gBAAgB,KAAK,IAAI,CAAC,IAAI,MAAM,CAAC,GAAG,gBAAgB,MAAM,GAAG,EAAE,KAAK,OAAO;AAE9F,QAAM,WAAW,UAAU,OAAO,CAAC,YAAY,MAAM,MAAM,GAAG,EAAE,KAAK,SAAS,CAAC;AAC/E,MAAI,SAAS,WAAW,EAAG,QAAO;AAClC,QAAM,MAAM,UAAU,OAAO,CAAC,OAAO,GAAG,KAAK,GAAG,EAAE,KAAK,SAAS,CAAC;AACjE,MAAI,IAAI,WAAW,EAAG,QAAO;AAC7B,QAAM,QAAQ,iBAAiB,MAAM,CAAC,GAAG,WAAW,GAAG,MAAM,MAAM,oBAAoB;AACvF,QAAM,OAAO;AAAA,IACX;AAAA,IACA;AAAA,IACA;AAAA,IACA,GAAG,MAAM;AAAA,MACP,CAAC,MACC,OAAO,EAAE,eAAe,gBAAgB,EAAE,WAAW,QAAQ,CAAC,CAAC,cAAc,EAAE,QAAQ;AAAA,IAC3F;AAAA,IACA;AAAA,IACA;AAAA,IACA,GAAG,MAAM,IAAI,CAAC,MAAM;AAAA,KAAQ,EAAE,OAAO,gBAAgB;AAAA;AAAA,EAAO,EAAE,SAAS,EAAE;AAAA,EAC3E,EAAE,KAAK,IAAI;AACX,QAAM,SAAS,UAAU,OAAO,CAAC,UAAU,MAAM,OAAO,MAAM,IAAI,GAAG,EAAE,KAAK,SAAS,CAAC;AACtF,MAAI,OAAO,WAAW,EAAG,QAAO;AAChC,QAAM,OAAO,UAAU,OAAO,CAAC,QAAQ,MAAM,UAAU,MAAM,GAAG,EAAE,KAAK,SAAS,CAAC;AACjF,MAAI,KAAK,WAAW,EAAG,QAAO;AAC9B,QAAM,KAAK;AAAA,IACT;AAAA,IACA;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,IACA,EAAE,KAAK,UAAU,UAAU,QAAQ;AAAA,EACrC;AACA,MAAI,GAAG,WAAW,EAAG,QAAO;AAC5B,SAAO,GAAG,OAAO,KAAK;AACxB;AAEA,SAAS,OAAO,GAAmB;AAGjC,QAAM,SAAS,UAAQ,QAAa;AACpC,SAAO,OAAO,WAAW,QAAQ,EAAE,OAAO,GAAG,OAAO,EAAE,OAAO,KAAK;AACpE;;;ACnSO,SAAS,8BACd,MACA,MAC6B;AAC7B,SAAO;AAAA,IACL,oBAAoB,UAAU;AAC5B,YAAM,QAAQ,KAAK,oBAAoB,QAAQ;AAC/C,aAAO;AAAA,QACL,WAAW,MAAM;AAAA,QACjB,SAAS,MAAM;AAAA,QACf,QAAQ,MAAM;AAAA,MAChB;AAAA,IACF;AAAA,IACA,MAAM,MAAM,WAAW;AACrB,YAAM,UAAoB,CAAC;AAC3B,YAAM,WAAqB,CAAC;AAC5B,iBAAW,KAAK,WAAW;AACzB,cAAM,eAAe,2BAA2B,CAAC;AACjD,YAAI,iBAAiB,MAAM;AACzB,mBAAS;AAAA,YACP,kEAAkE,mBAAmB,CAAC,CAAC;AAAA,UACzF;AACA;AAAA,QACF;AACA,YAAI;AACF,gBAAM,IAAI,MAAM,KAAK,0BAA0B,KAAK,eAAe,YAAY;AAC/E,kBAAQ,KAAK,GAAG,EAAE,OAAO;AACzB,mBAAS,KAAK,GAAG,EAAE,QAAQ;AAAA,QAC7B,SAAS,KAAK;AACZ,mBAAS;AAAA,YACP,qCAAqC,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,UACvF;AAAA,QACF;AAAA,MACF;AACA,UAAI,KAAK,kBAAkB,QAAQ,SAAS,GAAG;AAC7C,YAAI;AACF,gBAAM,aAAa,MAAM,KAAK,eAAe,KAAK,aAAa;AAC/D,qBAAW,SAAS,WAAY,UAAS,KAAK,SAAS,KAAK,EAAE;AAAA,QAChE,SAAS,KAAK;AACZ,mBAAS;AAAA,YACP,0BAA0B,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,UAC5E;AAAA,QACF;AAAA,MACF;AACA,aAAO,EAAE,SAAS,SAAS;AAAA,IAC7B;AAAA,EACF;AACF;AAYA,SAAS,2BAA2B,GAA2B;AAC7D,MAAI,CAAC,KAAK,OAAO,MAAM,SAAU,QAAO;AACxC,QAAM,MAAM;AACZ,MAAI,MAAM,QAAQ,IAAI,WAAW,GAAG;AAClC,UAAM,SAAS,IAAI;AACnB,QAAI,OAAO,WAAW,EAAG,QAAO;AAChC,WAAO,OACJ,IAAI,CAAC,MAAO,OAAO,EAAE,YAAY,WAAW,EAAE,UAAU,EAAG,EAC3D,OAAO,CAAC,MAAM,EAAE,SAAS,CAAC,EAC1B,KAAK,MAAM;AAAA,EAChB;AACA,MAAI,OAAO,IAAI,iBAAiB,SAAU,QAAO,IAAI;AACrD,MAAI,OAAO,IAAI,YAAY,SAAU,QAAO,IAAI;AAChD,SAAO;AACT;AAEA,SAAS,mBAAmB,GAAoB;AAC9C,MAAI,CAAC,KAAK,OAAO,MAAM,SAAU,QAAO;AACxC,QAAM,MAAM;AACZ,MAAI,OAAO,IAAI,oBAAoB,SAAU,QAAO,IAAI;AACxD,MAAI,OAAO,IAAI,OAAO,SAAU,QAAO,IAAI;AAC3C,SAAO;AACT;;;AC1EA,eAAsB,eACpB,QACA,MACmF;AACnF,QAAM,UAAU,OAAO,WAAW,WAAW,CAAC;AAC9C,QAAM,sBAAsB,OAAO,aAAa,WAAW,CAAC;AAC5D,QAAM,aAAa,CAAC,GAAG,SAAS,GAAG,mBAAmB;AAItD,MAAI,WAAW,WAAW,GAAG;AAC3B,WAAO;AAAA,MACL,GAAG;AAAA,MACH,SAAS;AAAA,QACP,mBAAmB,cAAc,KAAK,QAAQ;AAAA,QAC9C,gBAAgB,cAAc,KAAK,QAAQ;AAAA,QAC3C,OAAO;AAAA,QACP,YAAY,KAAK,SAAS,IAAI,CAAC,OAAO;AAAA,UACpC,WAAW,EAAE;AAAA,UACb,QAAQ,EAAE;AAAA,UACV,OAAO,EAAE;AAAA,UACT,OAAO;AAAA,QACT,EAAE;AAAA,QACF,iBAAiB,CAAC;AAAA,MACpB;AAAA,IACF;AAAA,EACF;AAEA,QAAM,aAAa,KAAK,SAAS,IAAI,CAAC,MAAM,EAAE,SAAS;AACvD,QAAM,QAAQ,MAAM,KAAK,YAAY,UAAU;AAC/C,QAAM,iBAAiB,IAAI,IAAI,MAAM,IAAI,CAAC,MAAM,CAAC,EAAE,WAAW,EAAE,SAAS,CAAC,CAAC;AAE3E,QAAM,aAAa,KAAK,SAAS,IAAI,CAAC,MAAM;AAC1C,UAAM,IAAI,eAAe,IAAI,EAAE,SAAS,KAAK,EAAE;AAC/C,WAAO,EAAE,WAAW,EAAE,WAAW,QAAQ,EAAE,WAAW,OAAO,GAAG,OAAO,IAAI,EAAE,UAAU;AAAA,EACzF,CAAC;AACD,QAAM,oBAAoB,cAAc,KAAK,QAAQ;AACrD,QAAM,iBAAiB,cAAc,KAAK;AAC1C,QAAM,QAAQ,iBAAiB;AAE/B,MAAI,kBAA4B,CAAC;AACjC,MAAI,QAAQ,KAAK,KAAK,wBAAwB,KAAK,QAAQ;AACzD,UAAM,KAAK,OAAO,UAAU;AAC5B,sBAAkB,CAAC,GAAG,UAAU;AAAA,EAClC;AAEA,SAAO;AAAA,IACL,GAAG;AAAA,IACH,SAAS;AAAA,MACP;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACF;AAEA,SAAS,cAAc,MAAoD;AACzE,MAAI,KAAK,WAAW,EAAG,QAAO;AAC9B,SAAO,KAAK,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,WAAW,CAAC,IAAI,KAAK;AAC9D;;;AChEO,SAAS,iBACd,SAC6E;AAC7E,QAAM,WAAW,QAAQ,YAAY;AAErC,SAAO,CAAC,SAAmB,QAAyD;AAClF,UAAM,UAAU,8BAA8B,QAAQ,aAAa;AAAA,MACjE,GAAI,QAAQ,UAAU,OAAO,KAAK,CAAC;AAAA,MACnC,GAAI,QAAQ,MAAM,EAAE,KAAK,QAAQ,IAAI,IAAI,CAAC;AAAA,IAC5C,CAAC;AACD,UAAM,eAAe,QAAQ,QAAQ,QAAQ,QAAQ;AACrD,UAAM,UAAU,QAAQ,YAAY,OAAO;AAC3C,UAAM,SAAS,IAAI,UAAU,IAAI,gBAAgB,EAAE;AAEnD,UAAM,MAAsB,CAAC;AAC7B,QAAI;AACJ,QAAI;AACJ,UAAM,SAAS,IAAI,QAAoB,CAAC,SAAS,WAAW;AAC1D,eAAS;AACT,aAAO;AAAA,IACT,CAAC;AAID,WAAO,MAAM,MAAM;AAAA,IAAC,CAAC;AAErB,UAAM,OAA8B;AAAA,MAClC;AAAA,MACA,cAAc,MAAM;AAAA,MACpB,MAAM;AAAA,MACN,GAAI,QAAQ,mBAAmB,EAAE,kBAAkB,QAAQ,iBAAiB,IAAI,CAAC;AAAA,IACnF;AAEA,oBAAgB,SAA4C;AAC1D,UAAI;AACF,cAAM,MAAM,MAAM,qBAAqB,QAAQ,eAAe,MAAM,MAAM;AAC1E,yBAAiB,SAAS,IAAI,aAAa,SAAS,EAAE,OAAO,CAAC,GAAG;AAC/D,cAAI,KAAK,KAAK;AACd,gBAAM,SAAS,SAAS,OAAO,EAAE,aAAa,CAAC;AAC/C,cAAI,OAAQ,OAAM;AAAA,QACpB;AACA,eAAO,QAAQ,OAAO,MAAM,GAAG,CAAC;AAAA,MAClC,SAAS,KAAK;AACZ,aAAK,GAAG;AACR,cAAM;AAAA,MACR;AAAA,IACF;AAEA,WAAO,EAAE,QAAQ,OAAO,GAAG,OAAO;AAAA,EACpC;AACF;","names":[]}
@@ -1,45 +1,10 @@
1
- import { AnalystFinding, TraceAnalystByteBudgets, TraceAnalysisStore } from '@tangle-network/agent-eval';
2
- import { A as AnalyzeInput } from './dynamic-BvllHV6M.js';
3
- import { A as AnalystRegistryLike, F as FindingsStoreLike, R as RunAnalystLoopOpts, a as RunAnalystLoopResult } from './types-BtRLF2U3.js';
4
- export { b as AnalystLoopEvent, c as AnalystRegistryStreamingLike, d as AutoApplyPolicy, I as ImprovementAdapter, e as ImprovementEditBatch, f as ImprovementReport, K as KnowledgeAdapter, g as KnowledgeProposalBatch, h as KnowledgeReport } from './types-BtRLF2U3.js';
5
- import { I as Iteration } from './types-DdzkffAm.js';
1
+ import { TraceAnalystByteBudgets, TraceAnalysisStore } from '@tangle-network/agent-eval';
2
+ import { I as Iteration } from './types-nBMuollC.js';
3
+ import { R as RunAnalystLoopOpts, a as RunAnalystLoopResult } from './types-p8dWBIXL.js';
4
+ export { A as AnalystLoopEvent, b as AnalystRegistryLike, c as AnalystRegistryStreamingLike, d as AutoApplyPolicy, F as FindingsStoreLike, I as ImprovementAdapter, e as ImprovementEditBatch, f as ImprovementReport, K as KnowledgeAdapter, g as KnowledgeProposalBatch, h as KnowledgeReport } from './types-p8dWBIXL.js';
6
5
  import '@tangle-network/sandbox';
7
6
  import './runtime-hooks-C7JwKb9E.js';
8
7
 
9
- /**
10
- * @experimental
11
- *
12
- * STEP 2 of closing the autonomous loop: `createAnalystDriverHook` returns the exact
13
- * `analyze` callback `createDynamicDriver` already accepts. Each round it projects the
14
- * round's iterations into a `TraceAnalysisStore` (STEP 1), runs `runAnalystLoop` over the
15
- * registry, threads the prior round's findings as the baseline (cross-round memory), and
16
- * returns the findings — which the driver feeds to the planner via `PlannerContext.analyses`.
17
- *
18
- * This is `runAnalystLoop`'s first consumer: the wire that turns "trace analysts exist" into
19
- * "trace analysts drive the next prompt." The driver enforces the steer-firewall
20
- * (`assertTraceDerivedFindings`) on what this returns, so a kind that leaks judge-derived
21
- * evidence fails loud here — selector ≠ judge stays intact for free.
22
- */
23
-
24
- interface AnalystDriverHookOptions {
25
- /** The analyst registry, pre-populated with the trace-analyst kinds to run each round. */
26
- registry: AnalystRegistryLike;
27
- /**
28
- * Durable findings ledger. When set, each round's findings are appended and diffed
29
- * against the prior round (cross-run memory). `null`/omitted = one-shot, no persistence.
30
- */
31
- findingsStore?: FindingsStoreLike | null;
32
- /** How prior-round findings reach the analysts. Default `'per-kind'` (threads memory). */
33
- priorFindingsStrategy?: 'per-kind' | 'wildcard' | 'none';
34
- /** Base run id; each round runs as `${runId}-r{round}`. Default = a random base. */
35
- runId?: string;
36
- }
37
- /**
38
- * Build the `analyze` hook for `createDynamicDriver({ planner, analyze })`. Fail-loud: an
39
- * empty round throws inside `iterationsToTraceStore` rather than returning empty findings.
40
- */
41
- declare function createAnalystDriverHook<Task, Output>(opts: AnalystDriverHookOptions): (input: AnalyzeInput<Task, Output>) => Promise<ReadonlyArray<AnalystFinding>>;
42
-
43
8
  /**
44
9
  * @experimental
45
10
  *
@@ -84,4 +49,4 @@ declare function iterationsToTraceStore<Task, Output>(iterations: ReadonlyArray<
84
49
 
85
50
  declare function runAnalystLoop<TProposal = unknown, TEdit = unknown>(opts: RunAnalystLoopOpts): Promise<RunAnalystLoopResult<TProposal, TEdit>>;
86
51
 
87
- export { type AnalystDriverHookOptions, AnalystRegistryLike, FindingsStoreLike, RunAnalystLoopOpts, RunAnalystLoopResult, createAnalystDriverHook, iterationsToTraceStore, runAnalystLoop };
52
+ export { RunAnalystLoopOpts, RunAnalystLoopResult, iterationsToTraceStore, runAnalystLoop };
@@ -1,12 +1,10 @@
1
1
  import {
2
- createAnalystDriverHook,
3
2
  iterationsToTraceStore,
4
3
  runAnalystLoop
5
- } from "./chunk-FK53TXOP.js";
6
- import "./chunk-PRX45WE2.js";
4
+ } from "./chunk-HNUXAZIJ.js";
5
+ import "./chunk-GSUO5QS6.js";
7
6
  import "./chunk-DGUM43GV.js";
8
7
  export {
9
- createAnalystDriverHook,
10
8
  iterationsToTraceStore,
11
9
  runAnalystLoop
12
10
  };
@@ -1,10 +1,10 @@
1
1
  import {
2
2
  coderProfile,
3
3
  multiHarnessCoderFanout
4
- } from "./chunk-QR4UUC5P.js";
4
+ } from "./chunk-KADIJAD4.js";
5
5
  import {
6
6
  runLoop
7
- } from "./chunk-KEWO4KI6.js";
7
+ } from "./chunk-72JQCHOZ.js";
8
8
 
9
9
  // src/mcp/executor.ts
10
10
  function createSiblingSandboxExecutor(options) {
@@ -91,7 +91,11 @@ function createDefaultCoderDelegate(options) {
91
91
  const variants = Math.max(1, Math.trunc(args.variants ?? 1));
92
92
  ctx.report({ iteration: 0, phase: "starting" });
93
93
  if (variants <= 1) {
94
- const { agentRunSpec, output, validator } = coderProfile({ task });
94
+ const { agentRunSpec, output, validator } = coderProfile({
95
+ task,
96
+ ...options.harness ? { harness: options.harness } : {},
97
+ ...options.model ? { model: options.model } : {}
98
+ });
95
99
  const result2 = await runLoop({
96
100
  driver: singleShotDriver,
97
101
  agentRun: agentRunSpec,
@@ -113,9 +117,10 @@ function createDefaultCoderDelegate(options) {
113
117
  ctx.report({ iteration: 1, phase: "completed" });
114
118
  return chosen2;
115
119
  }
116
- const fanout = multiHarnessCoderFanout(
117
- fanoutHarnesses && fanoutHarnesses.length > 0 ? { harnesses: fanoutHarnesses.slice(0, variants) } : { harnesses: void 0 }
118
- );
120
+ const fanout = multiHarnessCoderFanout({
121
+ ...fanoutHarnesses && fanoutHarnesses.length > 0 ? { harnesses: fanoutHarnesses.slice(0, variants) } : {},
122
+ ...options.fanoutModels ? { models: options.fanoutModels.slice(0, variants) } : {}
123
+ });
119
124
  const agentRuns = fanout.agentRuns.slice(0, variants);
120
125
  const result = await runLoop({
121
126
  driver: fanout.driver,
@@ -210,4 +215,4 @@ export {
210
215
  createFleetWorkspaceExecutor,
211
216
  createDefaultCoderDelegate
212
217
  };
213
- //# sourceMappingURL=chunk-IJ6FGOPO.js.map
218
+ //# sourceMappingURL=chunk-5YDS7BLC.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/mcp/executor.ts","../src/mcp/delegates.ts"],"sourcesContent":["/**\n * @experimental\n *\n * Delegation executors — the layer between MCP delegates and the sandbox\n * substrate. Each executor exposes a {@link SandboxClient} the kernel\n * consumes plus a placement tag so the trace pipeline can correlate workers\n * with their physical placement.\n *\n * Two implementations ship in-box:\n *\n * - {@link createSiblingSandboxExecutor} — every delegation spawns a fresh\n * sandbox sibling to the caller. Default when the MCP server runs as a\n * standalone CLI mounted outside a fleet.\n *\n * - {@link createFleetWorkspaceExecutor} — delegations dispatch onto machines\n * in the caller's existing fleet so worker diffs land directly on the\n * caller's filesystem (the fleet's shared workspace). Selected when the\n * parent sandbox passes `TANGLE_FLEET_ID` into the MCP server's env.\n */\n\nimport type { CreateSandboxOptions, SandboxInstance } from '@tangle-network/sandbox'\nimport type { LoopSandboxPlacement, SandboxClient } from '../runtime'\n\n/** @experimental */\nexport interface DelegationExecutor {\n /** Sandbox client the kernel calls. Returned with `describePlacement` set. */\n readonly client: SandboxClient\n /** Best-effort one-liner used in stderr boot logs and diagnostics. */\n describe(): string\n}\n\n/** @experimental */\nexport interface SiblingSandboxExecutorOptions {\n client: SandboxClient\n}\n\n/**\n * Wrap a raw sandbox SDK client so the kernel emits\n * `loop.iteration.dispatch` events with `{ placement: 'sibling', sandboxId }`.\n *\n * The returned client `.create()` delegates to the underlying client; the\n * only added behavior is a `describePlacement` tag the kernel reads.\n *\n * @experimental\n */\nexport function createSiblingSandboxExecutor(\n options: SiblingSandboxExecutorOptions,\n): DelegationExecutor {\n const underlying = options.client\n const client: SandboxClient = {\n create(opts?: CreateSandboxOptions): Promise<SandboxInstance> {\n return underlying.create(opts)\n },\n describePlacement(box: SandboxInstance): LoopSandboxPlacement {\n return { kind: 'sibling', sandboxId: readId(box) }\n },\n }\n return {\n client,\n describe(): string {\n return 'sibling-sandbox (each delegation = fresh sandbox via client.create)'\n },\n }\n}\n\n/**\n * Minimal `SandboxFleet` surface the fleet executor calls. Declared\n * structurally so tests can pass an in-memory stub without instantiating the\n * sandbox SDK.\n *\n * @experimental\n */\nexport interface FleetHandle {\n readonly fleetId: string\n /** Machine ids in dispatch-eligible order. The executor round-robins. */\n readonly ids: ReadonlyArray<string>\n /** Resolve a machine id to its `SandboxInstance` — that machine is mounted\n * on the fleet's shared workspace, so any diff the worker writes lands on\n * every other fleet machine's filesystem too. */\n sandbox(machineId: string): Promise<SandboxInstance>\n}\n\n/** @experimental */\nexport interface FleetWorkspaceExecutorOptions {\n fleet: FleetHandle\n /**\n * Override the machine-selection policy. Default = round-robin across\n * `fleet.ids`, skipping the optional `excludeMachineIds` set (typically the\n * coordinator machine the MCP server is running on).\n */\n selectMachine?: (call: { callIndex: number; ids: ReadonlyArray<string> }) => string\n /**\n * Machine ids to skip during default round-robin. Set to the caller's own\n * machineId so workers don't compete with the orchestrator on the same VM.\n */\n excludeMachineIds?: ReadonlyArray<string>\n}\n\n/**\n * Build an executor that resolves each delegated iteration to an existing\n * machine in `fleet`. The fleet's shared-workspace policy means the worker\n * machine sees the caller's filesystem — diffs land in-place with no\n * cross-sandbox copy step.\n *\n * @experimental\n */\nexport function createFleetWorkspaceExecutor(\n options: FleetWorkspaceExecutorOptions,\n): DelegationExecutor {\n const fleet = options.fleet\n const exclude = new Set(options.excludeMachineIds ?? [])\n let callIndex = 0\n // machineId-by-sandboxId, populated as we resolve machines so\n // `describePlacement` can recover the assignment from the SandboxInstance\n // the kernel hands back.\n const placementBySandboxId = new Map<string, { machineId: string }>()\n\n const client: SandboxClient = {\n async create(): Promise<SandboxInstance> {\n const ids = fleet.ids.filter((id) => !exclude.has(id))\n if (ids.length === 0) {\n throw new Error(\n `agent-runtime: fleet ${fleet.fleetId} has no eligible worker machines (ids=[${fleet.ids.join(',')}], excluded=[${[...exclude].join(',')}])`,\n )\n }\n const selector = options.selectMachine\n const machineId = selector ? selector({ callIndex, ids }) : ids[callIndex % ids.length]\n callIndex += 1\n if (typeof machineId !== 'string' || machineId.length === 0) {\n throw new Error('agent-runtime: fleet executor selectMachine returned an empty machine id')\n }\n const box = await fleet.sandbox(machineId)\n const sandboxId = readId(box)\n if (sandboxId) placementBySandboxId.set(sandboxId, { machineId })\n return box\n },\n describePlacement(box: SandboxInstance): LoopSandboxPlacement {\n const sandboxId = readId(box)\n const recorded = sandboxId ? placementBySandboxId.get(sandboxId) : undefined\n return {\n kind: 'fleet',\n sandboxId,\n fleetId: fleet.fleetId,\n machineId: recorded?.machineId,\n }\n },\n }\n\n return {\n client,\n describe(): string {\n const excluded = exclude.size > 0 ? ` (excluded=[${[...exclude].join(',')}])` : ''\n return `fleet-workspace (fleetId=${fleet.fleetId}, machines=[${fleet.ids.join(',')}]${excluded})`\n },\n }\n}\n\nfunction readId(box: SandboxInstance): string | undefined {\n const raw = (box as unknown as { id?: unknown }).id\n return typeof raw === 'string' && raw.length > 0 ? raw : undefined\n}\n","/**\n * @experimental\n *\n * Delegate factories — the layer between MCP tool handlers and the\n * underlying `runLoop` runners.\n *\n * The MCP server is profile-agnostic: it owns the task queue + feedback\n * store + transport. Each `*Delegate` is the closure that the queue\n * invokes when a task runs. Consumers can override either delegate to\n * inject custom drivers, mocks, fleet-aware dispatchers, etc.\n *\n * The default coder delegate is wired here because we own\n * `coderProfile` / `multiHarnessCoderFanout`. The default researcher\n * delegate is **not** wired in this file — `agent-knowledge` cannot be\n * imported from `agent-runtime` without inducing a cycle. Consumers\n * pass `researcherDelegate` explicitly when constructing the server.\n */\n\nimport { type CoderOutput, coderProfile, multiHarnessCoderFanout } from '../profiles/coder'\nimport type { Iteration, LoopTraceEmitter, SandboxClient } from '../runtime'\nimport { runLoop } from '../runtime'\nimport { createSiblingSandboxExecutor, type DelegationExecutor } from './executor'\nimport type {\n CoderTask,\n DelegateCodeArgs,\n DelegateResearchArgs,\n DelegateUiAuditArgs,\n DelegationProgress,\n ResearchOutputShape,\n UiAuditorDelegationOutput,\n} from './types'\n\n/** @experimental */\nexport interface DelegateRunCtx {\n signal: AbortSignal\n report(progress: DelegationProgress): void\n}\n\n/** @experimental */\nexport type CoderDelegate = (\n args: DelegateCodeArgs,\n ctx: DelegateRunCtx,\n) => Promise<import('../profiles/coder').CoderOutput>\n\n/** @experimental */\nexport type ResearcherDelegate = (\n args: DelegateResearchArgs,\n ctx: DelegateRunCtx,\n) => Promise<ResearchOutputShape>\n\n/**\n * UI-auditor delegate — fully consumer-injected. agent-runtime ships no\n * default factory because the inputs are workspace path + judge function\n * + (optionally) a `SandboxClient`, and the judge is the consumer's\n * model seam. See `createInProcessUiAuditClient` + `uiAuditorProfile` in\n * `@tangle-network/agent-runtime/profiles` for the canonical wiring.\n *\n * @experimental\n */\nexport type UiAuditorDelegate = (\n args: DelegateUiAuditArgs,\n ctx: DelegateRunCtx,\n) => Promise<UiAuditorDelegationOutput>\n\n/** @experimental Structured review verdict over a coder candidate. */\nexport interface CoderReview {\n /** Gate: only approved candidates are eligible to win. */\n approved: boolean\n /** Reviewer's recommendation — surfaced in traces. */\n recommendation: 'ship' | 'approve-with-nits' | 'changes-requested' | 'reject'\n /** Readiness 0..1, used by the `highest-readiness` winner-selection strategy. */\n readiness: number\n notes?: string\n}\n\n/**\n * @experimental\n *\n * Optional adversarial reviewer over a coder candidate that already passed\n * mechanical validation (tests/typecheck/forbidden/diff/no-op/secrets). Folded\n * from the ai-trading-blueprint delegation MCP: a candidate is only eligible to\n * win if the reviewer approves it. The reviewer is the consumer's seam — an LLM\n * judge, a `pnpm review` command, anything returning a `CoderReview`.\n */\nexport type CoderReviewer = (\n output: import('../profiles/coder').CoderOutput,\n task: CoderTask,\n ctx: { signal: AbortSignal },\n) => Promise<CoderReview> | CoderReview\n\n/**\n * @experimental Winner-selection strategy among validated (+ reviewed)\n * candidates. `highest-readiness` requires a `reviewer`. Default `highest-score`\n * (the kernel's behavior — preserves backward compatibility).\n */\nexport type CoderWinnerSelection =\n | 'highest-score'\n | 'smallest-diff'\n | 'highest-readiness'\n | 'first-approved'\n\n/** @experimental */\nexport interface CreateDefaultCoderDelegateOptions {\n /**\n * Execution placement. Pass a {@link DelegationExecutor} (sibling or fleet)\n * to control where worker iterations land. `sandboxClient` is a\n * convenience shorthand that wraps the client in a sibling executor — pass\n * one or the other, not both.\n */\n executor?: DelegationExecutor\n /**\n * Convenience shorthand for sibling placement. Equivalent to\n * `executor: createSiblingSandboxExecutor({ client: sandboxClient })`.\n */\n sandboxClient?: SandboxClient\n /** Backend harness for the single-coder path. Default comes from `coderProfile`. */\n harness?: string\n /** Model override for the single-coder path. */\n model?: string\n /** Default `['claude-code', 'codex', 'opencode/zai-coding-plan/glm-5.1']` when variants > 1. */\n fanoutHarnesses?: string[]\n /** Optional per-harness model override for `variants > 1`. */\n fanoutModels?: (string | undefined)[]\n /** Hard cap on the kernel's per-batch concurrency. Default 4. */\n maxConcurrency?: number\n /**\n * Optional adversarial reviewer. When set, a candidate must pass mechanical\n * validation AND `reviewer.approved` to be eligible to win — empty/secret/\n * test-failing patches are already gone; this catches the \"compiles + passes\n * but wrong/unsafe\" class the deterministic validator can't see.\n */\n reviewer?: CoderReviewer\n /** Winner-selection strategy among eligible candidates. Default `highest-score`. */\n winnerSelection?: CoderWinnerSelection\n /**\n * Loop trace emitter forwarded into every delegated `runLoop`. Wire\n * `createPropagatingTraceEmitter(readTraceContextFromEnv())` here (the bin\n * does) so delegated build-loops export their topology spans to the OTLP /\n * Tangle Intelligence sink when `OTEL_EXPORTER_OTLP_ENDPOINT` is set — and\n * are a cheap no-op when it isn't. Configurable by construction.\n */\n traceEmitter?: LoopTraceEmitter\n}\n\n/**\n * Build a coder delegate that drives `runLoop` against the project's\n * sandbox client + coder profile. When `args.variants > 1` it switches\n * to the multi-harness fanout topology.\n *\n * @experimental\n */\nexport function createDefaultCoderDelegate(\n options: CreateDefaultCoderDelegateOptions,\n): CoderDelegate {\n const executor = resolveExecutor(options)\n const sandboxClient = executor.client\n const fanoutHarnesses = options.fanoutHarnesses\n const maxConcurrency = options.maxConcurrency ?? 4\n const traceEmitter = options.traceEmitter\n return async (args, ctx) => {\n const task: CoderTask = {\n goal: buildCoderGoal(args),\n repoRoot: args.repoRoot,\n testCmd: args.config?.testCmd,\n typecheckCmd: args.config?.typecheckCmd,\n forbiddenPaths: args.config?.forbiddenPaths,\n maxDiffLines: args.config?.maxDiffLines,\n }\n const variants = Math.max(1, Math.trunc(args.variants ?? 1))\n ctx.report({ iteration: 0, phase: 'starting' })\n if (variants <= 1) {\n const { agentRunSpec, output, validator } = coderProfile({\n task,\n ...(options.harness ? { harness: options.harness } : {}),\n ...(options.model ? { model: options.model } : {}),\n })\n const result = await runLoop({\n driver: singleShotDriver,\n agentRun: agentRunSpec,\n output,\n validator,\n task,\n ctx: { sandboxClient, signal: ctx.signal, ...(traceEmitter ? { traceEmitter } : {}) },\n maxIterations: 1,\n maxConcurrency,\n })\n const chosen = await pickCoderWinner({\n iterations: result.iterations,\n reviewer: options.reviewer,\n selection: options.winnerSelection ?? 'highest-score',\n task,\n signal: ctx.signal,\n })\n if (!chosen) throw new Error(noWinnerMessage(options.reviewer))\n ctx.report({ iteration: 1, phase: 'completed' })\n return chosen\n }\n const fanout = multiHarnessCoderFanout({\n ...(fanoutHarnesses && fanoutHarnesses.length > 0\n ? { harnesses: fanoutHarnesses.slice(0, variants) }\n : {}),\n ...(options.fanoutModels ? { models: options.fanoutModels.slice(0, variants) } : {}),\n })\n const agentRuns = fanout.agentRuns.slice(0, variants)\n const result = await runLoop({\n driver: fanout.driver,\n agentRuns,\n output: fanout.output,\n validator: fanout.validator,\n task,\n ctx: { sandboxClient, signal: ctx.signal, ...(traceEmitter ? { traceEmitter } : {}) },\n maxIterations: variants,\n maxConcurrency: Math.min(maxConcurrency, variants),\n })\n const chosen = await pickCoderWinner({\n iterations: result.iterations,\n reviewer: options.reviewer,\n selection: options.winnerSelection ?? 'highest-score',\n task,\n signal: ctx.signal,\n })\n if (!chosen) throw new Error(noWinnerMessage(options.reviewer))\n ctx.report({ iteration: agentRuns.length, phase: 'completed' })\n return chosen\n }\n}\n\ninterface PickCoderWinnerArgs {\n iterations: ReadonlyArray<Iteration<CoderTask, CoderOutput>>\n reviewer: CoderReviewer | undefined\n selection: CoderWinnerSelection\n task: CoderTask\n signal: AbortSignal\n}\n\ninterface CoderCandidate {\n index: number\n output: CoderOutput\n score: number\n readiness: number\n}\n\n/**\n * Pick the winning coder candidate from a finished loop's iterations:\n * 1. keep only mechanically-VALID candidates (the validator already gated\n * tests/typecheck/forbidden/diff/no-op/secrets),\n * 2. if a `reviewer` is wired, keep only those it APPROVES,\n * 3. select among survivors by the chosen strategy.\n * Returns `undefined` when nothing survives — the delegate fails loud.\n */\nasync function pickCoderWinner(args: PickCoderWinnerArgs): Promise<CoderOutput | undefined> {\n const valid: CoderCandidate[] = []\n for (const iter of args.iterations) {\n if (iter.output === undefined || iter.error || iter.verdict?.valid !== true) continue\n valid.push({\n index: iter.index,\n output: iter.output,\n score: iter.verdict.score ?? 0,\n readiness: iter.verdict.score ?? 0,\n })\n }\n if (valid.length === 0) return undefined\n\n let eligible = valid\n if (args.reviewer) {\n eligible = []\n for (const c of valid) {\n const review = await args.reviewer(c.output, args.task, { signal: args.signal })\n if (review.approved) eligible.push({ ...c, readiness: review.readiness })\n }\n if (eligible.length === 0) return undefined\n }\n\n return selectCoderCandidate(eligible, args.selection).output\n}\n\n/** Apply the winner-selection strategy; ties broken by earliest iteration. */\nfunction selectCoderCandidate(\n candidates: CoderCandidate[],\n selection: CoderWinnerSelection,\n): CoderCandidate {\n const diffLines = (c: CoderCandidate) =>\n c.output.diffStats.insertions + c.output.diffStats.deletions\n const sorted = [...candidates].sort((a, b) => {\n switch (selection) {\n case 'smallest-diff':\n return diffLines(a) - diffLines(b) || a.index - b.index\n case 'highest-readiness':\n return b.readiness - a.readiness || a.index - b.index\n case 'first-approved':\n return a.index - b.index\n default:\n return b.score - a.score || a.index - b.index\n }\n })\n return sorted[0]!\n}\n\nfunction noWinnerMessage(reviewer: CoderReviewer | undefined): string {\n return reviewer\n ? 'coder delegate: no candidate passed validation + review'\n : 'coder delegate: no candidate passed validation'\n}\n\nfunction buildCoderGoal(args: DelegateCodeArgs): string {\n if (!args.contextHint) return args.goal\n return [args.goal, '', '## Context', args.contextHint].join('\\n')\n}\n\nfunction resolveExecutor(options: CreateDefaultCoderDelegateOptions): DelegationExecutor {\n if (options.executor && options.sandboxClient) {\n throw new Error('createDefaultCoderDelegate: pass exactly one of `executor` or `sandboxClient`')\n }\n if (options.executor) return options.executor\n if (options.sandboxClient) {\n return createSiblingSandboxExecutor({ client: options.sandboxClient })\n }\n throw new Error('createDefaultCoderDelegate: `executor` or `sandboxClient` is required')\n}\n\n/**\n * Single-shot driver — plan one task on iteration 0, stop after one\n * iteration. Used by the coder delegate when `variants <= 1`. Keeps the\n * runLoop kernel-level accounting (timing, cost, trace emission) while\n * skipping fanout/refine topology overhead.\n */\nconst singleShotDriver = {\n name: 'mcp-single-shot',\n async plan<Task>(task: Task, history: ReadonlyArray<unknown>): Promise<Task[]> {\n return history.length === 0 ? [task] : []\n },\n decide(history: ReadonlyArray<unknown>): 'pick-winner' | 'fail' {\n return history.length > 0 ? 'pick-winner' : 'fail'\n },\n}\n"],"mappings":";;;;;;;;;AA6CO,SAAS,6BACd,SACoB;AACpB,QAAM,aAAa,QAAQ;AAC3B,QAAM,SAAwB;AAAA,IAC5B,OAAO,MAAuD;AAC5D,aAAO,WAAW,OAAO,IAAI;AAAA,IAC/B;AAAA,IACA,kBAAkB,KAA4C;AAC5D,aAAO,EAAE,MAAM,WAAW,WAAW,OAAO,GAAG,EAAE;AAAA,IACnD;AAAA,EACF;AACA,SAAO;AAAA,IACL;AAAA,IACA,WAAmB;AACjB,aAAO;AAAA,IACT;AAAA,EACF;AACF;AA2CO,SAAS,6BACd,SACoB;AACpB,QAAM,QAAQ,QAAQ;AACtB,QAAM,UAAU,IAAI,IAAI,QAAQ,qBAAqB,CAAC,CAAC;AACvD,MAAI,YAAY;AAIhB,QAAM,uBAAuB,oBAAI,IAAmC;AAEpE,QAAM,SAAwB;AAAA,IAC5B,MAAM,SAAmC;AACvC,YAAM,MAAM,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC;AACrD,UAAI,IAAI,WAAW,GAAG;AACpB,cAAM,IAAI;AAAA,UACR,wBAAwB,MAAM,OAAO,0CAA0C,MAAM,IAAI,KAAK,GAAG,CAAC,gBAAgB,CAAC,GAAG,OAAO,EAAE,KAAK,GAAG,CAAC;AAAA,QAC1I;AAAA,MACF;AACA,YAAM,WAAW,QAAQ;AACzB,YAAM,YAAY,WAAW,SAAS,EAAE,WAAW,IAAI,CAAC,IAAI,IAAI,YAAY,IAAI,MAAM;AACtF,mBAAa;AACb,UAAI,OAAO,cAAc,YAAY,UAAU,WAAW,GAAG;AAC3D,cAAM,IAAI,MAAM,0EAA0E;AAAA,MAC5F;AACA,YAAM,MAAM,MAAM,MAAM,QAAQ,SAAS;AACzC,YAAM,YAAY,OAAO,GAAG;AAC5B,UAAI,UAAW,sBAAqB,IAAI,WAAW,EAAE,UAAU,CAAC;AAChE,aAAO;AAAA,IACT;AAAA,IACA,kBAAkB,KAA4C;AAC5D,YAAM,YAAY,OAAO,GAAG;AAC5B,YAAM,WAAW,YAAY,qBAAqB,IAAI,SAAS,IAAI;AACnE,aAAO;AAAA,QACL,MAAM;AAAA,QACN;AAAA,QACA,SAAS,MAAM;AAAA,QACf,WAAW,UAAU;AAAA,MACvB;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AAAA,IACL;AAAA,IACA,WAAmB;AACjB,YAAM,WAAW,QAAQ,OAAO,IAAI,eAAe,CAAC,GAAG,OAAO,EAAE,KAAK,GAAG,CAAC,OAAO;AAChF,aAAO,4BAA4B,MAAM,OAAO,eAAe,MAAM,IAAI,KAAK,GAAG,CAAC,IAAI,QAAQ;AAAA,IAChG;AAAA,EACF;AACF;AAEA,SAAS,OAAO,KAA0C;AACxD,QAAM,MAAO,IAAoC;AACjD,SAAO,OAAO,QAAQ,YAAY,IAAI,SAAS,IAAI,MAAM;AAC3D;;;ACTO,SAAS,2BACd,SACe;AACf,QAAM,WAAW,gBAAgB,OAAO;AACxC,QAAM,gBAAgB,SAAS;AAC/B,QAAM,kBAAkB,QAAQ;AAChC,QAAM,iBAAiB,QAAQ,kBAAkB;AACjD,QAAM,eAAe,QAAQ;AAC7B,SAAO,OAAO,MAAM,QAAQ;AAC1B,UAAM,OAAkB;AAAA,MACtB,MAAM,eAAe,IAAI;AAAA,MACzB,UAAU,KAAK;AAAA,MACf,SAAS,KAAK,QAAQ;AAAA,MACtB,cAAc,KAAK,QAAQ;AAAA,MAC3B,gBAAgB,KAAK,QAAQ;AAAA,MAC7B,cAAc,KAAK,QAAQ;AAAA,IAC7B;AACA,UAAM,WAAW,KAAK,IAAI,GAAG,KAAK,MAAM,KAAK,YAAY,CAAC,CAAC;AAC3D,QAAI,OAAO,EAAE,WAAW,GAAG,OAAO,WAAW,CAAC;AAC9C,QAAI,YAAY,GAAG;AACjB,YAAM,EAAE,cAAc,QAAQ,UAAU,IAAI,aAAa;AAAA,QACvD;AAAA,QACA,GAAI,QAAQ,UAAU,EAAE,SAAS,QAAQ,QAAQ,IAAI,CAAC;AAAA,QACtD,GAAI,QAAQ,QAAQ,EAAE,OAAO,QAAQ,MAAM,IAAI,CAAC;AAAA,MAClD,CAAC;AACD,YAAMA,UAAS,MAAM,QAAQ;AAAA,QAC3B,QAAQ;AAAA,QACR,UAAU;AAAA,QACV;AAAA,QACA;AAAA,QACA;AAAA,QACA,KAAK,EAAE,eAAe,QAAQ,IAAI,QAAQ,GAAI,eAAe,EAAE,aAAa,IAAI,CAAC,EAAG;AAAA,QACpF,eAAe;AAAA,QACf;AAAA,MACF,CAAC;AACD,YAAMC,UAAS,MAAM,gBAAgB;AAAA,QACnC,YAAYD,QAAO;AAAA,QACnB,UAAU,QAAQ;AAAA,QAClB,WAAW,QAAQ,mBAAmB;AAAA,QACtC;AAAA,QACA,QAAQ,IAAI;AAAA,MACd,CAAC;AACD,UAAI,CAACC,QAAQ,OAAM,IAAI,MAAM,gBAAgB,QAAQ,QAAQ,CAAC;AAC9D,UAAI,OAAO,EAAE,WAAW,GAAG,OAAO,YAAY,CAAC;AAC/C,aAAOA;AAAA,IACT;AACA,UAAM,SAAS,wBAAwB;AAAA,MACrC,GAAI,mBAAmB,gBAAgB,SAAS,IAC5C,EAAE,WAAW,gBAAgB,MAAM,GAAG,QAAQ,EAAE,IAChD,CAAC;AAAA,MACL,GAAI,QAAQ,eAAe,EAAE,QAAQ,QAAQ,aAAa,MAAM,GAAG,QAAQ,EAAE,IAAI,CAAC;AAAA,IACpF,CAAC;AACD,UAAM,YAAY,OAAO,UAAU,MAAM,GAAG,QAAQ;AACpD,UAAM,SAAS,MAAM,QAAQ;AAAA,MAC3B,QAAQ,OAAO;AAAA,MACf;AAAA,MACA,QAAQ,OAAO;AAAA,MACf,WAAW,OAAO;AAAA,MAClB;AAAA,MACA,KAAK,EAAE,eAAe,QAAQ,IAAI,QAAQ,GAAI,eAAe,EAAE,aAAa,IAAI,CAAC,EAAG;AAAA,MACpF,eAAe;AAAA,MACf,gBAAgB,KAAK,IAAI,gBAAgB,QAAQ;AAAA,IACnD,CAAC;AACD,UAAM,SAAS,MAAM,gBAAgB;AAAA,MACnC,YAAY,OAAO;AAAA,MACnB,UAAU,QAAQ;AAAA,MAClB,WAAW,QAAQ,mBAAmB;AAAA,MACtC;AAAA,MACA,QAAQ,IAAI;AAAA,IACd,CAAC;AACD,QAAI,CAAC,OAAQ,OAAM,IAAI,MAAM,gBAAgB,QAAQ,QAAQ,CAAC;AAC9D,QAAI,OAAO,EAAE,WAAW,UAAU,QAAQ,OAAO,YAAY,CAAC;AAC9D,WAAO;AAAA,EACT;AACF;AAyBA,eAAe,gBAAgB,MAA6D;AAC1F,QAAM,QAA0B,CAAC;AACjC,aAAW,QAAQ,KAAK,YAAY;AAClC,QAAI,KAAK,WAAW,UAAa,KAAK,SAAS,KAAK,SAAS,UAAU,KAAM;AAC7E,UAAM,KAAK;AAAA,MACT,OAAO,KAAK;AAAA,MACZ,QAAQ,KAAK;AAAA,MACb,OAAO,KAAK,QAAQ,SAAS;AAAA,MAC7B,WAAW,KAAK,QAAQ,SAAS;AAAA,IACnC,CAAC;AAAA,EACH;AACA,MAAI,MAAM,WAAW,EAAG,QAAO;AAE/B,MAAI,WAAW;AACf,MAAI,KAAK,UAAU;AACjB,eAAW,CAAC;AACZ,eAAW,KAAK,OAAO;AACrB,YAAM,SAAS,MAAM,KAAK,SAAS,EAAE,QAAQ,KAAK,MAAM,EAAE,QAAQ,KAAK,OAAO,CAAC;AAC/E,UAAI,OAAO,SAAU,UAAS,KAAK,EAAE,GAAG,GAAG,WAAW,OAAO,UAAU,CAAC;AAAA,IAC1E;AACA,QAAI,SAAS,WAAW,EAAG,QAAO;AAAA,EACpC;AAEA,SAAO,qBAAqB,UAAU,KAAK,SAAS,EAAE;AACxD;AAGA,SAAS,qBACP,YACA,WACgB;AAChB,QAAM,YAAY,CAAC,MACjB,EAAE,OAAO,UAAU,aAAa,EAAE,OAAO,UAAU;AACrD,QAAM,SAAS,CAAC,GAAG,UAAU,EAAE,KAAK,CAAC,GAAG,MAAM;AAC5C,YAAQ,WAAW;AAAA,MACjB,KAAK;AACH,eAAO,UAAU,CAAC,IAAI,UAAU,CAAC,KAAK,EAAE,QAAQ,EAAE;AAAA,MACpD,KAAK;AACH,eAAO,EAAE,YAAY,EAAE,aAAa,EAAE,QAAQ,EAAE;AAAA,MAClD,KAAK;AACH,eAAO,EAAE,QAAQ,EAAE;AAAA,MACrB;AACE,eAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,QAAQ,EAAE;AAAA,IAC5C;AAAA,EACF,CAAC;AACD,SAAO,OAAO,CAAC;AACjB;AAEA,SAAS,gBAAgB,UAA6C;AACpE,SAAO,WACH,4DACA;AACN;AAEA,SAAS,eAAe,MAAgC;AACtD,MAAI,CAAC,KAAK,YAAa,QAAO,KAAK;AACnC,SAAO,CAAC,KAAK,MAAM,IAAI,cAAc,KAAK,WAAW,EAAE,KAAK,IAAI;AAClE;AAEA,SAAS,gBAAgB,SAAgE;AACvF,MAAI,QAAQ,YAAY,QAAQ,eAAe;AAC7C,UAAM,IAAI,MAAM,+EAA+E;AAAA,EACjG;AACA,MAAI,QAAQ,SAAU,QAAO,QAAQ;AACrC,MAAI,QAAQ,eAAe;AACzB,WAAO,6BAA6B,EAAE,QAAQ,QAAQ,cAAc,CAAC;AAAA,EACvE;AACA,QAAM,IAAI,MAAM,uEAAuE;AACzF;AAQA,IAAM,mBAAmB;AAAA,EACvB,MAAM;AAAA,EACN,MAAM,KAAW,MAAY,SAAkD;AAC7E,WAAO,QAAQ,WAAW,IAAI,CAAC,IAAI,IAAI,CAAC;AAAA,EAC1C;AAAA,EACA,OAAO,SAAyD;AAC9D,WAAO,QAAQ,SAAS,IAAI,gBAAgB;AAAA,EAC9C;AACF;","names":["result","chosen"]}