@tangle-network/agent-runtime 0.39.0 → 0.41.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/README.md +265 -69
  2. package/dist/{chunk-AXWGLYSF.js → chunk-4GI7C36B.js} +3 -3
  3. package/dist/{chunk-VLXRXMTF.js → chunk-FJ4GDNVN.js} +5 -4
  4. package/dist/chunk-FJ4GDNVN.js.map +1 -0
  5. package/dist/{chunk-HSX6PFZR.js → chunk-HVYOHJHK.js} +338 -2
  6. package/dist/chunk-HVYOHJHK.js.map +1 -0
  7. package/dist/chunk-NRZOXCJK.js +64 -0
  8. package/dist/chunk-NRZOXCJK.js.map +1 -0
  9. package/dist/{chunk-7JBDJQLO.js → chunk-OISRXLWI.js} +8 -3
  10. package/dist/chunk-OISRXLWI.js.map +1 -0
  11. package/dist/{chunk-PK5DYSNO.js → chunk-WSJJGSD3.js} +51 -5
  12. package/dist/chunk-WSJJGSD3.js.map +1 -0
  13. package/dist/{dynamic-DcrwVGuV.d.ts → dynamic-CazTl_Zp.d.ts} +3 -1
  14. package/dist/index.d.ts +4 -4
  15. package/dist/index.js +9 -8
  16. package/dist/index.js.map +1 -1
  17. package/dist/{kb-gate-YdPNEagq.d.ts → kb-gate-NzOJSnOk.d.ts} +9 -1
  18. package/dist/{loop-runner-bin-DgZj0zfJ.d.ts → loop-runner-bin-DYRzk2cT.d.ts} +3 -3
  19. package/dist/loop-runner-bin.d.ts +4 -4
  20. package/dist/loop-runner-bin.js +3 -3
  21. package/dist/loops.d.ts +4 -4
  22. package/dist/loops.js +1 -1
  23. package/dist/mcp/bin.js +28 -17
  24. package/dist/mcp/bin.js.map +1 -1
  25. package/dist/mcp/index.d.ts +3 -3
  26. package/dist/mcp/index.js +9 -49
  27. package/dist/mcp/index.js.map +1 -1
  28. package/dist/profiles.d.ts +1 -1
  29. package/dist/{types-B9O7l-ij.d.ts → types-BrJKXXI8.d.ts} +8 -1
  30. package/package.json +1 -1
  31. package/dist/chunk-7JBDJQLO.js.map +0 -1
  32. package/dist/chunk-7ZECSZ3C.js +0 -400
  33. package/dist/chunk-7ZECSZ3C.js.map +0 -1
  34. package/dist/chunk-HSX6PFZR.js.map +0 -1
  35. package/dist/chunk-PK5DYSNO.js.map +0 -1
  36. package/dist/chunk-VLXRXMTF.js.map +0 -1
  37. /package/dist/{chunk-AXWGLYSF.js.map → chunk-4GI7C36B.js.map} +0 -0
package/README.md CHANGED
@@ -1,14 +1,38 @@
1
1
  # @tangle-network/agent-runtime
2
2
 
3
- Production runtime substrate for domain agents. Owns the chat-turn engine, task lifecycle, knowledge readiness, sanitized telemetry, OTEL export, model admission, and the declarative `defineAgent` manifest. Long-running execution durability lives in `@tangle-network/sandbox`.
3
+ The task-lifecycle substrate for domain agents. It owns the **chat-turn engine**, the **driven-loop kernel** (refine / fanout-vote / agent-authored *dynamic* topologies), **delegated loops** (build-in-a-loop, valid-only research, review, audit, self-improve), **identity-gated prompt optimization**, **OpenTelemetry GenAI tracing**, knowledge readiness, sanitized telemetry, and the declarative `defineAgent` manifest — and delegates domain behavior (models, tools, KB) to adapters. Long-running execution durability lives in [`@tangle-network/sandbox`](https://www.npmjs.com/package/@tangle-network/sandbox); evals + gates in [`@tangle-network/agent-eval`](https://www.npmjs.com/package/@tangle-network/agent-eval).
4
4
 
5
5
  ```bash
6
6
  pnpm add @tangle-network/agent-runtime @tangle-network/agent-eval @tangle-network/sandbox
7
7
  ```
8
8
 
9
- ## Hello world
9
+ ---
10
10
 
11
- Every product agent is a `handleChatTurn` call inside a route. This 20-line snippet is what gtm / creative / legal / tax all run:
11
+ ## Contents
12
+
13
+ - [Getting started](#getting-started) — the 20-line production chat turn
14
+ - [Which entry point do I reach for?](#which-entry-point-do-i-reach-for)
15
+ - [Capabilities](#capabilities)
16
+ - [1. Chat turns — `handleChatTurn`](#1-chat-turns--handlechatturn)
17
+ - [2. Driven loops + topology drivers](#2-driven-loops--topology-drivers)
18
+ - [3. Agent-authored topology — `createDynamicDriver`](#3-agent-authored-topology--createdynamicdriver)
19
+ - [4. Delegated loop-runner — `runDelegatedLoop`](#4-delegated-loop-runner--rundelegatedloop)
20
+ - [5. Reliable build-in-a-loop — the coder delegate](#5-reliable-build-in-a-loop--the-coder-delegate)
21
+ - [6. Valid-only research — `createKbGate`](#6-valid-only-research--createkbgate)
22
+ - [7. Identity-gated prompt optimization — `optimizePrompt`](#7-identity-gated-prompt-optimization--optimizeprompt)
23
+ - [8. OpenTelemetry GenAI topology tracing](#8-opentelemetry-genai-topology-tracing)
24
+ - [9. MCP delegation server — `agent-runtime-mcp`](#9-mcp-delegation-server--agent-runtime-mcp)
25
+ - [Defaults](#defaults)
26
+ - [Composition with the stack](#composition-with-the-stack)
27
+ - [Subpath exports](#subpath-exports)
28
+ - [Adoption skill](#adoption-skill)
29
+ - [Stability · Tests · Docs](#stability--tests--docs)
30
+
31
+ ---
32
+
33
+ ## Getting started
34
+
35
+ Every product agent is a `handleChatTurn` call inside a route. This is what gtm / creative / legal / tax all run in production:
12
36
 
13
37
  ```ts
14
38
  import { handleChatTurn } from '@tangle-network/agent-runtime'
@@ -33,106 +57,278 @@ export async function POST({ request, env, ctx }: { request: Request; env: Env;
33
57
  }
34
58
  ```
35
59
 
36
- That's the centerpiece. Everything else is "when chat alone isn't enough."
60
+ That's the centerpiece. Everything below is *"when one chat turn isn't enough"* — multi-shot loops, delegation, optimization, and the telemetry that makes them auditable.
61
+
62
+ ---
37
63
 
38
64
  ## Which entry point do I reach for?
39
65
 
66
+ | You want to… | Reach for | Subpath |
67
+ |---|---|---|
68
+ | Run a production chat turn (90% of products) | `handleChatTurn` | root |
69
+ | Declare an agent (profile + surfaces + adapters) | `defineAgent` | `/agent` |
70
+ | One-shot task with verification + eval | `runAgentTask` | root |
71
+ | Multi-shot loop (refine / fanout-vote) | `runLoop` + a driver | `/loops` |
72
+ | Let the **agent choose** the loop shape per round | `createDynamicDriver` + `createSandboxPlanner` | `/loops` |
73
+ | Delegate a disciplined loop by mode (code/research/…) | `runDelegatedLoop` / `agent-runtime-loop` | root |
74
+ | Build code reliably (reviewed, gated) | `createDefaultCoderDelegate` | `/mcp` |
75
+ | Grow a KB with only grounded facts | `createKbGate` | `/mcp` |
76
+ | Improve a prompt safely (identity-gated) | `optimizePrompt` | `/improvement` |
77
+ | Ship loop traces to a GenAI viewer | `buildLoopOtelSpans` + `createOtelExporter` | root |
78
+ | Expose delegation as MCP tools to a sandbox agent | `createMcpServer` / `agent-runtime-mcp` | `/mcp` |
79
+ | Mutate surfaces from trace findings | `runAnalystLoop` | `/analyst-loop` |
80
+ | Persist a run + cost ledger | `startRuntimeRun` | root |
81
+
82
+ ---
83
+
84
+ ## Capabilities
85
+
86
+ ### 1. Chat turns — `handleChatTurn`
87
+
88
+ The production turn envelope: frames a producer with the `session.run.*` NDJSON protocol, the persist → post-process → trace-flush hook order, and a stable execution id for client-retry replay. See [Getting started](#getting-started) and [`examples/chat-handler/`](./examples/chat-handler/).
89
+
90
+ ### 2. Driven loops + topology drivers
91
+
92
+ `runLoop` is a topology-agnostic kernel: each iteration spawns a sandbox on an `AgentRunSpec`, decodes the output, validates it, and asks a **driver** what to do next. The driver owns topology; the validator owns scoring; the kernel owns iteration accounting, concurrency, cost/token aggregation, and trace emission.
93
+
94
+ ```ts
95
+ import { runLoop, createFanoutVoteDriver } from '@tangle-network/agent-runtime/loops'
96
+
97
+ const result = await runLoop({
98
+ driver: createFanoutVoteDriver({ n: 3 }), // 3 parallel attempts, pick the best valid one
99
+ agentRuns: [claudeSpec, codexSpec, glmSpec], // heterogeneous: one harness per branch
100
+ output, // events → typed Output
101
+ validator, // Output → { valid, score }
102
+ task,
103
+ ctx: { sandboxClient: sandbox },
104
+ })
105
+ result.winner // highest-scoring valid attempt
106
+ ```
107
+
108
+ Shipped drivers (`/loops/drivers`): **`createRefineDriver`** (single task, iterate until valid) and **`createFanoutVoteDriver`** (N parallel, vote). See [`examples/coder-loop/`](./examples/coder-loop/) and [`examples/researcher-loop/`](./examples/researcher-loop/).
109
+
110
+ ### 3. Agent-authored topology — `createDynamicDriver`
111
+
112
+ The third driver lets the **agent author the loop topology at runtime** — refine, fan out, or stop, decided per round by an injected planner. Topology is orthogonal to harness: the planner never names a backend; the kernel's `agentRuns` round-robin decides which harness runs each branch.
113
+
114
+ ```ts
115
+ import { runLoop, createDynamicDriver, createSandboxPlanner } from '@tangle-network/agent-runtime/loops'
116
+
117
+ const planner = createSandboxPlanner({
118
+ client: sandbox,
119
+ profile: { name: 'planner', metadata: { backendType: 'claude-code' } }, // cheap model is fine
120
+ decodeTask: (raw) => raw as Task,
121
+ })
122
+
123
+ const result = await runLoop({
124
+ driver: createDynamicDriver({ planner, maxIterations: 8 }),
125
+ agentRuns: [claudeSpec, codexSpec], // the planner can fan a single round across both
126
+ output, validator, task,
127
+ ctx: { sandboxClient: sandbox },
128
+ })
129
+ ```
130
+
131
+ The planner emits one `TopologyMove` per round (`refine` | `fanout` | `stop`) with a rationale; a malformed move throws `PlannerError` (the loop never runs a topology nobody chose).
132
+
133
+ ### 4. Delegated loop-runner — `runDelegatedLoop`
134
+
135
+ One configured entrypoint a worker agent (or a scheduled routine) calls to run a disciplined loop in a chosen **mode**, over the hardened engines below. Fail-loud on an unwired mode; a thrown engine is captured as `{ ok: false }` so unattended runs *record* rather than crash.
136
+
137
+ ```ts
138
+ import {
139
+ runDelegatedLoop, coderLoopRunner, researchLoopRunner, type DelegatedLoopRegistry,
140
+ } from '@tangle-network/agent-runtime'
141
+
142
+ const registry: DelegatedLoopRegistry = {
143
+ code: coderLoopRunner({
144
+ sandboxClient,
145
+ args: { goal: 'fix the flaky retry test', repoRoot: '/repo' },
146
+ reviewer, // optional adversarial gate
147
+ winnerSelection: 'smallest-diff',
148
+ }),
149
+ research: researchLoopRunner({ research, gate: { selfArtifactKinds: ['spec'] }, maxRounds: 3 }),
150
+ }
151
+
152
+ const result = await runDelegatedLoop('code', registry)
153
+ // → { mode: 'code', ok: true, output: CoderOutput, durationMs }
154
+ ```
155
+
156
+ Modes: `code` · `review` · `research` · `audit` · `self-improve` · `dynamic` — each with a default factory (`coderLoopRunner`, `reviewLoopRunner`, `researchLoopRunner`, `dynamicLoopRunner`, `selfImproveLoopRunner`, `auditLoopRunner`).
157
+
158
+ **Schedulable**: the `agent-runtime-loop` bin runs it from a cron/routine. The config module wires the registry (with full env/creds access):
159
+
160
+ ```bash
161
+ agent-runtime-loop --mode research --config ./loops.config.js
162
+ # exits 0 (ok) · 1 (recorded failure) · 2 (usage/config error); prints the result as JSON
163
+ ```
164
+
165
+ ```ts
166
+ // loops.config.js — default-exports a DelegatedLoopRegistry (or a factory)
167
+ import { researchLoopRunner } from '@tangle-network/agent-runtime'
168
+ export default { research: researchLoopRunner({ research: myResearchEngine, maxRounds: 3 }) }
169
+ ```
170
+
171
+ ### 5. Reliable build-in-a-loop — the coder delegate
172
+
173
+ `createDefaultCoderDelegate` drives a coder loop with **default-on safety gates** so it never ships junk:
174
+
175
+ - **no-op rejection** — an empty patch can't "pass" trivially,
176
+ - **secret-path floor** — always-on, independent of `forbiddenPaths` (`.env`, keys, wallets, …),
177
+ - optional **`reviewer`** gate — a candidate must pass tests/typecheck **and** be approved to win,
178
+ - **`winnerSelection`** — `highest-score` (default) · `smallest-diff` · `highest-readiness` · `first-approved`.
179
+
180
+ ```ts
181
+ import { createDefaultCoderDelegate } from '@tangle-network/agent-runtime/mcp'
182
+
183
+ const coder = createDefaultCoderDelegate({
184
+ sandboxClient,
185
+ fanoutHarnesses: ['claude-code', 'codex'],
186
+ reviewer: async (output, task) => ({ approved: output.testResult.passed, recommendation: 'ship', readiness: 0.9 }),
187
+ winnerSelection: 'highest-readiness',
188
+ })
189
+ const out = await coder({ goal: 'add a retry with backoff', repoRoot: '/repo', variants: 2 }, ctx)
190
+ ```
191
+
192
+ See [`examples/coder-loop/`](./examples/coder-loop/) and [`examples/agent-into-reviewer/`](./examples/agent-into-reviewer/).
193
+
194
+ ### 6. Valid-only research — `createKbGate`
195
+
196
+ A fail-closed gate so a knowledge base grows with **only grounded facts**. The always-on floor: a fact's `verbatimPassage` must literally appear in its `sourceText` (anti-hallucination), the asserted value must be in the passage, and citations can't point at self-generated artifacts (laundering). Plug in your own judges; verdict-only (remediation is yours).
197
+
198
+ ```ts
199
+ import { createKbGate } from '@tangle-network/agent-runtime/mcp'
200
+
201
+ const gate = createKbGate({ selfArtifactKinds: ['spec', 'cad_params'] })
202
+ const verdict = await gate({
203
+ claim: 'revenue was $1.2B in 2025',
204
+ value: 1_200_000_000,
205
+ verbatimPassage: 'total revenue was $1,200,000,000 for the fiscal year',
206
+ sourceText: rawSource,
207
+ })
208
+ if (verdict.accepted) writeToKb(fact)
209
+ else console.warn('vetoed by', verdict.vetoedBy, verdict.reason)
210
+ ```
211
+
212
+ `researchLoopRunner` (mode `research`) wraps this with a correct-on-veto remediation loop: research → gate → re-research the vetoed gaps up to `maxRounds`, then **return** the unverified ones (escalate, never silently drop).
213
+
214
+ ### 7. Identity-gated prompt optimization — `optimizePrompt`
215
+
216
+ Optimize any text prompt over agent-eval's `runImprovementLoop`, **identity-gated by construction**: it runs evals, proposes candidates (default `gepaDriver`), and the held-out gate compares candidate vs baseline. `result.prompt` is the **baseline unless the gate decided `ship`** — so registering a prompt for optimization can never regress it.
217
+
218
+ ```ts
219
+ import { optimizePrompt } from '@tangle-network/agent-runtime/improvement'
220
+
221
+ const { prompt, improved, delta } = await optimizePrompt({
222
+ baselinePrompt: CURRENT_SYSTEM_PROMPT,
223
+ runWithPrompt: (candidate, scenario, ctx) => runYourThing(candidate, scenario),
224
+ scenarios, holdoutScenarios, judges, runDir,
225
+ reflection: { llm, model: 'claude-sonnet-4-6' },
226
+ })
227
+ // assign `prompt` unconditionally — it's the safe one
228
+ ```
229
+
230
+ See [`examples/self-improving-loop/`](./examples/self-improving-loop/).
231
+
232
+ ### 8. OpenTelemetry GenAI topology tracing
233
+
234
+ `runLoop` emits a structured event stream; `buildLoopOtelSpans` turns it into a **nested, real-duration span tree** that any GenAI trace viewer (Phoenix, Langfuse, Grafana Tempo, Tangle Intelligence) renders natively. Attributes follow the current GenAI semantic conventions (`gen_ai.operation.name`, `gen_ai.agent.name`, `gen_ai.usage.input_tokens/output_tokens`) plus a `tangle.loop.*` extension for the topology (move kind/rationale, edge lineage, verdict, placement, cost).
235
+
236
+ ```ts
237
+ import { buildLoopOtelSpans, createOtelExporter } from '@tangle-network/agent-runtime'
238
+
239
+ const exporter = createOtelExporter() // reads OTEL_EXPORTER_OTLP_ENDPOINT
240
+ for (const span of buildLoopOtelSpans(loopEvents, traceId)) exporter?.exportSpan(span)
241
+ await exporter?.flush()
40
242
  ```
41
- Production chat turn (90% of products) → handleChatTurn
42
- Declarative agent manifest defineAgent (/agent)
43
- Cross-process reconnect (X-Execution-ID) → deriveExecutionId
44
- One-shot task with verification + eval → runAgentTask
45
- Streaming task without chat-turn envelope → runAgentTaskStream
46
- Multi-iteration parallel fanout (coders /
47
- researchers proposing N variants) → runLoop + a Driver (/loops)
48
- Tool/MCP delegation server (stdio) → createMcpServer (/mcp)
49
- Analyst surface mutations → runAnalystLoop (/analyst-loop)
50
- Production-run persistence + cost ledger → startRuntimeRun
51
- Cross-site SSO / integrations hub → PlatformAuthClient (/platform)
243
+
244
+ The shape: `loop loop.round (move + rationale) → loop.iteration (agent, usage, verdict, cost, parent edge)`. See [`examples/with-intelligence-export/`](./examples/with-intelligence-export/).
245
+
246
+ ### 9. MCP delegation server `agent-runtime-mcp`
247
+
248
+ Expose the five delegation tools (`delegate_code`, `delegate_research`, `delegate_feedback`, `delegation_status`, `delegation_history`) to a sandbox coding-harness agent — mount the canonical server, don't fork delegation logic.
249
+
250
+ ```ts
251
+ import { createMcpServer, createDefaultCoderDelegate } from '@tangle-network/agent-runtime/mcp'
252
+
253
+ const server = createMcpServer({
254
+ coderDelegate: createDefaultCoderDelegate({ sandboxClient }),
255
+ researcherDelegate, // wire your KB-backed researcher
256
+ })
52
257
  ```
53
258
 
259
+ Or mount the `agent-runtime-mcp` stdio bin on a production `AgentProfile.mcp`. See [`examples/mcp-delegation/`](./examples/mcp-delegation/) and [`examples/fleet-delegation/`](./examples/fleet-delegation/).
260
+
261
+ ---
262
+
54
263
  ## Defaults
55
264
 
56
265
  When nothing is specified:
57
266
 
58
267
  | Knob | Default | Override |
59
268
  |---|---|---|
60
- | Backend model | `gpt-4o-mini` (when via `createOpenAICompatibleBackend`) | `model` option, or `MODEL_NAME` env |
61
- | Backend provider | `openai-compat` when `TANGLE_API_KEY` present, else `openai` if `OPENAI_API_KEY` | `MODEL_PROVIDER` env |
269
+ | Backend model | `gpt-4o-mini` (via `createOpenAICompatibleBackend`) | `model` option / `MODEL_NAME` env |
270
+ | Backend provider | `openai-compat` when `TANGLE_API_KEY`, else `openai` if `OPENAI_API_KEY` | `MODEL_PROVIDER` env |
62
271
  | Router base URL | `https://router.tangle.tools/v1` | `TANGLE_ROUTER_BASE_URL` env |
63
272
  | Sandbox base URL | `https://sandbox.tangle.tools` | `SANDBOX_API_URL` env |
64
- | Loop iteration cap | 8 | `runLoop({ maxIterations })` |
65
- | Driver | none — required to pass `Refine` or `FanoutVote` | `createRefineDriver()` or `createFanoutVoteDriver({ n })` |
66
- | Validator | none required if using `runLoop` | profile preset (e.g., `coderProfile().validator`) or your own |
273
+ | Loop iteration cap | 10 (`runLoop`); dynamic driver 8 | `runLoop({ maxIterations })` |
274
+ | Driver | none — required by `runLoop` | `createRefineDriver` / `createFanoutVoteDriver` / `createDynamicDriver` |
275
+ | Winner selection (coder delegate) | `highest-score` | `winnerSelection` option |
276
+ | KB gate min passage | 12 chars | `createKbGate({ minPassageChars })` |
277
+ | `optimizePrompt` gate | `heldOutGate` | `defaultProductionGate` for red-team hardening |
67
278
  | OTEL export | off | set `OTEL_EXPORTER_OTLP_ENDPOINT` |
68
- | Trace propagation through MCP subprocess | off until product wires it | `env.TRACE_ID` + `env.PARENT_SPAN_ID` at MCP launch |
69
-
70
- ## Composition with the rest of the stack
71
-
72
- ```
73
- agent-runtime ──── handleChatTurn (chat turn lifecycle)
74
- defineAgent (declarative manifest)
75
- runLoop (multi-shot kernel)
76
- createMcpServer (delegation tools server)
77
- OTEL export (trace pipeline)
279
+ | Loop-runner mode failure | recorded as `{ ok: false }` | `runDelegatedLoop` never crashes on a thrown engine |
78
280
 
79
- agent-eval ──── runEvalCampaign / runProductionLoop / runAgentMatrix
80
- (consumes agent-runtime traces, scores, gates promotion)
281
+ ---
81
282
 
82
- agent-knowledge ─── proposeKnowledgeWrites / applyKnowledgeWriteBlocks
83
- (analyst-loop produces these; runtime consumes them)
283
+ ## Composition with the stack
84
284
 
85
- sandbox ──── AgentProfile (substrate type), Sandbox.create, exportTraceBundle
86
- (provides the harness execution surface)
87
285
  ```
286
+ agent-runtime ── handleChatTurn · runLoop + drivers · runDelegatedLoop · createMcpServer
287
+ optimizePrompt · createKbGate · buildLoopOtelSpans · defineAgent
88
288
 
89
- Self-improving products consume all four. This package ships a self-contained adoption skill at [`skills/agent-runtime-adoption/SKILL.md`](./skills/agent-runtime-adoption/SKILL.md) — driven loops, topology drivers (refine / fanout-vote / dynamic), the `loopDispatch` campaign bridge, MCP delegation, and identity-gated `optimizePrompt`; it needs only this package + `@tangle-network/agent-eval`. For the end-to-end self-improving pipeline (trace sink → analyst loop → scorecard → production loop → CI), see the broader `agent-eval-adoption` / `agent-stack-adoption` skills.
289
+ agent-eval ── runEvalCampaign · runImprovementLoop (gepaDriver) · heldOutGate · runAgentMatrix
290
+ (consumes runtime traces, scores, gates promotion)
90
291
 
91
- ## Examples
292
+ agent-knowledge ─ proposeKnowledgeWrites / applyKnowledgeWriteBlocks
293
+ (analyst-loop produces these; runtime + createKbGate consume them)
92
294
 
93
- Ordered as a learning progression each example introduces one concept.
295
+ sandbox ── AgentProfile · Sandbox.create · streamPrompt · exportTraceBundle
296
+ (the harness execution surface every loop runs on)
297
+ ```
94
298
 
95
- **Start here:**
96
- - [`chat-handler/`](./examples/chat-handler/) — `handleChatTurn`, the production centerpiece
299
+ ---
97
300
 
98
- **Add observability + readiness:**
99
- - [`with-knowledge-readiness/`](./examples/with-knowledge-readiness/) — `requiredKnowledge` + `decideKnowledgeReadiness`
100
- - [`sanitized-telemetry-streaming/`](./examples/sanitized-telemetry-streaming/) — `createRuntimeStreamEventCollector` + redaction
101
- - [`runtime-run/`](./examples/runtime-run/) — `startRuntimeRun` + cost ledger persistence
301
+ ## Subpath exports
102
302
 
103
- **Add delegation:**
104
- - [`mcp-delegation/`](./examples/mcp-delegation/) — mount `agent-runtime-mcp` in an `AgentProfile`
303
+ | Import | Owns |
304
+ |---|---|
305
+ | `@tangle-network/agent-runtime` | chat turns, delegated loop-runner, OTEL export, errors, model resolution |
306
+ | `…/agent` | `defineAgent` + surfaces / outcome adapters |
307
+ | `…/loops` | `runLoop` kernel + `refine` / `fanout-vote` / **`dynamic`** drivers + `loopDispatch` |
308
+ | `…/profiles` | `coderProfile`, `researcherProfile` presets |
309
+ | `…/mcp` | `createMcpServer`, `createDefaultCoderDelegate`, **`createKbGate`**, `agent-runtime-mcp` bin |
310
+ | `…/improvement` | **`optimizePrompt`** (text) + `improvementDriver` (code/worktree) |
311
+ | `…/analyst-loop` | `runAnalystLoop` — analyst registry driver |
312
+ | `…/platform` | cross-site SSO + integrations hub |
105
313
 
106
- **Multi-agent fanout (advanced):**
107
- - [`coder-loop/`](./examples/coder-loop/) — `coderProfile` + `runLoop` + `FanoutVote`
108
- - [`researcher-loop/`](./examples/researcher-loop/) — `researcherProfile` + `runLoop` (peer dep: `@tangle-network/agent-knowledge`)
109
- - [`fleet-delegation/`](./examples/fleet-delegation/) — `TANGLE_FLEET_ID` + `createFleetWorkspaceExecutor`
314
+ Bins: `agent-runtime-mcp` (delegation MCP server) · `agent-runtime-loop` (schedulable delegated loop-runner).
110
315
 
111
- ## Stability
316
+ ---
112
317
 
113
- Every public export is annotated `@stable` or `@experimental`. `@stable` exports do not change shape inside a minor. `@experimental` exports may change inside a minor and require a deliberate consumer bump.
318
+ ## Adoption skill
114
319
 
115
- ## Package boundaries
320
+ This package ships a **self-contained adoption skill** at [`skills/agent-runtime-adoption/SKILL.md`](./skills/agent-runtime-adoption/SKILL.md) — driven loops, topology drivers, the `loopDispatch` campaign bridge, MCP delegation, and identity-gated `optimizePrompt`. It needs only this package + `@tangle-network/agent-eval`, so external consumers need nothing private. For the full self-improving pipeline (trace sink → analyst loop → scorecard → production loop → CI), see the `agent-eval-adoption` / `agent-stack-adoption` skills.
116
321
 
117
- | Package | Owns |
118
- |---|---|
119
- | `agent-runtime` | Task lifecycle, adapters, backends, chat-turn engine, model resolution, trace bridge, `defineAgent` |
120
- | `agent-runtime/platform` | Cross-site SSO + integrations hub |
121
- | `agent-runtime/agent` | `defineAgent` + surfaces / outcome adapters |
122
- | `agent-runtime/analyst-loop` | `runAnalystLoop` — analyst registry driver |
123
- | `agent-runtime/loops` | `runLoop` kernel + `Refine` / `FanoutVote` drivers |
124
- | `agent-runtime/profiles` | `coderProfile`, `researcherProfile` presets |
125
- | `agent-runtime/mcp` | `createMcpServer` + `agent-runtime-mcp` bin (5 delegation tools) |
126
- | `agent-eval` | Evals, judges, scorecards, RL bridge, release evidence, matrix |
127
- | `agent-knowledge` | Evidence, claims, wiki pages, retrieval |
128
- | `sandbox` | `AgentProfile`, `Sandbox.create`, `streamPrompt`, `exportTraceBundle` |
322
+ ---
129
323
 
130
- See [`docs/concepts.md`](./docs/concepts.md) for the deeper mental model. For multi-agent conversations see [`docs/agent-bus-protocol.md`](./docs/agent-bus-protocol.md) (cross-gateway header contract), [`docs/conversation-economics.md`](./docs/conversation-economics.md) (who pays for what — `authSource`), and [`docs/durability-adapters.md`](./docs/durability-adapters.md) (SQL-backed `ConversationJournal` for D1, postgres, sqlite, libSQL).
324
+ ## Stability · Tests · Docs
131
325
 
132
- ## Tests
326
+ Every public export is annotated `@stable` or `@experimental`. `@stable` exports don't change shape inside a minor; `@experimental` ones may and require a deliberate consumer bump.
133
327
 
134
328
  ```bash
135
- pnpm test # 283+ tests across the kernel + drivers + MCP + backends + analyst-loop
329
+ pnpm test # full suite across the kernel, drivers, MCP, delegate hardening, kb-gate, loop-runner, backends
136
330
  pnpm typecheck
137
331
  pnpm build
138
332
  ```
333
+
334
+ Deeper docs: [`docs/concepts.md`](./docs/concepts.md) (mental model) · [`docs/agent-bus-protocol.md`](./docs/agent-bus-protocol.md) (cross-gateway header contract) · [`docs/conversation-economics.md`](./docs/conversation-economics.md) (who pays — `authSource`) · [`docs/durability-adapters.md`](./docs/durability-adapters.md) (SQL-backed `ConversationJournal`).
@@ -9,11 +9,11 @@ import {
9
9
  } from "./chunk-FNMGYYSS.js";
10
10
  import {
11
11
  createDefaultCoderDelegate
12
- } from "./chunk-VLXRXMTF.js";
12
+ } from "./chunk-FJ4GDNVN.js";
13
13
  import {
14
14
  createDynamicDriver,
15
15
  runLoop
16
- } from "./chunk-7JBDJQLO.js";
16
+ } from "./chunk-OISRXLWI.js";
17
17
  import {
18
18
  ConfigError
19
19
  } from "./chunk-SQSCRJ7U.js";
@@ -198,4 +198,4 @@ export {
198
198
  runLoopRunnerCli,
199
199
  parseLoopRunnerArgv
200
200
  };
201
- //# sourceMappingURL=chunk-AXWGLYSF.js.map
201
+ //# sourceMappingURL=chunk-4GI7C36B.js.map
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  runLoop
3
- } from "./chunk-7JBDJQLO.js";
3
+ } from "./chunk-OISRXLWI.js";
4
4
  import {
5
5
  coderProfile,
6
6
  multiHarnessCoderFanout
@@ -78,6 +78,7 @@ function createDefaultCoderDelegate(options) {
78
78
  const sandboxClient = executor.client;
79
79
  const fanoutHarnesses = options.fanoutHarnesses;
80
80
  const maxConcurrency = options.maxConcurrency ?? 4;
81
+ const traceEmitter = options.traceEmitter;
81
82
  return async (args, ctx) => {
82
83
  const task = {
83
84
  goal: buildCoderGoal(args),
@@ -97,7 +98,7 @@ function createDefaultCoderDelegate(options) {
97
98
  output,
98
99
  validator,
99
100
  task,
100
- ctx: { sandboxClient, signal: ctx.signal },
101
+ ctx: { sandboxClient, signal: ctx.signal, ...traceEmitter ? { traceEmitter } : {} },
101
102
  maxIterations: 1,
102
103
  maxConcurrency
103
104
  });
@@ -122,7 +123,7 @@ function createDefaultCoderDelegate(options) {
122
123
  output: fanout.output,
123
124
  validator: fanout.validator,
124
125
  task,
125
- ctx: { sandboxClient, signal: ctx.signal },
126
+ ctx: { sandboxClient, signal: ctx.signal, ...traceEmitter ? { traceEmitter } : {} },
126
127
  maxIterations: variants,
127
128
  maxConcurrency: Math.min(maxConcurrency, variants)
128
129
  });
@@ -209,4 +210,4 @@ export {
209
210
  createFleetWorkspaceExecutor,
210
211
  createDefaultCoderDelegate
211
212
  };
212
- //# sourceMappingURL=chunk-VLXRXMTF.js.map
213
+ //# sourceMappingURL=chunk-FJ4GDNVN.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/mcp/executor.ts","../src/mcp/delegates.ts"],"sourcesContent":["/**\n * @experimental\n *\n * Delegation executors — the layer between MCP delegates and the sandbox\n * substrate. Each executor exposes a {@link LoopSandboxClient} the kernel\n * consumes plus a placement tag so the trace pipeline can correlate workers\n * with their physical placement.\n *\n * Two implementations ship in-box:\n *\n * - {@link createSiblingSandboxExecutor} — every delegation spawns a fresh\n * sandbox sibling to the caller. Default when the MCP server runs as a\n * standalone CLI mounted outside a fleet.\n *\n * - {@link createFleetWorkspaceExecutor} — delegations dispatch onto machines\n * in the caller's existing fleet so worker diffs land directly on the\n * caller's filesystem (the fleet's shared workspace). Selected when the\n * parent sandbox passes `TANGLE_FLEET_ID` into the MCP server's env.\n */\n\nimport type { CreateSandboxOptions, SandboxInstance } from '@tangle-network/sandbox'\nimport type { LoopSandboxClient, LoopSandboxPlacement } from '../loops'\n\n/** @experimental */\nexport interface DelegationExecutor {\n /** Sandbox client the kernel calls. Returned with `describePlacement` set. */\n readonly client: LoopSandboxClient\n /** Best-effort one-liner used in stderr boot logs and diagnostics. */\n describe(): string\n}\n\n/** @experimental */\nexport interface SiblingSandboxExecutorOptions {\n client: LoopSandboxClient\n}\n\n/**\n * Wrap a raw sandbox SDK client so the kernel emits\n * `loop.iteration.dispatch` events with `{ placement: 'sibling', sandboxId }`.\n *\n * The returned client `.create()` delegates to the underlying client; the\n * only added behavior is a `describePlacement` tag the kernel reads.\n *\n * @experimental\n */\nexport function createSiblingSandboxExecutor(\n options: SiblingSandboxExecutorOptions,\n): DelegationExecutor {\n const underlying = options.client\n const client: LoopSandboxClient = {\n create(opts?: CreateSandboxOptions): Promise<SandboxInstance> {\n return underlying.create(opts)\n },\n describePlacement(box: SandboxInstance): LoopSandboxPlacement {\n return { kind: 'sibling', sandboxId: readId(box) }\n },\n }\n return {\n client,\n describe(): string {\n return 'sibling-sandbox (each delegation = fresh sandbox via client.create)'\n },\n }\n}\n\n/**\n * Minimal `SandboxFleet` surface the fleet executor calls. Declared\n * structurally so tests can pass an in-memory stub without instantiating the\n * sandbox SDK.\n *\n * @experimental\n */\nexport interface FleetHandle {\n readonly fleetId: string\n /** Machine ids in dispatch-eligible order. The executor round-robins. */\n readonly ids: ReadonlyArray<string>\n /** Resolve a machine id to its `SandboxInstance` — that machine is mounted\n * on the fleet's shared workspace, so any diff the worker writes lands on\n * every other fleet machine's filesystem too. */\n sandbox(machineId: string): Promise<SandboxInstance>\n}\n\n/** @experimental */\nexport interface FleetWorkspaceExecutorOptions {\n fleet: FleetHandle\n /**\n * Override the machine-selection policy. Default = round-robin across\n * `fleet.ids`, skipping the optional `excludeMachineIds` set (typically the\n * coordinator machine the MCP server is running on).\n */\n selectMachine?: (call: { callIndex: number; ids: ReadonlyArray<string> }) => string\n /**\n * Machine ids to skip during default round-robin. Set to the caller's own\n * machineId so workers don't compete with the orchestrator on the same VM.\n */\n excludeMachineIds?: ReadonlyArray<string>\n}\n\n/**\n * Build an executor that resolves each delegated iteration to an existing\n * machine in `fleet`. The fleet's shared-workspace policy means the worker\n * machine sees the caller's filesystem — diffs land in-place with no\n * cross-sandbox copy step.\n *\n * @experimental\n */\nexport function createFleetWorkspaceExecutor(\n options: FleetWorkspaceExecutorOptions,\n): DelegationExecutor {\n const fleet = options.fleet\n const exclude = new Set(options.excludeMachineIds ?? [])\n let callIndex = 0\n // machineId-by-sandboxId, populated as we resolve machines so\n // `describePlacement` can recover the assignment from the SandboxInstance\n // the kernel hands back.\n const placementBySandboxId = new Map<string, { machineId: string }>()\n\n const client: LoopSandboxClient = {\n async create(): Promise<SandboxInstance> {\n const ids = fleet.ids.filter((id) => !exclude.has(id))\n if (ids.length === 0) {\n throw new Error(\n `agent-runtime: fleet ${fleet.fleetId} has no eligible worker machines (ids=[${fleet.ids.join(',')}], excluded=[${[...exclude].join(',')}])`,\n )\n }\n const selector = options.selectMachine\n const machineId = selector ? selector({ callIndex, ids }) : ids[callIndex % ids.length]\n callIndex += 1\n if (typeof machineId !== 'string' || machineId.length === 0) {\n throw new Error('agent-runtime: fleet executor selectMachine returned an empty machine id')\n }\n const box = await fleet.sandbox(machineId)\n const sandboxId = readId(box)\n if (sandboxId) placementBySandboxId.set(sandboxId, { machineId })\n return box\n },\n describePlacement(box: SandboxInstance): LoopSandboxPlacement {\n const sandboxId = readId(box)\n const recorded = sandboxId ? placementBySandboxId.get(sandboxId) : undefined\n return {\n kind: 'fleet',\n sandboxId,\n fleetId: fleet.fleetId,\n machineId: recorded?.machineId,\n }\n },\n }\n\n return {\n client,\n describe(): string {\n const excluded = exclude.size > 0 ? ` (excluded=[${[...exclude].join(',')}])` : ''\n return `fleet-workspace (fleetId=${fleet.fleetId}, machines=[${fleet.ids.join(',')}]${excluded})`\n },\n }\n}\n\nfunction readId(box: SandboxInstance): string | undefined {\n const raw = (box as unknown as { id?: unknown }).id\n return typeof raw === 'string' && raw.length > 0 ? raw : undefined\n}\n","/**\n * @experimental\n *\n * Delegate factories — the layer between MCP tool handlers and the\n * underlying `runLoop` runners.\n *\n * The MCP server is profile-agnostic: it owns the task queue + feedback\n * store + transport. Each `*Delegate` is the closure that the queue\n * invokes when a task runs. Consumers can override either delegate to\n * inject custom drivers, mocks, fleet-aware dispatchers, etc.\n *\n * The default coder delegate is wired here because we own\n * `coderProfile` / `multiHarnessCoderFanout`. The default researcher\n * delegate is **not** wired in this file — `agent-knowledge` cannot be\n * imported from `agent-runtime` without inducing a cycle. Consumers\n * pass `researcherDelegate` explicitly when constructing the server.\n */\n\nimport type { Iteration, LoopSandboxClient, LoopTraceEmitter } from '../loops'\nimport { runLoop } from '../loops'\nimport { type CoderOutput, coderProfile, multiHarnessCoderFanout } from '../profiles/coder'\nimport { createSiblingSandboxExecutor, type DelegationExecutor } from './executor'\nimport type {\n CoderTask,\n DelegateCodeArgs,\n DelegateResearchArgs,\n DelegationProgress,\n ResearchOutputShape,\n} from './types'\n\n/** @experimental */\nexport interface DelegateRunCtx {\n signal: AbortSignal\n report(progress: DelegationProgress): void\n}\n\n/** @experimental */\nexport type CoderDelegate = (\n args: DelegateCodeArgs,\n ctx: DelegateRunCtx,\n) => Promise<import('../profiles/coder').CoderOutput>\n\n/** @experimental */\nexport type ResearcherDelegate = (\n args: DelegateResearchArgs,\n ctx: DelegateRunCtx,\n) => Promise<ResearchOutputShape>\n\n/** @experimental Structured review verdict over a coder candidate. */\nexport interface CoderReview {\n /** Gate: only approved candidates are eligible to win. */\n approved: boolean\n /** Reviewer's recommendation — surfaced in traces. */\n recommendation: 'ship' | 'approve-with-nits' | 'changes-requested' | 'reject'\n /** Readiness 0..1, used by the `highest-readiness` winner-selection strategy. */\n readiness: number\n notes?: string\n}\n\n/**\n * @experimental\n *\n * Optional adversarial reviewer over a coder candidate that already passed\n * mechanical validation (tests/typecheck/forbidden/diff/no-op/secrets). Folded\n * from the ai-trading-blueprint delegation MCP: a candidate is only eligible to\n * win if the reviewer approves it. The reviewer is the consumer's seam — an LLM\n * judge, a `pnpm review` command, anything returning a `CoderReview`.\n */\nexport type CoderReviewer = (\n output: import('../profiles/coder').CoderOutput,\n task: CoderTask,\n ctx: { signal: AbortSignal },\n) => Promise<CoderReview> | CoderReview\n\n/**\n * @experimental Winner-selection strategy among validated (+ reviewed)\n * candidates. `highest-readiness` requires a `reviewer`. Default `highest-score`\n * (the kernel's behavior — preserves backward compatibility).\n */\nexport type CoderWinnerSelection =\n | 'highest-score'\n | 'smallest-diff'\n | 'highest-readiness'\n | 'first-approved'\n\n/** @experimental */\nexport interface CreateDefaultCoderDelegateOptions {\n /**\n * Execution placement. Pass a {@link DelegationExecutor} (sibling or fleet)\n * to control where worker iterations land. `sandboxClient` is a\n * convenience shorthand that wraps the client in a sibling executor — pass\n * one or the other, not both.\n */\n executor?: DelegationExecutor\n /**\n * Convenience shorthand for sibling placement. Equivalent to\n * `executor: createSiblingSandboxExecutor({ client: sandboxClient })`.\n */\n sandboxClient?: LoopSandboxClient\n /** Default `['claude-code', 'codex', 'opencode/zai-coding-plan/glm-5.1']` when variants > 1. */\n fanoutHarnesses?: string[]\n /** Hard cap on the kernel's per-batch concurrency. Default 4. */\n maxConcurrency?: number\n /**\n * Optional adversarial reviewer. When set, a candidate must pass mechanical\n * validation AND `reviewer.approved` to be eligible to win — empty/secret/\n * test-failing patches are already gone; this catches the \"compiles + passes\n * but wrong/unsafe\" class the deterministic validator can't see.\n */\n reviewer?: CoderReviewer\n /** Winner-selection strategy among eligible candidates. Default `highest-score`. */\n winnerSelection?: CoderWinnerSelection\n /**\n * Loop trace emitter forwarded into every delegated `runLoop`. Wire\n * `createPropagatingTraceEmitter(readTraceContextFromEnv())` here (the bin\n * does) so delegated build-loops export their topology spans to the OTLP /\n * Tangle Intelligence sink when `OTEL_EXPORTER_OTLP_ENDPOINT` is set — and\n * are a cheap no-op when it isn't. Configurable by construction.\n */\n traceEmitter?: LoopTraceEmitter\n}\n\n/**\n * Build a coder delegate that drives `runLoop` against the project's\n * sandbox client + coder profile. When `args.variants > 1` it switches\n * to the multi-harness fanout topology.\n *\n * @experimental\n */\nexport function createDefaultCoderDelegate(\n options: CreateDefaultCoderDelegateOptions,\n): CoderDelegate {\n const executor = resolveExecutor(options)\n const sandboxClient = executor.client\n const fanoutHarnesses = options.fanoutHarnesses\n const maxConcurrency = options.maxConcurrency ?? 4\n const traceEmitter = options.traceEmitter\n return async (args, ctx) => {\n const task: CoderTask = {\n goal: buildCoderGoal(args),\n repoRoot: args.repoRoot,\n testCmd: args.config?.testCmd,\n typecheckCmd: args.config?.typecheckCmd,\n forbiddenPaths: args.config?.forbiddenPaths,\n maxDiffLines: args.config?.maxDiffLines,\n }\n const variants = Math.max(1, Math.trunc(args.variants ?? 1))\n ctx.report({ iteration: 0, phase: 'starting' })\n if (variants <= 1) {\n const { agentRunSpec, output, validator } = coderProfile({ task })\n const result = await runLoop({\n driver: singleShotDriver,\n agentRun: agentRunSpec,\n output,\n validator,\n task,\n ctx: { sandboxClient, signal: ctx.signal, ...(traceEmitter ? { traceEmitter } : {}) },\n maxIterations: 1,\n maxConcurrency,\n })\n const chosen = await pickCoderWinner({\n iterations: result.iterations,\n reviewer: options.reviewer,\n selection: options.winnerSelection ?? 'highest-score',\n task,\n signal: ctx.signal,\n })\n if (!chosen) throw new Error(noWinnerMessage(options.reviewer))\n ctx.report({ iteration: 1, phase: 'completed' })\n return chosen\n }\n const fanout = multiHarnessCoderFanout(\n fanoutHarnesses && fanoutHarnesses.length > 0\n ? { harnesses: fanoutHarnesses.slice(0, variants) }\n : { harnesses: undefined },\n )\n const agentRuns = fanout.agentRuns.slice(0, variants)\n const result = await runLoop({\n driver: fanout.driver,\n agentRuns,\n output: fanout.output,\n validator: fanout.validator,\n task,\n ctx: { sandboxClient, signal: ctx.signal, ...(traceEmitter ? { traceEmitter } : {}) },\n maxIterations: variants,\n maxConcurrency: Math.min(maxConcurrency, variants),\n })\n const chosen = await pickCoderWinner({\n iterations: result.iterations,\n reviewer: options.reviewer,\n selection: options.winnerSelection ?? 'highest-score',\n task,\n signal: ctx.signal,\n })\n if (!chosen) throw new Error(noWinnerMessage(options.reviewer))\n ctx.report({ iteration: agentRuns.length, phase: 'completed' })\n return chosen\n }\n}\n\ninterface PickCoderWinnerArgs {\n iterations: ReadonlyArray<Iteration<CoderTask, CoderOutput>>\n reviewer: CoderReviewer | undefined\n selection: CoderWinnerSelection\n task: CoderTask\n signal: AbortSignal\n}\n\ninterface CoderCandidate {\n index: number\n output: CoderOutput\n score: number\n readiness: number\n}\n\n/**\n * Pick the winning coder candidate from a finished loop's iterations:\n * 1. keep only mechanically-VALID candidates (the validator already gated\n * tests/typecheck/forbidden/diff/no-op/secrets),\n * 2. if a `reviewer` is wired, keep only those it APPROVES,\n * 3. select among survivors by the chosen strategy.\n * Returns `undefined` when nothing survives — the delegate fails loud.\n */\nasync function pickCoderWinner(args: PickCoderWinnerArgs): Promise<CoderOutput | undefined> {\n const valid: CoderCandidate[] = []\n for (const iter of args.iterations) {\n if (iter.output === undefined || iter.error || iter.verdict?.valid !== true) continue\n valid.push({\n index: iter.index,\n output: iter.output,\n score: iter.verdict.score ?? 0,\n readiness: iter.verdict.score ?? 0,\n })\n }\n if (valid.length === 0) return undefined\n\n let eligible = valid\n if (args.reviewer) {\n eligible = []\n for (const c of valid) {\n const review = await args.reviewer(c.output, args.task, { signal: args.signal })\n if (review.approved) eligible.push({ ...c, readiness: review.readiness })\n }\n if (eligible.length === 0) return undefined\n }\n\n return selectCoderCandidate(eligible, args.selection).output\n}\n\n/** Apply the winner-selection strategy; ties broken by earliest iteration. */\nfunction selectCoderCandidate(\n candidates: CoderCandidate[],\n selection: CoderWinnerSelection,\n): CoderCandidate {\n const diffLines = (c: CoderCandidate) =>\n c.output.diffStats.insertions + c.output.diffStats.deletions\n const sorted = [...candidates].sort((a, b) => {\n switch (selection) {\n case 'smallest-diff':\n return diffLines(a) - diffLines(b) || a.index - b.index\n case 'highest-readiness':\n return b.readiness - a.readiness || a.index - b.index\n case 'first-approved':\n return a.index - b.index\n default:\n return b.score - a.score || a.index - b.index\n }\n })\n return sorted[0]!\n}\n\nfunction noWinnerMessage(reviewer: CoderReviewer | undefined): string {\n return reviewer\n ? 'coder delegate: no candidate passed validation + review'\n : 'coder delegate: no candidate passed validation'\n}\n\nfunction buildCoderGoal(args: DelegateCodeArgs): string {\n if (!args.contextHint) return args.goal\n return [args.goal, '', '## Context', args.contextHint].join('\\n')\n}\n\nfunction resolveExecutor(options: CreateDefaultCoderDelegateOptions): DelegationExecutor {\n if (options.executor && options.sandboxClient) {\n throw new Error('createDefaultCoderDelegate: pass exactly one of `executor` or `sandboxClient`')\n }\n if (options.executor) return options.executor\n if (options.sandboxClient) {\n return createSiblingSandboxExecutor({ client: options.sandboxClient })\n }\n throw new Error('createDefaultCoderDelegate: `executor` or `sandboxClient` is required')\n}\n\n/**\n * Single-shot driver — plan one task on iteration 0, stop after one\n * iteration. Used by the coder delegate when `variants <= 1`. Keeps the\n * runLoop kernel-level accounting (timing, cost, trace emission) while\n * skipping fanout/refine topology overhead.\n */\nconst singleShotDriver = {\n name: 'mcp-single-shot',\n async plan<Task>(task: Task, history: ReadonlyArray<unknown>): Promise<Task[]> {\n return history.length === 0 ? [task] : []\n },\n decide(history: ReadonlyArray<unknown>): 'pick-winner' | 'fail' {\n return history.length > 0 ? 'pick-winner' : 'fail'\n },\n}\n"],"mappings":";;;;;;;;;AA6CO,SAAS,6BACd,SACoB;AACpB,QAAM,aAAa,QAAQ;AAC3B,QAAM,SAA4B;AAAA,IAChC,OAAO,MAAuD;AAC5D,aAAO,WAAW,OAAO,IAAI;AAAA,IAC/B;AAAA,IACA,kBAAkB,KAA4C;AAC5D,aAAO,EAAE,MAAM,WAAW,WAAW,OAAO,GAAG,EAAE;AAAA,IACnD;AAAA,EACF;AACA,SAAO;AAAA,IACL;AAAA,IACA,WAAmB;AACjB,aAAO;AAAA,IACT;AAAA,EACF;AACF;AA2CO,SAAS,6BACd,SACoB;AACpB,QAAM,QAAQ,QAAQ;AACtB,QAAM,UAAU,IAAI,IAAI,QAAQ,qBAAqB,CAAC,CAAC;AACvD,MAAI,YAAY;AAIhB,QAAM,uBAAuB,oBAAI,IAAmC;AAEpE,QAAM,SAA4B;AAAA,IAChC,MAAM,SAAmC;AACvC,YAAM,MAAM,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC;AACrD,UAAI,IAAI,WAAW,GAAG;AACpB,cAAM,IAAI;AAAA,UACR,wBAAwB,MAAM,OAAO,0CAA0C,MAAM,IAAI,KAAK,GAAG,CAAC,gBAAgB,CAAC,GAAG,OAAO,EAAE,KAAK,GAAG,CAAC;AAAA,QAC1I;AAAA,MACF;AACA,YAAM,WAAW,QAAQ;AACzB,YAAM,YAAY,WAAW,SAAS,EAAE,WAAW,IAAI,CAAC,IAAI,IAAI,YAAY,IAAI,MAAM;AACtF,mBAAa;AACb,UAAI,OAAO,cAAc,YAAY,UAAU,WAAW,GAAG;AAC3D,cAAM,IAAI,MAAM,0EAA0E;AAAA,MAC5F;AACA,YAAM,MAAM,MAAM,MAAM,QAAQ,SAAS;AACzC,YAAM,YAAY,OAAO,GAAG;AAC5B,UAAI,UAAW,sBAAqB,IAAI,WAAW,EAAE,UAAU,CAAC;AAChE,aAAO;AAAA,IACT;AAAA,IACA,kBAAkB,KAA4C;AAC5D,YAAM,YAAY,OAAO,GAAG;AAC5B,YAAM,WAAW,YAAY,qBAAqB,IAAI,SAAS,IAAI;AACnE,aAAO;AAAA,QACL,MAAM;AAAA,QACN;AAAA,QACA,SAAS,MAAM;AAAA,QACf,WAAW,UAAU;AAAA,MACvB;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AAAA,IACL;AAAA,IACA,WAAmB;AACjB,YAAM,WAAW,QAAQ,OAAO,IAAI,eAAe,CAAC,GAAG,OAAO,EAAE,KAAK,GAAG,CAAC,OAAO;AAChF,aAAO,4BAA4B,MAAM,OAAO,eAAe,MAAM,IAAI,KAAK,GAAG,CAAC,IAAI,QAAQ;AAAA,IAChG;AAAA,EACF;AACF;AAEA,SAAS,OAAO,KAA0C;AACxD,QAAM,MAAO,IAAoC;AACjD,SAAO,OAAO,QAAQ,YAAY,IAAI,SAAS,IAAI,MAAM;AAC3D;;;AC/BO,SAAS,2BACd,SACe;AACf,QAAM,WAAW,gBAAgB,OAAO;AACxC,QAAM,gBAAgB,SAAS;AAC/B,QAAM,kBAAkB,QAAQ;AAChC,QAAM,iBAAiB,QAAQ,kBAAkB;AACjD,QAAM,eAAe,QAAQ;AAC7B,SAAO,OAAO,MAAM,QAAQ;AAC1B,UAAM,OAAkB;AAAA,MACtB,MAAM,eAAe,IAAI;AAAA,MACzB,UAAU,KAAK;AAAA,MACf,SAAS,KAAK,QAAQ;AAAA,MACtB,cAAc,KAAK,QAAQ;AAAA,MAC3B,gBAAgB,KAAK,QAAQ;AAAA,MAC7B,cAAc,KAAK,QAAQ;AAAA,IAC7B;AACA,UAAM,WAAW,KAAK,IAAI,GAAG,KAAK,MAAM,KAAK,YAAY,CAAC,CAAC;AAC3D,QAAI,OAAO,EAAE,WAAW,GAAG,OAAO,WAAW,CAAC;AAC9C,QAAI,YAAY,GAAG;AACjB,YAAM,EAAE,cAAc,QAAQ,UAAU,IAAI,aAAa,EAAE,KAAK,CAAC;AACjE,YAAMA,UAAS,MAAM,QAAQ;AAAA,QAC3B,QAAQ;AAAA,QACR,UAAU;AAAA,QACV;AAAA,QACA;AAAA,QACA;AAAA,QACA,KAAK,EAAE,eAAe,QAAQ,IAAI,QAAQ,GAAI,eAAe,EAAE,aAAa,IAAI,CAAC,EAAG;AAAA,QACpF,eAAe;AAAA,QACf;AAAA,MACF,CAAC;AACD,YAAMC,UAAS,MAAM,gBAAgB;AAAA,QACnC,YAAYD,QAAO;AAAA,QACnB,UAAU,QAAQ;AAAA,QAClB,WAAW,QAAQ,mBAAmB;AAAA,QACtC;AAAA,QACA,QAAQ,IAAI;AAAA,MACd,CAAC;AACD,UAAI,CAACC,QAAQ,OAAM,IAAI,MAAM,gBAAgB,QAAQ,QAAQ,CAAC;AAC9D,UAAI,OAAO,EAAE,WAAW,GAAG,OAAO,YAAY,CAAC;AAC/C,aAAOA;AAAA,IACT;AACA,UAAM,SAAS;AAAA,MACb,mBAAmB,gBAAgB,SAAS,IACxC,EAAE,WAAW,gBAAgB,MAAM,GAAG,QAAQ,EAAE,IAChD,EAAE,WAAW,OAAU;AAAA,IAC7B;AACA,UAAM,YAAY,OAAO,UAAU,MAAM,GAAG,QAAQ;AACpD,UAAM,SAAS,MAAM,QAAQ;AAAA,MAC3B,QAAQ,OAAO;AAAA,MACf;AAAA,MACA,QAAQ,OAAO;AAAA,MACf,WAAW,OAAO;AAAA,MAClB;AAAA,MACA,KAAK,EAAE,eAAe,QAAQ,IAAI,QAAQ,GAAI,eAAe,EAAE,aAAa,IAAI,CAAC,EAAG;AAAA,MACpF,eAAe;AAAA,MACf,gBAAgB,KAAK,IAAI,gBAAgB,QAAQ;AAAA,IACnD,CAAC;AACD,UAAM,SAAS,MAAM,gBAAgB;AAAA,MACnC,YAAY,OAAO;AAAA,MACnB,UAAU,QAAQ;AAAA,MAClB,WAAW,QAAQ,mBAAmB;AAAA,MACtC;AAAA,MACA,QAAQ,IAAI;AAAA,IACd,CAAC;AACD,QAAI,CAAC,OAAQ,OAAM,IAAI,MAAM,gBAAgB,QAAQ,QAAQ,CAAC;AAC9D,QAAI,OAAO,EAAE,WAAW,UAAU,QAAQ,OAAO,YAAY,CAAC;AAC9D,WAAO;AAAA,EACT;AACF;AAyBA,eAAe,gBAAgB,MAA6D;AAC1F,QAAM,QAA0B,CAAC;AACjC,aAAW,QAAQ,KAAK,YAAY;AAClC,QAAI,KAAK,WAAW,UAAa,KAAK,SAAS,KAAK,SAAS,UAAU,KAAM;AAC7E,UAAM,KAAK;AAAA,MACT,OAAO,KAAK;AAAA,MACZ,QAAQ,KAAK;AAAA,MACb,OAAO,KAAK,QAAQ,SAAS;AAAA,MAC7B,WAAW,KAAK,QAAQ,SAAS;AAAA,IACnC,CAAC;AAAA,EACH;AACA,MAAI,MAAM,WAAW,EAAG,QAAO;AAE/B,MAAI,WAAW;AACf,MAAI,KAAK,UAAU;AACjB,eAAW,CAAC;AACZ,eAAW,KAAK,OAAO;AACrB,YAAM,SAAS,MAAM,KAAK,SAAS,EAAE,QAAQ,KAAK,MAAM,EAAE,QAAQ,KAAK,OAAO,CAAC;AAC/E,UAAI,OAAO,SAAU,UAAS,KAAK,EAAE,GAAG,GAAG,WAAW,OAAO,UAAU,CAAC;AAAA,IAC1E;AACA,QAAI,SAAS,WAAW,EAAG,QAAO;AAAA,EACpC;AAEA,SAAO,qBAAqB,UAAU,KAAK,SAAS,EAAE;AACxD;AAGA,SAAS,qBACP,YACA,WACgB;AAChB,QAAM,YAAY,CAAC,MACjB,EAAE,OAAO,UAAU,aAAa,EAAE,OAAO,UAAU;AACrD,QAAM,SAAS,CAAC,GAAG,UAAU,EAAE,KAAK,CAAC,GAAG,MAAM;AAC5C,YAAQ,WAAW;AAAA,MACjB,KAAK;AACH,eAAO,UAAU,CAAC,IAAI,UAAU,CAAC,KAAK,EAAE,QAAQ,EAAE;AAAA,MACpD,KAAK;AACH,eAAO,EAAE,YAAY,EAAE,aAAa,EAAE,QAAQ,EAAE;AAAA,MAClD,KAAK;AACH,eAAO,EAAE,QAAQ,EAAE;AAAA,MACrB;AACE,eAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,QAAQ,EAAE;AAAA,IAC5C;AAAA,EACF,CAAC;AACD,SAAO,OAAO,CAAC;AACjB;AAEA,SAAS,gBAAgB,UAA6C;AACpE,SAAO,WACH,4DACA;AACN;AAEA,SAAS,eAAe,MAAgC;AACtD,MAAI,CAAC,KAAK,YAAa,QAAO,KAAK;AACnC,SAAO,CAAC,KAAK,MAAM,IAAI,cAAc,KAAK,WAAW,EAAE,KAAK,IAAI;AAClE;AAEA,SAAS,gBAAgB,SAAgE;AACvF,MAAI,QAAQ,YAAY,QAAQ,eAAe;AAC7C,UAAM,IAAI,MAAM,+EAA+E;AAAA,EACjG;AACA,MAAI,QAAQ,SAAU,QAAO,QAAQ;AACrC,MAAI,QAAQ,eAAe;AACzB,WAAO,6BAA6B,EAAE,QAAQ,QAAQ,cAAc,CAAC;AAAA,EACvE;AACA,QAAM,IAAI,MAAM,uEAAuE;AACzF;AAQA,IAAM,mBAAmB;AAAA,EACvB,MAAM;AAAA,EACN,MAAM,KAAW,MAAY,SAAkD;AAC7E,WAAO,QAAQ,WAAW,IAAI,CAAC,IAAI,IAAI,CAAC;AAAA,EAC1C;AAAA,EACA,OAAO,SAAyD;AAC9D,WAAO,QAAQ,SAAS,IAAI,gBAAgB;AAAA,EAC9C;AACF;","names":["result","chosen"]}