@tangle-network/agent-runtime 0.51.0 → 0.53.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/agent.d.ts +2 -2
  2. package/dist/agent.js +2 -2
  3. package/dist/analyst-loop.d.ts +1 -1
  4. package/dist/{chunk-HAA4KZUD.js → chunk-5M2WDWBI.js} +3 -3
  5. package/dist/{chunk-HYG4ISNS.js → chunk-AYRQZRDV.js} +2 -2
  6. package/dist/{chunk-47SWANFA.js → chunk-FO4DCM7R.js} +2 -2
  7. package/dist/{chunk-7JITYN6T.js → chunk-JFIYKDXF.js} +17 -2
  8. package/dist/chunk-JFIYKDXF.js.map +1 -0
  9. package/dist/{chunk-XEI7AIHU.js → chunk-K5M3SHEU.js} +3 -3
  10. package/dist/{chunk-FQH33M5N.js → chunk-K6WP7PYW.js} +67 -61
  11. package/dist/chunk-K6WP7PYW.js.map +1 -0
  12. package/dist/{chunk-FKHNHUXP.js → chunk-P4QNEXFC.js} +2 -2
  13. package/dist/{coder-_YCf3BAK.d.ts → coder-LKm3Mczw.d.ts} +1 -1
  14. package/dist/{delegation-profile-1GbW5yA3.d.ts → delegation-profile-Bvfro2m1.d.ts} +28 -2
  15. package/dist/{driver-DLI1io57.d.ts → driver-B2RKkVJW.d.ts} +1 -1
  16. package/dist/index.d.ts +121 -11
  17. package/dist/index.js +143 -16
  18. package/dist/index.js.map +1 -1
  19. package/dist/intelligence.d.ts +475 -5
  20. package/dist/intelligence.js +547 -3
  21. package/dist/intelligence.js.map +1 -1
  22. package/dist/{kb-gate-CHAyt4aI.d.ts → kb-gate-CKfykcYQ.d.ts} +2 -2
  23. package/dist/{loop-runner-bin-DFUNgpeK.d.ts → loop-runner-bin-D4Ir7b00.d.ts} +4 -4
  24. package/dist/loop-runner-bin.d.ts +5 -5
  25. package/dist/loop-runner-bin.js +3 -3
  26. package/dist/loops.d.ts +7 -5
  27. package/dist/loops.js +3 -1
  28. package/dist/mcp/bin.js +4 -4
  29. package/dist/mcp/index.d.ts +7 -7
  30. package/dist/mcp/index.js +6 -6
  31. package/dist/{openai-tools-D4HLDWgw.d.ts → openai-tools-CKfR3EMh.d.ts} +1 -1
  32. package/dist/profiles.d.ts +2 -2
  33. package/dist/router-client-B0Qi1NiN.d.ts +120 -0
  34. package/dist/{run-loop-BIineL1T.d.ts → run-loop-DgVhucoR.d.ts} +1 -1
  35. package/dist/runtime.d.ts +62 -126
  36. package/dist/runtime.js +3 -1
  37. package/dist/{types-5MGt5KTY.d.ts → types-CNDJCL_0.d.ts} +1 -1
  38. package/dist/{types-BEQsBhOE.d.ts → types-CklkW4Eh.d.ts} +2 -1
  39. package/dist/workflow.d.ts +2 -2
  40. package/dist/workflow.js +1 -1
  41. package/package.json +1 -1
  42. package/skills/agent-runtime-adoption/SKILL.md +41 -26
  43. package/skills/build-with-agent-runtime/SKILL.md +143 -0
  44. package/skills/loop-writer/SKILL.md +6 -7
  45. package/dist/chunk-7JITYN6T.js.map +0 -1
  46. package/dist/chunk-FQH33M5N.js.map +0 -1
  47. /package/dist/{chunk-HAA4KZUD.js.map → chunk-5M2WDWBI.js.map} +0 -0
  48. /package/dist/{chunk-HYG4ISNS.js.map → chunk-AYRQZRDV.js.map} +0 -0
  49. /package/dist/{chunk-47SWANFA.js.map → chunk-FO4DCM7R.js.map} +0 -0
  50. /package/dist/{chunk-XEI7AIHU.js.map → chunk-K5M3SHEU.js.map} +0 -0
  51. /package/dist/{chunk-FKHNHUXP.js.map → chunk-P4QNEXFC.js.map} +0 -0
@@ -35,14 +35,24 @@ A `Driver<Task, Output, Decision>` is just `plan(task, history) → Task[]`
35
35
  (`[task]`→refine, N copies→fanout, `[]`→stop) + `decide(history) → Decision`.
36
36
  Topology is data; the kernel is topology-agnostic.
37
37
 
38
- ### Three shipped drivers — `@tangle-network/agent-runtime/loops`
39
-
40
- - **`createRefineDriver({ maxIterations?, refineTask? })`** one task/iteration,
41
- validator-gated; replay or rewrite the task until valid or capped. Use for
42
- incremental patches, document revision, anything monotonic.
43
- - **`createFanoutVoteDriver({ n, selector? })`** N parallel attempts in
44
- iteration 0, score once, pick the winner (default: highest valid score). Use
45
- for multi-harness coder fanout, redundant research with disagreement detection.
38
+ ### Topology drivers — `@tangle-network/agent-runtime/loops`
39
+
40
+ > **Stale-name correction (gen-6 consolidation, #165):** the standalone
41
+ > `createRefineDriver` / `createFanoutVoteDriver` factories were **removed**
42
+ > refine/fanout collapsed into the one recursive agent tree. Canonical today:
43
+ > the personify combinators `loopUntil`(depth/refine) / `fanout`(breadth/vote)
44
+ > and the `Strategy` values `refine` / `sample`, plus `createDriver` for an
45
+ > agent-authored topology. Verify names in `src/runtime/index.ts`; see
46
+ > `build-with-agent-runtime` + `docs/canonical-api.md` §3.1/§3.3 for the live
47
+ > signatures. Likewise `createSandboxPlanner` is gone — pass a `TopologyPlanner`
48
+ > to `createDriver({ planner })` directly.
49
+
50
+ - **`refine` / `loopUntil`** — one attempt/round, validator-gated; iterate over
51
+ one evolving artifact until valid or budget-capped. Use for incremental
52
+ patches, document revision, anything monotonic. (Replaces `createRefineDriver`.)
53
+ - **`sample` / `fanout`** — N attempts at equal budget, score once, pick the
54
+ winner via the single-sourced selector. Use for multi-harness coder fanout,
55
+ redundant research with disagreement detection. (Replaces `createFanoutVoteDriver`.)
46
56
  - **`createDriver({ planner, maxIterations?, maxFanout? })`** — **the
47
57
  agent authors the topology.** `plan`/`decide` are backed by an injected
48
58
  `TopologyPlanner` that emits one `TopologyMove` per round
@@ -56,16 +66,18 @@ round-robins `agentRuns[]` to decide which harness (claude-code / codex /
56
66
  opencode / pi) runs each branch. One driver spans all backends, including
57
67
  fanning a single round across several.
58
68
 
59
- ### Wiring an LLM planner — `createSandboxPlanner`
69
+ ### Wiring an LLM planner — inject a `TopologyPlanner`
70
+
71
+ `createDriver({ planner })` takes an injected `TopologyPlanner` (the standalone
72
+ `createSandboxPlanner` factory was removed in the gen-6 consolidation — verify
73
+ the live shape in `src/runtime/driver.ts` / `src/runtime/index.ts`). The planner
74
+ is the brain (it may call any harness/LLM to author the move); the driver maps
75
+ each `TopologyMove` onto kernel structure.
60
76
 
61
77
  ```ts
62
- import { createDriver, createSandboxPlanner, runLoop } from '@tangle-network/agent-runtime/loops'
78
+ import { createDriver, runLoop, type TopologyPlanner } from '@tangle-network/agent-runtime/loops'
63
79
 
64
- const planner = createSandboxPlanner<Task, Out>({
65
- client, profile: plannerProfile, // any harness; cheap model is fine
66
- decodeTask: (raw) => raw as Task, // envelope task → domain Task
67
- // buildPrompt? — defaults to a history-summary prompt; override to customize
68
- })
80
+ const planner: TopologyPlanner<Task, Out> = {/* plan() → one {kind:'refine'|'fanout'|'stop',…} per round */}
69
81
  const result = await runLoop({
70
82
  driver: createDriver({ planner, maxIterations: 8 }),
71
83
  agentRuns: workerSpecs, output, validator, task, ctx: { sandboxClient: client },
@@ -88,23 +100,25 @@ a topology nobody chose.
88
100
  - Dynamic driver: set the kernel's `runLoop` `maxIterations >=` the driver's so
89
101
  the driver's cap governs and the loop closes on a clean `'done'`.
90
102
 
91
- ## Campaign bridge — `loopDispatch` / `loopCampaignDispatch`
103
+ ## Campaign bridge — `loopDispatch`
92
104
 
93
105
  To run `runLoop` as an agent-eval campaign cell, do NOT hand-build the ExecCtx +
94
106
  forward trace + report usage every time (the third is silent — forgetting it
95
- yields a `{0,0}` cell `assertRealBackend` reads as a stub). Use the adapter:
107
+ yields a `{0,0}` cell `assertRealBackend` reads as a stub). Use the one bridge,
108
+ `loopDispatch` (the old `loopCampaignDispatch` name was consolidated away; verify
109
+ in `src/runtime/index.ts`):
96
110
 
97
111
  ```ts
98
- import { loopCampaignDispatch } from '@tangle-network/agent-runtime/loops'
99
- const dispatch = loopCampaignDispatch({
112
+ import { loopDispatch } from '@tangle-network/agent-runtime/loops'
113
+ const dispatch = loopDispatch({
100
114
  sandboxClient,
101
- toLoopOptions: (scenario) => ({ driver, agentRun, output, validator, task: toTask(scenario) }),
115
+ toLoopOptions: (scenario, profile) => ({ driver, agentRun, output, validator, task: toTask(scenario) }),
102
116
  // toArtifact? — defaults to result.winner?.output
103
117
  })
104
118
  // pass `dispatch` to runCampaign / runEvalCampaign; usage + trace are auto-forwarded
105
119
  ```
106
120
 
107
- `loopDispatch` is the `runProfileMatrix` variant (profile is an axis).
121
+ `loopDispatch` doubles as the `runProfileMatrix` variant (the `profile` arg is an axis).
108
122
 
109
123
  ## Identity-gated optimization — agent-eval's `selfImprove`
110
124
 
@@ -159,11 +173,12 @@ Mount it on a production `AgentProfile.mcp`; do not re-implement delegation.
159
173
 
160
174
  ## Acceptance checklist
161
175
 
162
- - [ ] Topology is a `Driver`, not hard-coded control flow. Reuse refine /
163
- fanout-vote / dynamic; build a custom `Driver` against
164
- `loops/types.ts:Driver` only when none fit — never fork the kernel.
165
- - [ ] `runLoop` is bridged to campaigns via `loopDispatch` / `loopCampaignDispatch`
166
- (usage + trace auto-forwarded), not a hand-rolled ExecCtx.
176
+ - [ ] Topology is a `Driver`/combinator, not hard-coded control flow. Reuse
177
+ `refine`/`loopUntil`, `sample`/`fanout`, or the agent-authored `createDriver`;
178
+ build a custom `Driver` against `loops/types.ts:Driver` only when none fit —
179
+ never fork the kernel.
180
+ - [ ] `runLoop` is bridged to campaigns via `loopDispatch` (usage + trace
181
+ auto-forwarded), not a hand-rolled ExecCtx.
167
182
  - [ ] Every optimizable prompt is registered through `selfImprove` (or the
168
183
  product's existing `runImprovementLoop`), identity-gated on a held-out set.
169
184
  - [ ] Boundaries fail loud: no `null` sandbox client, no silent adapter return,
@@ -0,0 +1,143 @@
1
+ ---
2
+ name: build-with-agent-runtime
3
+ description: Use before hand-rolling a tool loop, driver, corpus, or optimizer wrapper. Create an agent genome, run it on a benchmark, optimize+gate it, observe/ship it with @tangle-network/agent-runtime. The genome→run→optimize→observe spine.
4
+ ---
5
+
6
+ # build-with-agent-runtime
7
+
8
+ The one create→run→optimize→observe seam for `@tangle-network/agent-runtime`. A
9
+ **genome** (an `AgentProfile`/`AgentSurfaces` — systemPrompt + skills + tools +
10
+ mcp + knowledge + memory + rag as ONE combined surface) runs as a
11
+ **driver⟷worker** shape over a **benchmark**, gets **optimized by a gated loop**
12
+ that evolves the genome and certifies wins on a **frozen holdout**, and is
13
+ **observed** through the one lifecycle stream. The selector is never the judge;
14
+ observation attaches to the loop, never to the portable genome.
15
+
16
+ If you are about to write a `runConversation`, a "skill optimizer", a
17
+ "profile-seam", a depth-vs-breadth A/B harness, a bootstrap loop, or a
18
+ `new Sandbox(...)` + stream + read dance — **stop.** It exists, and a parallel
19
+ silently breaks a load-bearing invariant (equal-k, selector≠judge,
20
+ capture-integrity, or eval/prod parity).
21
+
22
+ ## Load order — point at source, never freeze snippets
23
+
24
+ This skill carries **no API snippets**. The barrel MOVES (`./loops` is a
25
+ back-compat alias of `./runtime`), the agent-eval pin drifts, and signatures get
26
+ corrected in place. Freezing a snippet here guarantees rot. Instead, read, in
27
+ order, and re-verify against source:
28
+
29
+ 1. **`docs/canonical-api.md`** — the source of truth: the §2 decision table
30
+ ("I want to X → use Y → NOT Z"), §3 per-subsystem signatures (each cited
31
+ `file:line`), §4 the end-to-end recipe, §5 the recursive atom, §6 the
32
+ two-substrate map. Every signature there was read from source.
33
+ 2. **`grep` the export barrel** — `grep -nE 'export (function|const|type)' src/runtime/index.ts`
34
+ (and `src/agent/index.ts`, `src/improvement/index.ts`, `src/mcp/index.ts`,
35
+ `src/intelligence/index.ts`) for the live names + subpaths. `./loops` and
36
+ `./runtime` resolve to the SAME barrel (`package.json` maps both to
37
+ `src/runtime/index.ts`).
38
+ 3. **`bench/HARNESS.md`** — the experiment-harness map: commands, the
39
+ `rollout → corpus → selector → CI → gate` flow, and the `ADAPTERS` registry
40
+ (a harness-local export, `bench/src/adapters.ts`, not a package export).
41
+
42
+ **Code wins.** If a name, subpath, or signature here or in `docs/canonical-api.md`
43
+ disagrees with source, the **source is right** — fix the map in the *same turn*
44
+ (the anti-rediscovery law). Verify with Read/Edit, don't re-read to confirm.
45
+
46
+ ## Decision table — by altitude (each row → ONE source, not a snippet)
47
+
48
+ Read the cited `docs/canonical-api.md` row before writing; it carries the live
49
+ signature + the exact "do NOT build".
50
+
51
+ | Altitude — I want to… | Use | Source |
52
+ |---|---|---|
53
+ | **Define a genome** (who the agent is + what it can do, ONE surface) | `AgentProfile` (runnable) / `AgentSurfaces` (the editable-coordinate map) — `/runtime`, `/agent` | canonical-api §3.2 |
54
+ | **Define the personified-run record** (model+prompt+tools+role+seams) | `definePersona(input)` — `/runtime` | canonical-api §3.1 |
55
+ | **Run a genome driver⟷worker, end-to-end** | `runPersonified({ persona, shape, task, budget })` — `/runtime` | canonical-api §3.1 |
56
+ | **Loop a worker over one evolving artifact, K rounds, stop-when-good** | `loopUntil(seed, spec)` as the `shape` — `/runtime` | canonical-api §3.1 |
57
+ | **Best-of-N / parallel-research at equal compute** | `fanout(items, opts)` — `/runtime` | canonical-api §3.1 |
58
+ | **Produce-then-gate / multi-judge quorum / fixed chain** | `verify` / `panel` / `pipeline` — `/runtime` | canonical-api §3.1 |
59
+ | **Run depth-vs-breadth (or a custom strategy) over a stateful tool domain** | `runAgentic({ surface, task, mode\|strategy, budget })` — `/loops` | canonical-api §3.3 |
60
+ | **Author a new topology/strategy compactly** | `defineStrategy(name, body)` w/ `ctx.shot()`+`ctx.critique()` — `/loops` | canonical-api §3.3 |
61
+ | **Add a stateful tool-using domain** | implement `AgenticSurface` (5 hooks) — `/loops` | canonical-api §3.3 |
62
+ | **Benchmark: compare strategies + significance + Pareto on a domain** | `runBenchmark({ environment, tasks, worker, strategies })` — `/loops` | canonical-api §3.3 |
63
+ | **Benchmark: add/run an external benchmark from the harness** | `ADAPTERS`/`resolveAdapter(key)` + `runExperiment` — `bench` | canonical-api §3.3 |
64
+ | **Sandbox coding rollout** (fresh box/round, or persistent+resume) | `runLoop(options)` / `openSandboxRun(client, opts, deliverable)` — `/runtime` | canonical-api §3.1 |
65
+ | **Optimize a CODE surface** in a gated loop | `improvementDriver({ worktree, generator })` — `/improvement` | canonical-api §3.4 |
66
+ | **Optimize a PROMPT/config surface** (one call) | `selfImprove({ agent, scenarios, judge, baselineSurface })` — `agent-eval/contract` | canonical-api §3.4 |
67
+ | **Gate: ship/hold a candidate** (campaign ctx) | `defaultProductionGate` / `heldOutGate` / `composeGate` — `agent-eval/contract` | canonical-api §3.4 |
68
+ | **Gate: ship/hold from a `BenchmarkReport`** (per-task cells) | `promotionGate({ report, incumbent, candidate })` — `/runtime` | canonical-api §3.4 |
69
+ | **Run the full multi-generation flywheel + certify** | `runStrategyEvolution(config)` — `/runtime` | canonical-api §3.4 |
70
+ | **Compose the prod sandbox profile** (eval/prod parity) | `composeProductionAgentProfile(base, opts)` — `/mcp` | canonical-api §3.2 |
71
+ | **Observe a run** (cost/time waterfall, live tree, OTLP) | `createWaterfallCollector` / `createTopologyView` / `createOtelExporter` via `composeRuntimeHooks(...)` — root | canonical-api §3.5 |
72
+ | **State any A/B claim** | `pairedLift` (bench) over `pairedBootstrap`/`heldoutSignificance` (substrate) | canonical-api §3.5 |
73
+ | **Observe/ship with billing-boundary** | `withTangleIntelligence(agent, { project, effort })` — `/intelligence` | canonical-api §7 (now live on main — verify) |
74
+
75
+ ## Do-NOT-reinvent — the traps this skill exists to stop
76
+
77
+ Each of these gets hand-rolled every session; the canonical primitive already
78
+ holds the load-bearing invariant the parallel breaks:
79
+
80
+ - `runConversation` / persona-runner / `while(!done)` steering loop **≈**
81
+ `loopUntil` + `runPersonified` (threads executor seams; equal-k; selector≠judge
82
+ firewall; journal/replay — a parallel runner silently fails to wire the seams).
83
+ - "skill optimizer" / "topology mutator" that opens branches + applies patches
84
+ **≈** `improvementDriver` (code surface) or `selfImprove`/`gepaDriver` (prompt
85
+ surface) — both gated on a frozen holdout.
86
+ - "profile-seam" / agent-config wrapper carrying model+prompt+tools+role **≈**
87
+ `AgentProfile` (it IS that bundle) + `definePersona` (the run record);
88
+ `sandboxAgentRun({ profile })` is the box seam — never pass a router key into
89
+ the box.
90
+ - `new Sandbox()` + acquire + stream + `box.fs.read` + delete **≈**
91
+ `openSandboxRun` (persistent + resume) or `runLoop` (fresh box/round).
92
+ - `Promise.all` over N calls + manual argmax/merge **≈** `fanout` (bypassing the
93
+ budget pool breaks equal-compute claims).
94
+ - a per-step cost/token tally over events **≈** `createWaterfallCollector` (the
95
+ sum of spans IS the billed run cost; a parallel tally drifts).
96
+ - your own bootstrap loop / PRNG per gate **≈** `pairedLift` / `promotionGate`
97
+ (seeded, identical run-to-run; never report a point lift without `low/high/pairs`).
98
+
99
+ ## End-to-end recipe
100
+
101
+ `docs/canonical-api.md` §4 is the real composition — copy it from there, don't
102
+ re-derive: **define a genome → run driver⟷worker via the reactive substrate over
103
+ a multi-turn `AgenticSurface` → measure with `runBenchmark` → optimize a prompt
104
+ surface with `selfImprove` → certify on a frozen holdout with the gate.** For the
105
+ multi-generation flywheel, replace the measure/certify steps with one
106
+ `runStrategyEvolution(...)` and read `report.verdict` (NOT `report.trajectory`)
107
+ as the evidence. For a sandbox coding rollout judged by an external deterministic
108
+ checker, use the bench path: `runExperiment({ adapter: resolveAdapter(...),
109
+ sandboxClient, agentRun: sandboxAgentRun({ profile }), arms: [randomArm(...),
110
+ analystArm(...)] })` — `arms[0]` is the mandatory equal-compute control.
111
+
112
+ ## Two substrates — pick one, don't invent a third
113
+
114
+ Both implement the same recursive-decision atom over the one `Executor` port and
115
+ share `defaultSelectWinner`. **Reactive** (`Supervisor`/`Scope` + personify
116
+ combinators: `runPersonified`/`runAgentic`/`runBenchmark`) — prefer for NEW
117
+ recursive work; equal-k by construction. **Round-synchronous** (`runLoop` +
118
+ `createDriver`, `runExperiment`) — sandbox coding rollouts against external
119
+ benchmarks. The full when-which map is `docs/canonical-api.md` §6.
120
+
121
+ ## Observe / ship with the Intelligence SDK
122
+
123
+ One line wraps any agent with trace + billing boundary:
124
+ `withTangleIntelligence(agent, { project, effort })`, `effort ∈
125
+ off|eco|standard|thorough|max` (`'off'` is the provable passthrough floor —
126
+ intelligence spend clamped to 0). It builds on `createOtelExporter` +
127
+ `loopEventToOtelSpan` — don't hand-roll a trace-wrapper or effort/tier config.
128
+ Verify the live subpath against `src/intelligence/index.ts` (canonical-api §7's
129
+ "branch-only" note is stale — it landed on main).
130
+
131
+ ## Final check
132
+
133
+ - Picked a primitive from the decision table, not a hand-rolled parallel?
134
+ - Genome is ONE `AgentProfile`/`AgentSurfaces` surface, not split skill/tool/prompt knobs?
135
+ - Equal compute preserved (budget pool, or `arms[0]` control) — no `Promise.all` zoo?
136
+ - Selector ≠ judge: no judge score feeding a driver/another judge; holdout score write-only?
137
+ - Any win certified on a FROZEN holdout via a gate, never on the training composite?
138
+ - Map fixed in the same turn if source disagreed with `docs/canonical-api.md`?
139
+
140
+ See `_common.md` for shared conventions (frontmatter, fail-loud, no AI attribution).
141
+
142
+ Next: build the genome/loop/optimizer against `docs/canonical-api.md` §3–§4; if a
143
+ strategy beats incumbent on the holdout gate, `/ship` it.
@@ -24,7 +24,7 @@ The driver owns strategy.
24
24
 
25
25
  | Objective | Use |
26
26
  |---|---|
27
- | Try N attempts, pick best | `fanout` or `createFanoutVoteDriver` |
27
+ | Try N attempts, pick best | `fanout` (or the `sample` strategy) |
28
28
  | Ordered stages | `pipeline` |
29
29
  | Improve until executable check passes | `loopUntil` + verifier |
30
30
  | Review from several lenses | `panel` |
@@ -138,16 +138,15 @@ Git is the durable workspace seam:
138
138
  - resume derives completion from git state, not only a side journal
139
139
  - conflicts become blockers/questions, not silent overwrite
140
140
 
141
- Proof command for the local substrate join:
141
+ Proof command (real sandbox, real observe→steer join):
142
142
 
143
143
  ```bash
144
- pnpm exec tsx bench/src/observe-steer-workspace-loop.mts
144
+ TANGLE_API_KEY=... pnpm exec tsx bench/src/cloud-loop.mts
145
145
  ```
146
146
 
147
- It proves `Scope.spawn -> coordination tools -> gitWorkspace -> observe ->
148
- Scope.send -> corrective worker -> integration pass`. Until the same proof runs
149
- with `openSandboxRun` and a remote branch, claim local substrate closure and
150
- serial git accumulation, not full cloud migration safety.
147
+ It proves `openSandboxRun -> observe -> steer -> corrective worker` over a live
148
+ sandbox. The old `observe-steer-workspace-loop.mts` used mock executors and is
149
+ deleted the live proof is the only valid one.
151
150
 
152
151
  ## Final Check
153
152
 
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/mcp/delegation-profile.ts"],"sourcesContent":["/**\n * Production-profile composition for the agent-runtime delegation MCP.\n *\n * A product agent's sandbox loads the delegation tools (`delegate_code`,\n * `delegate_research`, `delegate_feedback`, `delegation_status`,\n * `delegation_history`) by mounting the `agent-runtime-mcp` stdio server as\n * an MCP entry in its `AgentProfile`. This module is the single composer for\n * that wiring, so every consumer — the fleet agents and agent-builder's\n * generated agents — shares one implementation instead of copying it.\n *\n * The load-bearing invariant: the delegation MCP entry is only ever emitted\n * when a sandbox API key is present. Without the key the kernel's\n * coder/researcher delegate cannot construct an authenticated sandbox client,\n * so we omit the entry rather than ship an MCP child that fails to\n * authenticate on startup. No static profile entry, ever.\n */\n\nimport type {\n AgentProfile,\n AgentProfileFileMount,\n AgentProfileMcpServer,\n} from '@tangle-network/sandbox'\n\n/** MCP server key under which the agent-runtime delegation tools mount. */\nexport const DELEGATION_MCP_SERVER_KEY = 'agent-runtime-delegation'\n\n/**\n * Env vars forwarded into the delegation MCP child so its delegated\n * build/research loops export topology spans to the configured OTLP /\n * Tangle Intelligence sink. Each is forwarded only when present, so the\n * child is a no-op exporter until `OTEL_EXPORTER_OTLP_ENDPOINT` is set in the\n * parent env — never a hardcoded endpoint.\n */\nconst OTEL_FORWARD_KEYS = [\n 'OTEL_EXPORTER_OTLP_ENDPOINT',\n 'OTEL_EXPORTER_OTLP_HEADERS',\n 'TRACE_ID',\n 'PARENT_SPAN_ID',\n] as const\n\nconst DEFAULT_SANDBOX_BASE_URL = 'https://sandbox.tangle.tools'\n\nexport interface BuildDelegationMcpServerOptions {\n /** Sandbox API key forwarded as `TANGLE_API_KEY` to the MCP child. The\n * agent-runtime MCP bin reads `TANGLE_API_KEY` and passes it straight to\n * `new Sandbox({ apiKey })`. Defaults to `env.TANGLE_API_KEY`. */\n sandboxApiKey?: string\n /** Sandbox base URL forwarded as `SANDBOX_BASE_URL`. Defaults to\n * `env.SANDBOX_BASE_URL`, then `env.SANDBOX_API_URL`, then the public\n * sandbox endpoint. */\n sandboxBaseUrl?: string\n /** Environment source for key + OTEL resolution. Defaults to `process.env`;\n * injectable for tests and non-process callers. */\n env?: Record<string, string | undefined>\n}\n\n/**\n * Build the delegation MCP entry the sandbox-side agent loads on startup.\n * Returns `undefined` when no sandbox API key is resolvable — callers merge\n * the result into a profile's `mcp` map only when defined.\n */\nexport function buildDelegationMcpServer(\n options: BuildDelegationMcpServerOptions = {},\n): Record<string, AgentProfileMcpServer> | undefined {\n const env = options.env ?? process.env\n const sandboxApiKey = options.sandboxApiKey ?? env.TANGLE_API_KEY\n if (!sandboxApiKey) return undefined\n const baseUrl =\n options.sandboxBaseUrl ??\n env.SANDBOX_BASE_URL ??\n env.SANDBOX_API_URL ??\n DEFAULT_SANDBOX_BASE_URL\n\n const otelEnv: Record<string, string> = {}\n for (const key of OTEL_FORWARD_KEYS) {\n const value = env[key]\n if (value) otelEnv[key] = value\n }\n\n return {\n [DELEGATION_MCP_SERVER_KEY]: {\n transport: 'stdio',\n command: 'npx',\n args: ['-y', '@tangle-network/agent-runtime', 'mcp'],\n env: {\n TANGLE_API_KEY: sandboxApiKey,\n SANDBOX_BASE_URL: baseUrl,\n ...otelEnv,\n },\n enabled: true,\n metadata: {\n surface: 'delegation:dispatch',\n tools: [\n 'delegate_code',\n 'delegate_research',\n 'delegate_feedback',\n 'delegation_status',\n 'delegation_history',\n ],\n },\n },\n }\n}\n\nexport interface ComposeProductionAgentProfileOptions {\n /** Sandbox API key forwarded to the delegation MCP child. Defaults to\n * `env.TANGLE_API_KEY`. When unset, the delegation MCP entry is omitted. */\n sandboxApiKey?: string\n /** Sandbox base URL forwarded as `SANDBOX_BASE_URL` to the MCP child. */\n sandboxBaseUrl?: string\n /** Replace the base profile's system prompt. Used by per-turn calls that\n * swap in workspace-augmented prompts (board summary, learned style). */\n systemPrompt?: string\n /** Extra file mounts layered after the base profile's `resources.files`. */\n extraFiles?: AgentProfileFileMount[]\n /** Override the profile `name`. Defaults to the base profile's name. */\n name?: string\n /** Environment source for key + OTEL resolution. Defaults to `process.env`. */\n env?: Record<string, string | undefined>\n}\n\n/**\n * Compose the production `AgentProfile`: the canonical base profile with the\n * delegation MCP merged into `mcp`. Used by every call site that boots a\n * sandbox or runs a chat turn through the sandbox path, and by eval wiring so\n * the scorecard profile hash reflects the actual production profile.\n *\n * Merge rules:\n * - `mcp`: base map preserved; the delegation entry is appended under\n * {@link DELEGATION_MCP_SERVER_KEY}, and omitted entirely when no sandbox\n * API key resolves.\n * - `prompt.systemPrompt`: replaced when `options.systemPrompt` is set.\n * - `resources.files`: `options.extraFiles` concatenated after base files.\n * - `name`: replaced when `options.name` is set.\n */\nexport function composeProductionAgentProfile(\n baseProfile: AgentProfile,\n options: ComposeProductionAgentProfileOptions = {},\n): AgentProfile {\n const delegationMcp = buildDelegationMcpServer({\n sandboxApiKey: options.sandboxApiKey,\n sandboxBaseUrl: options.sandboxBaseUrl,\n env: options.env,\n })\n\n const baseMcp = baseProfile.mcp ?? {}\n const mergedMcp: Record<string, AgentProfileMcpServer> = delegationMcp\n ? { ...baseMcp, ...delegationMcp }\n : { ...baseMcp }\n\n const baseFiles = baseProfile.resources?.files ?? []\n const mergedFiles: AgentProfileFileMount[] = options.extraFiles?.length\n ? [...baseFiles, ...options.extraFiles]\n : [...baseFiles]\n\n const prompt = options.systemPrompt\n ? { ...baseProfile.prompt, systemPrompt: options.systemPrompt }\n : baseProfile.prompt\n\n return {\n ...baseProfile,\n name: options.name ?? baseProfile.name,\n prompt,\n mcp: mergedMcp,\n resources: {\n ...baseProfile.resources,\n files: mergedFiles,\n },\n }\n}\n"],"mappings":";AAwBO,IAAM,4BAA4B;AASzC,IAAM,oBAAoB;AAAA,EACxB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,IAAM,2BAA2B;AAqB1B,SAAS,yBACd,UAA2C,CAAC,GACO;AACnD,QAAM,MAAM,QAAQ,OAAO,QAAQ;AACnC,QAAM,gBAAgB,QAAQ,iBAAiB,IAAI;AACnD,MAAI,CAAC,cAAe,QAAO;AAC3B,QAAM,UACJ,QAAQ,kBACR,IAAI,oBACJ,IAAI,mBACJ;AAEF,QAAM,UAAkC,CAAC;AACzC,aAAW,OAAO,mBAAmB;AACnC,UAAM,QAAQ,IAAI,GAAG;AACrB,QAAI,MAAO,SAAQ,GAAG,IAAI;AAAA,EAC5B;AAEA,SAAO;AAAA,IACL,CAAC,yBAAyB,GAAG;AAAA,MAC3B,WAAW;AAAA,MACX,SAAS;AAAA,MACT,MAAM,CAAC,MAAM,iCAAiC,KAAK;AAAA,MACnD,KAAK;AAAA,QACH,gBAAgB;AAAA,QAChB,kBAAkB;AAAA,QAClB,GAAG;AAAA,MACL;AAAA,MACA,SAAS;AAAA,MACT,UAAU;AAAA,QACR,SAAS;AAAA,QACT,OAAO;AAAA,UACL;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACF;AAiCO,SAAS,8BACd,aACA,UAAgD,CAAC,GACnC;AACd,QAAM,gBAAgB,yBAAyB;AAAA,IAC7C,eAAe,QAAQ;AAAA,IACvB,gBAAgB,QAAQ;AAAA,IACxB,KAAK,QAAQ;AAAA,EACf,CAAC;AAED,QAAM,UAAU,YAAY,OAAO,CAAC;AACpC,QAAM,YAAmD,gBACrD,EAAE,GAAG,SAAS,GAAG,cAAc,IAC/B,EAAE,GAAG,QAAQ;AAEjB,QAAM,YAAY,YAAY,WAAW,SAAS,CAAC;AACnD,QAAM,cAAuC,QAAQ,YAAY,SAC7D,CAAC,GAAG,WAAW,GAAG,QAAQ,UAAU,IACpC,CAAC,GAAG,SAAS;AAEjB,QAAM,SAAS,QAAQ,eACnB,EAAE,GAAG,YAAY,QAAQ,cAAc,QAAQ,aAAa,IAC5D,YAAY;AAEhB,SAAO;AAAA,IACL,GAAG;AAAA,IACH,MAAM,QAAQ,QAAQ,YAAY;AAAA,IAClC;AAAA,IACA,KAAK;AAAA,IACL,WAAW;AAAA,MACT,GAAG,YAAY;AAAA,MACf,OAAO;AAAA,IACT;AAAA,EACF;AACF;","names":[]}