@tangle-network/agent-runtime 0.19.0 → 0.20.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -22,6 +22,7 @@ pnpm add @tangle-network/agent-runtime @tangle-network/agent-eval
22
22
  | `deriveExecutionId` | Stable substrate executionId for `X-Execution-ID` cross-process reconnect |
23
23
  | `startRuntimeRun` | Canonical production-run row + cost ledger |
24
24
  | `defineAgent` | Declarative per-vertical agent manifest — surfaces, knowledge, rubric, run fn |
25
+ | `createMcpServer` (`/mcp`) + `agent-runtime-mcp` bin | Stdio MCP server with the 5 delegation tools (`delegate_code`, `delegate_research`, `delegate_feedback`, `delegation_status`, `delegation_history`) |
25
26
  | `resolveChatModel` / `validateChatModelId` / `getModels` | Router catalog fetch + fail-closed admission + precedence resolver |
26
27
  | `decideKnowledgeReadiness` | `ready` / `blocked` / `caveat` branch for routes / UI |
27
28
  | `createOpenAICompatibleBackend` | OpenAI-compatible streaming backend (TCloud / cli-bridge) |
@@ -173,6 +174,115 @@ await run.persist({ runtimeEvents: telemetry.events })
173
174
 
174
175
  Full runnable: [`examples/runtime-run/`](./examples/runtime-run/).
175
176
 
177
+ ## Delegation tools (MCP)
178
+
179
+ `@tangle-network/agent-runtime/mcp` ships a stdio MCP server that exposes
180
+ five delegation tools to a sandbox coding-harness agent (claude-code,
181
+ codex, opencode, ...). The product agent itself runs inside a sandbox
182
+ during a chat; when it needs a long-running coder or researcher loop, it
183
+ calls one of these tools instead of doing the work in-line.
184
+
185
+ | Tool | Kind | Use |
186
+ |---|---|---|
187
+ | `delegate_code` | async | Code-modification task — returns a `taskId`; poll `delegation_status` for the patch |
188
+ | `delegate_research` | async | Source-grounded research task — returns a `taskId`; poll for items + citations |
189
+ | `delegate_feedback` | sync | Append an agent/user/judge rating against a delegation, artifact, or outcome |
190
+ | `delegation_status` | sync | Snapshot of a delegation's state machine (`pending` → `running` → `completed` \| `failed` \| `cancelled`) |
191
+ | `delegation_history` | sync | Newest-first read of past delegations + attached feedback |
192
+
193
+ Mount the server from a Node entry point:
194
+
195
+ ```ts
196
+ import { Sandbox } from '@tangle-network/sandbox'
197
+ import {
198
+ createMcpServer,
199
+ createDefaultCoderDelegate,
200
+ } from '@tangle-network/agent-runtime/mcp'
201
+
202
+ const sandboxClient = new Sandbox({ apiKey: process.env.SANDBOX_API_KEY! })
203
+ const server = createMcpServer({
204
+ coderDelegate: createDefaultCoderDelegate({ sandboxClient }),
205
+ // researcherDelegate: wire your own — see below.
206
+ })
207
+ await server.serve() // reads JSON-RPC from stdin, writes responses to stdout
208
+ ```
209
+
210
+ Or run the ready-made bin:
211
+
212
+ ```bash
213
+ SANDBOX_API_KEY=sk_sandbox_... agent-runtime-mcp
214
+ ```
215
+
216
+ The bin auto-wires the coder delegate and, when
217
+ `@tangle-network/agent-knowledge` is installed as a peer, the researcher
218
+ delegate. Environment knobs:
219
+
220
+ - `SANDBOX_API_KEY` — required (unless both `MCP_DISABLE_*` are set)
221
+ - `SANDBOX_BASE_URL` — sandbox-SDK base URL override
222
+ - `MCP_MAX_CONCURRENT_SANDBOXES` — kernel `maxConcurrency` cap (default 4)
223
+ - `MCP_CODER_FANOUT_HARNESSES` — comma-separated harness ids for `variants > 1`
224
+ - `MCP_DISABLE_CODER` / `MCP_DISABLE_RESEARCHER` — omit the matching tool
225
+
226
+ ### Async semantics
227
+
228
+ Coder + researcher delegations are **fire-and-poll**. The handler returns
229
+ a `taskId` immediately; the agent calls `delegation_status(taskId)` until
230
+ the state is terminal. Identical inputs return the same `taskId` —
231
+ duplicate-call safety is built in via canonical-form hashing.
232
+
233
+ ```
234
+ agent → delegate_code(goal, repoRoot) → { taskId, estimatedDurationMs }
235
+ agent → delegation_status(taskId) → { status: 'running', progress: { ... } }
236
+ ... (minutes pass)
237
+ agent → delegation_status(taskId) → { status: 'completed', result: { profile: 'coder', output: <CoderOutput> } }
238
+ agent → delegate_feedback(refersTo, rating) → { recorded: true, id }
239
+ ```
240
+
241
+ Task state lives in-memory inside the server process. A restart drops
242
+ pending delegations — Phase 2 will move state into sqlite.
243
+
244
+ ### Wiring a researcher delegate
245
+
246
+ `agent-runtime` cannot depend on `@tangle-network/agent-knowledge` (it
247
+ would induce a dependency cycle). Wire the researcher delegate from your
248
+ own integration code:
249
+
250
+ ```ts
251
+ import { runLoop } from '@tangle-network/agent-runtime/loops'
252
+ import { researcherProfile, multiHarnessResearcherFanout } from '@tangle-network/agent-knowledge/profiles'
253
+ import { createMcpServer, type ResearcherDelegate } from '@tangle-network/agent-runtime/mcp'
254
+
255
+ const researcherDelegate: ResearcherDelegate = async (args, ctx) => {
256
+ const task = {
257
+ question: args.question,
258
+ knowledgeNamespace: args.namespace,
259
+ scope: args.scope,
260
+ sources: args.sources,
261
+ /* ...map config.recencyWindow ISO strings to Date objects */
262
+ }
263
+ if ((args.variants ?? 1) <= 1) {
264
+ const preset = researcherProfile({ task })
265
+ const result = await runLoop({
266
+ driver: { /* single-shot */ async plan(t, h) { return h.length === 0 ? [t] : [] }, decide(h) { return h.length > 0 ? 'pick-winner' : 'fail' } },
267
+ agentRun: preset.agentRunSpec, output: preset.output, validator: preset.validator,
268
+ task, ctx: { sandboxClient, signal: ctx.signal }, maxIterations: 1,
269
+ })
270
+ return result.winner!.output
271
+ }
272
+ const fanout = multiHarnessResearcherFanout({ task })
273
+ const result = await runLoop({
274
+ driver: fanout.driver,
275
+ agentRuns: fanout.agentRuns.slice(0, args.variants),
276
+ output: fanout.output, validator: fanout.validator,
277
+ task, ctx: { sandboxClient, signal: ctx.signal },
278
+ maxIterations: args.variants ?? 1,
279
+ })
280
+ return result.winner!.output
281
+ }
282
+
283
+ createMcpServer({ researcherDelegate })
284
+ ```
285
+
176
286
  ## Error taxonomy
177
287
 
178
288
  | Error | When |
package/dist/agent.d.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  import * as _tangle_network_agent_eval from '@tangle-network/agent-eval';
2
2
  import { FindingSubject, TraceAnalystKindSpec, AnalystFinding } from '@tangle-network/agent-eval';
3
- import { R as RuntimeStreamEvent } from './types-DlyPgeI0.js';
3
+ import { R as RuntimeStreamEvent } from './types-DvJIha6w.js';
4
4
  import { I as ImprovementAdapter, K as KnowledgeAdapter, a as RunAnalystLoopResult } from './types-D_MXrmJP.js';
5
5
 
6
6
  /**