npm - @tangle-network/agent-runtime - Versions diffs - 0.47.0 → 0.49.0 - Mend

@tangle-network/agent-runtime 0.47.0 → 0.49.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/README.md +79 -15
package/dist/agent.js +1 -1
package/dist/chunk-GHX7XOJ2.js +433 -0
package/dist/chunk-GHX7XOJ2.js.map +1 -0
package/dist/{chunk-T4OQQEE3.js → chunk-IQS4HI3F.js} +14 -5
package/dist/chunk-IQS4HI3F.js.map +1 -0
package/dist/{chunk-72JQCHOZ.js → chunk-PXUTIMGJ.js} +2318 -237
package/dist/chunk-PXUTIMGJ.js.map +1 -0
package/dist/{chunk-MGFEUYOH.js → chunk-U2VEWKKK.js} +3 -3
package/dist/{chunk-JNPK46YH.js → chunk-VIEDXELL.js} +408 -6
package/dist/chunk-VIEDXELL.js.map +1 -0
package/dist/{chunk-VR4JIC5H.js → chunk-XTEZ3YJ4.js} +2 -2
package/dist/index.d.ts +29 -4
package/dist/index.js +109 -21
package/dist/index.js.map +1 -1
package/dist/kb-gate-CsXpNRk7.d.ts +1145 -0
package/dist/{loop-runner-bin-DEm4roYF.d.ts → loop-runner-bin-Cgn0A-NW.d.ts} +1 -1
package/dist/loop-runner-bin.d.ts +2 -2
package/dist/loop-runner-bin.js +3 -3
package/dist/loops.d.ts +3 -3
package/dist/loops.js +57 -1
package/dist/mcp/bin.js +187 -24
package/dist/mcp/bin.js.map +1 -1
package/dist/mcp/index.d.ts +28 -125
package/dist/mcp/index.js +28 -6
package/dist/mcp/index.js.map +1 -1
package/dist/platform.js +2 -2
package/dist/platform.js.map +1 -1
package/dist/runtime.d.ts +1100 -62
package/dist/runtime.js +57 -1
package/dist/{types-Cbx3dNK5.d.ts → types-BpDfCPUp.d.ts} +1 -1
package/dist/workflow.js +1 -1
package/package.json +7 -6
package/dist/chunk-5YDS7BLC.js +0 -218
package/dist/chunk-5YDS7BLC.js.map +0 -1
package/dist/chunk-72JQCHOZ.js.map +0 -1
package/dist/chunk-JNPK46YH.js.map +0 -1
package/dist/chunk-T4OQQEE3.js.map +0 -1
package/dist/kb-gate-51BlLlVM.d.ts +0 -529
/package/dist/{chunk-MGFEUYOH.js.map → chunk-U2VEWKKK.js.map} +0 -0
/package/dist/{chunk-VR4JIC5H.js.map → chunk-XTEZ3YJ4.js.map} +0 -0

package/README.md CHANGED Viewed

@@ -2,7 +2,10 @@
 The shared task-lifecycle skeleton for agents. It runs an agent (a chat turn, a one-shot task, or a multi-attempt loop), captures every run as a trace, and feeds those traces into eval-gated self-improvement.
-It owns the lifecycle and the loop kernel. It delegates domain behavior (models, tools, knowledge) to adapters, scoring and the ship gate to [`@tangle-network/agent-eval`](https://www.npmjs.com/package/@tangle-network/agent-eval), and sandboxed long-running execution to [`@tangle-network/sandbox`](https://www.npmjs.com/package/@tangle-network/sandbox).
+It owns the lifecycle, the loop kernel, and the **optimization suite** — `Environment` + `Strategy` +
+`runBenchmark` + `runStrategyEvolution`, the published surface for measuring and evolving how an agent
+spends compute against a deployable check. It delegates domain behavior (models, tools, knowledge) to
+adapters, scoring statistics and the ship gate to [`@tangle-network/agent-eval`](https://www.npmjs.com/package/@tangle-network/agent-eval), and sandboxed long-running execution to [`@tangle-network/sandbox`](https://www.npmjs.com/package/@tangle-network/sandbox).
 ```bash
 pnpm add @tangle-network/agent-runtime @tangle-network/agent-eval @tangle-network/sandbox
@@ -53,8 +56,9 @@ That is the common case. Everything below is for when one chat turn is not enoug
 | Run a production chat turn (most products) | `handleChatTurn` | root |
 | Declare an agent (profile, surfaces, adapters) | `defineAgent` | `/agent` |
 | Run a one-shot task with verification and eval | `runAgentTask` | root |
-| Run a multi-attempt loop (refine or fanout-vote) | `runLoop` plus a driver | `/loops` |
-| Let the agent choose the loop shape per round | `createDriver` plus `createSandboxPlanner` | `/loops` |
+| Compare optimization strategies on YOUR domain (5 hooks) | `runBenchmark` + `defineStrategy` | `/loops` |
+| Let the system author + evolve its own strategies, gated | `runStrategyEvolution` · `authorStrategy` · `promotionGate` | `/loops` |
+| Run a multi-attempt loop with a custom driver | `runLoop` + `createDriver` | `/loops` |
 | Delegate a disciplined loop by mode (code, research, ...) | `runDelegatedLoop` or `agent-runtime-loop` | root |
 | Build code reliably (reviewed, gated) | `createDefaultCoderDelegate` | `/mcp` |
 | Grow a knowledge base with only grounded facts | `createKbGate` | `/mcp` |
@@ -64,15 +68,50 @@ That is the common case. Everything below is for when one chat turn is not enoug
 | Mutate surfaces from trace findings | `runAnalystLoop` | `/analyst-loop` |
 | Persist a run plus its cost ledger | `startRuntimeRun` | root |
+## The optimization suite
+The canonical surface. A domain is an `Environment` (five hooks: `open`/`tools`/`call`/`score`/`close`);
+a **strategy** is how a compute budget is spent to beat the domain's own deployable check. Two
+built-ins (`sample` = best-of-N, `refine` = critique-and-continue) plus `defineStrategy` to compose
+your own from two steps — and `authorStrategy`, where the system writes new strategies from its own
+per-task losses:
+```ts
+import { defineStrategy, runBenchmark, sample, refine } from '@tangle-network/agent-runtime/loops'
+const doubleCheck = defineStrategy('double-check', async ({ shot, critique }) => {
+  const first = await shot()
+  const steer = first ? await critique(first.messages) : null
+  const second = steer ? await shot({ messages: first?.messages, steer }) : null
+  const score = Math.max(first?.score ?? 0, second?.score ?? 0)
+  return { score, resolved: score >= 1, completions: 2, progression: [first?.score ?? 0, score], shots: 2 }
+})
+const report = await runBenchmark({ environment, tasks, worker, strategies: [sample, refine, doubleCheck], budget: 3 })
+report.perTask // the losses table an author/optimizer consumes
+report.pareto  // the (score, $) frontier
+```
+The measurement invariants are structural, not advisory: every strategy spends through a conserved
+budget pool (equal compute by construction), the deliverable score is **harness-verified** from the
+shots actually brokered (a body cannot fabricate a win), and the critic is firewalled from the check
+(selector ≠ judge). `runStrategyEvolution` runs the multi-generation search — populations of authored
+candidates, cost-aware champion selection, a phase ledger with resume, and ONE promotion decision via
+`promotionGate` (seeded paired bootstrap) on a holdout slice the search never touched.
+`createVerifierEnvironment` adapts answer-shaped domains (one `check` function); `createMcpEnvironment`
+adapts any MCP server. The consumer surface — loops as a service with a CLI, detached runner, and MCP
+server — lives in the [`loops`](https://github.com/drewstone/loops) repo; the experiment harness and
+evidence ledger live in [`bench/HARNESS.md`](./bench/HARNESS.md).
 ## The loop kernel
 `runLoop` is a topology-agnostic kernel. Each iteration spawns a sandbox on an `AgentRunSpec`, decodes the output, validates it, and asks a driver what to do next. The driver owns topology. The validator owns scoring. The kernel owns iteration accounting, concurrency, cost and token aggregation, and trace emission.
 ```ts
-import { runLoop, createFanoutVoteDriver } from '@tangle-network/agent-runtime/loops'
+import { runLoop, createDriver } from '@tangle-network/agent-runtime/loops'
 const result = await runLoop({
-  driver: createFanoutVoteDriver({ n: 3 }),    // 3 parallel attempts, pick the best valid one
+  driver: createDriver({ planner }),           // the planner emits one TopologyMove per round
   agentRuns: [claudeSpec, codexSpec, glmSpec], // heterogeneous: one harness per branch
   output,                                       // events to typed Output
   validator,                                    // Output to { valid, score }
@@ -82,9 +121,13 @@ const result = await runLoop({
 result.winner // highest-scoring valid attempt
 ```
-Shipped drivers (`/loops/drivers`): `createRefineDriver` (single task, iterate until valid), `createFanoutVoteDriver` (N parallel, vote), and `createDriver` (the agent authors the topology at runtime). The dynamic driver emits one `TopologyMove` per round (`refine`, `fanout`, or `stop`) from an injected planner; a malformed move throws `PlannerError`, so the loop never runs a topology nobody chose. Topology is orthogonal to harness: the planner never names a backend, and the kernel's `agentRuns` decide which harness runs each branch.
-`runProgram` (also in `/loops`) is the recursive op-set (`sample`, `steer`, `fork`, `parallel`, `select`, `seq`, `stop`) plus a tree executor, for programs that compose sub-loops.
+`createDriver` lets a planner author the topology at runtime: one `TopologyMove` per round
+(`refine`, `fanout`, `select`, or `stop`); a malformed move throws `PlannerError`, so the loop never
+runs a topology nobody chose. Topology is orthogonal to harness: the planner never names a backend,
+and the kernel's `agentRuns` decide which harness runs each branch. For fixed shapes, write a small
+inline `Driver` (see `examples/coder-loop`) or use the `personify` combinators (`fanout`, `loopUntil`,
+`panel`, `pipeline`) over the recursive `Scope`/`Supervisor` core — the newer canonical path for
+recursive work.
 ## Self-improvement
@@ -106,7 +149,12 @@ const result = await selfImprove({
 // result.winner.surface is the safe one — the baseline unless gateDecision === 'ship'
 ```
-agent-runtime contributes the runtime-specific piece: the **CODE-surface `improvementDriver`** (`/improvement`) — a git-worktree mutator you pass to `selfImprove` as `driver` to optimize code instead of a string.
+agent-runtime contributes the runtime-specific pieces: the **CODE-surface `improvementDriver`**
+(`/improvement`) — a git-worktree mutator you pass to `selfImprove` as `driver` to optimize code
+instead of a string — and **`runStrategyEvolution`** (`/loops`), the multi-generation search over
+STRATEGY space: the system reads its own per-task losses, authors candidate strategies as code,
+plays them against the incumbent at equal budget, and a seeded statistical gate decides promotion
+on a never-touched holdout slice.
 `runAnalystLoop` (`/analyst-loop`) mines real run traces into findings; `createAnalystDriverHook` feeds those findings to a dynamic-driver planner via `PlannerContext.analyses`, with a firewall (`assertTraceDerivedFindings`) that rejects any finding derived from a judge verdict. Production intake — turning real run traces into the corpus `selfImprove` optimizes against — is agent-eval's `analyzeRuns` / `partitionRunsByAuthoringModel` (`/contract`).
@@ -156,9 +204,15 @@ const server = createMcpServer({ coderDelegate: createDefaultCoderDelegate({ san
 Or mount the `agent-runtime-mcp` stdio bin on a production `AgentProfile.mcp`.
+Delegation state is in-memory by default — a server restart drops pending delegations and history. Set `AGENT_RUNTIME_DELEGATION_STATE_FILE=/path/state.json` on the bin (or construct via `DelegationTaskQueue.restore({ store: new FileDelegationStore({ filePath }) })`) to persist records across restarts: `delegation_status`/`delegation_history` keep answering for prior runs, idempotency keys dedupe resubmissions, and in-flight records either resume through the `resumeDelegate` seam (when submitted with a `detachedSessionRef`) or settle as failed with an explicit driver-restart error. A corrupt state file refuses to load (`DelegationStateCorruptError`); `AGENT_RUNTIME_DELEGATION_STATE_RECOVER=1` archives it and starts empty. `AGENT_RUNTIME_DELEGATION_RETAIN_TERMINAL=<n>` caps retained terminal records.
 ## The experiment harness (bench/)
-`bench/` is the internal harness that asks the binding empirical question: does any non-blind topology beat blind compute at equal k, under a deployable (non-oracle) selector, on a real benchmark? It runs through the same kernel, not a reimplementation.
+`bench/` is the internal harness; [`bench/HARNESS.md`](./bench/HARNESS.md) is its map — read that
+first. The canonical path is the optimization suite (`runBenchmark`/`flywheel-evolve` over real
+domains: the EnterpriseOps gym, commit0, answer-shaped math); the older selection-gate paths
+(`runExperiment`, corpus-replay) remain for the legacy evidence. The live evidence ledger is
+`.evolve/current.json` — results never live in this README.
 One entrypoint, `runExperiment(adapter, { sandboxClient, agentRun, arms, ... })`: N instances times a set of arms, each arm a topology driven through `runLoop`, judged by the adapter, written to a durable canonical corpus. An arm is one steer function `f(rootPrompt, history) => nextPrompt`: `random` ignores history (the compute control), `refine` carries the prior answer plus a directive, `diverse` rotates a strategy lens. The cost dial is the backend type (`hermes` for a direct router call, `opencode` or `claude-code` or `codex` for agent CLIs). The deep statistics (paired bootstrap with Benjamini-Hochberg correction, selector replay) come from `corpus-report.mts` and `corpus-replay.mts` over the written corpus, computed once. See `bench/HARNESS.md` and `docs/learning-flywheel.md`.
@@ -170,8 +224,9 @@ One entrypoint, `runExperiment(adapter, { sandboxClient, agentRun, arms, ... })`
 | Backend provider | `openai-compat` when `TANGLE_API_KEY`, else `openai` if `OPENAI_API_KEY` | `MODEL_PROVIDER` env |
 | Router base URL | `https://router.tangle.tools/v1` | `TANGLE_ROUTER_BASE_URL` env |
 | Sandbox base URL | `https://sandbox.tangle.tools` | `SANDBOX_API_URL` env |
-| Loop iteration cap | 10 (`runLoop`), 8 (dynamic driver) | `runLoop({ maxIterations })` |
-| Driver | none, required by `runLoop` | `createRefineDriver`, `createFanoutVoteDriver`, `createDriver` |
+| Loop iteration cap | 10 (`runLoop`) | `runLoop({ maxIterations })` |
+| Driver | none, required by `runLoop` | `createDriver` or an inline `Driver` |
+| Strategy budget (suite) | 3 rollouts/shots per strategy per task | `runBenchmark({ budget })` |
 | Winner selection (coder delegate) | `highest-score` | `winnerSelection` option |
 | KB gate min passage | 12 chars | `createKbGate({ minPassageChars })` |
 | `selfImprove` gate | held-out gate (default) | pass `gate: defaultProductionGate` for red-team hardening |
@@ -202,18 +257,27 @@ sandbox         AgentProfile, Sandbox.create, streamPrompt, exportTraceBundle. T
 |---|---|
 | `@tangle-network/agent-runtime` | chat turns, delegated loop-runner, OTEL export, errors, model resolution |
 | `.../agent` | `defineAgent` plus surface and outcome adapters |
-| `.../loops` | the `runLoop` kernel, the `refine` / `fanout-vote` / `dynamic` drivers, `runProgram`, `loopDispatch` |
+| `.../loops` | **the optimization suite** (`Environment`, `defineStrategy`, `runBenchmark`, `runStrategyEvolution`, `authorStrategy`, `promotionGate`) + the `runLoop` kernel, `createDriver`, `loopDispatch` |
 | `.../profiles` | `coderProfile`, `researcherProfile` presets |
 | `.../mcp` | `createMcpServer`, `createDefaultCoderDelegate`, `createKbGate`, the `agent-runtime-mcp` bin |
 | `.../improvement` | `improvementDriver` (code/worktree `CandidateGenerator`), `agenticGenerator`, `reflectiveGenerator` — the code-surface driver you pass to agent-eval's `selfImprove` |
 | `.../analyst-loop` | `runAnalystLoop`, the analyst registry driver |
 | `.../platform` | cross-site SSO and the integrations hub |
+| `.../runtime` | the recursive core by its own name (same module as `/loops`) |
+| `.../topology` | the live agent-tree viewer (folds spawn/settle events into a renderable tree) |
+| `.../workflow` · `.../audit` | workflow orchestration helpers · audit utilities |
 Bins: `agent-runtime-mcp` (delegation MCP server), `agent-runtime-loop` (schedulable delegated loop-runner).
-## Adoption skill
+## Teaching an agent to build on this
-This package ships a self-contained adoption skill at [`skills/agent-runtime-adoption/SKILL.md`](./skills/agent-runtime-adoption/SKILL.md): driven loops, topology drivers, the `loopDispatch` campaign bridge, MCP delegation, and the code-surface `improvementDriver` for agent-eval's `selfImprove`. It needs only this package plus `@tangle-network/agent-eval`. For the full self-improving pipeline (trace sink, analyst loop, scorecard, production loop, CI), see the `agent-eval-adoption` and `agent-stack-adoption` skills.
+Two agent-consumable skills live in the [`loops`](https://github.com/drewstone/loops) repo:
+**`skills/loop-builder`** (domain → `Environment` → loop → gate → operator surface, with the
+measured foot-gun list) and **`skills/loop-author`** (authoring a strategy body from losses;
+read the contract with `loops contract`). The runnable on-ramp is [`examples/`](./examples/README.md)
+— a learning progression from the production chat turn through the strategy suite to the recursive
+supervisor. For the broader pipeline (trace sink, analyst loop, scorecard, CI), see the
+`agent-eval-adoption` and `agent-stack-adoption` skills.
 ## Stability, tests, docs

package/dist/agent.js CHANGED Viewed

@@ -3,7 +3,7 @@ import {
 } from "./chunk-7JITYN6T.js";
 import {
   createSandboxForSpec
-} from "./chunk-72JQCHOZ.js";
+} from "./chunk-PXUTIMGJ.js";
 import {
   mapSandboxEvent
 } from "./chunk-GSUO5QS6.js";

package/dist/chunk-GHX7XOJ2.js ADDED Viewed

@@ -0,0 +1,433 @@
+import {
+  coderProfile,
+  multiHarnessCoderFanout
+} from "./chunk-KADIJAD4.js";
+import {
+  createSandboxForSpec,
+  deleteBoxSafe,
+  runLoop,
+  sleep,
+  throwAbort,
+  throwIfAborted
+} from "./chunk-PXUTIMGJ.js";
+import {
+  ValidationError
+} from "./chunk-GSUO5QS6.js";
+// src/mcp/detached-turn.ts
+var DEFAULT_TICK_INTERVAL_MS = 5e3;
+function formatDetachedSessionRef(parts) {
+  assertRefComponent("sessionId", parts.sessionId);
+  if (parts.sandboxId === void 0) return `session=${parts.sessionId}`;
+  assertRefComponent("sandboxId", parts.sandboxId);
+  return `sandbox=${parts.sandboxId};session=${parts.sessionId}`;
+}
+function parseDetachedSessionRef(raw) {
+  const fields = /* @__PURE__ */ new Map();
+  for (const pair of raw.split(";")) {
+    const eq = pair.indexOf("=");
+    const key = eq === -1 ? "" : pair.slice(0, eq);
+    const value = eq === -1 ? "" : pair.slice(eq + 1);
+    if (key !== "session" && key !== "sandbox" || value.length === 0 || fields.has(key)) {
+      throw new ValidationError(
+        `parseDetachedSessionRef: malformed detachedSessionRef ${JSON.stringify(raw)} \u2014 expected "session=<id>" or "sandbox=<id>;session=<id>"`
+      );
+    }
+    fields.set(key, value);
+  }
+  const sessionId = fields.get("session");
+  if (!sessionId) {
+    throw new ValidationError(
+      `parseDetachedSessionRef: detachedSessionRef ${JSON.stringify(raw)} carries no session id`
+    );
+  }
+  const sandboxId = fields.get("sandbox");
+  return { sessionId, ...sandboxId !== void 0 ? { sandboxId } : {} };
+}
+function assertRefComponent(name, value) {
+  if (value.length === 0 || value.includes(";") || value.includes("=")) {
+    throw new ValidationError(
+      `formatDetachedSessionRef: ${name} ${JSON.stringify(value)} must be non-empty and free of ";" / "="`
+    );
+  }
+}
+function detachedTurnEvents(sessionId, turn) {
+  return [
+    {
+      type: "result",
+      id: sessionId,
+      data: {
+        text: turn.text,
+        finalText: turn.text,
+        success: true,
+        result: turn.result
+      }
+    }
+  ];
+}
+async function runDetachedTurn(options) {
+  const intervalMs = options.tickIntervalMs ?? DEFAULT_TICK_INTERVAL_MS;
+  const box = await createSandboxForSpec(options.client, options.spec, options.signal);
+  const drive = box;
+  const onAbort = () => {
+    void drive._sessionCancel?.(options.sessionId).catch(() => {
+    });
+  };
+  try {
+    if (typeof drive.driveTurn !== "function") {
+      throw new ValidationError(
+        "runDetachedTurn: the acquired sandbox exposes no driveTurn(message, { sessionId }) \u2014 detached dispatch requires @tangle-network/sandbox >= 0.6 and a session-backed placement (sibling/fleet); disable detached dispatch for this executor."
+      );
+    }
+    const sandboxId = box.id;
+    if (typeof sandboxId !== "string" || sandboxId.length === 0) {
+      throw new ValidationError(
+        "runDetachedTurn: the acquired sandbox carries no id \u2014 without it the detached run cannot be resumed after a restart, so refusing to dispatch detached."
+      );
+    }
+    options.bindSandbox(sandboxId);
+    options.signal.addEventListener("abort", onAbort, { once: true });
+    for (; ; ) {
+      throwIfAborted(options.signal);
+      const tick = await drive.driveTurn(options.prompt, {
+        sessionId: options.sessionId,
+        turnId: options.sessionId,
+        ...options.wallCapMs !== void 0 ? { wallCapMs: options.wallCapMs } : {}
+      });
+      throwIfAborted(options.signal);
+      if (tick.state === "completed") return { text: tick.text, result: tick.result };
+      if (tick.state === "failed") {
+        throw new Error(`detached turn ${options.sessionId} failed: ${tick.error}`);
+      }
+      options.report({ iteration: 0, phase: detachedRunningPhase(tick.elapsedMs) });
+      await sleep(intervalMs, options.signal);
+    }
+  } finally {
+    options.signal.removeEventListener("abort", onAbort);
+    if (options.signal.aborted) onAbort();
+    await deleteBoxSafe(box);
+  }
+}
+function detachedRunningPhase(elapsedMs) {
+  return elapsedMs === void 0 ? "detached-running" : `detached-running ${Math.round(elapsedMs / 1e3)}s`;
+}
+function createDriveTurnResumeDriver(options) {
+  const cancelHooked = /* @__PURE__ */ new Set();
+  return {
+    intervalMs: options.intervalMs ?? DEFAULT_TICK_INTERVAL_MS,
+    async tick({ record, detachedSessionRef }, ctx) {
+      const ref = parseDetachedSessionRef(detachedSessionRef);
+      if (ref.sandboxId === void 0) {
+        return {
+          state: "failed",
+          error: {
+            message: `detached session "${ref.sessionId}" was never bound to a sandbox \u2014 the previous process died before the box was acquired, so the turn was never dispatched and cannot be resumed`,
+            kind: "DetachedSessionUnboundError"
+          }
+        };
+      }
+      const box = await options.resolveSandbox(ref.sandboxId);
+      if (!cancelHooked.has(record.taskId)) {
+        cancelHooked.add(record.taskId);
+        ctx.signal.addEventListener(
+          "abort",
+          () => {
+            void box._sessionCancel?.(ref.sessionId).catch(() => {
+            });
+          },
+          { once: true }
+        );
+      }
+      if (ctx.signal.aborted) throwAbort();
+      const tick = await box.driveTurn(options.buildMessage(record), {
+        sessionId: ref.sessionId,
+        turnId: ref.sessionId,
+        ...options.wallCapMs !== void 0 ? { wallCapMs: options.wallCapMs } : {}
+      });
+      if (tick.state === "completed") {
+        const output = await options.settleOutput(
+          { text: tick.text, result: tick.result },
+          record,
+          {
+            signal: ctx.signal
+          }
+        );
+        return { state: "completed", output };
+      }
+      if (tick.state === "failed") {
+        return {
+          state: "failed",
+          error: {
+            message: `detached turn ${ref.sessionId} failed: ${tick.error}`,
+            kind: "DetachedTurnFailedError"
+          }
+        };
+      }
+      ctx.report({ iteration: 0, phase: detachedRunningPhase(tick.elapsedMs) });
+      return { state: "running" };
+    }
+  };
+}
+// src/mcp/executor.ts
+function createSiblingSandboxExecutor(options) {
+  const underlying = options.client;
+  const client = {
+    create(opts) {
+      return underlying.create(opts);
+    },
+    describePlacement(box) {
+      return { kind: "sibling", sandboxId: readId(box) };
+    }
+  };
+  return {
+    client,
+    placement: "sibling",
+    describe() {
+      return "sibling-sandbox (each delegation = fresh sandbox via client.create)";
+    }
+  };
+}
+function createFleetWorkspaceExecutor(options) {
+  const fleet = options.fleet;
+  const exclude = new Set(options.excludeMachineIds ?? []);
+  let callIndex = 0;
+  const placementBySandboxId = /* @__PURE__ */ new Map();
+  const client = {
+    async create() {
+      const ids = fleet.ids.filter((id) => !exclude.has(id));
+      if (ids.length === 0) {
+        throw new Error(
+          `agent-runtime: fleet ${fleet.fleetId} has no eligible worker machines (ids=[${fleet.ids.join(",")}], excluded=[${[...exclude].join(",")}])`
+        );
+      }
+      const selector = options.selectMachine;
+      const machineId = selector ? selector({ callIndex, ids }) : ids[callIndex % ids.length];
+      callIndex += 1;
+      if (typeof machineId !== "string" || machineId.length === 0) {
+        throw new Error("agent-runtime: fleet executor selectMachine returned an empty machine id");
+      }
+      const box = await fleet.sandbox(machineId);
+      const sandboxId = readId(box);
+      if (sandboxId) placementBySandboxId.set(sandboxId, { machineId });
+      return box;
+    },
+    describePlacement(box) {
+      const sandboxId = readId(box);
+      const recorded = sandboxId ? placementBySandboxId.get(sandboxId) : void 0;
+      return {
+        kind: "fleet",
+        sandboxId,
+        fleetId: fleet.fleetId,
+        machineId: recorded?.machineId
+      };
+    }
+  };
+  return {
+    client,
+    placement: "fleet",
+    describe() {
+      const excluded = exclude.size > 0 ? ` (excluded=[${[...exclude].join(",")}])` : "";
+      return `fleet-workspace (fleetId=${fleet.fleetId}, machines=[${fleet.ids.join(",")}]${excluded})`;
+    }
+  };
+}
+function readId(box) {
+  const raw = box.id;
+  return typeof raw === "string" && raw.length > 0 ? raw : void 0;
+}
+// src/mcp/delegates.ts
+function createDefaultCoderDelegate(options) {
+  const executor = resolveExecutor(options);
+  const sandboxClient = executor.client;
+  const fanoutHarnesses = options.fanoutHarnesses;
+  const maxConcurrency = options.maxConcurrency ?? 4;
+  const traceEmitter = options.traceEmitter;
+  return async (args, ctx) => {
+    const task = coderTaskFromArgs(args);
+    const variants = Math.max(1, Math.trunc(args.variants ?? 1));
+    ctx.report({ iteration: 0, phase: "starting" });
+    if (variants <= 1) {
+      const { agentRunSpec, output, validator } = coderProfile({
+        task,
+        ...options.harness ? { harness: options.harness } : {},
+        ...options.model ? { model: options.model } : {}
+      });
+      if (ctx.detachedSessionRef !== void 0 && ctx.updateDetachedSessionRef) {
+        const { sessionId } = parseDetachedSessionRef(ctx.detachedSessionRef);
+        const rebind = ctx.updateDetachedSessionRef;
+        const turn = await runDetachedTurn({
+          client: sandboxClient,
+          spec: agentRunSpec,
+          prompt: agentRunSpec.taskToPrompt(task),
+          sessionId,
+          bindSandbox: (sandboxId) => rebind(formatDetachedSessionRef({ sandboxId, sessionId })),
+          signal: ctx.signal,
+          report: ctx.report,
+          ...options.detachedTickIntervalMs !== void 0 ? { tickIntervalMs: options.detachedTickIntervalMs } : {},
+          ...options.detachedWallCapMs !== void 0 ? { wallCapMs: options.detachedWallCapMs } : {}
+        });
+        const chosen3 = await settleDetachedCoderTurn(turn, {
+          task,
+          sessionId,
+          signal: ctx.signal,
+          ...options.harness ? { harness: options.harness } : {},
+          ...options.model ? { model: options.model } : {},
+          ...options.reviewer ? { reviewer: options.reviewer } : {}
+        });
+        ctx.report({ iteration: 1, phase: "completed" });
+        return chosen3;
+      }
+      const result2 = await runLoop({
+        driver: singleShotDriver,
+        agentRun: agentRunSpec,
+        output,
+        validator,
+        task,
+        ctx: { sandboxClient, signal: ctx.signal, ...traceEmitter ? { traceEmitter } : {} },
+        maxIterations: 1,
+        maxConcurrency
+      });
+      const chosen2 = await pickCoderWinner({
+        iterations: result2.iterations,
+        reviewer: options.reviewer,
+        selection: options.winnerSelection ?? "highest-score",
+        task,
+        signal: ctx.signal
+      });
+      if (!chosen2) throw new Error(noWinnerMessage(options.reviewer));
+      ctx.report({ iteration: 1, phase: "completed" });
+      return chosen2;
+    }
+    const fanout = multiHarnessCoderFanout({
+      ...fanoutHarnesses && fanoutHarnesses.length > 0 ? { harnesses: fanoutHarnesses.slice(0, variants) } : {},
+      ...options.fanoutModels ? { models: options.fanoutModels.slice(0, variants) } : {}
+    });
+    const agentRuns = fanout.agentRuns.slice(0, variants);
+    const result = await runLoop({
+      driver: fanout.driver,
+      agentRuns,
+      output: fanout.output,
+      validator: fanout.validator,
+      task,
+      ctx: { sandboxClient, signal: ctx.signal, ...traceEmitter ? { traceEmitter } : {} },
+      maxIterations: variants,
+      maxConcurrency: Math.min(maxConcurrency, variants)
+    });
+    const chosen = await pickCoderWinner({
+      iterations: result.iterations,
+      reviewer: options.reviewer,
+      selection: options.winnerSelection ?? "highest-score",
+      task,
+      signal: ctx.signal
+    });
+    if (!chosen) throw new Error(noWinnerMessage(options.reviewer));
+    ctx.report({ iteration: agentRuns.length, phase: "completed" });
+    return chosen;
+  };
+}
+async function pickCoderWinner(args) {
+  const valid = [];
+  for (const iter of args.iterations) {
+    if (iter.output === void 0 || iter.error || iter.verdict?.valid !== true) continue;
+    valid.push({
+      index: iter.index,
+      output: iter.output,
+      score: iter.verdict.score ?? 0,
+      readiness: iter.verdict.score ?? 0
+    });
+  }
+  if (valid.length === 0) return void 0;
+  let eligible = valid;
+  if (args.reviewer) {
+    eligible = [];
+    for (const c of valid) {
+      const review = await args.reviewer(c.output, args.task, { signal: args.signal });
+      if (review.approved) eligible.push({ ...c, readiness: review.readiness });
+    }
+    if (eligible.length === 0) return void 0;
+  }
+  return selectCoderCandidate(eligible, args.selection).output;
+}
+function selectCoderCandidate(candidates, selection) {
+  const diffLines = (c) => c.output.diffStats.insertions + c.output.diffStats.deletions;
+  const sorted = [...candidates].sort((a, b) => {
+    switch (selection) {
+      case "smallest-diff":
+        return diffLines(a) - diffLines(b) || a.index - b.index;
+      case "highest-readiness":
+        return b.readiness - a.readiness || a.index - b.index;
+      case "first-approved":
+        return a.index - b.index;
+      default:
+        return b.score - a.score || a.index - b.index;
+    }
+  });
+  return sorted[0];
+}
+function noWinnerMessage(reviewer) {
+  return reviewer ? "coder delegate: no candidate passed validation + review" : "coder delegate: no candidate passed validation";
+}
+function coderTaskFromArgs(args) {
+  return {
+    goal: buildCoderGoal(args),
+    repoRoot: args.repoRoot,
+    testCmd: args.config?.testCmd,
+    typecheckCmd: args.config?.typecheckCmd,
+    forbiddenPaths: args.config?.forbiddenPaths,
+    maxDiffLines: args.config?.maxDiffLines
+  };
+}
+async function settleDetachedCoderTurn(turn, options) {
+  const { output, validator } = coderProfile({
+    task: options.task,
+    ...options.harness ? { harness: options.harness } : {},
+    ...options.model ? { model: options.model } : {}
+  });
+  const parsed = output.parse(detachedTurnEvents(options.sessionId, turn));
+  const verdict = await validator.validate(parsed, { iteration: 0, signal: options.signal });
+  if (verdict.valid !== true) throw new Error(noWinnerMessage(options.reviewer));
+  if (options.reviewer) {
+    const review = await options.reviewer(parsed, options.task, { signal: options.signal });
+    if (!review.approved) throw new Error(noWinnerMessage(options.reviewer));
+  }
+  return parsed;
+}
+function buildCoderGoal(args) {
+  if (!args.contextHint) return args.goal;
+  return [args.goal, "", "## Context", args.contextHint].join("\n");
+}
+function resolveExecutor(options) {
+  if (options.executor && options.sandboxClient) {
+    throw new Error("createDefaultCoderDelegate: pass exactly one of `executor` or `sandboxClient`");
+  }
+  if (options.executor) return options.executor;
+  if (options.sandboxClient) {
+    return createSiblingSandboxExecutor({ client: options.sandboxClient });
+  }
+  throw new Error("createDefaultCoderDelegate: `executor` or `sandboxClient` is required");
+}
+var singleShotDriver = {
+  name: "mcp-single-shot",
+  async plan(task, history) {
+    return history.length === 0 ? [task] : [];
+  },
+  decide(history) {
+    return history.length > 0 ? "pick-winner" : "fail";
+  }
+};
+export {
+  formatDetachedSessionRef,
+  parseDetachedSessionRef,
+  detachedTurnEvents,
+  runDetachedTurn,
+  createDriveTurnResumeDriver,
+  createSiblingSandboxExecutor,
+  createFleetWorkspaceExecutor,
+  createDefaultCoderDelegate,
+  coderTaskFromArgs,
+  settleDetachedCoderTurn
+};
+//# sourceMappingURL=chunk-GHX7XOJ2.js.map