npm - @forwardimpact/libeval - Versions diffs - 0.1.44 → 0.1.45 - Mend

@forwardimpact/libeval 0.1.44 → 0.1.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/README.md +212 -13
package/package.json +1 -1
package/src/agent-runner.js +45 -181
package/src/benchmark/runner.js +2 -2
package/src/commands/supervise.js +3 -1
package/src/discuss-tools.js +72 -140
package/src/discusser.js +18 -35
package/src/facilitator.js +26 -43
package/src/index.js +0 -2
package/src/judge.js +1 -1
package/src/message-bus.js +27 -81
package/src/orchestration-loop.js +176 -229
package/src/orchestration-toolkit.js +272 -303
package/src/orchestrator-helpers.js +9 -45
package/src/redaction.js +2 -0
package/src/render/orchestrator-filter.js +1 -9
package/src/supervisor.js +79 -465

package/src/supervisor.js CHANGED Viewed

@@ -1,478 +1,99 @@
 /**
- * Supervisor — orchestrates a relay loop between an agent and a supervisor,
- * both running as AgentRunner instances. The supervisor receives the task first,
- * introduces itself, and delegates work to the agent. The loop then alternates:
- * agent → supervisor → agent.
+ * Supervisor — supervise-mode wrapper around `OrchestrationLoop`. One
+ * named participant (`"agent"`) coordinated by a lead participant
+ * (`"supervisor"`). Structurally the same as `Facilitator` with a
+ * single agent; differs only in role names, prompts, and pass-through
+ * accessors.
  *
- * Signaling uses orchestration tools (Ask / Announce / Redirect / Conclude)
- * via in-process MCP servers; the supervisor has no Answer tool — agent replies
- * are routed back through the relay loop. The Ask/Answer contract is enforced
- * at turn boundaries: an unanswered Ask triggers one synthetic reminder and
- * then a `protocol_violation` trace event plus a null-answer injection so the
- * session advances without silent deadlock.
+ * Ask is async (same contract as facilitate / discuss): returns
+ * `{askIds:[N]}` immediately; the agent's reply arrives on the
+ * supervisor's next turn as `[answer#N] agent: <text>`. The supervisor
+ * sees the agent at each Ask boundary, plans the next step, and
+ * eventually calls Conclude.
  *
- * Follows OO+DI: constructor injection, factory function, tests bypass factory.
+ * For tighter feedback loops, size the agent's per-turn budget down
+ * (smaller `maxTurns` on the agent runner) so each Ask returns sooner.
  */
 import { Writable } from "node:stream";
 import { resolve } from "node:path";
 import { createAgentRunner } from "./agent-runner.js";
 import { composeProfilePrompt } from "./profile-prompt.js";
-import { TraceCollector } from "./trace-collector.js";
-import { SequenceCounter } from "./sequence-counter.js";
 import { createMessageBus } from "./message-bus.js";
 import {
   createOrchestrationContext,
-  createSupervisorToolServer,
   createSupervisedAgentToolServer,
-  checkPendingAsk,
+  createSupervisorToolServer,
 } from "./orchestration-toolkit.js";
-import { formatMessages } from "./orchestrator-helpers.js";
+import { OrchestrationLoop } from "./orchestration-loop.js";
 /** System prompt appended for the supervisor runner in supervise mode. */
 export const SUPERVISOR_SYSTEM_PROMPT =
-  "You supervise one agent. " +
-  "Ask sends a question to the agent; the reply arrives via Answer. " +
-  "Answer replies to an ask the agent addressed to you. " +
-  "Announce sends a message with no reply obligation. " +
-  "Redirect interrupts the agent with replacement instructions. " +
-  "Conclude ends the session with a verdict ('success' or 'failure') and a summary; " +
-  "the verdict reflects whether the agent's work meets the criteria stated in the task.";
+  "You supervise one agent named `agent`. " +
+  "Ask sends a question and returns immediately with {askIds:[N]}. The reply arrives on a later turn as `[answer#N] agent: <text>` in your inbox — between turns you can plan and reflect while the agent works. End your turn with text after asking; the orchestrator wakes you when the agent replies. " +
+  "Answer replies to an ask the agent addressed to you (you'll see it tagged `[ask#N] agent: …` in your inbox). Quote askId from the [ask#N] tag; omit it and the handler auto-picks the only pending ask or routes your message as an Announce. " +
+  "Announce delivers a message with no reply obligation. " +
+  "Conclude ends the session with a verdict ('success' or 'failure') and a summary; the verdict reflects whether the agent's work meets the criteria stated in the task. " +
+  "You MUST end every session with Conclude — never end a turn with only text *after* every Ask round has resolved. " +
+  "If the agent goes off-track, course-correct by issuing a new Ask with corrected instructions; each Ask carries a fresh askId, so a follow-up never collides with an earlier one.";
 /** System prompt appended for the agent runner in supervise mode. */
 export const AGENT_SYSTEM_PROMPT =
   "A supervisor watches your work. " +
-  "Answer replies to an ask addressed to you. " +
-  "Ask sends a question to the supervisor; the reply arrives via Answer. " +
-  "Announce sends a message with no reply expected.";
+  "Each question you receive carries an [ask#N] header — quote that N back as the askId field on Answer so the reply pairs with the right question. " +
+  "Answer replies to an ask addressed to you. askId is optional: omit it and the handler auto-picks if exactly one ask is owed to you, otherwise it routes your message as an Announce. " +
+  "Ask sends a question to the supervisor and returns immediately with {askIds:[N]}; the reply arrives on a later turn as `[answer#N] supervisor: <text>` in your inbox. " +
+  "Announce sends a message with no reply expected — use this for unsolicited remarks or to reply to an Announce.";
 /**
- * Maximum number of mid-turn interventions allowed within a single agent turn.
- * Bounded so a looping supervisor exhausts its quota fast (observability) but
- * leaves headroom for legitimate "intervene, observe, intervene again" patterns.
- * The outer exchange budget still bounds overall runtime.
+ * Supervise-mode wrapper around `OrchestrationLoop`. The lead is
+ * `"supervisor"`, one participant is `"agent"`, mode tag is `"supervised"`.
  */
-const MAX_INTERVENTIONS_PER_TURN = 5;
-/**
- * Default cap on supervisor↔agent exchanges in a single run. Not exposed via
- * CLI — `--max-turns` governs the per-runner invocation budget instead. When
- * a `--max-exchanges` flag is added this becomes the default for that flag.
- */
-const DEFAULT_MAX_EXCHANGES = 100;
-/** Orchestrate a relay loop between a supervisor LLM and an agent LLM with mid-turn review. */
-export class Supervisor {
+export class Supervisor extends OrchestrationLoop {
   /**
    * @param {object} deps
-   * @param {import("./agent-runner.js").AgentRunner} deps.agentRunner - Runs the agent sessions
-   * @param {import("./agent-runner.js").AgentRunner} deps.supervisorRunner - Runs the supervisor sessions
-   * @param {import("stream").Writable} deps.output - Stream to emit tagged NDJSON to
-   * @param {number} [deps.maxTurns] - Maximum supervisor ↔ agent exchanges
-   * @param {object} [deps.ctx] - Orchestration context (injected by factory)
-   * @param {import("./message-bus.js").MessageBus} [deps.messageBus] - Two-participant message bus ("supervisor" / "agent")
-   * @param {string} [deps.taskAmend] - Opaque addendum appended to the task before delivery.
+   * @param {import("./agent-runner.js").AgentRunner} deps.supervisorRunner
+   * @param {import("./agent-runner.js").AgentRunner} deps.agentRunner
+   * @param {import("./message-bus.js").MessageBus} deps.messageBus
+   * @param {import("stream").Writable} deps.output
+   * @param {object} deps.ctx
+   * @param {object} deps.redactor
+   * @param {string} [deps.taskAmend]
    */
   constructor({
-    agentRunner,
     supervisorRunner,
+    agentRunner,
+    messageBus,
     output,
-    maxTurns,
     ctx,
-    messageBus,
     taskAmend,
     redactor,
   }) {
     if (!agentRunner) throw new Error("agentRunner is required");
     if (!supervisorRunner) throw new Error("supervisorRunner is required");
     if (!output) throw new Error("output is required");
-    if (!redactor) throw new Error("redactor is required");
-    this.redactor = redactor;
-    this.agentRunner = agentRunner;
-    this.supervisorRunner = supervisorRunner;
-    this.output = output;
-    this.maxTurns = maxTurns ?? 100;
-    this.ctx = ctx ?? createOrchestrationContext();
-    this.messageBus =
-      messageBus ?? createMessageBus({ participants: ["supervisor", "agent"] });
-    if (!this.ctx.messageBus) this.ctx.messageBus = this.messageBus;
-    this.counter = new SequenceCounter();
-    this.taskAmend = taskAmend ?? null;
-    /** @type {"agent"|"supervisor"} */
-    this.currentSource = "agent";
-    /** @type {number} */
-    this.currentTurn = 0;
-  }
-  /**
-   * Run the supervisor ↔ agent relay loop.
-   * @param {string} task - The initial task for the supervisor
-   * @returns {Promise<{success: boolean, turns: number, concluded: boolean}>}
-   */
-  async run(task) {
-    const initialTask = this.taskAmend ? `${task}\n\n${this.taskAmend}` : task;
-    this.currentSource = "supervisor";
-    this.currentTurn = 0;
-    let supervisorResult = await this.supervisorRunner.run(initialTask);
-    if (supervisorResult.error) {
-      this.emitSummary({ success: false, turns: 0 });
-      return { success: false, turns: 0, concluded: false };
-    }
-    if (this.ctx.concluded) {
-      const success = this.ctx.verdict === "success";
-      this.emitSummary({
-        success,
-        verdict: this.ctx.verdict,
-        turns: 0,
-        summary: this.ctx.summary,
-      });
-      return { success, turns: 0, concluded: true };
-    }
-    let pendingRelay = null;
-    const turnLimit = this.maxTurns === 0 ? Infinity : this.maxTurns;
-    for (let turn = 1; turn <= turnLimit; turn++) {
-      const relay =
-        pendingRelay ?? this.#buildInitialRelay(supervisorResult.text);
-      const turnOutcome = await this.#runAgentTurn(turn, relay);
-      if (turnOutcome.exit) {
-        return { ...turnOutcome.exit, concluded: this.ctx.concluded };
-      }
-      const reviewOutcome = await this.#endOfTurnReview(turn);
-      if (reviewOutcome.exit) {
-        return { ...reviewOutcome.exit, concluded: this.ctx.concluded };
-      }
-      supervisorResult = reviewOutcome.supervisorResult;
-      pendingRelay = reviewOutcome.relay ?? null;
-    }
-    this.emitSummary({ success: false, turns: this.maxTurns });
-    return { success: false, turns: this.maxTurns, concluded: false };
-  }
-  #buildInitialRelay(fallbackText) {
-    const queued = this.messageBus.drain("agent");
-    if (queued.length > 0) return formatMessages(queued);
-    return this.extractLastText(this.supervisorRunner, fallbackText);
-  }
-  #checkAsk(name) {
-    return checkPendingAsk({
-      ctx: this.ctx,
-      messageBus: this.messageBus,
-      addresseeName: name,
+    super({
+      leadRunner: supervisorRunner,
+      agents: [{ name: "agent", role: "agent", runner: agentRunner }],
+      messageBus,
+      output,
+      leadName: "supervisor",
       mode: "supervised",
-      emitViolation: (e) => this.emitOrchestratorEvent(e),
+      ctx,
+      taskAmend,
+      redactor,
     });
   }
-  /**
-   * Drive the agent through one turn, allowing the supervisor to interrupt
-   * via the Redirect tool. Returns either an `exit` outcome (the loop should
-   * return immediately) or `{exit: null}` (proceed to end-of-turn review).
-   * @param {number} turn
-   * @param {string} initialRelay
-   * @returns {Promise<{exit: {success: boolean, turns: number}|null}>}
-   */
-  async #runAgentTurn(turn, initialRelay) {
-    let relay = initialRelay;
-    let interventions = 0;
-    let agentCalled = this.agentRunner.sessionId !== null;
-    this.agentRunner.onBatch = (batchLines, ctx) =>
-      this.#midTurnReview(turn, batchLines, ctx);
-    try {
-      while (true) {
-        this.currentSource = "agent";
-        this.currentTurn = turn;
-        const agentResult = agentCalled
-          ? await this.agentRunner.resume(relay)
-          : await this.agentRunner.run(relay);
-        agentCalled = true;
-        const outcome = this.#classifyAgentOutcome(
-          agentResult,
-          turn,
-          interventions,
-        );
-        if (outcome.type === "exit") return { exit: outcome.exit };
-        if (outcome.type === "intervention_limit") return { exit: null };
-        if (outcome.type === "redirect") {
-          interventions++;
-          relay = outcome.relay;
-          this.emitOrchestratorEvent({ type: "intervention_relayed", turn });
-          continue;
-        }
-        const askRelay = this.#drainAgentAskRelay();
-        if (askRelay) {
-          relay = askRelay;
-          continue;
-        }
-        return { exit: null };
-      }
-    } finally {
-      this.agentRunner.onBatch = null;
-    }
-  }
-  /**
-   * Classify the outcome of a single agent execution within #runAgentTurn.
-   * @returns {{type: string, exit?: object|null, relay?: string}}
-   */
-  #classifyAgentOutcome(agentResult, turn, interventions) {
-    if (agentResult.error && !agentResult.aborted) {
-      this.emitSummary({ success: false, turns: turn });
-      return { type: "exit", exit: { success: false, turns: turn } };
-    }
-    if (this.ctx.concluded) {
-      const success = this.ctx.verdict === "success";
-      this.emitSummary({
-        success,
-        verdict: this.ctx.verdict,
-        turns: turn,
-        summary: this.ctx.summary,
-      });
-      return { type: "exit", exit: { success, turns: turn } };
-    }
-    if (agentResult.aborted && this.ctx.redirect) {
-      const redirect = this.ctx.redirect;
-      this.ctx.redirect = null;
-      if (interventions + 1 >= MAX_INTERVENTIONS_PER_TURN) {
-        this.emitOrchestratorEvent({ type: "intervention_limit", turn });
-        return { type: "intervention_limit" };
-      }
-      return { type: "redirect", relay: redirect.message };
-    }
-    return { type: "continue" };
-  }
-  /**
-   * If the agent has an unanswered ask, drain reminders and return a
-   * formatted relay string. Returns null when no relay is needed.
-   * @returns {string|null}
-   */
-  #drainAgentAskRelay() {
-    if (this.#checkAsk("agent") !== "recheck" || this.ctx.concluded)
-      return null;
-    const reminders = this.messageBus.drain("agent");
-    return reminders.length > 0 ? formatMessages(reminders) : null;
-  }
-  /**
-   * Mid-turn supervisor review fired from inside the agent's onBatch hook.
-   * Runs the supervisor's LLM against the batch and aborts the agent if
-   * the supervisor calls Redirect or Conclude.
-   * @param {number} turn
-   * @param {string[]} batchLines
-   * @param {{abort: () => void}} ctx
-   */
-  async #midTurnReview(turn, batchLines, { abort }) {
-    const batchTranscript = this.renderBatch(batchLines);
-    this.emitOrchestratorEvent({ type: "mid_turn_review", turn });
-    this.currentSource = "supervisor";
-    this.ctx.redirect = null;
-    await this.supervisorRunner.resume(
-      `The agent is mid-turn. Latest batch:\n\n${batchTranscript}\n\n` +
-        `Review and use your tools if action is needed.`,
-    );
-    this.currentSource = "agent";
-    if (this.ctx.redirect) {
-      this.emitOrchestratorEvent({ type: "intervention_requested", turn });
-      abort();
-      return;
-    }
-    if (this.ctx.concluded) {
-      this.emitOrchestratorEvent({ type: "complete_requested", turn });
-      abort();
-    }
-  }
-  /**
-   * End-of-turn supervisor review. Returns either an exit outcome (error or
-   * completion) or the supervisor result so the outer loop can build the
-   * next turn's relay.
-   * @param {number} turn
-   * @returns {Promise<{exit: {success: boolean, turns: number}|null, supervisorResult?: object, relay?: string}>}
-   */
-  async #endOfTurnReview(turn) {
-    const queuedForSupervisor = this.messageBus.drain("supervisor");
-    const agentTranscript = this.extractTranscript(this.agentRunner);
-    this.currentSource = "supervisor";
-    this.currentTurn = turn;
-    this.ctx.redirect = null;
-    const reviewPrompt =
-      queuedForSupervisor.length > 0
-        ? `The agent reported:\n\n${agentTranscript}\n\n` +
-          `Agent messages:\n${formatMessages(queuedForSupervisor)}\n\n` +
-          `Review and decide how to proceed.`
-        : `The agent reported:\n\n${agentTranscript}\n\nReview the agent's work and decide how to proceed.`;
-    let supervisorResult = await this.supervisorRunner.resume(reviewPrompt);
-    if (supervisorResult.error) {
-      this.emitSummary({ success: false, turns: turn });
-      return { exit: { success: false, turns: turn } };
-    }
-    if (this.ctx.concluded) {
-      const success = this.ctx.verdict === "success";
-      this.emitSummary({
-        success,
-        verdict: this.ctx.verdict,
-        turns: turn,
-        summary: this.ctx.summary,
-      });
-      return { exit: { success, turns: turn } };
-    }
-    if (this.#checkAsk("supervisor") === "recheck" && !this.ctx.concluded) {
-      const reminders = this.messageBus.drain("supervisor");
-      if (reminders.length > 0) {
-        supervisorResult = await this.supervisorRunner.resume(
-          formatMessages(reminders),
-        );
-        if (this.ctx.concluded) {
-          const success = this.ctx.verdict === "success";
-          this.emitSummary({
-            success,
-            verdict: this.ctx.verdict,
-            turns: turn,
-            summary: this.ctx.summary,
-          });
-          return { exit: { success, turns: turn } };
-        }
-        this.#checkAsk("supervisor");
-      }
-    }
-    if (this.ctx.redirect) {
-      const redirect = this.ctx.redirect;
-      this.ctx.redirect = null;
-      return { exit: null, supervisorResult, relay: redirect.message };
-    }
-    const queuedForAgent = this.messageBus.drain("agent");
-    const relay =
-      queuedForAgent.length > 0 ? formatMessages(queuedForAgent) : undefined;
-    return { exit: null, supervisorResult, relay };
+  /** Readability shims for tests that read the runners by their domain names. */
+  /** Readability shim — exposes the lead runner under its mode-specific name. */
+  get supervisorRunner() {
+    return this.leadRunner;
   }
-  /**
-   * Extract a human-readable transcript from an AgentRunner's buffered output.
-   * @param {import("./agent-runner.js").AgentRunner} runner
-   * @returns {string}
-   */
-  extractTranscript(runner) {
-    const lines = runner.drainOutput();
-    const collector = new TraceCollector();
-    for (const line of lines) {
-      collector.addLine(line);
-    }
-    return collector.toText() || "[The agent produced no output.]";
-  }
-  /**
-   * Extract only the last assistant text block from an AgentRunner's buffer.
-   * @param {import("./agent-runner.js").AgentRunner} runner
-   * @param {string} fallback
-   * @returns {string}
-   */
-  extractLastText(runner, fallback) {
-    const lines = runner.buffer;
-    for (let i = lines.length - 1; i >= 0; i--) {
-      const event = JSON.parse(lines[i]);
-      if (event.type !== "assistant") continue;
-      const content = event.message?.content ?? event.content;
-      if (!Array.isArray(content)) continue;
-      for (let j = content.length - 1; j >= 0; j--) {
-        if (content[j].type === "text" && content[j].text) {
-          return content[j].text;
-        }
-      }
-    }
-    return fallback;
-  }
-  /**
-   * Emit a single NDJSON line tagged with the current source and seq.
-   * @param {string} line - Raw NDJSON line from the runner
-   */
-  emitLine(line) {
-    const event = JSON.parse(line);
-    const tagged = {
-      source: this.currentSource,
-      seq: this.counter.next(),
-      event,
-    };
-    this.output.write(JSON.stringify(this.redactor.redactValue(tagged)) + "\n");
-  }
-  /**
-   * Render a batch of buffered NDJSON lines as human-readable text.
-   * @param {string[]} batchLines
-   * @returns {string}
-   */
-  renderBatch(batchLines) {
-    if (batchLines.length === 0) return "[empty]";
-    const collector = new TraceCollector();
-    for (const line of batchLines) {
-      collector.addLine(line);
-    }
-    return collector.toText() || "[empty]";
-  }
-  /**
-   * Emit an orchestrator-source NDJSON line.
-   * @param {{type: string, turn?: number}} event
-   */
-  emitOrchestratorEvent(event) {
-    this.output.write(
-      JSON.stringify(
-        this.redactor.redactValue({
-          source: "orchestrator",
-          seq: this.counter.next(),
-          event,
-        }),
-      ) + "\n",
-    );
-  }
-  /**
-   * Emit a final orchestrator summary line, wrapped in the universal envelope.
-   * @param {{success: boolean, verdict?: string|null, turns: number, summary?: string}} result
-   */
-  emitSummary(result) {
-    this.output.write(
-      JSON.stringify(
-        this.redactor.redactValue({
-          source: "orchestrator",
-          seq: this.counter.next(),
-          event: {
-            type: "summary",
-            success: result.success,
-            ...(result.verdict && { verdict: result.verdict }),
-            turns: result.turns,
-            ...(result.summary && { summary: result.summary }),
-          },
-        }),
-      ) + "\n",
-    );
+  /** Readability shim — exposes the single agent runner directly. */
+  get agentRunner() {
+    return this.agents[0].runner;
   }
 }
@@ -483,24 +104,26 @@ const devNull = new Writable({
 });
 /**
- * Factory function — wires both AgentRunners with their respective configs.
+ * Factory — wires the supervisor + agent runners and the orchestration
+ * context. Mirrors the facilitator factory in shape.
+ *
  * @param {object} deps
  * @param {string} deps.supervisorCwd
  * @param {string} deps.agentCwd
  * @param {function} deps.query
  * @param {import("stream").Writable} deps.output
- * @param {string} [deps.model] - Default model for both runners.
- * @param {string} [deps.agentModel] - Agent model override (falls back to `model`).
- * @param {string} [deps.supervisorModel] - Supervisor model override (falls back to `model`).
- * @param {number} [deps.maxTurns] - Per-runner invocation budget for both the supervisor and the agent (default 200; 0 = unlimited). Outer supervisor↔agent exchanges are bounded separately by `DEFAULT_MAX_EXCHANGES` (passes through to unlimited when `maxTurns === 0`).
+ * @param {string} [deps.model]
+ * @param {string} [deps.agentModel]
+ * @param {string} [deps.supervisorModel]
+ * @param {number} [deps.maxTurns] - Per-runner SDK turn budget (default 200).
  * @param {string[]} [deps.allowedTools]
  * @param {string[]} [deps.supervisorAllowedTools]
  * @param {string[]} [deps.supervisorDisallowedTools]
- * @param {string} [deps.supervisorProfile] - Supervisor profile name; resolved into the main-thread system prompt via `composeProfilePrompt`.
- * @param {string} [deps.agentProfile] - Agent profile name; resolved into the main-thread system prompt via `composeProfilePrompt`.
- * @param {string} [deps.profilesDir] - Directory containing `<name>.md` profile files. Defaults to `<supervisorCwd>/.claude/agents`. Resolved once from the orchestrator's cwd so profiles travel with the project, not with a per-agent sandbox.
- * @param {string} [deps.taskAmend] - Opaque addendum appended to the task before delivery.
- * @param {Record<string, object>} [deps.agentMcpServers] - Additional MCP servers exposed to the agent (merged alongside the orchestration server).
+ * @param {string} [deps.supervisorProfile]
+ * @param {string} [deps.agentProfile]
+ * @param {string} [deps.profilesDir]
+ * @param {string} [deps.taskAmend]
+ * @param {Record<string, object>} [deps.agentMcpServers]
  * @returns {Supervisor}
  */
 export function createSupervisor({
@@ -513,8 +136,8 @@ export function createSupervisor({
   supervisorModel,
   maxTurns,
   allowedTools,
-  supervisorDisallowedTools,
   supervisorAllowedTools,
+  supervisorDisallowedTools,
   supervisorProfile,
   agentProfile,
   profilesDir,
@@ -534,7 +157,6 @@ export function createSupervisor({
         })
       : { type: "preset", preset: "claude_code", append: trailer };
   };
-  let supervisor;
   const ctx = createOrchestrationContext();
   const messageBus = createMessageBus({
@@ -546,36 +168,29 @@ export function createSupervisor({
     { name: "agent", role: "agent" },
   ];
-  const supervisorServer = createSupervisorToolServer(ctx);
-  const agentServer = createSupervisedAgentToolServer(ctx);
-  const onLine = (line) => supervisor.emitLine(line);
+  let supervisor;
+  const perRunBudget = maxTurns ?? 200;
-  // `maxTurns` is the per-runner invocation budget — matches `run` and
-  // `facilitate` semantics. The outer supervisor↔agent exchange loop is
-  // bounded separately by `DEFAULT_MAX_EXCHANGES`; when --max-exchanges is
-  // added it will become a parameter. `maxTurns === 0` propagates through
-  // to mean unlimited on both axes.
-  const perInvocationTurns = maxTurns ?? 200;
-  const exchangeBudget = maxTurns === 0 ? 0 : DEFAULT_MAX_EXCHANGES;
+  const agentServer = createSupervisedAgentToolServer(ctx);
+  const supervisorServer = createSupervisorToolServer(ctx);
   const agentRunner = createAgentRunner({
     cwd: agentCwd,
     query,
     output: devNull,
     model: agentModel ?? model,
-    maxTurns: perInvocationTurns,
+    maxTurns: perRunBudget,
     allowedTools,
-    onLine,
+    onLine: (line) => supervisor.emitLine("agent", line),
     settingSources: ["project"],
     systemPrompt: systemPromptFor(agentProfile, AGENT_SYSTEM_PROMPT),
     mcpServers: { orchestration: agentServer, ...agentMcpServers },
     redactor,
   });
-  // Block the SDK's sub-agent spawn tools on the supervisor: its job is to
-  // coordinate the agent through the libeval orchestration harness, not to
-  // fan work out to ad-hoc Claude Code sub-agents. Mirrors the facilitator.
+  // Block the SDK's sub-agent spawn tools on the supervisor: it should
+  // coordinate the agent through orchestration tools, not fan work out
+  // to ad-hoc Claude Code sub-agents.
   const defaultDisallowed = ["Agent", "Task", "TaskOutput", "TaskStop"];
   const disallowedTools = supervisorDisallowedTools
     ? [...new Set([...defaultDisallowed, ...supervisorDisallowedTools])]
@@ -586,7 +201,7 @@ export function createSupervisor({
     query,
     output: devNull,
     model: supervisorModel ?? model,
-    maxTurns: perInvocationTurns,
+    maxTurns: perRunBudget,
     allowedTools: supervisorAllowedTools ?? [
       "Bash",
       "Read",
@@ -596,7 +211,7 @@ export function createSupervisor({
       "Edit",
     ],
     disallowedTools,
-    onLine,
+    onLine: (line) => supervisor.emitLine("supervisor", line),
     settingSources: ["project"],
     systemPrompt: systemPromptFor(supervisorProfile, SUPERVISOR_SYSTEM_PROMPT),
     mcpServers: { orchestration: supervisorServer },
@@ -604,12 +219,11 @@ export function createSupervisor({
   });
   supervisor = new Supervisor({
-    agentRunner,
     supervisorRunner,
+    agentRunner,
+    messageBus,
     output,
-    maxTurns: exchangeBudget,
     ctx,
-    messageBus,
     taskAmend,
     redactor,
   });