@forwardimpact/libeval 0.1.44 → 0.1.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/supervisor.js CHANGED
@@ -1,478 +1,99 @@
1
1
  /**
2
- * Supervisor — orchestrates a relay loop between an agent and a supervisor,
3
- * both running as AgentRunner instances. The supervisor receives the task first,
4
- * introduces itself, and delegates work to the agent. The loop then alternates:
5
- * agent supervisor agent.
2
+ * Supervisor — supervise-mode wrapper around `OrchestrationLoop`. One
3
+ * named participant (`"agent"`) coordinated by a lead participant
4
+ * (`"supervisor"`). Structurally the same as `Facilitator` with a
5
+ * single agent; differs only in role names, prompts, and pass-through
6
+ * accessors.
6
7
  *
7
- * Signaling uses orchestration tools (Ask / Announce / Redirect / Conclude)
8
- * via in-process MCP servers; the supervisor has no Answer tool — agent replies
9
- * are routed back through the relay loop. The Ask/Answer contract is enforced
10
- * at turn boundaries: an unanswered Ask triggers one synthetic reminder and
11
- * then a `protocol_violation` trace event plus a null-answer injection so the
12
- * session advances without silent deadlock.
8
+ * Ask is async (same contract as facilitate / discuss): returns
9
+ * `{askIds:[N]}` immediately; the agent's reply arrives on the
10
+ * supervisor's next turn as `[answer#N] agent: <text>`. The supervisor
11
+ * sees the agent at each Ask boundary, plans the next step, and
12
+ * eventually calls Conclude.
13
13
  *
14
- * Follows OO+DI: constructor injection, factory function, tests bypass factory.
14
+ * For tighter feedback loops, size the agent's per-turn budget down
15
+ * (smaller `maxTurns` on the agent runner) so each Ask returns sooner.
15
16
  */
16
17
 
17
18
  import { Writable } from "node:stream";
18
19
  import { resolve } from "node:path";
19
20
  import { createAgentRunner } from "./agent-runner.js";
20
21
  import { composeProfilePrompt } from "./profile-prompt.js";
21
- import { TraceCollector } from "./trace-collector.js";
22
- import { SequenceCounter } from "./sequence-counter.js";
23
22
  import { createMessageBus } from "./message-bus.js";
24
23
  import {
25
24
  createOrchestrationContext,
26
- createSupervisorToolServer,
27
25
  createSupervisedAgentToolServer,
28
- checkPendingAsk,
26
+ createSupervisorToolServer,
29
27
  } from "./orchestration-toolkit.js";
30
- import { formatMessages } from "./orchestrator-helpers.js";
28
+ import { OrchestrationLoop } from "./orchestration-loop.js";
31
29
 
32
30
  /** System prompt appended for the supervisor runner in supervise mode. */
33
31
  export const SUPERVISOR_SYSTEM_PROMPT =
34
- "You supervise one agent. " +
35
- "Ask sends a question to the agent; the reply arrives via Answer. " +
36
- "Answer replies to an ask the agent addressed to you. " +
37
- "Announce sends a message with no reply obligation. " +
38
- "Redirect interrupts the agent with replacement instructions. " +
39
- "Conclude ends the session with a verdict ('success' or 'failure') and a summary; " +
40
- "the verdict reflects whether the agent's work meets the criteria stated in the task.";
32
+ "You supervise one agent named `agent`. " +
33
+ "Ask sends a question and returns immediately with {askIds:[N]}. The reply arrives on a later turn as `[answer#N] agent: <text>` in your inbox — between turns you can plan and reflect while the agent works. End your turn with text after asking; the orchestrator wakes you when the agent replies. " +
34
+ "Answer replies to an ask the agent addressed to you (you'll see it tagged `[ask#N] agent: …` in your inbox). Quote askId from the [ask#N] tag; omit it and the handler auto-picks the only pending ask or routes your message as an Announce. " +
35
+ "Announce delivers a message with no reply obligation. " +
36
+ "Conclude ends the session with a verdict ('success' or 'failure') and a summary; the verdict reflects whether the agent's work meets the criteria stated in the task. " +
37
+ "You MUST end every session with Conclude — never end a turn with only text *after* every Ask round has resolved. " +
38
+ "If the agent goes off-track, course-correct by issuing a new Ask with corrected instructions; each Ask carries a fresh askId, so a follow-up never collides with an earlier one.";
41
39
 
42
40
  /** System prompt appended for the agent runner in supervise mode. */
43
41
  export const AGENT_SYSTEM_PROMPT =
44
42
  "A supervisor watches your work. " +
45
- "Answer replies to an ask addressed to you. " +
46
- "Ask sends a question to the supervisor; the reply arrives via Answer. " +
47
- "Announce sends a message with no reply expected.";
43
+ "Each question you receive carries an [ask#N] header quote that N back as the askId field on Answer so the reply pairs with the right question. " +
44
+ "Answer replies to an ask addressed to you. askId is optional: omit it and the handler auto-picks if exactly one ask is owed to you, otherwise it routes your message as an Announce. " +
45
+ "Ask sends a question to the supervisor and returns immediately with {askIds:[N]}; the reply arrives on a later turn as `[answer#N] supervisor: <text>` in your inbox. " +
46
+ "Announce sends a message with no reply expected — use this for unsolicited remarks or to reply to an Announce.";
48
47
 
49
48
  /**
50
- * Maximum number of mid-turn interventions allowed within a single agent turn.
51
- * Bounded so a looping supervisor exhausts its quota fast (observability) but
52
- * leaves headroom for legitimate "intervene, observe, intervene again" patterns.
53
- * The outer exchange budget still bounds overall runtime.
49
+ * Supervise-mode wrapper around `OrchestrationLoop`. The lead is
50
+ * `"supervisor"`, one participant is `"agent"`, mode tag is `"supervised"`.
54
51
  */
55
- const MAX_INTERVENTIONS_PER_TURN = 5;
56
-
57
- /**
58
- * Default cap on supervisor↔agent exchanges in a single run. Not exposed via
59
- * CLI — `--max-turns` governs the per-runner invocation budget instead. When
60
- * a `--max-exchanges` flag is added this becomes the default for that flag.
61
- */
62
- const DEFAULT_MAX_EXCHANGES = 100;
63
-
64
- /** Orchestrate a relay loop between a supervisor LLM and an agent LLM with mid-turn review. */
65
- export class Supervisor {
52
+ export class Supervisor extends OrchestrationLoop {
66
53
  /**
67
54
  * @param {object} deps
68
- * @param {import("./agent-runner.js").AgentRunner} deps.agentRunner - Runs the agent sessions
69
- * @param {import("./agent-runner.js").AgentRunner} deps.supervisorRunner - Runs the supervisor sessions
70
- * @param {import("stream").Writable} deps.output - Stream to emit tagged NDJSON to
71
- * @param {number} [deps.maxTurns] - Maximum supervisor ↔ agent exchanges
72
- * @param {object} [deps.ctx] - Orchestration context (injected by factory)
73
- * @param {import("./message-bus.js").MessageBus} [deps.messageBus] - Two-participant message bus ("supervisor" / "agent")
74
- * @param {string} [deps.taskAmend] - Opaque addendum appended to the task before delivery.
55
+ * @param {import("./agent-runner.js").AgentRunner} deps.supervisorRunner
56
+ * @param {import("./agent-runner.js").AgentRunner} deps.agentRunner
57
+ * @param {import("./message-bus.js").MessageBus} deps.messageBus
58
+ * @param {import("stream").Writable} deps.output
59
+ * @param {object} deps.ctx
60
+ * @param {object} deps.redactor
61
+ * @param {string} [deps.taskAmend]
75
62
  */
76
63
  constructor({
77
- agentRunner,
78
64
  supervisorRunner,
65
+ agentRunner,
66
+ messageBus,
79
67
  output,
80
- maxTurns,
81
68
  ctx,
82
- messageBus,
83
69
  taskAmend,
84
70
  redactor,
85
71
  }) {
86
72
  if (!agentRunner) throw new Error("agentRunner is required");
87
73
  if (!supervisorRunner) throw new Error("supervisorRunner is required");
88
74
  if (!output) throw new Error("output is required");
89
- if (!redactor) throw new Error("redactor is required");
90
- this.redactor = redactor;
91
- this.agentRunner = agentRunner;
92
- this.supervisorRunner = supervisorRunner;
93
- this.output = output;
94
- this.maxTurns = maxTurns ?? 100;
95
- this.ctx = ctx ?? createOrchestrationContext();
96
- this.messageBus =
97
- messageBus ?? createMessageBus({ participants: ["supervisor", "agent"] });
98
- if (!this.ctx.messageBus) this.ctx.messageBus = this.messageBus;
99
- this.counter = new SequenceCounter();
100
- this.taskAmend = taskAmend ?? null;
101
- /** @type {"agent"|"supervisor"} */
102
- this.currentSource = "agent";
103
- /** @type {number} */
104
- this.currentTurn = 0;
105
- }
106
-
107
- /**
108
- * Run the supervisor ↔ agent relay loop.
109
- * @param {string} task - The initial task for the supervisor
110
- * @returns {Promise<{success: boolean, turns: number, concluded: boolean}>}
111
- */
112
- async run(task) {
113
- const initialTask = this.taskAmend ? `${task}\n\n${this.taskAmend}` : task;
114
- this.currentSource = "supervisor";
115
- this.currentTurn = 0;
116
- let supervisorResult = await this.supervisorRunner.run(initialTask);
117
-
118
- if (supervisorResult.error) {
119
- this.emitSummary({ success: false, turns: 0 });
120
- return { success: false, turns: 0, concluded: false };
121
- }
122
-
123
- if (this.ctx.concluded) {
124
- const success = this.ctx.verdict === "success";
125
- this.emitSummary({
126
- success,
127
- verdict: this.ctx.verdict,
128
- turns: 0,
129
- summary: this.ctx.summary,
130
- });
131
- return { success, turns: 0, concluded: true };
132
- }
133
-
134
- let pendingRelay = null;
135
- const turnLimit = this.maxTurns === 0 ? Infinity : this.maxTurns;
136
- for (let turn = 1; turn <= turnLimit; turn++) {
137
- const relay =
138
- pendingRelay ?? this.#buildInitialRelay(supervisorResult.text);
139
-
140
- const turnOutcome = await this.#runAgentTurn(turn, relay);
141
- if (turnOutcome.exit) {
142
- return { ...turnOutcome.exit, concluded: this.ctx.concluded };
143
- }
144
-
145
- const reviewOutcome = await this.#endOfTurnReview(turn);
146
- if (reviewOutcome.exit) {
147
- return { ...reviewOutcome.exit, concluded: this.ctx.concluded };
148
- }
149
- supervisorResult = reviewOutcome.supervisorResult;
150
- pendingRelay = reviewOutcome.relay ?? null;
151
- }
152
-
153
- this.emitSummary({ success: false, turns: this.maxTurns });
154
- return { success: false, turns: this.maxTurns, concluded: false };
155
- }
156
-
157
- #buildInitialRelay(fallbackText) {
158
- const queued = this.messageBus.drain("agent");
159
- if (queued.length > 0) return formatMessages(queued);
160
- return this.extractLastText(this.supervisorRunner, fallbackText);
161
- }
162
-
163
- #checkAsk(name) {
164
- return checkPendingAsk({
165
- ctx: this.ctx,
166
- messageBus: this.messageBus,
167
- addresseeName: name,
75
+ super({
76
+ leadRunner: supervisorRunner,
77
+ agents: [{ name: "agent", role: "agent", runner: agentRunner }],
78
+ messageBus,
79
+ output,
80
+ leadName: "supervisor",
168
81
  mode: "supervised",
169
- emitViolation: (e) => this.emitOrchestratorEvent(e),
82
+ ctx,
83
+ taskAmend,
84
+ redactor,
170
85
  });
171
86
  }
172
87
 
173
- /**
174
- * Drive the agent through one turn, allowing the supervisor to interrupt
175
- * via the Redirect tool. Returns either an `exit` outcome (the loop should
176
- * return immediately) or `{exit: null}` (proceed to end-of-turn review).
177
- * @param {number} turn
178
- * @param {string} initialRelay
179
- * @returns {Promise<{exit: {success: boolean, turns: number}|null}>}
180
- */
181
- async #runAgentTurn(turn, initialRelay) {
182
- let relay = initialRelay;
183
- let interventions = 0;
184
- let agentCalled = this.agentRunner.sessionId !== null;
185
-
186
- this.agentRunner.onBatch = (batchLines, ctx) =>
187
- this.#midTurnReview(turn, batchLines, ctx);
188
-
189
- try {
190
- while (true) {
191
- this.currentSource = "agent";
192
- this.currentTurn = turn;
193
- const agentResult = agentCalled
194
- ? await this.agentRunner.resume(relay)
195
- : await this.agentRunner.run(relay);
196
- agentCalled = true;
197
-
198
- const outcome = this.#classifyAgentOutcome(
199
- agentResult,
200
- turn,
201
- interventions,
202
- );
203
-
204
- if (outcome.type === "exit") return { exit: outcome.exit };
205
- if (outcome.type === "intervention_limit") return { exit: null };
206
-
207
- if (outcome.type === "redirect") {
208
- interventions++;
209
- relay = outcome.relay;
210
- this.emitOrchestratorEvent({ type: "intervention_relayed", turn });
211
- continue;
212
- }
213
-
214
- const askRelay = this.#drainAgentAskRelay();
215
- if (askRelay) {
216
- relay = askRelay;
217
- continue;
218
- }
219
-
220
- return { exit: null };
221
- }
222
- } finally {
223
- this.agentRunner.onBatch = null;
224
- }
225
- }
226
-
227
- /**
228
- * Classify the outcome of a single agent execution within #runAgentTurn.
229
- * @returns {{type: string, exit?: object|null, relay?: string}}
230
- */
231
- #classifyAgentOutcome(agentResult, turn, interventions) {
232
- if (agentResult.error && !agentResult.aborted) {
233
- this.emitSummary({ success: false, turns: turn });
234
- return { type: "exit", exit: { success: false, turns: turn } };
235
- }
236
-
237
- if (this.ctx.concluded) {
238
- const success = this.ctx.verdict === "success";
239
- this.emitSummary({
240
- success,
241
- verdict: this.ctx.verdict,
242
- turns: turn,
243
- summary: this.ctx.summary,
244
- });
245
- return { type: "exit", exit: { success, turns: turn } };
246
- }
247
-
248
- if (agentResult.aborted && this.ctx.redirect) {
249
- const redirect = this.ctx.redirect;
250
- this.ctx.redirect = null;
251
- if (interventions + 1 >= MAX_INTERVENTIONS_PER_TURN) {
252
- this.emitOrchestratorEvent({ type: "intervention_limit", turn });
253
- return { type: "intervention_limit" };
254
- }
255
- return { type: "redirect", relay: redirect.message };
256
- }
257
-
258
- return { type: "continue" };
259
- }
260
-
261
- /**
262
- * If the agent has an unanswered ask, drain reminders and return a
263
- * formatted relay string. Returns null when no relay is needed.
264
- * @returns {string|null}
265
- */
266
- #drainAgentAskRelay() {
267
- if (this.#checkAsk("agent") !== "recheck" || this.ctx.concluded)
268
- return null;
269
- const reminders = this.messageBus.drain("agent");
270
- return reminders.length > 0 ? formatMessages(reminders) : null;
271
- }
272
-
273
- /**
274
- * Mid-turn supervisor review fired from inside the agent's onBatch hook.
275
- * Runs the supervisor's LLM against the batch and aborts the agent if
276
- * the supervisor calls Redirect or Conclude.
277
- * @param {number} turn
278
- * @param {string[]} batchLines
279
- * @param {{abort: () => void}} ctx
280
- */
281
- async #midTurnReview(turn, batchLines, { abort }) {
282
- const batchTranscript = this.renderBatch(batchLines);
283
- this.emitOrchestratorEvent({ type: "mid_turn_review", turn });
284
-
285
- this.currentSource = "supervisor";
286
- this.ctx.redirect = null;
287
-
288
- await this.supervisorRunner.resume(
289
- `The agent is mid-turn. Latest batch:\n\n${batchTranscript}\n\n` +
290
- `Review and use your tools if action is needed.`,
291
- );
292
- this.currentSource = "agent";
293
-
294
- if (this.ctx.redirect) {
295
- this.emitOrchestratorEvent({ type: "intervention_requested", turn });
296
- abort();
297
- return;
298
- }
299
- if (this.ctx.concluded) {
300
- this.emitOrchestratorEvent({ type: "complete_requested", turn });
301
- abort();
302
- }
303
- }
304
-
305
- /**
306
- * End-of-turn supervisor review. Returns either an exit outcome (error or
307
- * completion) or the supervisor result so the outer loop can build the
308
- * next turn's relay.
309
- * @param {number} turn
310
- * @returns {Promise<{exit: {success: boolean, turns: number}|null, supervisorResult?: object, relay?: string}>}
311
- */
312
- async #endOfTurnReview(turn) {
313
- const queuedForSupervisor = this.messageBus.drain("supervisor");
314
- const agentTranscript = this.extractTranscript(this.agentRunner);
315
- this.currentSource = "supervisor";
316
- this.currentTurn = turn;
317
- this.ctx.redirect = null;
318
-
319
- const reviewPrompt =
320
- queuedForSupervisor.length > 0
321
- ? `The agent reported:\n\n${agentTranscript}\n\n` +
322
- `Agent messages:\n${formatMessages(queuedForSupervisor)}\n\n` +
323
- `Review and decide how to proceed.`
324
- : `The agent reported:\n\n${agentTranscript}\n\nReview the agent's work and decide how to proceed.`;
325
-
326
- let supervisorResult = await this.supervisorRunner.resume(reviewPrompt);
327
-
328
- if (supervisorResult.error) {
329
- this.emitSummary({ success: false, turns: turn });
330
- return { exit: { success: false, turns: turn } };
331
- }
332
-
333
- if (this.ctx.concluded) {
334
- const success = this.ctx.verdict === "success";
335
- this.emitSummary({
336
- success,
337
- verdict: this.ctx.verdict,
338
- turns: turn,
339
- summary: this.ctx.summary,
340
- });
341
- return { exit: { success, turns: turn } };
342
- }
343
-
344
- if (this.#checkAsk("supervisor") === "recheck" && !this.ctx.concluded) {
345
- const reminders = this.messageBus.drain("supervisor");
346
- if (reminders.length > 0) {
347
- supervisorResult = await this.supervisorRunner.resume(
348
- formatMessages(reminders),
349
- );
350
- if (this.ctx.concluded) {
351
- const success = this.ctx.verdict === "success";
352
- this.emitSummary({
353
- success,
354
- verdict: this.ctx.verdict,
355
- turns: turn,
356
- summary: this.ctx.summary,
357
- });
358
- return { exit: { success, turns: turn } };
359
- }
360
- this.#checkAsk("supervisor");
361
- }
362
- }
363
-
364
- if (this.ctx.redirect) {
365
- const redirect = this.ctx.redirect;
366
- this.ctx.redirect = null;
367
- return { exit: null, supervisorResult, relay: redirect.message };
368
- }
369
-
370
- const queuedForAgent = this.messageBus.drain("agent");
371
- const relay =
372
- queuedForAgent.length > 0 ? formatMessages(queuedForAgent) : undefined;
373
- return { exit: null, supervisorResult, relay };
88
+ /** Readability shims for tests that read the runners by their domain names. */
89
+ /** Readability shim exposes the lead runner under its mode-specific name. */
90
+ get supervisorRunner() {
91
+ return this.leadRunner;
374
92
  }
375
93
 
376
- /**
377
- * Extract a human-readable transcript from an AgentRunner's buffered output.
378
- * @param {import("./agent-runner.js").AgentRunner} runner
379
- * @returns {string}
380
- */
381
- extractTranscript(runner) {
382
- const lines = runner.drainOutput();
383
- const collector = new TraceCollector();
384
- for (const line of lines) {
385
- collector.addLine(line);
386
- }
387
- return collector.toText() || "[The agent produced no output.]";
388
- }
389
-
390
- /**
391
- * Extract only the last assistant text block from an AgentRunner's buffer.
392
- * @param {import("./agent-runner.js").AgentRunner} runner
393
- * @param {string} fallback
394
- * @returns {string}
395
- */
396
- extractLastText(runner, fallback) {
397
- const lines = runner.buffer;
398
- for (let i = lines.length - 1; i >= 0; i--) {
399
- const event = JSON.parse(lines[i]);
400
- if (event.type !== "assistant") continue;
401
- const content = event.message?.content ?? event.content;
402
- if (!Array.isArray(content)) continue;
403
- for (let j = content.length - 1; j >= 0; j--) {
404
- if (content[j].type === "text" && content[j].text) {
405
- return content[j].text;
406
- }
407
- }
408
- }
409
- return fallback;
410
- }
411
-
412
- /**
413
- * Emit a single NDJSON line tagged with the current source and seq.
414
- * @param {string} line - Raw NDJSON line from the runner
415
- */
416
- emitLine(line) {
417
- const event = JSON.parse(line);
418
- const tagged = {
419
- source: this.currentSource,
420
- seq: this.counter.next(),
421
- event,
422
- };
423
- this.output.write(JSON.stringify(this.redactor.redactValue(tagged)) + "\n");
424
- }
425
-
426
- /**
427
- * Render a batch of buffered NDJSON lines as human-readable text.
428
- * @param {string[]} batchLines
429
- * @returns {string}
430
- */
431
- renderBatch(batchLines) {
432
- if (batchLines.length === 0) return "[empty]";
433
- const collector = new TraceCollector();
434
- for (const line of batchLines) {
435
- collector.addLine(line);
436
- }
437
- return collector.toText() || "[empty]";
438
- }
439
-
440
- /**
441
- * Emit an orchestrator-source NDJSON line.
442
- * @param {{type: string, turn?: number}} event
443
- */
444
- emitOrchestratorEvent(event) {
445
- this.output.write(
446
- JSON.stringify(
447
- this.redactor.redactValue({
448
- source: "orchestrator",
449
- seq: this.counter.next(),
450
- event,
451
- }),
452
- ) + "\n",
453
- );
454
- }
455
-
456
- /**
457
- * Emit a final orchestrator summary line, wrapped in the universal envelope.
458
- * @param {{success: boolean, verdict?: string|null, turns: number, summary?: string}} result
459
- */
460
- emitSummary(result) {
461
- this.output.write(
462
- JSON.stringify(
463
- this.redactor.redactValue({
464
- source: "orchestrator",
465
- seq: this.counter.next(),
466
- event: {
467
- type: "summary",
468
- success: result.success,
469
- ...(result.verdict && { verdict: result.verdict }),
470
- turns: result.turns,
471
- ...(result.summary && { summary: result.summary }),
472
- },
473
- }),
474
- ) + "\n",
475
- );
94
+ /** Readability shim — exposes the single agent runner directly. */
95
+ get agentRunner() {
96
+ return this.agents[0].runner;
476
97
  }
477
98
  }
478
99
 
@@ -483,24 +104,26 @@ const devNull = new Writable({
483
104
  });
484
105
 
485
106
  /**
486
- * Factory function — wires both AgentRunners with their respective configs.
107
+ * Factory — wires the supervisor + agent runners and the orchestration
108
+ * context. Mirrors the facilitator factory in shape.
109
+ *
487
110
  * @param {object} deps
488
111
  * @param {string} deps.supervisorCwd
489
112
  * @param {string} deps.agentCwd
490
113
  * @param {function} deps.query
491
114
  * @param {import("stream").Writable} deps.output
492
- * @param {string} [deps.model] - Default model for both runners.
493
- * @param {string} [deps.agentModel] - Agent model override (falls back to `model`).
494
- * @param {string} [deps.supervisorModel] - Supervisor model override (falls back to `model`).
495
- * @param {number} [deps.maxTurns] - Per-runner invocation budget for both the supervisor and the agent (default 200; 0 = unlimited). Outer supervisor↔agent exchanges are bounded separately by `DEFAULT_MAX_EXCHANGES` (passes through to unlimited when `maxTurns === 0`).
115
+ * @param {string} [deps.model]
116
+ * @param {string} [deps.agentModel]
117
+ * @param {string} [deps.supervisorModel]
118
+ * @param {number} [deps.maxTurns] - Per-runner SDK turn budget (default 200).
496
119
  * @param {string[]} [deps.allowedTools]
497
120
  * @param {string[]} [deps.supervisorAllowedTools]
498
121
  * @param {string[]} [deps.supervisorDisallowedTools]
499
- * @param {string} [deps.supervisorProfile] - Supervisor profile name; resolved into the main-thread system prompt via `composeProfilePrompt`.
500
- * @param {string} [deps.agentProfile] - Agent profile name; resolved into the main-thread system prompt via `composeProfilePrompt`.
501
- * @param {string} [deps.profilesDir] - Directory containing `<name>.md` profile files. Defaults to `<supervisorCwd>/.claude/agents`. Resolved once from the orchestrator's cwd so profiles travel with the project, not with a per-agent sandbox.
502
- * @param {string} [deps.taskAmend] - Opaque addendum appended to the task before delivery.
503
- * @param {Record<string, object>} [deps.agentMcpServers] - Additional MCP servers exposed to the agent (merged alongside the orchestration server).
122
+ * @param {string} [deps.supervisorProfile]
123
+ * @param {string} [deps.agentProfile]
124
+ * @param {string} [deps.profilesDir]
125
+ * @param {string} [deps.taskAmend]
126
+ * @param {Record<string, object>} [deps.agentMcpServers]
504
127
  * @returns {Supervisor}
505
128
  */
506
129
  export function createSupervisor({
@@ -513,8 +136,8 @@ export function createSupervisor({
513
136
  supervisorModel,
514
137
  maxTurns,
515
138
  allowedTools,
516
- supervisorDisallowedTools,
517
139
  supervisorAllowedTools,
140
+ supervisorDisallowedTools,
518
141
  supervisorProfile,
519
142
  agentProfile,
520
143
  profilesDir,
@@ -534,7 +157,6 @@ export function createSupervisor({
534
157
  })
535
158
  : { type: "preset", preset: "claude_code", append: trailer };
536
159
  };
537
- let supervisor;
538
160
 
539
161
  const ctx = createOrchestrationContext();
540
162
  const messageBus = createMessageBus({
@@ -546,36 +168,29 @@ export function createSupervisor({
546
168
  { name: "agent", role: "agent" },
547
169
  ];
548
170
 
549
- const supervisorServer = createSupervisorToolServer(ctx);
550
- const agentServer = createSupervisedAgentToolServer(ctx);
551
-
552
- const onLine = (line) => supervisor.emitLine(line);
171
+ let supervisor;
172
+ const perRunBudget = maxTurns ?? 200;
553
173
 
554
- // `maxTurns` is the per-runner invocation budget — matches `run` and
555
- // `facilitate` semantics. The outer supervisor↔agent exchange loop is
556
- // bounded separately by `DEFAULT_MAX_EXCHANGES`; when --max-exchanges is
557
- // added it will become a parameter. `maxTurns === 0` propagates through
558
- // to mean unlimited on both axes.
559
- const perInvocationTurns = maxTurns ?? 200;
560
- const exchangeBudget = maxTurns === 0 ? 0 : DEFAULT_MAX_EXCHANGES;
174
+ const agentServer = createSupervisedAgentToolServer(ctx);
175
+ const supervisorServer = createSupervisorToolServer(ctx);
561
176
 
562
177
  const agentRunner = createAgentRunner({
563
178
  cwd: agentCwd,
564
179
  query,
565
180
  output: devNull,
566
181
  model: agentModel ?? model,
567
- maxTurns: perInvocationTurns,
182
+ maxTurns: perRunBudget,
568
183
  allowedTools,
569
- onLine,
184
+ onLine: (line) => supervisor.emitLine("agent", line),
570
185
  settingSources: ["project"],
571
186
  systemPrompt: systemPromptFor(agentProfile, AGENT_SYSTEM_PROMPT),
572
187
  mcpServers: { orchestration: agentServer, ...agentMcpServers },
573
188
  redactor,
574
189
  });
575
190
 
576
- // Block the SDK's sub-agent spawn tools on the supervisor: its job is to
577
- // coordinate the agent through the libeval orchestration harness, not to
578
- // fan work out to ad-hoc Claude Code sub-agents. Mirrors the facilitator.
191
+ // Block the SDK's sub-agent spawn tools on the supervisor: it should
192
+ // coordinate the agent through orchestration tools, not fan work out
193
+ // to ad-hoc Claude Code sub-agents.
579
194
  const defaultDisallowed = ["Agent", "Task", "TaskOutput", "TaskStop"];
580
195
  const disallowedTools = supervisorDisallowedTools
581
196
  ? [...new Set([...defaultDisallowed, ...supervisorDisallowedTools])]
@@ -586,7 +201,7 @@ export function createSupervisor({
586
201
  query,
587
202
  output: devNull,
588
203
  model: supervisorModel ?? model,
589
- maxTurns: perInvocationTurns,
204
+ maxTurns: perRunBudget,
590
205
  allowedTools: supervisorAllowedTools ?? [
591
206
  "Bash",
592
207
  "Read",
@@ -596,7 +211,7 @@ export function createSupervisor({
596
211
  "Edit",
597
212
  ],
598
213
  disallowedTools,
599
- onLine,
214
+ onLine: (line) => supervisor.emitLine("supervisor", line),
600
215
  settingSources: ["project"],
601
216
  systemPrompt: systemPromptFor(supervisorProfile, SUPERVISOR_SYSTEM_PROMPT),
602
217
  mcpServers: { orchestration: supervisorServer },
@@ -604,12 +219,11 @@ export function createSupervisor({
604
219
  });
605
220
 
606
221
  supervisor = new Supervisor({
607
- agentRunner,
608
222
  supervisorRunner,
223
+ agentRunner,
224
+ messageBus,
609
225
  output,
610
- maxTurns: exchangeBudget,
611
226
  ctx,
612
- messageBus,
613
227
  taskAmend,
614
228
  redactor,
615
229
  });