@forwardimpact/libeval 0.1.14 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/supervisor.js CHANGED
@@ -4,50 +4,37 @@
4
4
  * introduces itself, and delegates work to the agent. The loop then alternates:
5
5
  * agent → supervisor → agent.
6
6
  *
7
+ * Signaling uses orchestration tools (Conclude, Redirect, Ask) via in-process
8
+ * MCP servers. No text-token detection.
9
+ *
7
10
  * Follows OO+DI: constructor injection, factory function, tests bypass factory.
8
11
  */
9
12
 
10
- import { PassThrough } from "node:stream";
13
+ import { Writable } from "node:stream";
11
14
  import { createAgentRunner } from "./agent-runner.js";
12
15
  import { TraceCollector } from "./trace-collector.js";
13
-
14
- /**
15
- * Check if the supervisor's response signals evaluation success.
16
- * Matches EVALUATION_COMPLETE anywhere in the text, tolerating markdown
17
- * formatting (e.g. **EVALUATION_COMPLETE**). Uses word boundaries to
18
- * avoid matching inside longer identifiers.
19
- * @param {string} text
20
- * @returns {boolean}
21
- */
22
- export function isComplete(text) {
23
- return /(?:^|[\s*_~`])EVALUATION_COMPLETE(?:[\s*_~`.,!?]|$)/m.test(text);
24
- }
25
-
26
- /**
27
- * Check if the supervisor's response signals a mid-turn intervention.
28
- * Same tolerance rules as isComplete (markdown formatting, word boundaries),
29
- * but matches the EVALUATION_INTERVENTION keyword instead.
30
- * @param {string} text
31
- * @returns {boolean}
32
- */
33
- export function isIntervention(text) {
34
- return /(?:^|[\s*_~`])EVALUATION_INTERVENTION(?:[\s*_~`.,!?]|$)/m.test(text);
35
- }
16
+ import { SequenceCounter } from "./sequence-counter.js";
17
+ import {
18
+ createOrchestrationContext,
19
+ createSupervisorToolServer,
20
+ createSupervisedAgentToolServer,
21
+ } from "./orchestration-toolkit.js";
36
22
 
37
23
  /** System prompt appended for the supervisor runner in supervise mode. */
38
24
  export const SUPERVISOR_SYSTEM_PROMPT =
39
25
  "You relay messages to one persistent agent session — your only output " +
40
26
  "channel. Spawning sub-agents or restarting the agent is blocked. Do not " +
41
- "do the work yourself. Reply briefly to let the agent continue, write " +
42
- "EVALUATION_INTERVENTION + instructions to interrupt mid-turn, or " +
43
- "EVALUATION_COMPLETE when done. Only your final message each turn is " +
44
- "relayed.";
27
+ "do the work yourself. Reply briefly to let the agent continue. Use your " +
28
+ "Redirect tool to interrupt and correct the agent. Use your Conclude tool " +
29
+ "with a summary when the task is fully done. Only your final message each " +
30
+ "turn is relayed.";
45
31
 
46
32
  /** System prompt appended for the agent runner in supervise mode. */
47
33
  export const AGENT_SYSTEM_PROMPT =
48
34
  "A supervisor watches your work and may interrupt with new instructions " +
49
35
  "mid-task. Treat any new prompt as authoritative and adjust course. " +
50
- "When uncertain, stop and ask a clarifying question.";
36
+ "When uncertain, use your Ask tool to ask the supervisor a clarifying " +
37
+ "question — you will receive a direct answer.";
51
38
 
52
39
  /**
53
40
  * Maximum number of mid-turn interventions allowed within a single agent turn.
@@ -64,8 +51,9 @@ export class Supervisor {
64
51
  * @param {import("./agent-runner.js").AgentRunner} deps.supervisorRunner - Runs the supervisor sessions
65
52
  * @param {import("stream").Writable} deps.output - Stream to emit tagged NDJSON to
66
53
  * @param {number} [deps.maxTurns] - Maximum supervisor ↔ agent exchanges
54
+ * @param {object} [deps.ctx] - Orchestration context (injected by factory)
67
55
  */
68
- constructor({ agentRunner, supervisorRunner, output, maxTurns }) {
56
+ constructor({ agentRunner, supervisorRunner, output, maxTurns, ctx }) {
69
57
  if (!agentRunner) throw new Error("agentRunner is required");
70
58
  if (!supervisorRunner) throw new Error("supervisorRunner is required");
71
59
  if (!output) throw new Error("output is required");
@@ -73,51 +61,22 @@ export class Supervisor {
73
61
  this.supervisorRunner = supervisorRunner;
74
62
  this.output = output;
75
63
  this.maxTurns = maxTurns ?? 100;
64
+ this.ctx = ctx ?? createOrchestrationContext();
65
+ this.counter = new SequenceCounter();
76
66
  /** @type {"agent"|"supervisor"} */
77
67
  this.currentSource = "agent";
78
68
  /** @type {number} */
79
69
  this.currentTurn = 0;
80
- /**
81
- * Set to true when any supervisor message contains the success signal.
82
- * The SDK result text only reflects the last assistant message, so when
83
- * the supervisor writes EVALUATION_COMPLETE in an early message and
84
- * then continues with follow-up work, the result text won't contain it.
85
- * This flag captures the signal from the full message stream.
86
- * @type {boolean}
87
- */
88
- this.completeSignalSeen = false;
89
- /**
90
- * Set to true when any supervisor message contains EVALUATION_INTERVENTION.
91
- * Mirrors completeSignalSeen — populated by emitLine when a supervisor
92
- * assistant text block matches isIntervention(...). The mid-turn loop
93
- * reads this flag after each supervisor invocation to decide whether to
94
- * abort the agent's in-flight SDK session.
95
- * @type {boolean}
96
- */
97
- this.interventionSignalSeen = false;
98
- /**
99
- * The most recent supervisor SDK result captured inside the mid-turn
100
- * onBatch callback. The outer loop reads this after the agent aborts to
101
- * build the next relay prompt without re-running the supervisor.
102
- * @type {{success: boolean, text: string}|null}
103
- */
104
- this.lastSupervisorResult = null;
105
70
  }
106
71
 
107
72
  /**
108
73
  * Run the supervisor ↔ agent relay loop.
109
- * The supervisor receives the task first, introduces itself, and delegates
110
- * work to the agent. The loop then alternates: agent → supervisor → agent.
111
74
  * @param {string} task - The initial task for the supervisor
112
75
  * @returns {Promise<{success: boolean, turns: number}>}
113
76
  */
114
77
  async run(task) {
115
- // Turn 0: Supervisor receives the task and introduces it to the agent
116
78
  this.currentSource = "supervisor";
117
79
  this.currentTurn = 0;
118
- this.completeSignalSeen = false;
119
- this.interventionSignalSeen = false;
120
- this.lastSupervisorResult = null;
121
80
  let supervisorResult = await this.supervisorRunner.run(task);
122
81
 
123
82
  if (supervisorResult.error) {
@@ -125,38 +84,25 @@ export class Supervisor {
125
84
  return { success: false, turns: 0 };
126
85
  }
127
86
 
128
- // Check for the success signal in either the SDK result text or the
129
- // streamed message content. The SDK result text only reflects the last
130
- // assistant message, so when the supervisor writes EVALUATION_COMPLETE
131
- // early and then continues (e.g. filing issues), we must also check the
132
- // flag set by emitLine during streaming.
133
- if (this.completeSignalSeen || isComplete(supervisorResult.text)) {
134
- this.emitSummary({ success: true, turns: 0 });
87
+ if (this.ctx.concluded) {
88
+ this.emitSummary({ success: true, turns: 0, summary: this.ctx.summary });
135
89
  return { success: true, turns: 0 };
136
90
  }
137
91
 
92
+ let pendingRelay = null;
138
93
  const turnLimit = this.maxTurns === 0 ? Infinity : this.maxTurns;
139
94
  for (let turn = 1; turn <= turnLimit; turn++) {
140
- // Only the supervisor's final message is relayed to the agent.
141
- // Extract the last assistant text block from the buffer to avoid
142
- // leaking intermediate reasoning (research, tool calls, notes).
143
- const relay = this.extractLastText(
144
- this.supervisorRunner,
145
- supervisorResult.text,
146
- );
95
+ const relay =
96
+ pendingRelay ??
97
+ this.extractLastText(this.supervisorRunner, supervisorResult.text);
147
98
 
148
- // Drive the agent through interventions until its SDK session ends
149
- // naturally, the supervisor signals completion mid-turn, or the
150
- // per-turn intervention budget is exhausted.
151
99
  const turnOutcome = await this.#runAgentTurn(turn, relay);
152
100
  if (turnOutcome.exit) return turnOutcome.exit;
153
101
 
154
- // End-of-turn review (existing behaviour). Returns either an exit
155
- // outcome (error or completion) or the supervisor result for the
156
- // next turn's relay.
157
102
  const reviewOutcome = await this.#endOfTurnReview(turn);
158
103
  if (reviewOutcome.exit) return reviewOutcome.exit;
159
104
  supervisorResult = reviewOutcome.supervisorResult;
105
+ pendingRelay = reviewOutcome.relay ?? null;
160
106
  }
161
107
 
162
108
  this.emitSummary({ success: false, turns: this.maxTurns });
@@ -165,9 +111,8 @@ export class Supervisor {
165
111
 
166
112
  /**
167
113
  * Drive the agent through one turn, allowing the supervisor to interrupt
168
- * mid-stream via EVALUATION_INTERVENTION. Returns either an `exit` outcome
169
- * (the loop should return immediately) or `{exit: null}` (proceed to the
170
- * end-of-turn review).
114
+ * via the Redirect tool. Returns either an `exit` outcome (the loop should
115
+ * return immediately) or `{exit: null}` (proceed to end-of-turn review).
171
116
  * @param {number} turn
172
117
  * @param {string} initialRelay
173
118
  * @returns {Promise<{exit: {success: boolean, turns: number}|null}>}
@@ -176,11 +121,6 @@ export class Supervisor {
176
121
  let relay = initialRelay;
177
122
  let interventions = 0;
178
123
 
179
- // Wire the mid-turn observation hook on the agent runner. The bound
180
- // callback captures `turn` so the inner loop's multiple resume(...)
181
- // calls all see the same turn id. The supervisorRunner does NOT get
182
- // an onBatch callback — it only fires onLine, which is enough for
183
- // emitLine to detect EVALUATION_COMPLETE / EVALUATION_INTERVENTION.
184
124
  this.agentRunner.onBatch = (batchLines, ctx) =>
185
125
  this.#midTurnReview(turn, batchLines, ctx);
186
126
 
@@ -198,124 +138,109 @@ export class Supervisor {
198
138
  return { exit: { success: false, turns: turn } };
199
139
  }
200
140
 
201
- // Mid-turn EVALUATION_COMPLETE: end the session immediately.
202
- if (this.completeSignalSeen) {
203
- this.emitSummary({ success: true, turns: turn });
141
+ if (this.ctx.concluded) {
142
+ this.emitSummary({
143
+ success: true,
144
+ turns: turn,
145
+ summary: this.ctx.summary,
146
+ });
204
147
  return { exit: { success: true, turns: turn } };
205
148
  }
206
149
 
207
- if (agentResult.aborted && this.interventionSignalSeen) {
150
+ if (agentResult.aborted && this.ctx.redirect) {
208
151
  interventions++;
152
+ const redirect = this.ctx.redirect;
153
+ this.ctx.redirect = null;
209
154
  if (interventions >= MAX_INTERVENTIONS_PER_TURN) {
210
155
  this.emitOrchestratorEvent({ type: "intervention_limit", turn });
211
156
  return { exit: null };
212
157
  }
213
- relay = this.extractLastText(
214
- this.supervisorRunner,
215
- this.lastSupervisorResult?.text ?? "",
216
- );
158
+ relay = redirect.message;
217
159
  this.emitOrchestratorEvent({ type: "intervention_relayed", turn });
218
160
  continue;
219
161
  }
220
162
 
221
- // Agent's SDK session finished naturally — proceed to end-of-turn.
222
163
  return { exit: null };
223
164
  }
224
165
  } finally {
225
- // Detach onBatch before the end-of-turn review so the supervisor's
226
- // own SDK session does not trigger nested onBatch fires.
227
166
  this.agentRunner.onBatch = null;
228
167
  }
229
168
  }
230
169
 
231
170
  /**
232
171
  * Mid-turn supervisor review fired from inside the agent's onBatch hook.
233
- * Emits a `mid_turn_review` orchestrator marker, runs the supervisor's
234
- * LLM against the batch, and aborts the agent if the supervisor signals
235
- * EVALUATION_INTERVENTION or EVALUATION_COMPLETE.
172
+ * Runs the supervisor's LLM against the batch and aborts the agent if
173
+ * the supervisor calls Redirect or Conclude.
236
174
  * @param {number} turn
237
175
  * @param {string[]} batchLines
238
176
  * @param {{abort: () => void}} ctx
239
177
  */
240
178
  async #midTurnReview(turn, batchLines, { abort }) {
241
179
  const batchTranscript = this.renderBatch(batchLines);
242
-
243
- // Order matters: emit the orchestrator marker BEFORE the supervisor
244
- // LLM call so the trace reads
245
- // agent line → orchestrator:mid_turn_review
246
- // → supervisor lines (tagged turn:N)
247
- // → orchestrator:intervention_requested|complete_requested
248
180
  this.emitOrchestratorEvent({ type: "mid_turn_review", turn });
249
181
 
250
- // currentTurn stays = turn so mid-turn supervisor lines share the
251
- // agent's turn id. They are distinguishable from end-of-turn reviews
252
- // by the surrounding orchestrator events emitted around this call.
253
182
  this.currentSource = "supervisor";
254
- this.completeSignalSeen = false;
255
- this.interventionSignalSeen = false;
183
+ this.ctx.redirect = null;
256
184
 
257
- this.lastSupervisorResult = await this.supervisorRunner.resume(
185
+ await this.supervisorRunner.resume(
258
186
  `The agent is mid-turn. Latest batch:\n\n${batchTranscript}\n\n` +
259
- `Respond with a brief acknowledgement to let it continue, or write ` +
260
- `EVALUATION_INTERVENTION followed by a corrective message to stop ` +
261
- `and relay a new instruction. Write EVALUATION_COMPLETE only when ` +
262
- `the task is fully done.`,
187
+ `Review and use your tools if action is needed.`,
263
188
  );
264
189
  this.currentSource = "agent";
265
190
 
266
- if (this.interventionSignalSeen) {
191
+ if (this.ctx.redirect) {
267
192
  this.emitOrchestratorEvent({ type: "intervention_requested", turn });
268
193
  abort();
269
194
  return;
270
195
  }
271
- if (this.completeSignalSeen) {
196
+ if (this.ctx.concluded) {
272
197
  this.emitOrchestratorEvent({ type: "complete_requested", turn });
273
198
  abort();
274
199
  }
275
- // Non-intervention: do nothing; the agent loop pulls the next line.
276
200
  }
277
201
 
278
202
  /**
279
- * End-of-turn supervisor review (existing behaviour). Returns either an
280
- * exit outcome (error or completion) or the supervisor result so the
281
- * outer loop can build the next turn's relay.
203
+ * End-of-turn supervisor review. Returns either an exit outcome (error or
204
+ * completion) or the supervisor result so the outer loop can build the
205
+ * next turn's relay.
282
206
  * @param {number} turn
283
- * @returns {Promise<{exit: {success: boolean, turns: number}|null, supervisorResult?: object}>}
207
+ * @returns {Promise<{exit: {success: boolean, turns: number}|null, supervisorResult?: object, relay?: string}>}
284
208
  */
285
209
  async #endOfTurnReview(turn) {
286
- // Build the full agent transcript from buffered NDJSON events so the
287
- // supervisor sees tool calls and reasoning, not just the SDK result.
288
210
  const agentTranscript = this.extractTranscript(this.agentRunner);
289
-
290
- const supervisorPrompt =
291
- `The agent reported:\n\n${agentTranscript}\n\n` +
292
- `Review the agent's work and decide how to proceed.`;
293
-
294
211
  this.currentSource = "supervisor";
295
212
  this.currentTurn = turn;
296
- this.completeSignalSeen = false;
297
- this.interventionSignalSeen = false;
298
- const supervisorResult =
299
- await this.supervisorRunner.resume(supervisorPrompt);
213
+ this.ctx.redirect = null;
214
+
215
+ const supervisorResult = await this.supervisorRunner.resume(
216
+ `The agent reported:\n\n${agentTranscript}\n\nReview the agent's work and decide how to proceed.`,
217
+ );
300
218
 
301
219
  if (supervisorResult.error) {
302
220
  this.emitSummary({ success: false, turns: turn });
303
221
  return { exit: { success: false, turns: turn } };
304
222
  }
305
223
 
306
- // The supervisor's turn is fully complete — check for success signal
307
- // in either the SDK result text or streamed messages.
308
- if (this.completeSignalSeen || isComplete(supervisorResult.text)) {
309
- this.emitSummary({ success: true, turns: turn });
224
+ if (this.ctx.concluded) {
225
+ this.emitSummary({
226
+ success: true,
227
+ turns: turn,
228
+ summary: this.ctx.summary,
229
+ });
310
230
  return { exit: { success: true, turns: turn } };
311
231
  }
312
232
 
233
+ if (this.ctx.redirect) {
234
+ const redirect = this.ctx.redirect;
235
+ this.ctx.redirect = null;
236
+ return { exit: null, supervisorResult, relay: redirect.message };
237
+ }
238
+
313
239
  return { exit: null, supervisorResult };
314
240
  }
315
241
 
316
242
  /**
317
243
  * Extract a human-readable transcript from an AgentRunner's buffered output.
318
- * Drains the buffer and replays events through a TraceCollector.
319
244
  * @param {import("./agent-runner.js").AgentRunner} runner
320
245
  * @returns {string}
321
246
  */
@@ -330,11 +255,8 @@ export class Supervisor {
330
255
 
331
256
  /**
332
257
  * Extract only the last assistant text block from an AgentRunner's buffer.
333
- * Scans buffered NDJSON events in reverse to find the final assistant message
334
- * with a text content block. This prevents intermediate reasoning (tool calls,
335
- * research notes) from leaking to the agent.
336
258
  * @param {import("./agent-runner.js").AgentRunner} runner
337
- * @param {string} fallback - Fallback text if no assistant text block is found
259
+ * @param {string} fallback
338
260
  * @returns {string}
339
261
  */
340
262
  extractLastText(runner, fallback) {
@@ -354,43 +276,21 @@ export class Supervisor {
354
276
  }
355
277
 
356
278
  /**
357
- * Emit a single NDJSON line tagged with the current source and turn.
358
- * Called in real-time via the AgentRunner onLine callback.
359
- *
360
- * When the current source is the supervisor, also scans assistant text
361
- * content for the EVALUATION_COMPLETE and EVALUATION_INTERVENTION signals,
362
- * setting completeSignalSeen / interventionSignalSeen respectively.
279
+ * Emit a single NDJSON line tagged with the current source and seq.
363
280
  * @param {string} line - Raw NDJSON line from the runner
364
281
  */
365
282
  emitLine(line) {
366
283
  const event = JSON.parse(line);
367
284
  const tagged = {
368
285
  source: this.currentSource,
369
- turn: this.currentTurn,
286
+ seq: this.counter.next(),
370
287
  event,
371
288
  };
372
289
  this.output.write(JSON.stringify(tagged) + "\n");
373
-
374
- // Scan supervisor assistant messages for the signals in real time.
375
- // The SDK result text only reflects the final assistant message, but the
376
- // supervisor may write EVALUATION_COMPLETE / EVALUATION_INTERVENTION in
377
- // an earlier message and then continue with follow-up tool calls.
378
- if (this.currentSource === "supervisor" && event.type === "assistant") {
379
- const content = event.message?.content ?? event.content ?? [];
380
- if (Array.isArray(content)) {
381
- for (const block of content) {
382
- if (block.type !== "text" || !block.text) continue;
383
- if (isComplete(block.text)) this.completeSignalSeen = true;
384
- if (isIntervention(block.text)) this.interventionSignalSeen = true;
385
- }
386
- }
387
- }
388
290
  }
389
291
 
390
292
  /**
391
- * Render a batch of buffered NDJSON lines as human-readable text for the
392
- * mid-turn supervisor prompt. Reuses the TraceCollector pipeline so the
393
- * supervisor sees tool calls and reasoning, not just raw events.
293
+ * Render a batch of buffered NDJSON lines as human-readable text.
394
294
  * @param {string[]} batchLines
395
295
  * @returns {string}
396
296
  */
@@ -404,53 +304,59 @@ export class Supervisor {
404
304
  }
405
305
 
406
306
  /**
407
- * Emit an orchestrator-source NDJSON line. Used by the mid-turn loop to
408
- * mark mid_turn_review / intervention_requested / intervention_relayed /
409
- * intervention_limit / complete_requested boundaries in the trace, so the
410
- * improvement coach can distinguish mid-turn supervisor activity from
411
- * end-of-turn reviews. Additive to existing trace shape — the parser
412
- * already reads `source` and ignores unknown event types.
307
+ * Emit an orchestrator-source NDJSON line.
413
308
  * @param {{type: string, turn?: number}} event
414
309
  */
415
310
  emitOrchestratorEvent(event) {
416
311
  this.output.write(
417
312
  JSON.stringify({
418
313
  source: "orchestrator",
419
- turn: this.currentTurn,
314
+ seq: this.counter.next(),
420
315
  event,
421
316
  }) + "\n",
422
317
  );
423
318
  }
424
319
 
425
320
  /**
426
- * Emit a final orchestrator summary line.
427
- * @param {{success: boolean, turns: number}} result
321
+ * Emit a final orchestrator summary line, wrapped in the universal envelope.
322
+ * @param {{success: boolean, turns: number, summary?: string}} result
428
323
  */
429
324
  emitSummary(result) {
430
- const summary = {
431
- source: "orchestrator",
432
- type: "summary",
433
- success: result.success,
434
- turns: result.turns,
435
- };
436
- this.output.write(JSON.stringify(summary) + "\n");
325
+ this.output.write(
326
+ JSON.stringify({
327
+ source: "orchestrator",
328
+ seq: this.counter.next(),
329
+ event: {
330
+ type: "summary",
331
+ success: result.success,
332
+ turns: result.turns,
333
+ ...(result.summary && { summary: result.summary }),
334
+ },
335
+ }) + "\n",
336
+ );
437
337
  }
438
338
  }
439
339
 
340
+ const devNull = new Writable({
341
+ write(_chunk, _enc, cb) {
342
+ cb();
343
+ },
344
+ });
345
+
440
346
  /**
441
347
  * Factory function — wires both AgentRunners with their respective configs.
442
348
  * @param {object} deps
443
- * @param {string} deps.supervisorCwd - Supervisor working directory
444
- * @param {string} deps.agentCwd - Agent working directory
445
- * @param {function} deps.query - SDK query function
446
- * @param {import("stream").Writable} deps.output - Final output stream
447
- * @param {string} [deps.model] - Claude model identifier
448
- * @param {number} [deps.maxTurns] - Maximum supervisor ↔ agent exchanges
449
- * @param {string[]} [deps.allowedTools] - Tools the agent may use
450
- * @param {string[]} [deps.supervisorAllowedTools] - Tools the supervisor may use (default: Bash, Read, Glob, Grep, Write, Edit)
451
- * @param {string[]} [deps.supervisorDisallowedTools] - Tools to explicitly block from the supervisor
452
- * @param {string} [deps.supervisorProfile] - Supervisor agent profile name
453
- * @param {string} [deps.agentProfile] - Agent profile name
349
+ * @param {string} deps.supervisorCwd
350
+ * @param {string} deps.agentCwd
351
+ * @param {function} deps.query
352
+ * @param {import("stream").Writable} deps.output
353
+ * @param {string} [deps.model]
354
+ * @param {number} [deps.maxTurns]
355
+ * @param {string[]} [deps.allowedTools]
356
+ * @param {string[]} [deps.supervisorAllowedTools]
357
+ * @param {string[]} [deps.supervisorDisallowedTools]
358
+ * @param {string} [deps.supervisorProfile]
359
+ * @param {string} [deps.agentProfile]
454
360
  * @returns {Supervisor}
455
361
  */
456
362
  export function createSupervisor({
@@ -466,15 +372,31 @@ export function createSupervisor({
466
372
  supervisorProfile,
467
373
  agentProfile,
468
374
  }) {
469
- // Forward-reference: onLine captures `supervisor` before construction completes.
470
- // This is safe because onLine is only called during run(), after construction.
471
375
  let supervisor;
376
+ let supervisorRunner;
377
+
378
+ const ctx = createOrchestrationContext();
379
+
380
+ const supervisorServer = createSupervisorToolServer(ctx);
381
+ const agentServer = createSupervisedAgentToolServer(ctx, {
382
+ onAsk: async (question) => {
383
+ supervisor.currentSource = "supervisor";
384
+ supervisor.emitOrchestratorEvent({ type: "ask_received" });
385
+ await supervisorRunner.resume(
386
+ `The agent asks: "${question}"\n\nAnswer the question directly.`,
387
+ );
388
+ supervisor.currentSource = "agent";
389
+ supervisor.emitOrchestratorEvent({ type: "ask_answered" });
390
+ return supervisor.extractLastText(supervisorRunner, "No answer.");
391
+ },
392
+ });
393
+
472
394
  const onLine = (line) => supervisor.emitLine(line);
473
395
 
474
396
  const agentRunner = createAgentRunner({
475
397
  cwd: agentCwd,
476
398
  query,
477
- output: new PassThrough(),
399
+ output: devNull,
478
400
  model,
479
401
  maxTurns: 50,
480
402
  allowedTools,
@@ -486,24 +408,18 @@ export function createSupervisor({
486
408
  preset: "claude_code",
487
409
  append: AGENT_SYSTEM_PROMPT,
488
410
  },
411
+ mcpServers: { orchestration: agentServer },
489
412
  });
490
413
 
491
- // Block every sub-agent spawning tool so the supervisor cannot bypass the
492
- // relay loop. The current Claude Agent SDK exposes the spawn tool to the
493
- // model as `Agent`; older versions called it `Task`. Both are blocked
494
- // (along with TaskOutput/TaskStop) so the supervisor sees no spawn tool
495
- // regardless of which SDK version is installed. Letting the supervisor
496
- // spawn its own sub-agent would bypass the relay and produce an empty
497
- // agent trace, which is the failure mode that motivated this default.
498
414
  const defaultDisallowed = ["Agent", "Task", "TaskOutput", "TaskStop"];
499
415
  const disallowedTools = supervisorDisallowedTools
500
416
  ? [...new Set([...defaultDisallowed, ...supervisorDisallowedTools])]
501
417
  : defaultDisallowed;
502
418
 
503
- const supervisorRunner = createAgentRunner({
419
+ supervisorRunner = createAgentRunner({
504
420
  cwd: supervisorCwd,
505
421
  query,
506
- output: new PassThrough(),
422
+ output: devNull,
507
423
  model,
508
424
  maxTurns: 20,
509
425
  allowedTools: supervisorAllowedTools ?? [
@@ -523,6 +439,7 @@ export function createSupervisor({
523
439
  preset: "claude_code",
524
440
  append: SUPERVISOR_SYSTEM_PROMPT,
525
441
  },
442
+ mcpServers: { orchestration: supervisorServer },
526
443
  });
527
444
 
528
445
  supervisor = new Supervisor({
@@ -530,6 +447,7 @@ export function createSupervisor({
530
447
  supervisorRunner,
531
448
  output,
532
449
  maxTurns,
450
+ ctx,
533
451
  });
534
452
  return supervisor;
535
453
  }
package/src/tee-writer.js CHANGED
@@ -3,9 +3,9 @@
3
3
  * simultaneously streaming human-readable text to a separate stream (e.g.
4
4
  * process.stdout).
5
5
  *
6
- * Supports two modes:
7
- * - "raw" (default): expects standard stream-json events from AgentRunner
8
- * - "supervised": expects tagged events {source, turn, event} from Supervisor
6
+ * All modes emit the same { source, seq, event } envelope. The `mode`
7
+ * parameter controls display formatting: multi-participant modes show
8
+ * source labels on content lines.
9
9
  *
10
10
  * Follows OO+DI: constructor injection, factory function, tests bypass factory.
11
11
  */
@@ -18,7 +18,7 @@ export class TeeWriter extends Writable {
18
18
  * @param {object} deps
19
19
  * @param {import("stream").Writable} deps.fileStream - Stream to write raw NDJSON to
20
20
  * @param {import("stream").Writable} deps.textStream - Stream to write human-readable text to
21
- * @param {"raw"|"supervised"} [deps.mode] - Event format: "raw" or "supervised" (default: "raw")
21
+ * @param {"raw"|"supervised"} [deps.mode] - Display mode: "raw" (no source labels) or "supervised" (source labels) (default: "raw")
22
22
  */
23
23
  constructor({ fileStream, textStream, mode }) {
24
24
  super();
@@ -72,23 +72,10 @@ export class TeeWriter extends Writable {
72
72
  }
73
73
 
74
74
  /**
75
- * Process a single NDJSON line — feed to collector and flush text.
75
+ * Process a single NDJSON line — unified envelope handling for all modes.
76
76
  * @param {string} line
77
77
  */
78
78
  processLine(line) {
79
- if (this.mode === "supervised") {
80
- this.processSupervisedLine(line);
81
- } else {
82
- this.collector.addLine(line);
83
- this.flushTurns();
84
- }
85
- }
86
-
87
- /**
88
- * Handle a tagged supervisor line: unwrap event, show source labels.
89
- * @param {string} line
90
- */
91
- processSupervisedLine(line) {
92
79
  let parsed;
93
80
  try {
94
81
  parsed = JSON.parse(line);
@@ -96,21 +83,28 @@ export class TeeWriter extends Writable {
96
83
  return;
97
84
  }
98
85
 
99
- if (parsed.source === "orchestrator" && parsed.type === "summary") {
100
- const status = parsed.success ? "completed" : "incomplete";
101
- this.textStream.write(
102
- `\n--- Evaluation ${status} after ${parsed.turns} turns ---\n`,
103
- );
104
- return;
105
- }
106
-
86
+ // Universal envelope: { source, seq, event }
107
87
  if (parsed.event) {
88
+ // Orchestrator summary event
89
+ if (parsed.source === "orchestrator" && parsed.event.type === "summary") {
90
+ const status = parsed.event.success ? "completed" : "incomplete";
91
+ this.textStream.write(
92
+ `\n--- Evaluation ${status} after ${parsed.event.turns} turns ---\n`,
93
+ );
94
+ return;
95
+ }
96
+
108
97
  if (parsed.source && parsed.source !== this.lastSource) {
109
98
  this.lastSource = parsed.source;
110
99
  }
111
100
  this.collector.addLine(JSON.stringify(parsed.event));
112
101
  this.flushTurns();
102
+ return;
113
103
  }
104
+
105
+ // Bare event (run mode pre-migration or direct feed)
106
+ this.collector.addLine(line);
107
+ this.flushTurns();
114
108
  }
115
109
 
116
110
  /**