@forwardimpact/libeval 0.1.21 → 0.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/supervisor.js CHANGED
@@ -4,8 +4,11 @@
4
4
  * introduces itself, and delegates work to the agent. The loop then alternates:
5
5
  * agent → supervisor → agent.
6
6
  *
7
- * Signaling uses orchestration tools (Conclude, Redirect, Ask) via in-process
8
- * MCP servers. No text-token detection.
7
+ * Signaling uses orchestration tools (Ask / Answer / Announce / Redirect /
8
+ * Conclude) via in-process MCP servers. The Ask/Answer contract is enforced
9
+ * at turn boundaries: an unanswered Ask triggers one synthetic reminder and
10
+ * then a `protocol_violation` trace event plus a null-answer injection so the
11
+ * session advances without silent deadlock.
9
12
  *
10
13
  * Follows OO+DI: constructor injection, factory function, tests bypass factory.
11
14
  */
@@ -16,27 +19,30 @@ import { createAgentRunner } from "./agent-runner.js";
16
19
  import { composeProfilePrompt } from "./profile-prompt.js";
17
20
  import { TraceCollector } from "./trace-collector.js";
18
21
  import { SequenceCounter } from "./sequence-counter.js";
22
+ import { createMessageBus } from "./message-bus.js";
19
23
  import {
20
24
  createOrchestrationContext,
21
25
  createSupervisorToolServer,
22
26
  createSupervisedAgentToolServer,
27
+ checkPendingAsk,
23
28
  } from "./orchestration-toolkit.js";
29
+ import { formatMessages } from "./orchestrator-helpers.js";
24
30
 
25
31
  /** System prompt appended for the supervisor runner in supervise mode. */
26
32
  export const SUPERVISOR_SYSTEM_PROMPT =
27
- "You relay messages to one persistent agent session — your only output " +
28
- "channel. Spawning sub-agents or restarting the agent is blocked. Do not " +
29
- "do the work yourself. Reply briefly to let the agent continue. Use your " +
30
- "Redirect tool to interrupt and correct the agent. Use your Conclude tool " +
31
- "with a summary when the task is fully done. Only your final message each " +
32
- "turn is relayed.";
33
+ "You supervise one agent. " +
34
+ "Ask sends a question to the agent; the reply arrives via Answer. " +
35
+ "Answer replies to an ask the agent addressed to you. " +
36
+ "Announce sends a message with no reply obligation. " +
37
+ "Redirect interrupts the agent with replacement instructions. " +
38
+ "Conclude ends the session with a summary.";
33
39
 
34
40
  /** System prompt appended for the agent runner in supervise mode. */
35
41
  export const AGENT_SYSTEM_PROMPT =
36
- "A supervisor watches your work and may interrupt with new instructions " +
37
- "mid-task. Treat any new prompt as authoritative and adjust course. " +
38
- "When uncertain, use your Ask tool to ask the supervisor a clarifying " +
39
- "question you will receive a direct answer.";
42
+ "A supervisor watches your work. " +
43
+ "Answer replies to an ask addressed to you. " +
44
+ "Ask sends a question to the supervisor; the reply arrives via Answer. " +
45
+ "Announce sends a message with no reply expected.";
40
46
 
41
47
  /**
42
48
  * Maximum number of mid-turn interventions allowed within a single agent turn.
@@ -54,8 +60,18 @@ export class Supervisor {
54
60
  * @param {import("stream").Writable} deps.output - Stream to emit tagged NDJSON to
55
61
  * @param {number} [deps.maxTurns] - Maximum supervisor ↔ agent exchanges
56
62
  * @param {object} [deps.ctx] - Orchestration context (injected by factory)
63
+ * @param {import("./message-bus.js").MessageBus} [deps.messageBus] - Two-participant message bus ("supervisor" / "agent")
64
+ * @param {string} [deps.taskAmend] - Opaque addendum appended to the task before delivery.
57
65
  */
58
- constructor({ agentRunner, supervisorRunner, output, maxTurns, ctx }) {
66
+ constructor({
67
+ agentRunner,
68
+ supervisorRunner,
69
+ output,
70
+ maxTurns,
71
+ ctx,
72
+ messageBus,
73
+ taskAmend,
74
+ }) {
59
75
  if (!agentRunner) throw new Error("agentRunner is required");
60
76
  if (!supervisorRunner) throw new Error("supervisorRunner is required");
61
77
  if (!output) throw new Error("output is required");
@@ -64,7 +80,11 @@ export class Supervisor {
64
80
  this.output = output;
65
81
  this.maxTurns = maxTurns ?? 100;
66
82
  this.ctx = ctx ?? createOrchestrationContext();
83
+ this.messageBus =
84
+ messageBus ?? createMessageBus({ participants: ["supervisor", "agent"] });
85
+ if (!this.ctx.messageBus) this.ctx.messageBus = this.messageBus;
67
86
  this.counter = new SequenceCounter();
87
+ this.taskAmend = taskAmend ?? null;
68
88
  /** @type {"agent"|"supervisor"} */
69
89
  this.currentSource = "agent";
70
90
  /** @type {number} */
@@ -77,9 +97,10 @@ export class Supervisor {
77
97
  * @returns {Promise<{success: boolean, turns: number}>}
78
98
  */
79
99
  async run(task) {
100
+ const initialTask = this.taskAmend ? `${task}\n\n${this.taskAmend}` : task;
80
101
  this.currentSource = "supervisor";
81
102
  this.currentTurn = 0;
82
- let supervisorResult = await this.supervisorRunner.run(task);
103
+ let supervisorResult = await this.supervisorRunner.run(initialTask);
83
104
 
84
105
  if (supervisorResult.error) {
85
106
  this.emitSummary({ success: false, turns: 0 });
@@ -95,8 +116,7 @@ export class Supervisor {
95
116
  const turnLimit = this.maxTurns === 0 ? Infinity : this.maxTurns;
96
117
  for (let turn = 1; turn <= turnLimit; turn++) {
97
118
  const relay =
98
- pendingRelay ??
99
- this.extractLastText(this.supervisorRunner, supervisorResult.text);
119
+ pendingRelay ?? this.#buildInitialRelay(supervisorResult.text);
100
120
 
101
121
  const turnOutcome = await this.#runAgentTurn(turn, relay);
102
122
  if (turnOutcome.exit) return turnOutcome.exit;
@@ -111,6 +131,22 @@ export class Supervisor {
111
131
  return { success: false, turns: this.maxTurns };
112
132
  }
113
133
 
134
+ #buildInitialRelay(fallbackText) {
135
+ const queued = this.messageBus.drain("agent");
136
+ if (queued.length > 0) return formatMessages(queued);
137
+ return this.extractLastText(this.supervisorRunner, fallbackText);
138
+ }
139
+
140
+ #checkAsk(name) {
141
+ return checkPendingAsk({
142
+ ctx: this.ctx,
143
+ messageBus: this.messageBus,
144
+ addresseeName: name,
145
+ mode: "supervised",
146
+ emitViolation: (e) => this.emitOrchestratorEvent(e),
147
+ });
148
+ }
149
+
114
150
  /**
115
151
  * Drive the agent through one turn, allowing the supervisor to interrupt
116
152
  * via the Redirect tool. Returns either an `exit` outcome (the loop should
@@ -122,6 +158,7 @@ export class Supervisor {
122
158
  async #runAgentTurn(turn, initialRelay) {
123
159
  let relay = initialRelay;
124
160
  let interventions = 0;
161
+ let agentCalled = this.agentRunner.sessionId !== null;
125
162
 
126
163
  this.agentRunner.onBatch = (batchLines, ctx) =>
127
164
  this.#midTurnReview(turn, batchLines, ctx);
@@ -130,10 +167,10 @@ export class Supervisor {
130
167
  while (true) {
131
168
  this.currentSource = "agent";
132
169
  this.currentTurn = turn;
133
- const isFirstAgentCall = turn === 1 && interventions === 0;
134
- const agentResult = isFirstAgentCall
135
- ? await this.agentRunner.run(relay)
136
- : await this.agentRunner.resume(relay);
170
+ const agentResult = agentCalled
171
+ ? await this.agentRunner.resume(relay)
172
+ : await this.agentRunner.run(relay);
173
+ agentCalled = true;
137
174
 
138
175
  if (agentResult.error && !agentResult.aborted) {
139
176
  this.emitSummary({ success: false, turns: turn });
@@ -162,6 +199,14 @@ export class Supervisor {
162
199
  continue;
163
200
  }
164
201
 
202
+ if (this.#checkAsk("agent") === "recheck" && !this.ctx.concluded) {
203
+ const reminders = this.messageBus.drain("agent");
204
+ if (reminders.length > 0) {
205
+ relay = formatMessages(reminders);
206
+ continue;
207
+ }
208
+ }
209
+
165
210
  return { exit: null };
166
211
  }
167
212
  } finally {
@@ -209,14 +254,20 @@ export class Supervisor {
209
254
  * @returns {Promise<{exit: {success: boolean, turns: number}|null, supervisorResult?: object, relay?: string}>}
210
255
  */
211
256
  async #endOfTurnReview(turn) {
257
+ const queuedForSupervisor = this.messageBus.drain("supervisor");
212
258
  const agentTranscript = this.extractTranscript(this.agentRunner);
213
259
  this.currentSource = "supervisor";
214
260
  this.currentTurn = turn;
215
261
  this.ctx.redirect = null;
216
262
 
217
- const supervisorResult = await this.supervisorRunner.resume(
218
- `The agent reported:\n\n${agentTranscript}\n\nReview the agent's work and decide how to proceed.`,
219
- );
263
+ const reviewPrompt =
264
+ queuedForSupervisor.length > 0
265
+ ? `The agent reported:\n\n${agentTranscript}\n\n` +
266
+ `Agent messages:\n${formatMessages(queuedForSupervisor)}\n\n` +
267
+ `Review and decide how to proceed.`
268
+ : `The agent reported:\n\n${agentTranscript}\n\nReview the agent's work and decide how to proceed.`;
269
+
270
+ let supervisorResult = await this.supervisorRunner.resume(reviewPrompt);
220
271
 
221
272
  if (supervisorResult.error) {
222
273
  this.emitSummary({ success: false, turns: turn });
@@ -232,13 +283,34 @@ export class Supervisor {
232
283
  return { exit: { success: true, turns: turn } };
233
284
  }
234
285
 
286
+ if (this.#checkAsk("supervisor") === "recheck" && !this.ctx.concluded) {
287
+ const reminders = this.messageBus.drain("supervisor");
288
+ if (reminders.length > 0) {
289
+ supervisorResult = await this.supervisorRunner.resume(
290
+ formatMessages(reminders),
291
+ );
292
+ if (this.ctx.concluded) {
293
+ this.emitSummary({
294
+ success: true,
295
+ turns: turn,
296
+ summary: this.ctx.summary,
297
+ });
298
+ return { exit: { success: true, turns: turn } };
299
+ }
300
+ this.#checkAsk("supervisor");
301
+ }
302
+ }
303
+
235
304
  if (this.ctx.redirect) {
236
305
  const redirect = this.ctx.redirect;
237
306
  this.ctx.redirect = null;
238
307
  return { exit: null, supervisorResult, relay: redirect.message };
239
308
  }
240
309
 
241
- return { exit: null, supervisorResult };
310
+ const queuedForAgent = this.messageBus.drain("agent");
311
+ const relay =
312
+ queuedForAgent.length > 0 ? formatMessages(queuedForAgent) : undefined;
313
+ return { exit: null, supervisorResult, relay };
242
314
  }
243
315
 
244
316
  /**
@@ -360,6 +432,7 @@ const devNull = new Writable({
360
432
  * @param {string} [deps.supervisorProfile] - Supervisor profile name; resolved into the main-thread system prompt via `composeProfilePrompt`.
361
433
  * @param {string} [deps.agentProfile] - Agent profile name; resolved into the main-thread system prompt via `composeProfilePrompt`.
362
434
  * @param {string} [deps.profilesDir] - Directory containing `<name>.md` profile files. Defaults to `<supervisorCwd>/.claude/agents`. Resolved once from the orchestrator's cwd so profiles travel with the project, not with a per-agent sandbox.
435
+ * @param {string} [deps.taskAmend] - Opaque addendum appended to the task before delivery.
363
436
  * @returns {Supervisor}
364
437
  */
365
438
  export function createSupervisor({
@@ -375,6 +448,7 @@ export function createSupervisor({
375
448
  supervisorProfile,
376
449
  agentProfile,
377
450
  profilesDir,
451
+ taskAmend,
378
452
  }) {
379
453
  const resolvedProfilesDir =
380
454
  profilesDir ?? resolve(supervisorCwd, ".claude/agents");
@@ -388,23 +462,19 @@ export function createSupervisor({
388
462
  : { type: "preset", preset: "claude_code", append: trailer };
389
463
  };
390
464
  let supervisor;
391
- let supervisorRunner;
392
465
 
393
466
  const ctx = createOrchestrationContext();
467
+ const messageBus = createMessageBus({
468
+ participants: ["supervisor", "agent"],
469
+ });
470
+ ctx.messageBus = messageBus;
471
+ ctx.participants = [
472
+ { name: "supervisor", role: "supervisor" },
473
+ { name: "agent", role: "agent" },
474
+ ];
394
475
 
395
476
  const supervisorServer = createSupervisorToolServer(ctx);
396
- const agentServer = createSupervisedAgentToolServer(ctx, {
397
- onAsk: async (question) => {
398
- supervisor.currentSource = "supervisor";
399
- supervisor.emitOrchestratorEvent({ type: "ask_received" });
400
- await supervisorRunner.resume(
401
- `The agent asks: "${question}"\n\nAnswer the question directly.`,
402
- );
403
- supervisor.currentSource = "agent";
404
- supervisor.emitOrchestratorEvent({ type: "ask_answered" });
405
- return supervisor.extractLastText(supervisorRunner, "No answer.");
406
- },
407
- });
477
+ const agentServer = createSupervisedAgentToolServer(ctx);
408
478
 
409
479
  const onLine = (line) => supervisor.emitLine(line);
410
480
 
@@ -426,7 +496,7 @@ export function createSupervisor({
426
496
  ? [...new Set([...defaultDisallowed, ...supervisorDisallowedTools])]
427
497
  : defaultDisallowed;
428
498
 
429
- supervisorRunner = createAgentRunner({
499
+ const supervisorRunner = createAgentRunner({
430
500
  cwd: supervisorCwd,
431
501
  query,
432
502
  output: devNull,
@@ -453,6 +523,8 @@ export function createSupervisor({
453
523
  output,
454
524
  maxTurns,
455
525
  ctx,
526
+ messageBus,
527
+ taskAmend,
456
528
  });
457
529
  return supervisor;
458
530
  }