@forwardimpact/libeval 0.1.21 → 0.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -3
- package/src/agent-runner.js +5 -1
- package/src/commands/facilitate.js +3 -2
- package/src/commands/run.js +4 -2
- package/src/commands/supervise.js +3 -2
- package/src/facilitator.js +78 -135
- package/src/message-bus.js +78 -13
- package/src/orchestration-toolkit.js +211 -63
- package/src/orchestrator-helpers.js +58 -0
- package/src/render/tool-hints.js +3 -3
- package/src/supervisor.js +110 -38
package/src/supervisor.js
CHANGED
|
@@ -4,8 +4,11 @@
|
|
|
4
4
|
* introduces itself, and delegates work to the agent. The loop then alternates:
|
|
5
5
|
* agent → supervisor → agent.
|
|
6
6
|
*
|
|
7
|
-
* Signaling uses orchestration tools (
|
|
8
|
-
* MCP servers.
|
|
7
|
+
* Signaling uses orchestration tools (Ask / Answer / Announce / Redirect /
|
|
8
|
+
* Conclude) via in-process MCP servers. The Ask/Answer contract is enforced
|
|
9
|
+
* at turn boundaries: an unanswered Ask triggers one synthetic reminder and
|
|
10
|
+
* then a `protocol_violation` trace event plus a null-answer injection so the
|
|
11
|
+
* session advances without silent deadlock.
|
|
9
12
|
*
|
|
10
13
|
* Follows OO+DI: constructor injection, factory function, tests bypass factory.
|
|
11
14
|
*/
|
|
@@ -16,27 +19,30 @@ import { createAgentRunner } from "./agent-runner.js";
|
|
|
16
19
|
import { composeProfilePrompt } from "./profile-prompt.js";
|
|
17
20
|
import { TraceCollector } from "./trace-collector.js";
|
|
18
21
|
import { SequenceCounter } from "./sequence-counter.js";
|
|
22
|
+
import { createMessageBus } from "./message-bus.js";
|
|
19
23
|
import {
|
|
20
24
|
createOrchestrationContext,
|
|
21
25
|
createSupervisorToolServer,
|
|
22
26
|
createSupervisedAgentToolServer,
|
|
27
|
+
checkPendingAsk,
|
|
23
28
|
} from "./orchestration-toolkit.js";
|
|
29
|
+
import { formatMessages } from "./orchestrator-helpers.js";
|
|
24
30
|
|
|
25
31
|
/** System prompt appended for the supervisor runner in supervise mode. */
|
|
26
32
|
export const SUPERVISOR_SYSTEM_PROMPT =
|
|
27
|
-
"You
|
|
28
|
-
"
|
|
29
|
-
"
|
|
30
|
-
"
|
|
31
|
-
"
|
|
32
|
-
"
|
|
33
|
+
"You supervise one agent. " +
|
|
34
|
+
"Ask sends a question to the agent; the reply arrives via Answer. " +
|
|
35
|
+
"Answer replies to an ask the agent addressed to you. " +
|
|
36
|
+
"Announce sends a message with no reply obligation. " +
|
|
37
|
+
"Redirect interrupts the agent with replacement instructions. " +
|
|
38
|
+
"Conclude ends the session with a summary.";
|
|
33
39
|
|
|
34
40
|
/** System prompt appended for the agent runner in supervise mode. */
|
|
35
41
|
export const AGENT_SYSTEM_PROMPT =
|
|
36
|
-
"A supervisor watches your work
|
|
37
|
-
"
|
|
38
|
-
"
|
|
39
|
-
"
|
|
42
|
+
"A supervisor watches your work. " +
|
|
43
|
+
"Answer replies to an ask addressed to you. " +
|
|
44
|
+
"Ask sends a question to the supervisor; the reply arrives via Answer. " +
|
|
45
|
+
"Announce sends a message with no reply expected.";
|
|
40
46
|
|
|
41
47
|
/**
|
|
42
48
|
* Maximum number of mid-turn interventions allowed within a single agent turn.
|
|
@@ -54,8 +60,18 @@ export class Supervisor {
|
|
|
54
60
|
* @param {import("stream").Writable} deps.output - Stream to emit tagged NDJSON to
|
|
55
61
|
* @param {number} [deps.maxTurns] - Maximum supervisor ↔ agent exchanges
|
|
56
62
|
* @param {object} [deps.ctx] - Orchestration context (injected by factory)
|
|
63
|
+
* @param {import("./message-bus.js").MessageBus} [deps.messageBus] - Two-participant message bus ("supervisor" / "agent")
|
|
64
|
+
* @param {string} [deps.taskAmend] - Opaque addendum appended to the task before delivery.
|
|
57
65
|
*/
|
|
58
|
-
constructor({
|
|
66
|
+
constructor({
|
|
67
|
+
agentRunner,
|
|
68
|
+
supervisorRunner,
|
|
69
|
+
output,
|
|
70
|
+
maxTurns,
|
|
71
|
+
ctx,
|
|
72
|
+
messageBus,
|
|
73
|
+
taskAmend,
|
|
74
|
+
}) {
|
|
59
75
|
if (!agentRunner) throw new Error("agentRunner is required");
|
|
60
76
|
if (!supervisorRunner) throw new Error("supervisorRunner is required");
|
|
61
77
|
if (!output) throw new Error("output is required");
|
|
@@ -64,7 +80,11 @@ export class Supervisor {
|
|
|
64
80
|
this.output = output;
|
|
65
81
|
this.maxTurns = maxTurns ?? 100;
|
|
66
82
|
this.ctx = ctx ?? createOrchestrationContext();
|
|
83
|
+
this.messageBus =
|
|
84
|
+
messageBus ?? createMessageBus({ participants: ["supervisor", "agent"] });
|
|
85
|
+
if (!this.ctx.messageBus) this.ctx.messageBus = this.messageBus;
|
|
67
86
|
this.counter = new SequenceCounter();
|
|
87
|
+
this.taskAmend = taskAmend ?? null;
|
|
68
88
|
/** @type {"agent"|"supervisor"} */
|
|
69
89
|
this.currentSource = "agent";
|
|
70
90
|
/** @type {number} */
|
|
@@ -77,9 +97,10 @@ export class Supervisor {
|
|
|
77
97
|
* @returns {Promise<{success: boolean, turns: number}>}
|
|
78
98
|
*/
|
|
79
99
|
async run(task) {
|
|
100
|
+
const initialTask = this.taskAmend ? `${task}\n\n${this.taskAmend}` : task;
|
|
80
101
|
this.currentSource = "supervisor";
|
|
81
102
|
this.currentTurn = 0;
|
|
82
|
-
let supervisorResult = await this.supervisorRunner.run(
|
|
103
|
+
let supervisorResult = await this.supervisorRunner.run(initialTask);
|
|
83
104
|
|
|
84
105
|
if (supervisorResult.error) {
|
|
85
106
|
this.emitSummary({ success: false, turns: 0 });
|
|
@@ -95,8 +116,7 @@ export class Supervisor {
|
|
|
95
116
|
const turnLimit = this.maxTurns === 0 ? Infinity : this.maxTurns;
|
|
96
117
|
for (let turn = 1; turn <= turnLimit; turn++) {
|
|
97
118
|
const relay =
|
|
98
|
-
pendingRelay ??
|
|
99
|
-
this.extractLastText(this.supervisorRunner, supervisorResult.text);
|
|
119
|
+
pendingRelay ?? this.#buildInitialRelay(supervisorResult.text);
|
|
100
120
|
|
|
101
121
|
const turnOutcome = await this.#runAgentTurn(turn, relay);
|
|
102
122
|
if (turnOutcome.exit) return turnOutcome.exit;
|
|
@@ -111,6 +131,22 @@ export class Supervisor {
|
|
|
111
131
|
return { success: false, turns: this.maxTurns };
|
|
112
132
|
}
|
|
113
133
|
|
|
134
|
+
#buildInitialRelay(fallbackText) {
|
|
135
|
+
const queued = this.messageBus.drain("agent");
|
|
136
|
+
if (queued.length > 0) return formatMessages(queued);
|
|
137
|
+
return this.extractLastText(this.supervisorRunner, fallbackText);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
#checkAsk(name) {
|
|
141
|
+
return checkPendingAsk({
|
|
142
|
+
ctx: this.ctx,
|
|
143
|
+
messageBus: this.messageBus,
|
|
144
|
+
addresseeName: name,
|
|
145
|
+
mode: "supervised",
|
|
146
|
+
emitViolation: (e) => this.emitOrchestratorEvent(e),
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
|
|
114
150
|
/**
|
|
115
151
|
* Drive the agent through one turn, allowing the supervisor to interrupt
|
|
116
152
|
* via the Redirect tool. Returns either an `exit` outcome (the loop should
|
|
@@ -122,6 +158,7 @@ export class Supervisor {
|
|
|
122
158
|
async #runAgentTurn(turn, initialRelay) {
|
|
123
159
|
let relay = initialRelay;
|
|
124
160
|
let interventions = 0;
|
|
161
|
+
let agentCalled = this.agentRunner.sessionId !== null;
|
|
125
162
|
|
|
126
163
|
this.agentRunner.onBatch = (batchLines, ctx) =>
|
|
127
164
|
this.#midTurnReview(turn, batchLines, ctx);
|
|
@@ -130,10 +167,10 @@ export class Supervisor {
|
|
|
130
167
|
while (true) {
|
|
131
168
|
this.currentSource = "agent";
|
|
132
169
|
this.currentTurn = turn;
|
|
133
|
-
const
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
170
|
+
const agentResult = agentCalled
|
|
171
|
+
? await this.agentRunner.resume(relay)
|
|
172
|
+
: await this.agentRunner.run(relay);
|
|
173
|
+
agentCalled = true;
|
|
137
174
|
|
|
138
175
|
if (agentResult.error && !agentResult.aborted) {
|
|
139
176
|
this.emitSummary({ success: false, turns: turn });
|
|
@@ -162,6 +199,14 @@ export class Supervisor {
|
|
|
162
199
|
continue;
|
|
163
200
|
}
|
|
164
201
|
|
|
202
|
+
if (this.#checkAsk("agent") === "recheck" && !this.ctx.concluded) {
|
|
203
|
+
const reminders = this.messageBus.drain("agent");
|
|
204
|
+
if (reminders.length > 0) {
|
|
205
|
+
relay = formatMessages(reminders);
|
|
206
|
+
continue;
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
165
210
|
return { exit: null };
|
|
166
211
|
}
|
|
167
212
|
} finally {
|
|
@@ -209,14 +254,20 @@ export class Supervisor {
|
|
|
209
254
|
* @returns {Promise<{exit: {success: boolean, turns: number}|null, supervisorResult?: object, relay?: string}>}
|
|
210
255
|
*/
|
|
211
256
|
async #endOfTurnReview(turn) {
|
|
257
|
+
const queuedForSupervisor = this.messageBus.drain("supervisor");
|
|
212
258
|
const agentTranscript = this.extractTranscript(this.agentRunner);
|
|
213
259
|
this.currentSource = "supervisor";
|
|
214
260
|
this.currentTurn = turn;
|
|
215
261
|
this.ctx.redirect = null;
|
|
216
262
|
|
|
217
|
-
const
|
|
218
|
-
|
|
219
|
-
|
|
263
|
+
const reviewPrompt =
|
|
264
|
+
queuedForSupervisor.length > 0
|
|
265
|
+
? `The agent reported:\n\n${agentTranscript}\n\n` +
|
|
266
|
+
`Agent messages:\n${formatMessages(queuedForSupervisor)}\n\n` +
|
|
267
|
+
`Review and decide how to proceed.`
|
|
268
|
+
: `The agent reported:\n\n${agentTranscript}\n\nReview the agent's work and decide how to proceed.`;
|
|
269
|
+
|
|
270
|
+
let supervisorResult = await this.supervisorRunner.resume(reviewPrompt);
|
|
220
271
|
|
|
221
272
|
if (supervisorResult.error) {
|
|
222
273
|
this.emitSummary({ success: false, turns: turn });
|
|
@@ -232,13 +283,34 @@ export class Supervisor {
|
|
|
232
283
|
return { exit: { success: true, turns: turn } };
|
|
233
284
|
}
|
|
234
285
|
|
|
286
|
+
if (this.#checkAsk("supervisor") === "recheck" && !this.ctx.concluded) {
|
|
287
|
+
const reminders = this.messageBus.drain("supervisor");
|
|
288
|
+
if (reminders.length > 0) {
|
|
289
|
+
supervisorResult = await this.supervisorRunner.resume(
|
|
290
|
+
formatMessages(reminders),
|
|
291
|
+
);
|
|
292
|
+
if (this.ctx.concluded) {
|
|
293
|
+
this.emitSummary({
|
|
294
|
+
success: true,
|
|
295
|
+
turns: turn,
|
|
296
|
+
summary: this.ctx.summary,
|
|
297
|
+
});
|
|
298
|
+
return { exit: { success: true, turns: turn } };
|
|
299
|
+
}
|
|
300
|
+
this.#checkAsk("supervisor");
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
235
304
|
if (this.ctx.redirect) {
|
|
236
305
|
const redirect = this.ctx.redirect;
|
|
237
306
|
this.ctx.redirect = null;
|
|
238
307
|
return { exit: null, supervisorResult, relay: redirect.message };
|
|
239
308
|
}
|
|
240
309
|
|
|
241
|
-
|
|
310
|
+
const queuedForAgent = this.messageBus.drain("agent");
|
|
311
|
+
const relay =
|
|
312
|
+
queuedForAgent.length > 0 ? formatMessages(queuedForAgent) : undefined;
|
|
313
|
+
return { exit: null, supervisorResult, relay };
|
|
242
314
|
}
|
|
243
315
|
|
|
244
316
|
/**
|
|
@@ -360,6 +432,7 @@ const devNull = new Writable({
|
|
|
360
432
|
* @param {string} [deps.supervisorProfile] - Supervisor profile name; resolved into the main-thread system prompt via `composeProfilePrompt`.
|
|
361
433
|
* @param {string} [deps.agentProfile] - Agent profile name; resolved into the main-thread system prompt via `composeProfilePrompt`.
|
|
362
434
|
* @param {string} [deps.profilesDir] - Directory containing `<name>.md` profile files. Defaults to `<supervisorCwd>/.claude/agents`. Resolved once from the orchestrator's cwd so profiles travel with the project, not with a per-agent sandbox.
|
|
435
|
+
* @param {string} [deps.taskAmend] - Opaque addendum appended to the task before delivery.
|
|
363
436
|
* @returns {Supervisor}
|
|
364
437
|
*/
|
|
365
438
|
export function createSupervisor({
|
|
@@ -375,6 +448,7 @@ export function createSupervisor({
|
|
|
375
448
|
supervisorProfile,
|
|
376
449
|
agentProfile,
|
|
377
450
|
profilesDir,
|
|
451
|
+
taskAmend,
|
|
378
452
|
}) {
|
|
379
453
|
const resolvedProfilesDir =
|
|
380
454
|
profilesDir ?? resolve(supervisorCwd, ".claude/agents");
|
|
@@ -388,23 +462,19 @@ export function createSupervisor({
|
|
|
388
462
|
: { type: "preset", preset: "claude_code", append: trailer };
|
|
389
463
|
};
|
|
390
464
|
let supervisor;
|
|
391
|
-
let supervisorRunner;
|
|
392
465
|
|
|
393
466
|
const ctx = createOrchestrationContext();
|
|
467
|
+
const messageBus = createMessageBus({
|
|
468
|
+
participants: ["supervisor", "agent"],
|
|
469
|
+
});
|
|
470
|
+
ctx.messageBus = messageBus;
|
|
471
|
+
ctx.participants = [
|
|
472
|
+
{ name: "supervisor", role: "supervisor" },
|
|
473
|
+
{ name: "agent", role: "agent" },
|
|
474
|
+
];
|
|
394
475
|
|
|
395
476
|
const supervisorServer = createSupervisorToolServer(ctx);
|
|
396
|
-
const agentServer = createSupervisedAgentToolServer(ctx
|
|
397
|
-
onAsk: async (question) => {
|
|
398
|
-
supervisor.currentSource = "supervisor";
|
|
399
|
-
supervisor.emitOrchestratorEvent({ type: "ask_received" });
|
|
400
|
-
await supervisorRunner.resume(
|
|
401
|
-
`The agent asks: "${question}"\n\nAnswer the question directly.`,
|
|
402
|
-
);
|
|
403
|
-
supervisor.currentSource = "agent";
|
|
404
|
-
supervisor.emitOrchestratorEvent({ type: "ask_answered" });
|
|
405
|
-
return supervisor.extractLastText(supervisorRunner, "No answer.");
|
|
406
|
-
},
|
|
407
|
-
});
|
|
477
|
+
const agentServer = createSupervisedAgentToolServer(ctx);
|
|
408
478
|
|
|
409
479
|
const onLine = (line) => supervisor.emitLine(line);
|
|
410
480
|
|
|
@@ -426,7 +496,7 @@ export function createSupervisor({
|
|
|
426
496
|
? [...new Set([...defaultDisallowed, ...supervisorDisallowedTools])]
|
|
427
497
|
: defaultDisallowed;
|
|
428
498
|
|
|
429
|
-
supervisorRunner = createAgentRunner({
|
|
499
|
+
const supervisorRunner = createAgentRunner({
|
|
430
500
|
cwd: supervisorCwd,
|
|
431
501
|
query,
|
|
432
502
|
output: devNull,
|
|
@@ -453,6 +523,8 @@ export function createSupervisor({
|
|
|
453
523
|
output,
|
|
454
524
|
maxTurns,
|
|
455
525
|
ctx,
|
|
526
|
+
messageBus,
|
|
527
|
+
taskAmend,
|
|
456
528
|
});
|
|
457
529
|
return supervisor;
|
|
458
530
|
}
|