@forwardimpact/libeval 0.1.44 → 0.1.46
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +186 -21
- package/bin/fit-selfedit.js +162 -0
- package/package.json +7 -3
- package/src/agent-runner.js +45 -181
- package/src/benchmark/runner.js +2 -2
- package/src/commands/supervise.js +3 -1
- package/src/discuss-tools.js +72 -140
- package/src/discusser.js +18 -35
- package/src/facilitator.js +26 -43
- package/src/index.js +0 -2
- package/src/judge.js +1 -1
- package/src/message-bus.js +27 -81
- package/src/orchestration-loop.js +176 -229
- package/src/orchestration-toolkit.js +272 -303
- package/src/orchestrator-helpers.js +9 -45
- package/src/redaction.js +2 -0
- package/src/render/orchestrator-filter.js +1 -9
- package/src/supervisor.js +79 -465
package/src/supervisor.js
CHANGED
|
@@ -1,478 +1,99 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Supervisor —
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
* agent
|
|
2
|
+
* Supervisor — supervise-mode wrapper around `OrchestrationLoop`. One
|
|
3
|
+
* named participant (`"agent"`) coordinated by a lead participant
|
|
4
|
+
* (`"supervisor"`). Structurally the same as `Facilitator` with a
|
|
5
|
+
* single agent; differs only in role names, prompts, and pass-through
|
|
6
|
+
* accessors.
|
|
6
7
|
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
* session advances without silent deadlock.
|
|
8
|
+
* Ask is async (same contract as facilitate / discuss): returns
|
|
9
|
+
* `{askIds:[N]}` immediately; the agent's reply arrives on the
|
|
10
|
+
* supervisor's next turn as `[answer#N] agent: <text>`. The supervisor
|
|
11
|
+
* sees the agent at each Ask boundary, plans the next step, and
|
|
12
|
+
* eventually calls Conclude.
|
|
13
13
|
*
|
|
14
|
-
*
|
|
14
|
+
* For tighter feedback loops, size the agent's per-turn budget down
|
|
15
|
+
* (smaller `maxTurns` on the agent runner) so each Ask returns sooner.
|
|
15
16
|
*/
|
|
16
17
|
|
|
17
18
|
import { Writable } from "node:stream";
|
|
18
19
|
import { resolve } from "node:path";
|
|
19
20
|
import { createAgentRunner } from "./agent-runner.js";
|
|
20
21
|
import { composeProfilePrompt } from "./profile-prompt.js";
|
|
21
|
-
import { TraceCollector } from "./trace-collector.js";
|
|
22
|
-
import { SequenceCounter } from "./sequence-counter.js";
|
|
23
22
|
import { createMessageBus } from "./message-bus.js";
|
|
24
23
|
import {
|
|
25
24
|
createOrchestrationContext,
|
|
26
|
-
createSupervisorToolServer,
|
|
27
25
|
createSupervisedAgentToolServer,
|
|
28
|
-
|
|
26
|
+
createSupervisorToolServer,
|
|
29
27
|
} from "./orchestration-toolkit.js";
|
|
30
|
-
import {
|
|
28
|
+
import { OrchestrationLoop } from "./orchestration-loop.js";
|
|
31
29
|
|
|
32
30
|
/** System prompt appended for the supervisor runner in supervise mode. */
|
|
33
31
|
export const SUPERVISOR_SYSTEM_PROMPT =
|
|
34
|
-
"You supervise one agent
|
|
35
|
-
"Ask sends a question
|
|
36
|
-
"Answer replies to an ask the agent addressed to you. " +
|
|
37
|
-
"Announce
|
|
38
|
-
"
|
|
39
|
-
"
|
|
40
|
-
"the
|
|
32
|
+
"You supervise one agent named `agent`. " +
|
|
33
|
+
"Ask sends a question and returns immediately with {askIds:[N]}. The reply arrives on a later turn as `[answer#N] agent: <text>` in your inbox — between turns you can plan and reflect while the agent works. End your turn with text after asking; the orchestrator wakes you when the agent replies. " +
|
|
34
|
+
"Answer replies to an ask the agent addressed to you (you'll see it tagged `[ask#N] agent: …` in your inbox). Quote askId from the [ask#N] tag; omit it and the handler auto-picks the only pending ask or routes your message as an Announce. " +
|
|
35
|
+
"Announce delivers a message with no reply obligation. " +
|
|
36
|
+
"Conclude ends the session with a verdict ('success' or 'failure') and a summary; the verdict reflects whether the agent's work meets the criteria stated in the task. " +
|
|
37
|
+
"You MUST end every session with Conclude — never end a turn with only text *after* every Ask round has resolved. " +
|
|
38
|
+
"If the agent goes off-track, course-correct by issuing a new Ask with corrected instructions; each Ask carries a fresh askId, so a follow-up never collides with an earlier one.";
|
|
41
39
|
|
|
42
40
|
/** System prompt appended for the agent runner in supervise mode. */
|
|
43
41
|
export const AGENT_SYSTEM_PROMPT =
|
|
44
42
|
"A supervisor watches your work. " +
|
|
45
|
-
"
|
|
46
|
-
"
|
|
47
|
-
"
|
|
43
|
+
"Each question you receive carries an [ask#N] header — quote that N back as the askId field on Answer so the reply pairs with the right question. " +
|
|
44
|
+
"Answer replies to an ask addressed to you. askId is optional: omit it and the handler auto-picks if exactly one ask is owed to you, otherwise it routes your message as an Announce. " +
|
|
45
|
+
"Ask sends a question to the supervisor and returns immediately with {askIds:[N]}; the reply arrives on a later turn as `[answer#N] supervisor: <text>` in your inbox. " +
|
|
46
|
+
"Announce sends a message with no reply expected — use this for unsolicited remarks or to reply to an Announce.";
|
|
48
47
|
|
|
49
48
|
/**
|
|
50
|
-
*
|
|
51
|
-
*
|
|
52
|
-
* leaves headroom for legitimate "intervene, observe, intervene again" patterns.
|
|
53
|
-
* The outer exchange budget still bounds overall runtime.
|
|
49
|
+
* Supervise-mode wrapper around `OrchestrationLoop`. The lead is
|
|
50
|
+
* `"supervisor"`, one participant is `"agent"`, mode tag is `"supervised"`.
|
|
54
51
|
*/
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
/**
|
|
58
|
-
* Default cap on supervisor↔agent exchanges in a single run. Not exposed via
|
|
59
|
-
* CLI — `--max-turns` governs the per-runner invocation budget instead. When
|
|
60
|
-
* a `--max-exchanges` flag is added this becomes the default for that flag.
|
|
61
|
-
*/
|
|
62
|
-
const DEFAULT_MAX_EXCHANGES = 100;
|
|
63
|
-
|
|
64
|
-
/** Orchestrate a relay loop between a supervisor LLM and an agent LLM with mid-turn review. */
|
|
65
|
-
export class Supervisor {
|
|
52
|
+
export class Supervisor extends OrchestrationLoop {
|
|
66
53
|
/**
|
|
67
54
|
* @param {object} deps
|
|
68
|
-
* @param {import("./agent-runner.js").AgentRunner} deps.
|
|
69
|
-
* @param {import("./agent-runner.js").AgentRunner} deps.
|
|
70
|
-
* @param {import("
|
|
71
|
-
* @param {
|
|
72
|
-
* @param {object}
|
|
73
|
-
* @param {
|
|
74
|
-
* @param {string} [deps.taskAmend]
|
|
55
|
+
* @param {import("./agent-runner.js").AgentRunner} deps.supervisorRunner
|
|
56
|
+
* @param {import("./agent-runner.js").AgentRunner} deps.agentRunner
|
|
57
|
+
* @param {import("./message-bus.js").MessageBus} deps.messageBus
|
|
58
|
+
* @param {import("stream").Writable} deps.output
|
|
59
|
+
* @param {object} deps.ctx
|
|
60
|
+
* @param {object} deps.redactor
|
|
61
|
+
* @param {string} [deps.taskAmend]
|
|
75
62
|
*/
|
|
76
63
|
constructor({
|
|
77
|
-
agentRunner,
|
|
78
64
|
supervisorRunner,
|
|
65
|
+
agentRunner,
|
|
66
|
+
messageBus,
|
|
79
67
|
output,
|
|
80
|
-
maxTurns,
|
|
81
68
|
ctx,
|
|
82
|
-
messageBus,
|
|
83
69
|
taskAmend,
|
|
84
70
|
redactor,
|
|
85
71
|
}) {
|
|
86
72
|
if (!agentRunner) throw new Error("agentRunner is required");
|
|
87
73
|
if (!supervisorRunner) throw new Error("supervisorRunner is required");
|
|
88
74
|
if (!output) throw new Error("output is required");
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
this.ctx = ctx ?? createOrchestrationContext();
|
|
96
|
-
this.messageBus =
|
|
97
|
-
messageBus ?? createMessageBus({ participants: ["supervisor", "agent"] });
|
|
98
|
-
if (!this.ctx.messageBus) this.ctx.messageBus = this.messageBus;
|
|
99
|
-
this.counter = new SequenceCounter();
|
|
100
|
-
this.taskAmend = taskAmend ?? null;
|
|
101
|
-
/** @type {"agent"|"supervisor"} */
|
|
102
|
-
this.currentSource = "agent";
|
|
103
|
-
/** @type {number} */
|
|
104
|
-
this.currentTurn = 0;
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
/**
|
|
108
|
-
* Run the supervisor ↔ agent relay loop.
|
|
109
|
-
* @param {string} task - The initial task for the supervisor
|
|
110
|
-
* @returns {Promise<{success: boolean, turns: number, concluded: boolean}>}
|
|
111
|
-
*/
|
|
112
|
-
async run(task) {
|
|
113
|
-
const initialTask = this.taskAmend ? `${task}\n\n${this.taskAmend}` : task;
|
|
114
|
-
this.currentSource = "supervisor";
|
|
115
|
-
this.currentTurn = 0;
|
|
116
|
-
let supervisorResult = await this.supervisorRunner.run(initialTask);
|
|
117
|
-
|
|
118
|
-
if (supervisorResult.error) {
|
|
119
|
-
this.emitSummary({ success: false, turns: 0 });
|
|
120
|
-
return { success: false, turns: 0, concluded: false };
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
if (this.ctx.concluded) {
|
|
124
|
-
const success = this.ctx.verdict === "success";
|
|
125
|
-
this.emitSummary({
|
|
126
|
-
success,
|
|
127
|
-
verdict: this.ctx.verdict,
|
|
128
|
-
turns: 0,
|
|
129
|
-
summary: this.ctx.summary,
|
|
130
|
-
});
|
|
131
|
-
return { success, turns: 0, concluded: true };
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
let pendingRelay = null;
|
|
135
|
-
const turnLimit = this.maxTurns === 0 ? Infinity : this.maxTurns;
|
|
136
|
-
for (let turn = 1; turn <= turnLimit; turn++) {
|
|
137
|
-
const relay =
|
|
138
|
-
pendingRelay ?? this.#buildInitialRelay(supervisorResult.text);
|
|
139
|
-
|
|
140
|
-
const turnOutcome = await this.#runAgentTurn(turn, relay);
|
|
141
|
-
if (turnOutcome.exit) {
|
|
142
|
-
return { ...turnOutcome.exit, concluded: this.ctx.concluded };
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
const reviewOutcome = await this.#endOfTurnReview(turn);
|
|
146
|
-
if (reviewOutcome.exit) {
|
|
147
|
-
return { ...reviewOutcome.exit, concluded: this.ctx.concluded };
|
|
148
|
-
}
|
|
149
|
-
supervisorResult = reviewOutcome.supervisorResult;
|
|
150
|
-
pendingRelay = reviewOutcome.relay ?? null;
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
this.emitSummary({ success: false, turns: this.maxTurns });
|
|
154
|
-
return { success: false, turns: this.maxTurns, concluded: false };
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
#buildInitialRelay(fallbackText) {
|
|
158
|
-
const queued = this.messageBus.drain("agent");
|
|
159
|
-
if (queued.length > 0) return formatMessages(queued);
|
|
160
|
-
return this.extractLastText(this.supervisorRunner, fallbackText);
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
#checkAsk(name) {
|
|
164
|
-
return checkPendingAsk({
|
|
165
|
-
ctx: this.ctx,
|
|
166
|
-
messageBus: this.messageBus,
|
|
167
|
-
addresseeName: name,
|
|
75
|
+
super({
|
|
76
|
+
leadRunner: supervisorRunner,
|
|
77
|
+
agents: [{ name: "agent", role: "agent", runner: agentRunner }],
|
|
78
|
+
messageBus,
|
|
79
|
+
output,
|
|
80
|
+
leadName: "supervisor",
|
|
168
81
|
mode: "supervised",
|
|
169
|
-
|
|
82
|
+
ctx,
|
|
83
|
+
taskAmend,
|
|
84
|
+
redactor,
|
|
170
85
|
});
|
|
171
86
|
}
|
|
172
87
|
|
|
173
|
-
/**
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
* @param {number} turn
|
|
178
|
-
* @param {string} initialRelay
|
|
179
|
-
* @returns {Promise<{exit: {success: boolean, turns: number}|null}>}
|
|
180
|
-
*/
|
|
181
|
-
async #runAgentTurn(turn, initialRelay) {
|
|
182
|
-
let relay = initialRelay;
|
|
183
|
-
let interventions = 0;
|
|
184
|
-
let agentCalled = this.agentRunner.sessionId !== null;
|
|
185
|
-
|
|
186
|
-
this.agentRunner.onBatch = (batchLines, ctx) =>
|
|
187
|
-
this.#midTurnReview(turn, batchLines, ctx);
|
|
188
|
-
|
|
189
|
-
try {
|
|
190
|
-
while (true) {
|
|
191
|
-
this.currentSource = "agent";
|
|
192
|
-
this.currentTurn = turn;
|
|
193
|
-
const agentResult = agentCalled
|
|
194
|
-
? await this.agentRunner.resume(relay)
|
|
195
|
-
: await this.agentRunner.run(relay);
|
|
196
|
-
agentCalled = true;
|
|
197
|
-
|
|
198
|
-
const outcome = this.#classifyAgentOutcome(
|
|
199
|
-
agentResult,
|
|
200
|
-
turn,
|
|
201
|
-
interventions,
|
|
202
|
-
);
|
|
203
|
-
|
|
204
|
-
if (outcome.type === "exit") return { exit: outcome.exit };
|
|
205
|
-
if (outcome.type === "intervention_limit") return { exit: null };
|
|
206
|
-
|
|
207
|
-
if (outcome.type === "redirect") {
|
|
208
|
-
interventions++;
|
|
209
|
-
relay = outcome.relay;
|
|
210
|
-
this.emitOrchestratorEvent({ type: "intervention_relayed", turn });
|
|
211
|
-
continue;
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
const askRelay = this.#drainAgentAskRelay();
|
|
215
|
-
if (askRelay) {
|
|
216
|
-
relay = askRelay;
|
|
217
|
-
continue;
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
return { exit: null };
|
|
221
|
-
}
|
|
222
|
-
} finally {
|
|
223
|
-
this.agentRunner.onBatch = null;
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
/**
|
|
228
|
-
* Classify the outcome of a single agent execution within #runAgentTurn.
|
|
229
|
-
* @returns {{type: string, exit?: object|null, relay?: string}}
|
|
230
|
-
*/
|
|
231
|
-
#classifyAgentOutcome(agentResult, turn, interventions) {
|
|
232
|
-
if (agentResult.error && !agentResult.aborted) {
|
|
233
|
-
this.emitSummary({ success: false, turns: turn });
|
|
234
|
-
return { type: "exit", exit: { success: false, turns: turn } };
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
if (this.ctx.concluded) {
|
|
238
|
-
const success = this.ctx.verdict === "success";
|
|
239
|
-
this.emitSummary({
|
|
240
|
-
success,
|
|
241
|
-
verdict: this.ctx.verdict,
|
|
242
|
-
turns: turn,
|
|
243
|
-
summary: this.ctx.summary,
|
|
244
|
-
});
|
|
245
|
-
return { type: "exit", exit: { success, turns: turn } };
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
if (agentResult.aborted && this.ctx.redirect) {
|
|
249
|
-
const redirect = this.ctx.redirect;
|
|
250
|
-
this.ctx.redirect = null;
|
|
251
|
-
if (interventions + 1 >= MAX_INTERVENTIONS_PER_TURN) {
|
|
252
|
-
this.emitOrchestratorEvent({ type: "intervention_limit", turn });
|
|
253
|
-
return { type: "intervention_limit" };
|
|
254
|
-
}
|
|
255
|
-
return { type: "redirect", relay: redirect.message };
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
return { type: "continue" };
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
/**
|
|
262
|
-
* If the agent has an unanswered ask, drain reminders and return a
|
|
263
|
-
* formatted relay string. Returns null when no relay is needed.
|
|
264
|
-
* @returns {string|null}
|
|
265
|
-
*/
|
|
266
|
-
#drainAgentAskRelay() {
|
|
267
|
-
if (this.#checkAsk("agent") !== "recheck" || this.ctx.concluded)
|
|
268
|
-
return null;
|
|
269
|
-
const reminders = this.messageBus.drain("agent");
|
|
270
|
-
return reminders.length > 0 ? formatMessages(reminders) : null;
|
|
271
|
-
}
|
|
272
|
-
|
|
273
|
-
/**
|
|
274
|
-
* Mid-turn supervisor review fired from inside the agent's onBatch hook.
|
|
275
|
-
* Runs the supervisor's LLM against the batch and aborts the agent if
|
|
276
|
-
* the supervisor calls Redirect or Conclude.
|
|
277
|
-
* @param {number} turn
|
|
278
|
-
* @param {string[]} batchLines
|
|
279
|
-
* @param {{abort: () => void}} ctx
|
|
280
|
-
*/
|
|
281
|
-
async #midTurnReview(turn, batchLines, { abort }) {
|
|
282
|
-
const batchTranscript = this.renderBatch(batchLines);
|
|
283
|
-
this.emitOrchestratorEvent({ type: "mid_turn_review", turn });
|
|
284
|
-
|
|
285
|
-
this.currentSource = "supervisor";
|
|
286
|
-
this.ctx.redirect = null;
|
|
287
|
-
|
|
288
|
-
await this.supervisorRunner.resume(
|
|
289
|
-
`The agent is mid-turn. Latest batch:\n\n${batchTranscript}\n\n` +
|
|
290
|
-
`Review and use your tools if action is needed.`,
|
|
291
|
-
);
|
|
292
|
-
this.currentSource = "agent";
|
|
293
|
-
|
|
294
|
-
if (this.ctx.redirect) {
|
|
295
|
-
this.emitOrchestratorEvent({ type: "intervention_requested", turn });
|
|
296
|
-
abort();
|
|
297
|
-
return;
|
|
298
|
-
}
|
|
299
|
-
if (this.ctx.concluded) {
|
|
300
|
-
this.emitOrchestratorEvent({ type: "complete_requested", turn });
|
|
301
|
-
abort();
|
|
302
|
-
}
|
|
303
|
-
}
|
|
304
|
-
|
|
305
|
-
/**
|
|
306
|
-
* End-of-turn supervisor review. Returns either an exit outcome (error or
|
|
307
|
-
* completion) or the supervisor result so the outer loop can build the
|
|
308
|
-
* next turn's relay.
|
|
309
|
-
* @param {number} turn
|
|
310
|
-
* @returns {Promise<{exit: {success: boolean, turns: number}|null, supervisorResult?: object, relay?: string}>}
|
|
311
|
-
*/
|
|
312
|
-
async #endOfTurnReview(turn) {
|
|
313
|
-
const queuedForSupervisor = this.messageBus.drain("supervisor");
|
|
314
|
-
const agentTranscript = this.extractTranscript(this.agentRunner);
|
|
315
|
-
this.currentSource = "supervisor";
|
|
316
|
-
this.currentTurn = turn;
|
|
317
|
-
this.ctx.redirect = null;
|
|
318
|
-
|
|
319
|
-
const reviewPrompt =
|
|
320
|
-
queuedForSupervisor.length > 0
|
|
321
|
-
? `The agent reported:\n\n${agentTranscript}\n\n` +
|
|
322
|
-
`Agent messages:\n${formatMessages(queuedForSupervisor)}\n\n` +
|
|
323
|
-
`Review and decide how to proceed.`
|
|
324
|
-
: `The agent reported:\n\n${agentTranscript}\n\nReview the agent's work and decide how to proceed.`;
|
|
325
|
-
|
|
326
|
-
let supervisorResult = await this.supervisorRunner.resume(reviewPrompt);
|
|
327
|
-
|
|
328
|
-
if (supervisorResult.error) {
|
|
329
|
-
this.emitSummary({ success: false, turns: turn });
|
|
330
|
-
return { exit: { success: false, turns: turn } };
|
|
331
|
-
}
|
|
332
|
-
|
|
333
|
-
if (this.ctx.concluded) {
|
|
334
|
-
const success = this.ctx.verdict === "success";
|
|
335
|
-
this.emitSummary({
|
|
336
|
-
success,
|
|
337
|
-
verdict: this.ctx.verdict,
|
|
338
|
-
turns: turn,
|
|
339
|
-
summary: this.ctx.summary,
|
|
340
|
-
});
|
|
341
|
-
return { exit: { success, turns: turn } };
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
if (this.#checkAsk("supervisor") === "recheck" && !this.ctx.concluded) {
|
|
345
|
-
const reminders = this.messageBus.drain("supervisor");
|
|
346
|
-
if (reminders.length > 0) {
|
|
347
|
-
supervisorResult = await this.supervisorRunner.resume(
|
|
348
|
-
formatMessages(reminders),
|
|
349
|
-
);
|
|
350
|
-
if (this.ctx.concluded) {
|
|
351
|
-
const success = this.ctx.verdict === "success";
|
|
352
|
-
this.emitSummary({
|
|
353
|
-
success,
|
|
354
|
-
verdict: this.ctx.verdict,
|
|
355
|
-
turns: turn,
|
|
356
|
-
summary: this.ctx.summary,
|
|
357
|
-
});
|
|
358
|
-
return { exit: { success, turns: turn } };
|
|
359
|
-
}
|
|
360
|
-
this.#checkAsk("supervisor");
|
|
361
|
-
}
|
|
362
|
-
}
|
|
363
|
-
|
|
364
|
-
if (this.ctx.redirect) {
|
|
365
|
-
const redirect = this.ctx.redirect;
|
|
366
|
-
this.ctx.redirect = null;
|
|
367
|
-
return { exit: null, supervisorResult, relay: redirect.message };
|
|
368
|
-
}
|
|
369
|
-
|
|
370
|
-
const queuedForAgent = this.messageBus.drain("agent");
|
|
371
|
-
const relay =
|
|
372
|
-
queuedForAgent.length > 0 ? formatMessages(queuedForAgent) : undefined;
|
|
373
|
-
return { exit: null, supervisorResult, relay };
|
|
88
|
+
/** Readability shims for tests that read the runners by their domain names. */
|
|
89
|
+
/** Readability shim — exposes the lead runner under its mode-specific name. */
|
|
90
|
+
get supervisorRunner() {
|
|
91
|
+
return this.leadRunner;
|
|
374
92
|
}
|
|
375
93
|
|
|
376
|
-
/**
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
* @returns {string}
|
|
380
|
-
*/
|
|
381
|
-
extractTranscript(runner) {
|
|
382
|
-
const lines = runner.drainOutput();
|
|
383
|
-
const collector = new TraceCollector();
|
|
384
|
-
for (const line of lines) {
|
|
385
|
-
collector.addLine(line);
|
|
386
|
-
}
|
|
387
|
-
return collector.toText() || "[The agent produced no output.]";
|
|
388
|
-
}
|
|
389
|
-
|
|
390
|
-
/**
|
|
391
|
-
* Extract only the last assistant text block from an AgentRunner's buffer.
|
|
392
|
-
* @param {import("./agent-runner.js").AgentRunner} runner
|
|
393
|
-
* @param {string} fallback
|
|
394
|
-
* @returns {string}
|
|
395
|
-
*/
|
|
396
|
-
extractLastText(runner, fallback) {
|
|
397
|
-
const lines = runner.buffer;
|
|
398
|
-
for (let i = lines.length - 1; i >= 0; i--) {
|
|
399
|
-
const event = JSON.parse(lines[i]);
|
|
400
|
-
if (event.type !== "assistant") continue;
|
|
401
|
-
const content = event.message?.content ?? event.content;
|
|
402
|
-
if (!Array.isArray(content)) continue;
|
|
403
|
-
for (let j = content.length - 1; j >= 0; j--) {
|
|
404
|
-
if (content[j].type === "text" && content[j].text) {
|
|
405
|
-
return content[j].text;
|
|
406
|
-
}
|
|
407
|
-
}
|
|
408
|
-
}
|
|
409
|
-
return fallback;
|
|
410
|
-
}
|
|
411
|
-
|
|
412
|
-
/**
|
|
413
|
-
* Emit a single NDJSON line tagged with the current source and seq.
|
|
414
|
-
* @param {string} line - Raw NDJSON line from the runner
|
|
415
|
-
*/
|
|
416
|
-
emitLine(line) {
|
|
417
|
-
const event = JSON.parse(line);
|
|
418
|
-
const tagged = {
|
|
419
|
-
source: this.currentSource,
|
|
420
|
-
seq: this.counter.next(),
|
|
421
|
-
event,
|
|
422
|
-
};
|
|
423
|
-
this.output.write(JSON.stringify(this.redactor.redactValue(tagged)) + "\n");
|
|
424
|
-
}
|
|
425
|
-
|
|
426
|
-
/**
|
|
427
|
-
* Render a batch of buffered NDJSON lines as human-readable text.
|
|
428
|
-
* @param {string[]} batchLines
|
|
429
|
-
* @returns {string}
|
|
430
|
-
*/
|
|
431
|
-
renderBatch(batchLines) {
|
|
432
|
-
if (batchLines.length === 0) return "[empty]";
|
|
433
|
-
const collector = new TraceCollector();
|
|
434
|
-
for (const line of batchLines) {
|
|
435
|
-
collector.addLine(line);
|
|
436
|
-
}
|
|
437
|
-
return collector.toText() || "[empty]";
|
|
438
|
-
}
|
|
439
|
-
|
|
440
|
-
/**
|
|
441
|
-
* Emit an orchestrator-source NDJSON line.
|
|
442
|
-
* @param {{type: string, turn?: number}} event
|
|
443
|
-
*/
|
|
444
|
-
emitOrchestratorEvent(event) {
|
|
445
|
-
this.output.write(
|
|
446
|
-
JSON.stringify(
|
|
447
|
-
this.redactor.redactValue({
|
|
448
|
-
source: "orchestrator",
|
|
449
|
-
seq: this.counter.next(),
|
|
450
|
-
event,
|
|
451
|
-
}),
|
|
452
|
-
) + "\n",
|
|
453
|
-
);
|
|
454
|
-
}
|
|
455
|
-
|
|
456
|
-
/**
|
|
457
|
-
* Emit a final orchestrator summary line, wrapped in the universal envelope.
|
|
458
|
-
* @param {{success: boolean, verdict?: string|null, turns: number, summary?: string}} result
|
|
459
|
-
*/
|
|
460
|
-
emitSummary(result) {
|
|
461
|
-
this.output.write(
|
|
462
|
-
JSON.stringify(
|
|
463
|
-
this.redactor.redactValue({
|
|
464
|
-
source: "orchestrator",
|
|
465
|
-
seq: this.counter.next(),
|
|
466
|
-
event: {
|
|
467
|
-
type: "summary",
|
|
468
|
-
success: result.success,
|
|
469
|
-
...(result.verdict && { verdict: result.verdict }),
|
|
470
|
-
turns: result.turns,
|
|
471
|
-
...(result.summary && { summary: result.summary }),
|
|
472
|
-
},
|
|
473
|
-
}),
|
|
474
|
-
) + "\n",
|
|
475
|
-
);
|
|
94
|
+
/** Readability shim — exposes the single agent runner directly. */
|
|
95
|
+
get agentRunner() {
|
|
96
|
+
return this.agents[0].runner;
|
|
476
97
|
}
|
|
477
98
|
}
|
|
478
99
|
|
|
@@ -483,24 +104,26 @@ const devNull = new Writable({
|
|
|
483
104
|
});
|
|
484
105
|
|
|
485
106
|
/**
|
|
486
|
-
* Factory
|
|
107
|
+
* Factory — wires the supervisor + agent runners and the orchestration
|
|
108
|
+
* context. Mirrors the facilitator factory in shape.
|
|
109
|
+
*
|
|
487
110
|
* @param {object} deps
|
|
488
111
|
* @param {string} deps.supervisorCwd
|
|
489
112
|
* @param {string} deps.agentCwd
|
|
490
113
|
* @param {function} deps.query
|
|
491
114
|
* @param {import("stream").Writable} deps.output
|
|
492
|
-
* @param {string} [deps.model]
|
|
493
|
-
* @param {string} [deps.agentModel]
|
|
494
|
-
* @param {string} [deps.supervisorModel]
|
|
495
|
-
* @param {number} [deps.maxTurns] - Per-runner
|
|
115
|
+
* @param {string} [deps.model]
|
|
116
|
+
* @param {string} [deps.agentModel]
|
|
117
|
+
* @param {string} [deps.supervisorModel]
|
|
118
|
+
* @param {number} [deps.maxTurns] - Per-runner SDK turn budget (default 200).
|
|
496
119
|
* @param {string[]} [deps.allowedTools]
|
|
497
120
|
* @param {string[]} [deps.supervisorAllowedTools]
|
|
498
121
|
* @param {string[]} [deps.supervisorDisallowedTools]
|
|
499
|
-
* @param {string} [deps.supervisorProfile]
|
|
500
|
-
* @param {string} [deps.agentProfile]
|
|
501
|
-
* @param {string} [deps.profilesDir]
|
|
502
|
-
* @param {string} [deps.taskAmend]
|
|
503
|
-
* @param {Record<string, object>} [deps.agentMcpServers]
|
|
122
|
+
* @param {string} [deps.supervisorProfile]
|
|
123
|
+
* @param {string} [deps.agentProfile]
|
|
124
|
+
* @param {string} [deps.profilesDir]
|
|
125
|
+
* @param {string} [deps.taskAmend]
|
|
126
|
+
* @param {Record<string, object>} [deps.agentMcpServers]
|
|
504
127
|
* @returns {Supervisor}
|
|
505
128
|
*/
|
|
506
129
|
export function createSupervisor({
|
|
@@ -513,8 +136,8 @@ export function createSupervisor({
|
|
|
513
136
|
supervisorModel,
|
|
514
137
|
maxTurns,
|
|
515
138
|
allowedTools,
|
|
516
|
-
supervisorDisallowedTools,
|
|
517
139
|
supervisorAllowedTools,
|
|
140
|
+
supervisorDisallowedTools,
|
|
518
141
|
supervisorProfile,
|
|
519
142
|
agentProfile,
|
|
520
143
|
profilesDir,
|
|
@@ -534,7 +157,6 @@ export function createSupervisor({
|
|
|
534
157
|
})
|
|
535
158
|
: { type: "preset", preset: "claude_code", append: trailer };
|
|
536
159
|
};
|
|
537
|
-
let supervisor;
|
|
538
160
|
|
|
539
161
|
const ctx = createOrchestrationContext();
|
|
540
162
|
const messageBus = createMessageBus({
|
|
@@ -546,36 +168,29 @@ export function createSupervisor({
|
|
|
546
168
|
{ name: "agent", role: "agent" },
|
|
547
169
|
];
|
|
548
170
|
|
|
549
|
-
|
|
550
|
-
const
|
|
551
|
-
|
|
552
|
-
const onLine = (line) => supervisor.emitLine(line);
|
|
171
|
+
let supervisor;
|
|
172
|
+
const perRunBudget = maxTurns ?? 200;
|
|
553
173
|
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
// bounded separately by `DEFAULT_MAX_EXCHANGES`; when --max-exchanges is
|
|
557
|
-
// added it will become a parameter. `maxTurns === 0` propagates through
|
|
558
|
-
// to mean unlimited on both axes.
|
|
559
|
-
const perInvocationTurns = maxTurns ?? 200;
|
|
560
|
-
const exchangeBudget = maxTurns === 0 ? 0 : DEFAULT_MAX_EXCHANGES;
|
|
174
|
+
const agentServer = createSupervisedAgentToolServer(ctx);
|
|
175
|
+
const supervisorServer = createSupervisorToolServer(ctx);
|
|
561
176
|
|
|
562
177
|
const agentRunner = createAgentRunner({
|
|
563
178
|
cwd: agentCwd,
|
|
564
179
|
query,
|
|
565
180
|
output: devNull,
|
|
566
181
|
model: agentModel ?? model,
|
|
567
|
-
maxTurns:
|
|
182
|
+
maxTurns: perRunBudget,
|
|
568
183
|
allowedTools,
|
|
569
|
-
onLine,
|
|
184
|
+
onLine: (line) => supervisor.emitLine("agent", line),
|
|
570
185
|
settingSources: ["project"],
|
|
571
186
|
systemPrompt: systemPromptFor(agentProfile, AGENT_SYSTEM_PROMPT),
|
|
572
187
|
mcpServers: { orchestration: agentServer, ...agentMcpServers },
|
|
573
188
|
redactor,
|
|
574
189
|
});
|
|
575
190
|
|
|
576
|
-
// Block the SDK's sub-agent spawn tools on the supervisor:
|
|
577
|
-
// coordinate the agent through
|
|
578
|
-
//
|
|
191
|
+
// Block the SDK's sub-agent spawn tools on the supervisor: it should
|
|
192
|
+
// coordinate the agent through orchestration tools, not fan work out
|
|
193
|
+
// to ad-hoc Claude Code sub-agents.
|
|
579
194
|
const defaultDisallowed = ["Agent", "Task", "TaskOutput", "TaskStop"];
|
|
580
195
|
const disallowedTools = supervisorDisallowedTools
|
|
581
196
|
? [...new Set([...defaultDisallowed, ...supervisorDisallowedTools])]
|
|
@@ -586,7 +201,7 @@ export function createSupervisor({
|
|
|
586
201
|
query,
|
|
587
202
|
output: devNull,
|
|
588
203
|
model: supervisorModel ?? model,
|
|
589
|
-
maxTurns:
|
|
204
|
+
maxTurns: perRunBudget,
|
|
590
205
|
allowedTools: supervisorAllowedTools ?? [
|
|
591
206
|
"Bash",
|
|
592
207
|
"Read",
|
|
@@ -596,7 +211,7 @@ export function createSupervisor({
|
|
|
596
211
|
"Edit",
|
|
597
212
|
],
|
|
598
213
|
disallowedTools,
|
|
599
|
-
onLine,
|
|
214
|
+
onLine: (line) => supervisor.emitLine("supervisor", line),
|
|
600
215
|
settingSources: ["project"],
|
|
601
216
|
systemPrompt: systemPromptFor(supervisorProfile, SUPERVISOR_SYSTEM_PROMPT),
|
|
602
217
|
mcpServers: { orchestration: supervisorServer },
|
|
@@ -604,12 +219,11 @@ export function createSupervisor({
|
|
|
604
219
|
});
|
|
605
220
|
|
|
606
221
|
supervisor = new Supervisor({
|
|
607
|
-
agentRunner,
|
|
608
222
|
supervisorRunner,
|
|
223
|
+
agentRunner,
|
|
224
|
+
messageBus,
|
|
609
225
|
output,
|
|
610
|
-
maxTurns: exchangeBudget,
|
|
611
226
|
ctx,
|
|
612
|
-
messageBus,
|
|
613
227
|
taskAmend,
|
|
614
228
|
redactor,
|
|
615
229
|
});
|