@forwardimpact/libeval 0.1.6 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/fit-eval.js +2 -2
- package/index.js +2 -0
- package/package.json +1 -1
- package/src/agent-runner.js +178 -43
- package/src/commands/run.js +43 -18
- package/src/commands/supervise.js +59 -37
- package/src/supervisor.js +298 -59
- package/test/agent-runner-batching.test.js +271 -0
- package/test/mock-runner.js +113 -0
- package/test/supervisor-batching.test.js +175 -0
- package/test/supervisor-intervention.test.js +365 -0
- package/test/{supervisor.test.js → supervisor-output.test.js} +121 -306
- package/test/supervisor-run.test.js +310 -0
|
@@ -3,317 +3,23 @@ import assert from "node:assert";
|
|
|
3
3
|
import { PassThrough } from "node:stream";
|
|
4
4
|
|
|
5
5
|
import {
|
|
6
|
-
AgentRunner,
|
|
7
6
|
Supervisor,
|
|
8
7
|
createSupervisor,
|
|
9
8
|
SUPERVISOR_SYSTEM_PROMPT,
|
|
10
9
|
AGENT_SYSTEM_PROMPT,
|
|
11
10
|
} from "@forwardimpact/libeval";
|
|
12
|
-
import {
|
|
13
|
-
|
|
14
|
-
/**
|
|
15
|
-
* Create a mock AgentRunner that yields pre-scripted responses.
|
|
16
|
-
* Each call to run() or resume() pops the next response from the array.
|
|
17
|
-
* @param {object[]} responses - Array of {text, success} objects
|
|
18
|
-
* @param {object[]} [messages] - Messages to buffer per turn
|
|
19
|
-
* @returns {AgentRunner}
|
|
20
|
-
*/
|
|
21
|
-
function createMockRunner(responses, messages) {
|
|
22
|
-
const output = new PassThrough();
|
|
23
|
-
let callIndex = 0;
|
|
24
|
-
|
|
25
|
-
const runner = new AgentRunner({
|
|
26
|
-
cwd: "/tmp",
|
|
27
|
-
query: async function* () {},
|
|
28
|
-
output,
|
|
29
|
-
});
|
|
30
|
-
|
|
31
|
-
// Override run and resume to return scripted responses
|
|
32
|
-
runner.run = async (_task) => {
|
|
33
|
-
const resp = responses[callIndex++];
|
|
34
|
-
const msgs = messages?.[callIndex - 1] ?? [
|
|
35
|
-
{ type: "assistant", content: resp.text },
|
|
36
|
-
];
|
|
37
|
-
for (const m of msgs) {
|
|
38
|
-
const line = JSON.stringify(m);
|
|
39
|
-
runner.buffer.push(line);
|
|
40
|
-
if (runner.onLine) runner.onLine(line);
|
|
41
|
-
}
|
|
42
|
-
runner.sessionId = "mock-session";
|
|
43
|
-
return {
|
|
44
|
-
success: resp.success ?? true,
|
|
45
|
-
text: resp.text,
|
|
46
|
-
sessionId: "mock-session",
|
|
47
|
-
};
|
|
48
|
-
};
|
|
49
|
-
|
|
50
|
-
runner.resume = async (_prompt) => {
|
|
51
|
-
const resp = responses[callIndex++];
|
|
52
|
-
const msgs = messages?.[callIndex - 1] ?? [
|
|
53
|
-
{ type: "assistant", content: resp.text },
|
|
54
|
-
];
|
|
55
|
-
for (const m of msgs) {
|
|
56
|
-
const line = JSON.stringify(m);
|
|
57
|
-
runner.buffer.push(line);
|
|
58
|
-
if (runner.onLine) runner.onLine(line);
|
|
59
|
-
}
|
|
60
|
-
return { success: resp.success ?? true, text: resp.text };
|
|
61
|
-
};
|
|
62
|
-
|
|
63
|
-
return runner;
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
describe("isSuccessful", () => {
|
|
67
|
-
test("detects EVALUATION_SUCCESSFUL on its own line", () => {
|
|
68
|
-
assert.strictEqual(isSuccessful("EVALUATION_SUCCESSFUL"), true);
|
|
69
|
-
assert.strictEqual(
|
|
70
|
-
isSuccessful("Some text\nEVALUATION_SUCCESSFUL\nMore text"),
|
|
71
|
-
true,
|
|
72
|
-
);
|
|
73
|
-
assert.strictEqual(isSuccessful("Done.\n\nEVALUATION_SUCCESSFUL"), true);
|
|
74
|
-
});
|
|
75
|
-
|
|
76
|
-
test("tolerates markdown formatting around the signal", () => {
|
|
77
|
-
assert.strictEqual(isSuccessful("**EVALUATION_SUCCESSFUL**"), true);
|
|
78
|
-
assert.strictEqual(isSuccessful("*EVALUATION_SUCCESSFUL*"), true);
|
|
79
|
-
assert.strictEqual(isSuccessful("__EVALUATION_SUCCESSFUL__"), true);
|
|
80
|
-
assert.strictEqual(isSuccessful("_EVALUATION_SUCCESSFUL_"), true);
|
|
81
|
-
assert.strictEqual(isSuccessful("`EVALUATION_SUCCESSFUL`"), true);
|
|
82
|
-
assert.strictEqual(
|
|
83
|
-
isSuccessful(
|
|
84
|
-
"Good work.\n\n**EVALUATION_SUCCESSFUL**\n\nNow filing issues.",
|
|
85
|
-
),
|
|
86
|
-
true,
|
|
87
|
-
);
|
|
88
|
-
});
|
|
89
|
-
|
|
90
|
-
test("matches EVALUATION_SUCCESSFUL anywhere in text", () => {
|
|
91
|
-
assert.strictEqual(isSuccessful("not EVALUATION_SUCCESSFUL yet"), true);
|
|
92
|
-
assert.strictEqual(
|
|
93
|
-
isSuccessful("The agent is EVALUATION_SUCCESSFUL done"),
|
|
94
|
-
true,
|
|
95
|
-
);
|
|
96
|
-
assert.strictEqual(
|
|
97
|
-
isSuccessful("Great work! EVALUATION_SUCCESSFUL. Now filing issues."),
|
|
98
|
-
true,
|
|
99
|
-
);
|
|
100
|
-
});
|
|
101
|
-
|
|
102
|
-
test("does not match empty or unrelated text", () => {
|
|
103
|
-
assert.strictEqual(isSuccessful(""), false);
|
|
104
|
-
assert.strictEqual(isSuccessful("All done!"), false);
|
|
105
|
-
assert.strictEqual(isSuccessful("DONE"), false);
|
|
106
|
-
});
|
|
107
|
-
|
|
108
|
-
test("does not match old EVALUATION_COMPLETE signal", () => {
|
|
109
|
-
assert.strictEqual(isSuccessful("EVALUATION_COMPLETE"), false);
|
|
110
|
-
});
|
|
111
|
-
});
|
|
112
|
-
|
|
113
|
-
describe("Supervisor", () => {
|
|
114
|
-
test("constructor throws on missing agentRunner", () => {
|
|
115
|
-
assert.throws(
|
|
116
|
-
() =>
|
|
117
|
-
new Supervisor({
|
|
118
|
-
supervisorRunner: createMockRunner([]),
|
|
119
|
-
output: new PassThrough(),
|
|
120
|
-
}),
|
|
121
|
-
/agentRunner is required/,
|
|
122
|
-
);
|
|
123
|
-
});
|
|
124
|
-
|
|
125
|
-
test("constructor throws on missing supervisorRunner", () => {
|
|
126
|
-
assert.throws(
|
|
127
|
-
() =>
|
|
128
|
-
new Supervisor({
|
|
129
|
-
agentRunner: createMockRunner([]),
|
|
130
|
-
output: new PassThrough(),
|
|
131
|
-
}),
|
|
132
|
-
/supervisorRunner is required/,
|
|
133
|
-
);
|
|
134
|
-
});
|
|
135
|
-
|
|
136
|
-
test("constructor throws on missing output", () => {
|
|
137
|
-
assert.throws(
|
|
138
|
-
() =>
|
|
139
|
-
new Supervisor({
|
|
140
|
-
agentRunner: createMockRunner([]),
|
|
141
|
-
supervisorRunner: createMockRunner([]),
|
|
142
|
-
}),
|
|
143
|
-
/output is required/,
|
|
144
|
-
);
|
|
145
|
-
});
|
|
146
|
-
|
|
147
|
-
test("completes on EVALUATION_SUCCESSFUL from supervisor at turn 0", async () => {
|
|
148
|
-
const agentRunner = createMockRunner([]);
|
|
149
|
-
|
|
150
|
-
const supervisorRunner = createMockRunner([
|
|
151
|
-
{ text: "EVALUATION_SUCCESSFUL" },
|
|
152
|
-
]);
|
|
153
|
-
|
|
154
|
-
const output = new PassThrough();
|
|
155
|
-
const supervisor = new Supervisor({
|
|
156
|
-
agentRunner,
|
|
157
|
-
supervisorRunner,
|
|
158
|
-
output,
|
|
159
|
-
maxTurns: 10,
|
|
160
|
-
});
|
|
161
|
-
|
|
162
|
-
const result = await supervisor.run("Install stuff");
|
|
163
|
-
|
|
164
|
-
assert.strictEqual(result.success, true);
|
|
165
|
-
assert.strictEqual(result.turns, 0);
|
|
166
|
-
});
|
|
167
|
-
|
|
168
|
-
test("completes after one agent turn", async () => {
|
|
169
|
-
const agentRunner = createMockRunner([
|
|
170
|
-
{ text: "I installed the packages." },
|
|
171
|
-
]);
|
|
172
|
-
|
|
173
|
-
const supervisorRunner = createMockRunner([
|
|
174
|
-
{ text: "Welcome! Please install the packages." },
|
|
175
|
-
{ text: "Good work.\n\nEVALUATION_SUCCESSFUL" },
|
|
176
|
-
]);
|
|
177
|
-
|
|
178
|
-
const output = new PassThrough();
|
|
179
|
-
const supervisor = new Supervisor({
|
|
180
|
-
agentRunner,
|
|
181
|
-
supervisorRunner,
|
|
182
|
-
output,
|
|
183
|
-
maxTurns: 10,
|
|
184
|
-
});
|
|
185
|
-
|
|
186
|
-
const result = await supervisor.run("Install stuff");
|
|
187
|
-
|
|
188
|
-
assert.strictEqual(result.success, true);
|
|
189
|
-
assert.strictEqual(result.turns, 1);
|
|
190
|
-
});
|
|
191
|
-
|
|
192
|
-
test("detects EVALUATION_SUCCESSFUL in streamed messages when result text differs", async () => {
|
|
193
|
-
// Simulates the real failure: supervisor writes EVALUATION_SUCCESSFUL in
|
|
194
|
-
// an early message, then continues with follow-up work (e.g. filing issues).
|
|
195
|
-
// The SDK result text reflects only the final message, which does NOT
|
|
196
|
-
// contain the signal.
|
|
197
|
-
const agentRunner = createMockRunner([
|
|
198
|
-
{ text: "I installed the packages." },
|
|
199
|
-
]);
|
|
200
|
-
|
|
201
|
-
// The supervisor's result text is the Summary (no signal), but messages
|
|
202
|
-
// include one with EVALUATION_SUCCESSFUL.
|
|
203
|
-
const supervisorMessages = [
|
|
204
|
-
undefined, // turn 0: use default
|
|
205
|
-
[
|
|
206
|
-
{
|
|
207
|
-
type: "assistant",
|
|
208
|
-
message: {
|
|
209
|
-
content: [
|
|
210
|
-
{
|
|
211
|
-
type: "text",
|
|
212
|
-
text: "Good work.\n\nEVALUATION_SUCCESSFUL\n\nNow filing issues.",
|
|
213
|
-
},
|
|
214
|
-
],
|
|
215
|
-
},
|
|
216
|
-
},
|
|
217
|
-
{
|
|
218
|
-
type: "assistant",
|
|
219
|
-
message: {
|
|
220
|
-
content: [
|
|
221
|
-
{ type: "text", text: "## Summary\n\nAll issues filed." },
|
|
222
|
-
],
|
|
223
|
-
},
|
|
224
|
-
},
|
|
225
|
-
],
|
|
226
|
-
];
|
|
227
|
-
|
|
228
|
-
const supervisorRunner = createMockRunner(
|
|
229
|
-
[
|
|
230
|
-
{ text: "Welcome! Please install the packages." },
|
|
231
|
-
// Result text is the final message — does NOT contain the signal
|
|
232
|
-
{ text: "## Summary\n\nAll issues filed." },
|
|
233
|
-
],
|
|
234
|
-
supervisorMessages,
|
|
235
|
-
);
|
|
236
|
-
|
|
237
|
-
const output = new PassThrough();
|
|
238
|
-
const supervisor = new Supervisor({
|
|
239
|
-
agentRunner,
|
|
240
|
-
supervisorRunner,
|
|
241
|
-
output,
|
|
242
|
-
maxTurns: 10,
|
|
243
|
-
});
|
|
244
|
-
agentRunner.onLine = (line) => supervisor.emitLine(line);
|
|
245
|
-
supervisorRunner.onLine = (line) => supervisor.emitLine(line);
|
|
246
|
-
|
|
247
|
-
const result = await supervisor.run("Install stuff");
|
|
248
|
-
|
|
249
|
-
assert.strictEqual(result.success, true);
|
|
250
|
-
assert.strictEqual(result.turns, 1);
|
|
251
|
-
});
|
|
252
|
-
|
|
253
|
-
test("runs multiple turns before completion", async () => {
|
|
254
|
-
const agentRunner = createMockRunner([
|
|
255
|
-
{ text: "Started working." },
|
|
256
|
-
{ text: "Made progress." },
|
|
257
|
-
{ text: "Finished everything." },
|
|
258
|
-
]);
|
|
259
|
-
|
|
260
|
-
const supervisorRunner = createMockRunner([
|
|
261
|
-
{ text: "Here is your task. Do the work." },
|
|
262
|
-
{ text: "Keep going, you need to do more." },
|
|
263
|
-
{ text: "Almost there, continue." },
|
|
264
|
-
{ text: "EVALUATION_SUCCESSFUL" },
|
|
265
|
-
]);
|
|
266
|
-
|
|
267
|
-
const output = new PassThrough();
|
|
268
|
-
const supervisor = new Supervisor({
|
|
269
|
-
agentRunner,
|
|
270
|
-
supervisorRunner,
|
|
271
|
-
output,
|
|
272
|
-
maxTurns: 10,
|
|
273
|
-
});
|
|
274
|
-
|
|
275
|
-
const result = await supervisor.run("Do the work");
|
|
276
|
-
|
|
277
|
-
assert.strictEqual(result.success, true);
|
|
278
|
-
assert.strictEqual(result.turns, 3);
|
|
279
|
-
});
|
|
280
|
-
|
|
281
|
-
test("enforces maxTurns limit", async () => {
|
|
282
|
-
// Supervisor starts, agent responds each turn, supervisor never says done
|
|
283
|
-
const agentRunner = createMockRunner([
|
|
284
|
-
{ text: "Turn 1" },
|
|
285
|
-
{ text: "Turn 2" },
|
|
286
|
-
]);
|
|
287
|
-
|
|
288
|
-
const supervisorRunner = createMockRunner([
|
|
289
|
-
{ text: "Start working." },
|
|
290
|
-
{ text: "Continue." },
|
|
291
|
-
{ text: "Continue." },
|
|
292
|
-
]);
|
|
293
|
-
|
|
294
|
-
const output = new PassThrough();
|
|
295
|
-
const supervisor = new Supervisor({
|
|
296
|
-
agentRunner,
|
|
297
|
-
supervisorRunner,
|
|
298
|
-
output,
|
|
299
|
-
maxTurns: 2,
|
|
300
|
-
});
|
|
301
|
-
|
|
302
|
-
const result = await supervisor.run("Endless task");
|
|
303
|
-
|
|
304
|
-
assert.strictEqual(result.success, false);
|
|
305
|
-
assert.strictEqual(result.turns, 2);
|
|
306
|
-
});
|
|
11
|
+
import { createMockRunner } from "./mock-runner.js";
|
|
307
12
|
|
|
13
|
+
describe("Supervisor - output and events", () => {
|
|
308
14
|
test("output contains tagged lines with correct source and turn", async () => {
|
|
309
15
|
const supervisorMessages = [
|
|
310
16
|
[{ type: "assistant", content: "Go ahead" }],
|
|
311
|
-
[{ type: "assistant", content: "
|
|
17
|
+
[{ type: "assistant", content: "EVALUATION_COMPLETE" }],
|
|
312
18
|
];
|
|
313
19
|
const agentMessages = [[{ type: "assistant", content: "Working" }]];
|
|
314
20
|
|
|
315
21
|
const supervisorRunner = createMockRunner(
|
|
316
|
-
[{ text: "Go ahead" }, { text: "
|
|
22
|
+
[{ text: "Go ahead" }, { text: "EVALUATION_COMPLETE" }],
|
|
317
23
|
supervisorMessages,
|
|
318
24
|
);
|
|
319
25
|
const agentRunner = createMockRunner([{ text: "Working" }], agentMessages);
|
|
@@ -362,7 +68,7 @@ describe("Supervisor", () => {
|
|
|
362
68
|
content: "test",
|
|
363
69
|
};
|
|
364
70
|
const supervisorRunner = createMockRunner(
|
|
365
|
-
[{ text: "Go" }, { text: "
|
|
71
|
+
[{ text: "Go" }, { text: "EVALUATION_COMPLETE" }],
|
|
366
72
|
[
|
|
367
73
|
[{ type: "assistant", content: "Go" }],
|
|
368
74
|
[{ type: "assistant", content: "ok" }],
|
|
@@ -390,11 +96,118 @@ describe("Supervisor", () => {
|
|
|
390
96
|
|
|
391
97
|
// First line is supervisor turn 0, second is agent turn 1
|
|
392
98
|
const tagged = JSON.parse(lines[1]);
|
|
393
|
-
// The original event's `source` field is preserved inside `event`
|
|
394
99
|
assert.strictEqual(tagged.source, "agent");
|
|
395
100
|
assert.strictEqual(tagged.event.source, "sdk-internal");
|
|
396
101
|
});
|
|
397
102
|
|
|
103
|
+
test("mid-turn intervention emits orchestrator events and shares the agent's turn id", async () => {
|
|
104
|
+
// Agent emits one structured assistant text block on its first call —
|
|
105
|
+
// supervisor intervenes mid-turn. Resume then completes naturally and
|
|
106
|
+
// the end-of-turn review signals EVALUATION_COMPLETE.
|
|
107
|
+
const agentMessages = [
|
|
108
|
+
[
|
|
109
|
+
{
|
|
110
|
+
type: "assistant",
|
|
111
|
+
message: {
|
|
112
|
+
content: [{ type: "text", text: "Trying the wrong thing." }],
|
|
113
|
+
},
|
|
114
|
+
},
|
|
115
|
+
],
|
|
116
|
+
[
|
|
117
|
+
{
|
|
118
|
+
type: "assistant",
|
|
119
|
+
message: {
|
|
120
|
+
content: [{ type: "text", text: "Switching to the right thing." }],
|
|
121
|
+
},
|
|
122
|
+
},
|
|
123
|
+
],
|
|
124
|
+
];
|
|
125
|
+
|
|
126
|
+
const supervisorMessages = [
|
|
127
|
+
undefined,
|
|
128
|
+
[
|
|
129
|
+
{
|
|
130
|
+
type: "assistant",
|
|
131
|
+
message: {
|
|
132
|
+
content: [
|
|
133
|
+
{
|
|
134
|
+
type: "text",
|
|
135
|
+
text: "EVALUATION_INTERVENTION Switch to the right path.",
|
|
136
|
+
},
|
|
137
|
+
],
|
|
138
|
+
},
|
|
139
|
+
},
|
|
140
|
+
],
|
|
141
|
+
undefined,
|
|
142
|
+
undefined,
|
|
143
|
+
];
|
|
144
|
+
|
|
145
|
+
const agentRunner = createMockRunner(
|
|
146
|
+
[{ text: "Trying the wrong thing." }, { text: "Switching." }],
|
|
147
|
+
agentMessages,
|
|
148
|
+
);
|
|
149
|
+
agentRunner.batchSize = 1;
|
|
150
|
+
const supervisorRunner = createMockRunner(
|
|
151
|
+
[
|
|
152
|
+
{ text: "Welcome." },
|
|
153
|
+
{ text: "EVALUATION_INTERVENTION Switch to the right path." },
|
|
154
|
+
{ text: "Keep going." },
|
|
155
|
+
{ text: "Done. EVALUATION_COMPLETE" },
|
|
156
|
+
],
|
|
157
|
+
supervisorMessages,
|
|
158
|
+
);
|
|
159
|
+
|
|
160
|
+
const output = new PassThrough();
|
|
161
|
+
const supervisor = new Supervisor({
|
|
162
|
+
agentRunner,
|
|
163
|
+
supervisorRunner,
|
|
164
|
+
output,
|
|
165
|
+
maxTurns: 10,
|
|
166
|
+
});
|
|
167
|
+
agentRunner.onLine = (line) => supervisor.emitLine(line);
|
|
168
|
+
supervisorRunner.onLine = (line) => supervisor.emitLine(line);
|
|
169
|
+
|
|
170
|
+
const result = await supervisor.run("Task");
|
|
171
|
+
assert.strictEqual(result.success, true);
|
|
172
|
+
|
|
173
|
+
const lines = (output.read()?.toString() ?? "")
|
|
174
|
+
.trim()
|
|
175
|
+
.split("\n")
|
|
176
|
+
.filter((l) => l.length > 0)
|
|
177
|
+
.map((l) => JSON.parse(l));
|
|
178
|
+
|
|
179
|
+
// (1) Orchestrator event with intervention_requested.
|
|
180
|
+
const interventionRequested = lines.find(
|
|
181
|
+
(l) =>
|
|
182
|
+
l.source === "orchestrator" &&
|
|
183
|
+
l.event?.type === "intervention_requested",
|
|
184
|
+
);
|
|
185
|
+
assert.ok(
|
|
186
|
+
interventionRequested,
|
|
187
|
+
"Trace must contain intervention_requested orchestrator event",
|
|
188
|
+
);
|
|
189
|
+
|
|
190
|
+
// (2) At least one agent line and one supervisor line share a turn id —
|
|
191
|
+
// mid-turn supervisor activity is tagged with the agent's turn.
|
|
192
|
+
const agentTurns = new Set(
|
|
193
|
+
lines.filter((l) => l.source === "agent").map((l) => l.turn),
|
|
194
|
+
);
|
|
195
|
+
const supervisorTurns = new Set(
|
|
196
|
+
lines.filter((l) => l.source === "supervisor").map((l) => l.turn),
|
|
197
|
+
);
|
|
198
|
+
const sharedTurns = [...agentTurns].filter((t) => supervisorTurns.has(t));
|
|
199
|
+
assert.ok(
|
|
200
|
+
sharedTurns.length > 0,
|
|
201
|
+
"At least one turn id must appear on both agent and supervisor lines",
|
|
202
|
+
);
|
|
203
|
+
|
|
204
|
+
// (3) Final summary line still emitted.
|
|
205
|
+
const summary = lines[lines.length - 1];
|
|
206
|
+
assert.strictEqual(summary.source, "orchestrator");
|
|
207
|
+
assert.strictEqual(summary.type, "summary");
|
|
208
|
+
assert.strictEqual(summary.success, true);
|
|
209
|
+
});
|
|
210
|
+
|
|
398
211
|
test("emits supervisor output and summary when supervisor errors on turn 0", async () => {
|
|
399
212
|
const supervisorMessages = [
|
|
400
213
|
[{ type: "assistant", content: "Starting..." }],
|
|
@@ -404,7 +217,6 @@ describe("Supervisor", () => {
|
|
|
404
217
|
supervisorMessages,
|
|
405
218
|
);
|
|
406
219
|
|
|
407
|
-
// Override run to simulate an error return
|
|
408
220
|
const origRun = supervisorRunner.run;
|
|
409
221
|
supervisorRunner.run = async (task) => {
|
|
410
222
|
const result = await origRun.call(supervisorRunner, task);
|
|
@@ -428,7 +240,6 @@ describe("Supervisor", () => {
|
|
|
428
240
|
assert.strictEqual(result.success, false);
|
|
429
241
|
assert.strictEqual(result.turns, 0);
|
|
430
242
|
|
|
431
|
-
// Output should still contain the supervisor's buffered lines + summary
|
|
432
243
|
const data = output.read()?.toString() ?? "";
|
|
433
244
|
const lines = data
|
|
434
245
|
.trim()
|
|
@@ -446,7 +257,9 @@ describe("Supervisor", () => {
|
|
|
446
257
|
assert.strictEqual(summaryLine.success, false);
|
|
447
258
|
assert.strictEqual(summaryLine.turns, 0);
|
|
448
259
|
});
|
|
260
|
+
});
|
|
449
261
|
|
|
262
|
+
describe("Supervisor - createSupervisor factory", () => {
|
|
450
263
|
test("createSupervisor factory returns a Supervisor instance", () => {
|
|
451
264
|
const supervisor = createSupervisor({
|
|
452
265
|
supervisorCwd: "/tmp/sup",
|
|
@@ -509,7 +322,7 @@ describe("Supervisor", () => {
|
|
|
509
322
|
});
|
|
510
323
|
});
|
|
511
324
|
|
|
512
|
-
test("createSupervisor blocks
|
|
325
|
+
test("createSupervisor blocks sub-agent spawn tools on supervisor by default", () => {
|
|
513
326
|
const supervisor = createSupervisor({
|
|
514
327
|
supervisorCwd: "/tmp/sup",
|
|
515
328
|
agentCwd: "/tmp/agent",
|
|
@@ -517,10 +330,11 @@ describe("Supervisor", () => {
|
|
|
517
330
|
output: new PassThrough(),
|
|
518
331
|
});
|
|
519
332
|
assert.deepStrictEqual(supervisor.supervisorRunner.disallowedTools, [
|
|
333
|
+
"Agent",
|
|
520
334
|
"Task",
|
|
521
335
|
"TaskOutput",
|
|
336
|
+
"TaskStop",
|
|
522
337
|
]);
|
|
523
|
-
// Agent should not have disallowed tools
|
|
524
338
|
assert.deepStrictEqual(supervisor.agentRunner.disallowedTools, []);
|
|
525
339
|
});
|
|
526
340
|
|
|
@@ -533,10 +347,11 @@ describe("Supervisor", () => {
|
|
|
533
347
|
supervisorDisallowedTools: ["WebSearch", "Task"],
|
|
534
348
|
});
|
|
535
349
|
const disallowed = supervisor.supervisorRunner.disallowedTools;
|
|
350
|
+
assert.ok(disallowed.includes("Agent"));
|
|
536
351
|
assert.ok(disallowed.includes("Task"));
|
|
537
352
|
assert.ok(disallowed.includes("TaskOutput"));
|
|
353
|
+
assert.ok(disallowed.includes("TaskStop"));
|
|
538
354
|
assert.ok(disallowed.includes("WebSearch"));
|
|
539
|
-
// No duplicates
|
|
540
355
|
assert.strictEqual(disallowed.length, new Set(disallowed).size);
|
|
541
356
|
});
|
|
542
357
|
|
|
@@ -549,6 +364,6 @@ describe("Supervisor", () => {
|
|
|
549
364
|
|
|
550
365
|
test("SUPERVISOR_SYSTEM_PROMPT explains relay mechanism", () => {
|
|
551
366
|
assert.ok(SUPERVISOR_SYSTEM_PROMPT.includes("relay"));
|
|
552
|
-
assert.ok(SUPERVISOR_SYSTEM_PROMPT.includes("
|
|
367
|
+
assert.ok(SUPERVISOR_SYSTEM_PROMPT.includes("EVALUATION_COMPLETE"));
|
|
553
368
|
});
|
|
554
369
|
});
|