@forwardimpact/libeval 0.1.5 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/fit-eval.js +2 -2
- package/index.js +2 -0
- package/package.json +1 -1
- package/src/agent-runner.js +97 -39
- package/src/commands/run.js +43 -18
- package/src/commands/supervise.js +59 -37
- package/src/supervisor.js +320 -48
- package/src/trace-collector.js +7 -0
- package/test/mock-runner.js +101 -0
- package/test/supervisor-intervention.test.js +359 -0
- package/test/{supervisor.test.js → supervisor-output.test.js} +120 -245
- package/test/supervisor-run.test.js +310 -0
- package/test/trace-collector.test.js +96 -0
|
@@ -3,256 +3,23 @@ import assert from "node:assert";
|
|
|
3
3
|
import { PassThrough } from "node:stream";
|
|
4
4
|
|
|
5
5
|
import {
|
|
6
|
-
AgentRunner,
|
|
7
6
|
Supervisor,
|
|
8
7
|
createSupervisor,
|
|
9
8
|
SUPERVISOR_SYSTEM_PROMPT,
|
|
10
9
|
AGENT_SYSTEM_PROMPT,
|
|
11
10
|
} from "@forwardimpact/libeval";
|
|
12
|
-
import {
|
|
13
|
-
|
|
14
|
-
/**
|
|
15
|
-
* Create a mock AgentRunner that yields pre-scripted responses.
|
|
16
|
-
* Each call to run() or resume() pops the next response from the array.
|
|
17
|
-
* @param {object[]} responses - Array of {text, success} objects
|
|
18
|
-
* @param {object[]} [messages] - Messages to buffer per turn
|
|
19
|
-
* @returns {AgentRunner}
|
|
20
|
-
*/
|
|
21
|
-
function createMockRunner(responses, messages) {
|
|
22
|
-
const output = new PassThrough();
|
|
23
|
-
let callIndex = 0;
|
|
24
|
-
|
|
25
|
-
const runner = new AgentRunner({
|
|
26
|
-
cwd: "/tmp",
|
|
27
|
-
query: async function* () {},
|
|
28
|
-
output,
|
|
29
|
-
});
|
|
30
|
-
|
|
31
|
-
// Override run and resume to return scripted responses
|
|
32
|
-
runner.run = async (_task) => {
|
|
33
|
-
const resp = responses[callIndex++];
|
|
34
|
-
const msgs = messages?.[callIndex - 1] ?? [
|
|
35
|
-
{ type: "assistant", content: resp.text },
|
|
36
|
-
];
|
|
37
|
-
for (const m of msgs) {
|
|
38
|
-
const line = JSON.stringify(m);
|
|
39
|
-
runner.buffer.push(line);
|
|
40
|
-
if (runner.onLine) runner.onLine(line);
|
|
41
|
-
}
|
|
42
|
-
runner.sessionId = "mock-session";
|
|
43
|
-
return {
|
|
44
|
-
success: resp.success ?? true,
|
|
45
|
-
text: resp.text,
|
|
46
|
-
sessionId: "mock-session",
|
|
47
|
-
};
|
|
48
|
-
};
|
|
49
|
-
|
|
50
|
-
runner.resume = async (_prompt) => {
|
|
51
|
-
const resp = responses[callIndex++];
|
|
52
|
-
const msgs = messages?.[callIndex - 1] ?? [
|
|
53
|
-
{ type: "assistant", content: resp.text },
|
|
54
|
-
];
|
|
55
|
-
for (const m of msgs) {
|
|
56
|
-
const line = JSON.stringify(m);
|
|
57
|
-
runner.buffer.push(line);
|
|
58
|
-
if (runner.onLine) runner.onLine(line);
|
|
59
|
-
}
|
|
60
|
-
return { success: resp.success ?? true, text: resp.text };
|
|
61
|
-
};
|
|
62
|
-
|
|
63
|
-
return runner;
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
describe("isSuccessful", () => {
|
|
67
|
-
test("detects EVALUATION_SUCCESSFUL on its own line", () => {
|
|
68
|
-
assert.strictEqual(isSuccessful("EVALUATION_SUCCESSFUL"), true);
|
|
69
|
-
assert.strictEqual(
|
|
70
|
-
isSuccessful("Some text\nEVALUATION_SUCCESSFUL\nMore text"),
|
|
71
|
-
true,
|
|
72
|
-
);
|
|
73
|
-
assert.strictEqual(isSuccessful("Done.\n\nEVALUATION_SUCCESSFUL"), true);
|
|
74
|
-
});
|
|
75
|
-
|
|
76
|
-
test("tolerates markdown formatting around the signal", () => {
|
|
77
|
-
assert.strictEqual(isSuccessful("**EVALUATION_SUCCESSFUL**"), true);
|
|
78
|
-
assert.strictEqual(isSuccessful("*EVALUATION_SUCCESSFUL*"), true);
|
|
79
|
-
assert.strictEqual(isSuccessful("__EVALUATION_SUCCESSFUL__"), true);
|
|
80
|
-
assert.strictEqual(isSuccessful("_EVALUATION_SUCCESSFUL_"), true);
|
|
81
|
-
assert.strictEqual(isSuccessful("`EVALUATION_SUCCESSFUL`"), true);
|
|
82
|
-
assert.strictEqual(
|
|
83
|
-
isSuccessful(
|
|
84
|
-
"Good work.\n\n**EVALUATION_SUCCESSFUL**\n\nNow filing issues.",
|
|
85
|
-
),
|
|
86
|
-
true,
|
|
87
|
-
);
|
|
88
|
-
});
|
|
89
|
-
|
|
90
|
-
test("matches EVALUATION_SUCCESSFUL anywhere in text", () => {
|
|
91
|
-
assert.strictEqual(isSuccessful("not EVALUATION_SUCCESSFUL yet"), true);
|
|
92
|
-
assert.strictEqual(
|
|
93
|
-
isSuccessful("The agent is EVALUATION_SUCCESSFUL done"),
|
|
94
|
-
true,
|
|
95
|
-
);
|
|
96
|
-
assert.strictEqual(
|
|
97
|
-
isSuccessful("Great work! EVALUATION_SUCCESSFUL. Now filing issues."),
|
|
98
|
-
true,
|
|
99
|
-
);
|
|
100
|
-
});
|
|
101
|
-
|
|
102
|
-
test("does not match empty or unrelated text", () => {
|
|
103
|
-
assert.strictEqual(isSuccessful(""), false);
|
|
104
|
-
assert.strictEqual(isSuccessful("All done!"), false);
|
|
105
|
-
assert.strictEqual(isSuccessful("DONE"), false);
|
|
106
|
-
});
|
|
107
|
-
|
|
108
|
-
test("does not match old EVALUATION_COMPLETE signal", () => {
|
|
109
|
-
assert.strictEqual(isSuccessful("EVALUATION_COMPLETE"), false);
|
|
110
|
-
});
|
|
111
|
-
});
|
|
112
|
-
|
|
113
|
-
describe("Supervisor", () => {
|
|
114
|
-
test("constructor throws on missing agentRunner", () => {
|
|
115
|
-
assert.throws(
|
|
116
|
-
() =>
|
|
117
|
-
new Supervisor({
|
|
118
|
-
supervisorRunner: createMockRunner([]),
|
|
119
|
-
output: new PassThrough(),
|
|
120
|
-
}),
|
|
121
|
-
/agentRunner is required/,
|
|
122
|
-
);
|
|
123
|
-
});
|
|
124
|
-
|
|
125
|
-
test("constructor throws on missing supervisorRunner", () => {
|
|
126
|
-
assert.throws(
|
|
127
|
-
() =>
|
|
128
|
-
new Supervisor({
|
|
129
|
-
agentRunner: createMockRunner([]),
|
|
130
|
-
output: new PassThrough(),
|
|
131
|
-
}),
|
|
132
|
-
/supervisorRunner is required/,
|
|
133
|
-
);
|
|
134
|
-
});
|
|
135
|
-
|
|
136
|
-
test("constructor throws on missing output", () => {
|
|
137
|
-
assert.throws(
|
|
138
|
-
() =>
|
|
139
|
-
new Supervisor({
|
|
140
|
-
agentRunner: createMockRunner([]),
|
|
141
|
-
supervisorRunner: createMockRunner([]),
|
|
142
|
-
}),
|
|
143
|
-
/output is required/,
|
|
144
|
-
);
|
|
145
|
-
});
|
|
146
|
-
|
|
147
|
-
test("completes on EVALUATION_SUCCESSFUL from supervisor at turn 0", async () => {
|
|
148
|
-
const agentRunner = createMockRunner([]);
|
|
149
|
-
|
|
150
|
-
const supervisorRunner = createMockRunner([
|
|
151
|
-
{ text: "EVALUATION_SUCCESSFUL" },
|
|
152
|
-
]);
|
|
153
|
-
|
|
154
|
-
const output = new PassThrough();
|
|
155
|
-
const supervisor = new Supervisor({
|
|
156
|
-
agentRunner,
|
|
157
|
-
supervisorRunner,
|
|
158
|
-
output,
|
|
159
|
-
maxTurns: 10,
|
|
160
|
-
});
|
|
161
|
-
|
|
162
|
-
const result = await supervisor.run("Install stuff");
|
|
163
|
-
|
|
164
|
-
assert.strictEqual(result.success, true);
|
|
165
|
-
assert.strictEqual(result.turns, 0);
|
|
166
|
-
});
|
|
167
|
-
|
|
168
|
-
test("completes after one agent turn", async () => {
|
|
169
|
-
const agentRunner = createMockRunner([
|
|
170
|
-
{ text: "I installed the packages." },
|
|
171
|
-
]);
|
|
172
|
-
|
|
173
|
-
const supervisorRunner = createMockRunner([
|
|
174
|
-
{ text: "Welcome! Please install the packages." },
|
|
175
|
-
{ text: "Good work.\n\nEVALUATION_SUCCESSFUL" },
|
|
176
|
-
]);
|
|
177
|
-
|
|
178
|
-
const output = new PassThrough();
|
|
179
|
-
const supervisor = new Supervisor({
|
|
180
|
-
agentRunner,
|
|
181
|
-
supervisorRunner,
|
|
182
|
-
output,
|
|
183
|
-
maxTurns: 10,
|
|
184
|
-
});
|
|
185
|
-
|
|
186
|
-
const result = await supervisor.run("Install stuff");
|
|
187
|
-
|
|
188
|
-
assert.strictEqual(result.success, true);
|
|
189
|
-
assert.strictEqual(result.turns, 1);
|
|
190
|
-
});
|
|
191
|
-
|
|
192
|
-
test("runs multiple turns before completion", async () => {
|
|
193
|
-
const agentRunner = createMockRunner([
|
|
194
|
-
{ text: "Started working." },
|
|
195
|
-
{ text: "Made progress." },
|
|
196
|
-
{ text: "Finished everything." },
|
|
197
|
-
]);
|
|
198
|
-
|
|
199
|
-
const supervisorRunner = createMockRunner([
|
|
200
|
-
{ text: "Here is your task. Do the work." },
|
|
201
|
-
{ text: "Keep going, you need to do more." },
|
|
202
|
-
{ text: "Almost there, continue." },
|
|
203
|
-
{ text: "EVALUATION_SUCCESSFUL" },
|
|
204
|
-
]);
|
|
205
|
-
|
|
206
|
-
const output = new PassThrough();
|
|
207
|
-
const supervisor = new Supervisor({
|
|
208
|
-
agentRunner,
|
|
209
|
-
supervisorRunner,
|
|
210
|
-
output,
|
|
211
|
-
maxTurns: 10,
|
|
212
|
-
});
|
|
213
|
-
|
|
214
|
-
const result = await supervisor.run("Do the work");
|
|
215
|
-
|
|
216
|
-
assert.strictEqual(result.success, true);
|
|
217
|
-
assert.strictEqual(result.turns, 3);
|
|
218
|
-
});
|
|
219
|
-
|
|
220
|
-
test("enforces maxTurns limit", async () => {
|
|
221
|
-
// Supervisor starts, agent responds each turn, supervisor never says done
|
|
222
|
-
const agentRunner = createMockRunner([
|
|
223
|
-
{ text: "Turn 1" },
|
|
224
|
-
{ text: "Turn 2" },
|
|
225
|
-
]);
|
|
226
|
-
|
|
227
|
-
const supervisorRunner = createMockRunner([
|
|
228
|
-
{ text: "Start working." },
|
|
229
|
-
{ text: "Continue." },
|
|
230
|
-
{ text: "Continue." },
|
|
231
|
-
]);
|
|
232
|
-
|
|
233
|
-
const output = new PassThrough();
|
|
234
|
-
const supervisor = new Supervisor({
|
|
235
|
-
agentRunner,
|
|
236
|
-
supervisorRunner,
|
|
237
|
-
output,
|
|
238
|
-
maxTurns: 2,
|
|
239
|
-
});
|
|
240
|
-
|
|
241
|
-
const result = await supervisor.run("Endless task");
|
|
242
|
-
|
|
243
|
-
assert.strictEqual(result.success, false);
|
|
244
|
-
assert.strictEqual(result.turns, 2);
|
|
245
|
-
});
|
|
11
|
+
import { createMockRunner } from "./mock-runner.js";
|
|
246
12
|
|
|
13
|
+
describe("Supervisor - output and events", () => {
|
|
247
14
|
test("output contains tagged lines with correct source and turn", async () => {
|
|
248
15
|
const supervisorMessages = [
|
|
249
16
|
[{ type: "assistant", content: "Go ahead" }],
|
|
250
|
-
[{ type: "assistant", content: "
|
|
17
|
+
[{ type: "assistant", content: "EVALUATION_COMPLETE" }],
|
|
251
18
|
];
|
|
252
19
|
const agentMessages = [[{ type: "assistant", content: "Working" }]];
|
|
253
20
|
|
|
254
21
|
const supervisorRunner = createMockRunner(
|
|
255
|
-
[{ text: "Go ahead" }, { text: "
|
|
22
|
+
[{ text: "Go ahead" }, { text: "EVALUATION_COMPLETE" }],
|
|
256
23
|
supervisorMessages,
|
|
257
24
|
);
|
|
258
25
|
const agentRunner = createMockRunner([{ text: "Working" }], agentMessages);
|
|
@@ -301,7 +68,7 @@ describe("Supervisor", () => {
|
|
|
301
68
|
content: "test",
|
|
302
69
|
};
|
|
303
70
|
const supervisorRunner = createMockRunner(
|
|
304
|
-
[{ text: "Go" }, { text: "
|
|
71
|
+
[{ text: "Go" }, { text: "EVALUATION_COMPLETE" }],
|
|
305
72
|
[
|
|
306
73
|
[{ type: "assistant", content: "Go" }],
|
|
307
74
|
[{ type: "assistant", content: "ok" }],
|
|
@@ -329,11 +96,117 @@ describe("Supervisor", () => {
|
|
|
329
96
|
|
|
330
97
|
// First line is supervisor turn 0, second is agent turn 1
|
|
331
98
|
const tagged = JSON.parse(lines[1]);
|
|
332
|
-
// The original event's `source` field is preserved inside `event`
|
|
333
99
|
assert.strictEqual(tagged.source, "agent");
|
|
334
100
|
assert.strictEqual(tagged.event.source, "sdk-internal");
|
|
335
101
|
});
|
|
336
102
|
|
|
103
|
+
test("mid-turn intervention emits orchestrator events and shares the agent's turn id", async () => {
|
|
104
|
+
// Agent emits one structured assistant text block on its first call —
|
|
105
|
+
// supervisor intervenes mid-turn. Resume then completes naturally and
|
|
106
|
+
// the end-of-turn review signals EVALUATION_COMPLETE.
|
|
107
|
+
const agentMessages = [
|
|
108
|
+
[
|
|
109
|
+
{
|
|
110
|
+
type: "assistant",
|
|
111
|
+
message: {
|
|
112
|
+
content: [{ type: "text", text: "Trying the wrong thing." }],
|
|
113
|
+
},
|
|
114
|
+
},
|
|
115
|
+
],
|
|
116
|
+
[
|
|
117
|
+
{
|
|
118
|
+
type: "assistant",
|
|
119
|
+
message: {
|
|
120
|
+
content: [{ type: "text", text: "Switching to the right thing." }],
|
|
121
|
+
},
|
|
122
|
+
},
|
|
123
|
+
],
|
|
124
|
+
];
|
|
125
|
+
|
|
126
|
+
const supervisorMessages = [
|
|
127
|
+
undefined,
|
|
128
|
+
[
|
|
129
|
+
{
|
|
130
|
+
type: "assistant",
|
|
131
|
+
message: {
|
|
132
|
+
content: [
|
|
133
|
+
{
|
|
134
|
+
type: "text",
|
|
135
|
+
text: "EVALUATION_INTERVENTION Switch to the right path.",
|
|
136
|
+
},
|
|
137
|
+
],
|
|
138
|
+
},
|
|
139
|
+
},
|
|
140
|
+
],
|
|
141
|
+
undefined,
|
|
142
|
+
undefined,
|
|
143
|
+
];
|
|
144
|
+
|
|
145
|
+
const agentRunner = createMockRunner(
|
|
146
|
+
[{ text: "Trying the wrong thing." }, { text: "Switching." }],
|
|
147
|
+
agentMessages,
|
|
148
|
+
);
|
|
149
|
+
const supervisorRunner = createMockRunner(
|
|
150
|
+
[
|
|
151
|
+
{ text: "Welcome." },
|
|
152
|
+
{ text: "EVALUATION_INTERVENTION Switch to the right path." },
|
|
153
|
+
{ text: "Keep going." },
|
|
154
|
+
{ text: "Done. EVALUATION_COMPLETE" },
|
|
155
|
+
],
|
|
156
|
+
supervisorMessages,
|
|
157
|
+
);
|
|
158
|
+
|
|
159
|
+
const output = new PassThrough();
|
|
160
|
+
const supervisor = new Supervisor({
|
|
161
|
+
agentRunner,
|
|
162
|
+
supervisorRunner,
|
|
163
|
+
output,
|
|
164
|
+
maxTurns: 10,
|
|
165
|
+
});
|
|
166
|
+
agentRunner.onLine = (line) => supervisor.emitLine(line);
|
|
167
|
+
supervisorRunner.onLine = (line) => supervisor.emitLine(line);
|
|
168
|
+
|
|
169
|
+
const result = await supervisor.run("Task");
|
|
170
|
+
assert.strictEqual(result.success, true);
|
|
171
|
+
|
|
172
|
+
const lines = (output.read()?.toString() ?? "")
|
|
173
|
+
.trim()
|
|
174
|
+
.split("\n")
|
|
175
|
+
.filter((l) => l.length > 0)
|
|
176
|
+
.map((l) => JSON.parse(l));
|
|
177
|
+
|
|
178
|
+
// (1) Orchestrator event with intervention_requested.
|
|
179
|
+
const interventionRequested = lines.find(
|
|
180
|
+
(l) =>
|
|
181
|
+
l.source === "orchestrator" &&
|
|
182
|
+
l.event?.type === "intervention_requested",
|
|
183
|
+
);
|
|
184
|
+
assert.ok(
|
|
185
|
+
interventionRequested,
|
|
186
|
+
"Trace must contain intervention_requested orchestrator event",
|
|
187
|
+
);
|
|
188
|
+
|
|
189
|
+
// (2) At least one agent line and one supervisor line share a turn id —
|
|
190
|
+
// mid-turn supervisor activity is tagged with the agent's turn.
|
|
191
|
+
const agentTurns = new Set(
|
|
192
|
+
lines.filter((l) => l.source === "agent").map((l) => l.turn),
|
|
193
|
+
);
|
|
194
|
+
const supervisorTurns = new Set(
|
|
195
|
+
lines.filter((l) => l.source === "supervisor").map((l) => l.turn),
|
|
196
|
+
);
|
|
197
|
+
const sharedTurns = [...agentTurns].filter((t) => supervisorTurns.has(t));
|
|
198
|
+
assert.ok(
|
|
199
|
+
sharedTurns.length > 0,
|
|
200
|
+
"At least one turn id must appear on both agent and supervisor lines",
|
|
201
|
+
);
|
|
202
|
+
|
|
203
|
+
// (3) Final summary line still emitted.
|
|
204
|
+
const summary = lines[lines.length - 1];
|
|
205
|
+
assert.strictEqual(summary.source, "orchestrator");
|
|
206
|
+
assert.strictEqual(summary.type, "summary");
|
|
207
|
+
assert.strictEqual(summary.success, true);
|
|
208
|
+
});
|
|
209
|
+
|
|
337
210
|
test("emits supervisor output and summary when supervisor errors on turn 0", async () => {
|
|
338
211
|
const supervisorMessages = [
|
|
339
212
|
[{ type: "assistant", content: "Starting..." }],
|
|
@@ -343,7 +216,6 @@ describe("Supervisor", () => {
|
|
|
343
216
|
supervisorMessages,
|
|
344
217
|
);
|
|
345
218
|
|
|
346
|
-
// Override run to simulate an error return
|
|
347
219
|
const origRun = supervisorRunner.run;
|
|
348
220
|
supervisorRunner.run = async (task) => {
|
|
349
221
|
const result = await origRun.call(supervisorRunner, task);
|
|
@@ -367,7 +239,6 @@ describe("Supervisor", () => {
|
|
|
367
239
|
assert.strictEqual(result.success, false);
|
|
368
240
|
assert.strictEqual(result.turns, 0);
|
|
369
241
|
|
|
370
|
-
// Output should still contain the supervisor's buffered lines + summary
|
|
371
242
|
const data = output.read()?.toString() ?? "";
|
|
372
243
|
const lines = data
|
|
373
244
|
.trim()
|
|
@@ -385,7 +256,9 @@ describe("Supervisor", () => {
|
|
|
385
256
|
assert.strictEqual(summaryLine.success, false);
|
|
386
257
|
assert.strictEqual(summaryLine.turns, 0);
|
|
387
258
|
});
|
|
259
|
+
});
|
|
388
260
|
|
|
261
|
+
describe("Supervisor - createSupervisor factory", () => {
|
|
389
262
|
test("createSupervisor factory returns a Supervisor instance", () => {
|
|
390
263
|
const supervisor = createSupervisor({
|
|
391
264
|
supervisorCwd: "/tmp/sup",
|
|
@@ -448,7 +321,7 @@ describe("Supervisor", () => {
|
|
|
448
321
|
});
|
|
449
322
|
});
|
|
450
323
|
|
|
451
|
-
test("createSupervisor blocks
|
|
324
|
+
test("createSupervisor blocks sub-agent spawn tools on supervisor by default", () => {
|
|
452
325
|
const supervisor = createSupervisor({
|
|
453
326
|
supervisorCwd: "/tmp/sup",
|
|
454
327
|
agentCwd: "/tmp/agent",
|
|
@@ -456,10 +329,11 @@ describe("Supervisor", () => {
|
|
|
456
329
|
output: new PassThrough(),
|
|
457
330
|
});
|
|
458
331
|
assert.deepStrictEqual(supervisor.supervisorRunner.disallowedTools, [
|
|
332
|
+
"Agent",
|
|
459
333
|
"Task",
|
|
460
334
|
"TaskOutput",
|
|
335
|
+
"TaskStop",
|
|
461
336
|
]);
|
|
462
|
-
// Agent should not have disallowed tools
|
|
463
337
|
assert.deepStrictEqual(supervisor.agentRunner.disallowedTools, []);
|
|
464
338
|
});
|
|
465
339
|
|
|
@@ -472,10 +346,11 @@ describe("Supervisor", () => {
|
|
|
472
346
|
supervisorDisallowedTools: ["WebSearch", "Task"],
|
|
473
347
|
});
|
|
474
348
|
const disallowed = supervisor.supervisorRunner.disallowedTools;
|
|
349
|
+
assert.ok(disallowed.includes("Agent"));
|
|
475
350
|
assert.ok(disallowed.includes("Task"));
|
|
476
351
|
assert.ok(disallowed.includes("TaskOutput"));
|
|
352
|
+
assert.ok(disallowed.includes("TaskStop"));
|
|
477
353
|
assert.ok(disallowed.includes("WebSearch"));
|
|
478
|
-
// No duplicates
|
|
479
354
|
assert.strictEqual(disallowed.length, new Set(disallowed).size);
|
|
480
355
|
});
|
|
481
356
|
|
|
@@ -488,6 +363,6 @@ describe("Supervisor", () => {
|
|
|
488
363
|
|
|
489
364
|
test("SUPERVISOR_SYSTEM_PROMPT explains relay mechanism", () => {
|
|
490
365
|
assert.ok(SUPERVISOR_SYSTEM_PROMPT.includes("relay"));
|
|
491
|
-
assert.ok(SUPERVISOR_SYSTEM_PROMPT.includes("
|
|
366
|
+
assert.ok(SUPERVISOR_SYSTEM_PROMPT.includes("EVALUATION_COMPLETE"));
|
|
492
367
|
});
|
|
493
368
|
});
|