@forwardimpact/libeval 0.1.39 → 0.1.42
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agent-runner.js +33 -21
- package/src/benchmark/apm-installer.js +104 -69
- package/src/benchmark/runner.js +9 -5
- package/src/facilitator.js +21 -0
- package/src/supervisor.js +6 -25
package/package.json
CHANGED
package/src/agent-runner.js
CHANGED
|
@@ -77,22 +77,7 @@ export class AgentRunner {
|
|
|
77
77
|
try {
|
|
78
78
|
const iterator = this.query({
|
|
79
79
|
prompt: effectiveTask,
|
|
80
|
-
options:
|
|
81
|
-
cwd: this.cwd,
|
|
82
|
-
allowedTools: this.allowedTools,
|
|
83
|
-
maxTurns:
|
|
84
|
-
this.maxTurns === 0 ? Number.MAX_SAFE_INTEGER : this.maxTurns,
|
|
85
|
-
model: this.model,
|
|
86
|
-
permissionMode: PERMISSION_MODE,
|
|
87
|
-
allowDangerouslySkipPermissions: true,
|
|
88
|
-
settingSources: this.settingSources,
|
|
89
|
-
abortController,
|
|
90
|
-
...(this.disallowedTools.length > 0 && {
|
|
91
|
-
disallowedTools: this.disallowedTools,
|
|
92
|
-
}),
|
|
93
|
-
...(this.systemPrompt && { systemPrompt: this.systemPrompt }),
|
|
94
|
-
...(this.mcpServers && { mcpServers: this.mcpServers }),
|
|
95
|
-
},
|
|
80
|
+
options: this.#callOptions(abortController),
|
|
96
81
|
});
|
|
97
82
|
return await this.#consumeQuery(iterator);
|
|
98
83
|
} finally {
|
|
@@ -112,12 +97,8 @@ export class AgentRunner {
|
|
|
112
97
|
const iterator = this.query({
|
|
113
98
|
prompt,
|
|
114
99
|
options: {
|
|
100
|
+
...this.#callOptions(abortController),
|
|
115
101
|
resume: this.sessionId,
|
|
116
|
-
model: this.model,
|
|
117
|
-
permissionMode: PERMISSION_MODE,
|
|
118
|
-
allowDangerouslySkipPermissions: true,
|
|
119
|
-
abortController,
|
|
120
|
-
...(this.mcpServers && { mcpServers: this.mcpServers }),
|
|
121
102
|
},
|
|
122
103
|
});
|
|
123
104
|
return await this.#consumeQuery(iterator);
|
|
@@ -126,6 +107,37 @@ export class AgentRunner {
|
|
|
126
107
|
}
|
|
127
108
|
}
|
|
128
109
|
|
|
110
|
+
/**
|
|
111
|
+
* Build the options passed to every SDK query() call. Shared by run() and
|
|
112
|
+
* resume() so the agent's configuration — cwd, tools, prompt, setting
|
|
113
|
+
* sources, turn budget — is identical across the session's lifetime. Only
|
|
114
|
+
* resume() layers `resume: this.sessionId` on top.
|
|
115
|
+
*
|
|
116
|
+
* SDK options are call-attached, not session-attached: the resumed call
|
|
117
|
+
* loads the prior conversation but otherwise uses whatever options this
|
|
118
|
+
* call passes. Omitting tool/prompt/setting options on resume causes the
|
|
119
|
+
* agent to silently lose its restrictions and persona between turns.
|
|
120
|
+
* @param {AbortController} abortController
|
|
121
|
+
* @returns {object}
|
|
122
|
+
*/
|
|
123
|
+
#callOptions(abortController) {
|
|
124
|
+
return {
|
|
125
|
+
cwd: this.cwd,
|
|
126
|
+
allowedTools: this.allowedTools,
|
|
127
|
+
maxTurns: this.maxTurns === 0 ? Number.MAX_SAFE_INTEGER : this.maxTurns,
|
|
128
|
+
model: this.model,
|
|
129
|
+
permissionMode: PERMISSION_MODE,
|
|
130
|
+
allowDangerouslySkipPermissions: true,
|
|
131
|
+
settingSources: this.settingSources,
|
|
132
|
+
abortController,
|
|
133
|
+
...(this.disallowedTools.length > 0 && {
|
|
134
|
+
disallowedTools: this.disallowedTools,
|
|
135
|
+
}),
|
|
136
|
+
...(this.systemPrompt && { systemPrompt: this.systemPrompt }),
|
|
137
|
+
...(this.mcpServers && { mcpServers: this.mcpServers }),
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
|
|
129
141
|
/**
|
|
130
142
|
* Shared consumer for both `run()` and `resume()`. Iterates the SDK query
|
|
131
143
|
* iterator, mirroring every line to the output stream / buffer / onLine
|
|
@@ -3,71 +3,109 @@
|
|
|
3
3
|
* materialise skills and agents, copies the resulting `.claude/` into a
|
|
4
4
|
* staging directory, and computes the manifest fingerprint from the lockfile.
|
|
5
5
|
* Per-task copy happens later in WorkdirManager.
|
|
6
|
+
*
|
|
7
|
+
* The class takes a `spawn` seam so tests can substitute a fake child process
|
|
8
|
+
* without ever shelling out to a real `apm` binary. See `createApmInstaller`
|
|
9
|
+
* for the real-dependency wiring; `installApm` is a thin free-function wrapper
|
|
10
|
+
* for callers that don't need to inject anything.
|
|
6
11
|
*/
|
|
7
12
|
|
|
8
|
-
import { spawn } from "node:child_process";
|
|
13
|
+
import { spawn as nodeSpawn } from "node:child_process";
|
|
9
14
|
import { createHash } from "node:crypto";
|
|
10
15
|
import { access, cp, mkdir, readFile, rm } from "node:fs/promises";
|
|
11
16
|
import { join } from "node:path";
|
|
12
17
|
|
|
13
|
-
/**
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
18
|
+
/** Installs apm and stages `.claude/` for a task family. */
|
|
19
|
+
export class ApmInstaller {
|
|
20
|
+
/**
|
|
21
|
+
* @param {object} [deps]
|
|
22
|
+
* @param {typeof nodeSpawn} [deps.spawn] - Spawn seam (defaults to
|
|
23
|
+
* `node:child_process` spawn). Tests inject a fake to avoid shelling out.
|
|
24
|
+
*/
|
|
25
|
+
constructor({ spawn } = {}) {
|
|
26
|
+
this.spawn = spawn ?? nodeSpawn;
|
|
27
|
+
}
|
|
23
28
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
29
|
+
/**
|
|
30
|
+
* @param {import("./task-family.js").TaskFamily} family
|
|
31
|
+
* @param {string} outputDir - The benchmark run's output directory.
|
|
32
|
+
* @returns {Promise<{stagingDir: string, skillSetHash: string, judgeProfilesDir: string}>}
|
|
33
|
+
*/
|
|
34
|
+
async install(family, outputDir) {
|
|
35
|
+
const stagingDir = join(outputDir, ".apm-staging");
|
|
36
|
+
const stagedClaude = join(stagingDir, ".claude");
|
|
37
|
+
const sourceClaude = join(family.rootPath, ".claude");
|
|
38
|
+
const apmYml = join(family.rootPath, "apm.yml");
|
|
39
|
+
|
|
40
|
+
const hasApm = await access(apmYml)
|
|
41
|
+
.then(() => true)
|
|
42
|
+
.catch(() => false);
|
|
43
|
+
|
|
44
|
+
if (hasApm) {
|
|
45
|
+
await this.#runApmInstall(family.rootPath);
|
|
46
|
+
try {
|
|
47
|
+
await access(sourceClaude);
|
|
48
|
+
} catch {
|
|
49
|
+
throw new Error(
|
|
50
|
+
`apm install did not produce .claude/ at ${sourceClaude}; check the family's apm.yml`,
|
|
51
|
+
);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
27
54
|
|
|
28
|
-
|
|
29
|
-
await
|
|
55
|
+
await rm(stagingDir, { recursive: true, force: true });
|
|
56
|
+
const hasClaudeDir = await access(sourceClaude)
|
|
57
|
+
.then(() => true)
|
|
58
|
+
.catch(() => false);
|
|
59
|
+
if (hasClaudeDir) {
|
|
60
|
+
await cp(sourceClaude, stagedClaude, { recursive: true });
|
|
61
|
+
} else {
|
|
62
|
+
await mkdir(stagedClaude, { recursive: true });
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Stage the family-local judge profile outside .claude/ so it is available
|
|
66
|
+
// to the judge but never copied into the agent-under-test's CWD.
|
|
67
|
+
const judgeSource = join(family.rootPath, "judge.md");
|
|
68
|
+
const judgeProfilesDir = join(stagingDir, "judge-profiles");
|
|
69
|
+
try {
|
|
70
|
+
await access(judgeSource);
|
|
71
|
+
await mkdir(judgeProfilesDir, { recursive: true });
|
|
72
|
+
await cp(judgeSource, join(judgeProfilesDir, "judge.md"));
|
|
73
|
+
} catch {}
|
|
74
|
+
|
|
75
|
+
const lockPath = join(family.rootPath, "apm.lock.yaml");
|
|
76
|
+
let skillSetHash = "";
|
|
30
77
|
try {
|
|
31
|
-
await
|
|
78
|
+
const lockBytes = await readFile(lockPath);
|
|
79
|
+
skillSetHash =
|
|
80
|
+
"sha256:" +
|
|
81
|
+
createHash("sha256").update(normalizeLf(lockBytes)).digest("hex");
|
|
32
82
|
} catch {
|
|
33
|
-
|
|
34
|
-
`apm install did not produce .claude/ at ${sourceClaude}; check the family's apm.yml`,
|
|
35
|
-
);
|
|
83
|
+
// No lockfile — family doesn't use skill packs.
|
|
36
84
|
}
|
|
37
|
-
}
|
|
38
85
|
|
|
39
|
-
|
|
40
|
-
const hasClaudeDir = await access(sourceClaude)
|
|
41
|
-
.then(() => true)
|
|
42
|
-
.catch(() => false);
|
|
43
|
-
if (hasClaudeDir) {
|
|
44
|
-
await cp(sourceClaude, stagedClaude, { recursive: true });
|
|
45
|
-
} else {
|
|
46
|
-
await mkdir(stagedClaude, { recursive: true });
|
|
86
|
+
return { stagingDir, skillSetHash, judgeProfilesDir };
|
|
47
87
|
}
|
|
48
88
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
89
|
+
#runApmInstall(cwd) {
|
|
90
|
+
return new Promise((res, rej) => {
|
|
91
|
+
const child = this.spawn("apm", ["install", "--target", "claude"], {
|
|
92
|
+
cwd,
|
|
93
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
94
|
+
});
|
|
95
|
+
let stderr = "";
|
|
96
|
+
child.stdout.on("data", () => {});
|
|
97
|
+
child.stderr.on("data", (d) => {
|
|
98
|
+
stderr += d.toString();
|
|
99
|
+
});
|
|
100
|
+
child.on("error", (e) => {
|
|
101
|
+
rej(new Error(`failed to spawn apm: ${e.message}`));
|
|
102
|
+
});
|
|
103
|
+
child.on("close", (code) => {
|
|
104
|
+
if (code === 0) res();
|
|
105
|
+
else rej(new Error(`apm install exited ${code}: ${stderr}`));
|
|
106
|
+
});
|
|
107
|
+
});
|
|
68
108
|
}
|
|
69
|
-
|
|
70
|
-
return { stagingDir, skillSetHash, judgeProfilesDir };
|
|
71
109
|
}
|
|
72
110
|
|
|
73
111
|
function normalizeLf(buf) {
|
|
@@ -79,23 +117,20 @@ function normalizeLf(buf) {
|
|
|
79
117
|
return Buffer.from(out);
|
|
80
118
|
}
|
|
81
119
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
else rej(new Error(`apm install exited ${code}: ${stderr}`));
|
|
99
|
-
});
|
|
100
|
-
});
|
|
120
|
+
/**
|
|
121
|
+
* Factory function — wires real dependencies.
|
|
122
|
+
* @param {ConstructorParameters<typeof ApmInstaller>[0]} [deps]
|
|
123
|
+
* @returns {ApmInstaller}
|
|
124
|
+
*/
|
|
125
|
+
export function createApmInstaller(deps) {
|
|
126
|
+
return new ApmInstaller(deps);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Free-function shorthand for callers that don't need to inject a spawn seam.
|
|
131
|
+
* @param {import("./task-family.js").TaskFamily} family
|
|
132
|
+
* @param {string} outputDir
|
|
133
|
+
*/
|
|
134
|
+
export function installApm(family, outputDir) {
|
|
135
|
+
return new ApmInstaller().install(family, outputDir);
|
|
101
136
|
}
|
package/src/benchmark/runner.js
CHANGED
|
@@ -21,7 +21,7 @@ import { join, resolve as resolvePath } from "node:path";
|
|
|
21
21
|
|
|
22
22
|
import { DEFAULT_ENV_ALLOWLIST, createRedactor } from "../redaction.js";
|
|
23
23
|
import { createSupervisor } from "../supervisor.js";
|
|
24
|
-
import { installApm } from "./apm-installer.js";
|
|
24
|
+
import { installApm as defaultInstallApm } from "./apm-installer.js";
|
|
25
25
|
import { runJudge } from "./judge.js";
|
|
26
26
|
import { validateResultRecord } from "./result.js";
|
|
27
27
|
import { runScoring } from "./scorer.js";
|
|
@@ -64,6 +64,10 @@ export class BenchmarkRunner {
|
|
|
64
64
|
* @param {Function} [opts.runJudge] - Test seam: replaces `runJudge`. Same
|
|
65
65
|
* contract as `runJudge(task, workdir, scoring, deps)`. Internal testing
|
|
66
66
|
* only.
|
|
67
|
+
* @param {Function} [opts.installApm] - Test seam: replaces `installApm`.
|
|
68
|
+
* Same contract as `installApm(family, outputDir)`. Lets tests inject a
|
|
69
|
+
* fake `apm` spawn (or skip the install entirely) so the suite never
|
|
70
|
+
* shells out to a real `apm` binary. Internal testing only.
|
|
67
71
|
*/
|
|
68
72
|
constructor({
|
|
69
73
|
family,
|
|
@@ -81,6 +85,7 @@ export class BenchmarkRunner {
|
|
|
81
85
|
runAgent,
|
|
82
86
|
runScoring: runScoringHook,
|
|
83
87
|
runJudge: runJudgeHook,
|
|
88
|
+
installApm: installApmHook,
|
|
84
89
|
}) {
|
|
85
90
|
if (!family) throw new Error("family is required");
|
|
86
91
|
if (!Number.isInteger(runs) || runs < 1)
|
|
@@ -105,6 +110,7 @@ export class BenchmarkRunner {
|
|
|
105
110
|
this._runAgentHook = runAgent ?? null;
|
|
106
111
|
this._runScoringHook = runScoringHook ?? runScoring;
|
|
107
112
|
this._runJudgeHook = runJudgeHook ?? runJudge;
|
|
113
|
+
this._installApmHook = installApmHook ?? defaultInstallApm;
|
|
108
114
|
}
|
|
109
115
|
|
|
110
116
|
/**
|
|
@@ -118,10 +124,8 @@ export class BenchmarkRunner {
|
|
|
118
124
|
: this.familyInput;
|
|
119
125
|
|
|
120
126
|
await mkdir(this.output, { recursive: true });
|
|
121
|
-
const { stagingDir, skillSetHash, judgeProfilesDir } =
|
|
122
|
-
family,
|
|
123
|
-
this.output,
|
|
124
|
-
);
|
|
127
|
+
const { stagingDir, skillSetHash, judgeProfilesDir } =
|
|
128
|
+
await this._installApmHook(family, this.output);
|
|
125
129
|
|
|
126
130
|
const tasks = family.tasks();
|
|
127
131
|
if (this.profiles.judge) {
|
package/src/facilitator.js
CHANGED
|
@@ -394,6 +394,8 @@ const devNull = new Writable({
|
|
|
394
394
|
* @param {string} [deps.agentModel] - Agent model override (falls back to `model`).
|
|
395
395
|
* @param {string} [deps.facilitatorModel] - Facilitator model override (falls back to `model`).
|
|
396
396
|
* @param {number} [deps.maxTurns]
|
|
397
|
+
* @param {string[]} [deps.facilitatorAllowedTools] - Tools the facilitator may use; defaults to a read/write file-edit set.
|
|
398
|
+
* @param {string[]} [deps.facilitatorDisallowedTools] - Additional tools to block on the facilitator; merged with the sub-agent spawn defaults (Agent/Task/TaskOutput/TaskStop).
|
|
397
399
|
* @param {string} [deps.facilitatorProfile] - Facilitator profile name; resolved into the main-thread system prompt via `composeProfilePrompt`.
|
|
398
400
|
* @param {string} [deps.profilesDir] - Directory containing `<name>.md` profile files. Defaults to `<facilitatorCwd>/.claude/agents`. Resolved once from the facilitator's cwd so profiles travel with the project, not with per-agent sandboxes.
|
|
399
401
|
* @param {string} [deps.taskAmend] - Opaque addendum appended to the task before delivery.
|
|
@@ -408,6 +410,8 @@ export function createFacilitator({
|
|
|
408
410
|
agentModel,
|
|
409
411
|
facilitatorModel,
|
|
410
412
|
maxTurns,
|
|
413
|
+
facilitatorAllowedTools,
|
|
414
|
+
facilitatorDisallowedTools,
|
|
411
415
|
facilitatorProfile,
|
|
412
416
|
profilesDir,
|
|
413
417
|
taskAmend,
|
|
@@ -467,12 +471,29 @@ export function createFacilitator({
|
|
|
467
471
|
return { name: config.name, role: config.role, runner };
|
|
468
472
|
});
|
|
469
473
|
|
|
474
|
+
// Block the SDK's sub-agent spawn tools on the facilitator: its job is to
|
|
475
|
+
// coordinate participants through the libeval orchestration harness, not
|
|
476
|
+
// to fan work out to ad-hoc Claude Code sub-agents. Mirrors the supervisor.
|
|
477
|
+
const defaultDisallowed = ["Agent", "Task", "TaskOutput", "TaskStop"];
|
|
478
|
+
const disallowedTools = facilitatorDisallowedTools
|
|
479
|
+
? [...new Set([...defaultDisallowed, ...facilitatorDisallowedTools])]
|
|
480
|
+
: defaultDisallowed;
|
|
481
|
+
|
|
470
482
|
const facilitatorRunner = createAgentRunner({
|
|
471
483
|
cwd: facilitatorCwd,
|
|
472
484
|
query,
|
|
473
485
|
output: devNull,
|
|
474
486
|
model: facilitatorModel ?? model,
|
|
475
487
|
maxTurns: maxTurns ?? 20,
|
|
488
|
+
allowedTools: facilitatorAllowedTools ?? [
|
|
489
|
+
"Bash",
|
|
490
|
+
"Read",
|
|
491
|
+
"Glob",
|
|
492
|
+
"Grep",
|
|
493
|
+
"Write",
|
|
494
|
+
"Edit",
|
|
495
|
+
],
|
|
496
|
+
disallowedTools,
|
|
476
497
|
onLine: (line) => facilitator.emitLine("facilitator", line),
|
|
477
498
|
mcpServers: { orchestration: facilitatorServer },
|
|
478
499
|
settingSources: ["project"],
|
package/src/supervisor.js
CHANGED
|
@@ -104,7 +104,6 @@ export class Supervisor {
|
|
|
104
104
|
*/
|
|
105
105
|
async run(task) {
|
|
106
106
|
const initialTask = this.taskAmend ? `${task}\n\n${this.taskAmend}` : task;
|
|
107
|
-
this.taskContext = initialTask;
|
|
108
107
|
this.currentSource = "supervisor";
|
|
109
108
|
this.currentTurn = 0;
|
|
110
109
|
let supervisorResult = await this.supervisorRunner.run(initialTask);
|
|
@@ -252,22 +251,6 @@ export class Supervisor {
|
|
|
252
251
|
return { type: "continue" };
|
|
253
252
|
}
|
|
254
253
|
|
|
255
|
-
/**
|
|
256
|
-
* Resume the supervisor runner, falling back to a fresh session when the
|
|
257
|
-
* SDK reports that the conversation no longer exists (e.g. session GC'd
|
|
258
|
-
* while the agent was running). The fresh session includes the original
|
|
259
|
-
* task context so the supervisor can still evaluate the agent's work.
|
|
260
|
-
* @param {string} prompt
|
|
261
|
-
* @returns {Promise<object>}
|
|
262
|
-
*/
|
|
263
|
-
async #resumeSupervisor(prompt) {
|
|
264
|
-
const result = await this.supervisorRunner.resume(prompt);
|
|
265
|
-
if (result.error && isSessionNotFound(result.error)) {
|
|
266
|
-
return this.supervisorRunner.run(`${this.taskContext}\n\n${prompt}`);
|
|
267
|
-
}
|
|
268
|
-
return result;
|
|
269
|
-
}
|
|
270
|
-
|
|
271
254
|
/**
|
|
272
255
|
* If the agent has an unanswered ask, drain reminders and return a
|
|
273
256
|
* formatted relay string. Returns null when no relay is needed.
|
|
@@ -295,7 +278,7 @@ export class Supervisor {
|
|
|
295
278
|
this.currentSource = "supervisor";
|
|
296
279
|
this.ctx.redirect = null;
|
|
297
280
|
|
|
298
|
-
await this
|
|
281
|
+
await this.supervisorRunner.resume(
|
|
299
282
|
`The agent is mid-turn. Latest batch:\n\n${batchTranscript}\n\n` +
|
|
300
283
|
`Review and use your tools if action is needed.`,
|
|
301
284
|
);
|
|
@@ -333,7 +316,7 @@ export class Supervisor {
|
|
|
333
316
|
`Review and decide how to proceed.`
|
|
334
317
|
: `The agent reported:\n\n${agentTranscript}\n\nReview the agent's work and decide how to proceed.`;
|
|
335
318
|
|
|
336
|
-
let supervisorResult = await this
|
|
319
|
+
let supervisorResult = await this.supervisorRunner.resume(reviewPrompt);
|
|
337
320
|
|
|
338
321
|
if (supervisorResult.error) {
|
|
339
322
|
this.emitSummary({ success: false, turns: turn });
|
|
@@ -354,7 +337,7 @@ export class Supervisor {
|
|
|
354
337
|
if (this.#checkAsk("supervisor") === "recheck" && !this.ctx.concluded) {
|
|
355
338
|
const reminders = this.messageBus.drain("supervisor");
|
|
356
339
|
if (reminders.length > 0) {
|
|
357
|
-
supervisorResult = await this
|
|
340
|
+
supervisorResult = await this.supervisorRunner.resume(
|
|
358
341
|
formatMessages(reminders),
|
|
359
342
|
);
|
|
360
343
|
if (this.ctx.concluded) {
|
|
@@ -578,6 +561,9 @@ export function createSupervisor({
|
|
|
578
561
|
redactor,
|
|
579
562
|
});
|
|
580
563
|
|
|
564
|
+
// Block the SDK's sub-agent spawn tools on the supervisor: its job is to
|
|
565
|
+
// coordinate the agent through the libeval orchestration harness, not to
|
|
566
|
+
// fan work out to ad-hoc Claude Code sub-agents. Mirrors the facilitator.
|
|
581
567
|
const defaultDisallowed = ["Agent", "Task", "TaskOutput", "TaskStop"];
|
|
582
568
|
const disallowedTools = supervisorDisallowedTools
|
|
583
569
|
? [...new Set([...defaultDisallowed, ...supervisorDisallowedTools])]
|
|
@@ -617,8 +603,3 @@ export function createSupervisor({
|
|
|
617
603
|
});
|
|
618
604
|
return supervisor;
|
|
619
605
|
}
|
|
620
|
-
|
|
621
|
-
function isSessionNotFound(error) {
|
|
622
|
-
const msg = error?.message ?? String(error);
|
|
623
|
-
return msg.includes("No conversation found with session ID");
|
|
624
|
-
}
|