@forwardimpact/libeval 0.1.13 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/fit-eval.js +92 -44
- package/package.json +3 -2
- package/src/agent-runner.js +29 -40
- package/src/commands/facilitate.js +109 -0
- package/src/commands/run.js +17 -1
- package/src/facilitator.js +492 -0
- package/src/index.js +15 -2
- package/src/message-bus.js +100 -0
- package/src/orchestration-toolkit.js +209 -0
- package/src/sequence-counter.js +17 -0
- package/src/supervisor.js +128 -210
- package/src/tee-writer.js +20 -26
package/bin/fit-eval.js
CHANGED
|
@@ -8,6 +8,7 @@ import { runOutputCommand } from "../src/commands/output.js";
|
|
|
8
8
|
import { runTeeCommand } from "../src/commands/tee.js";
|
|
9
9
|
import { runRunCommand } from "../src/commands/run.js";
|
|
10
10
|
import { runSuperviseCommand } from "../src/commands/supervise.js";
|
|
11
|
+
import { runFacilitateCommand } from "../src/commands/facilitate.js";
|
|
11
12
|
|
|
12
13
|
const { version: VERSION } = JSON.parse(
|
|
13
14
|
readFileSync(new URL("../package.json", import.meta.url), "utf8"),
|
|
@@ -20,7 +21,7 @@ const definition = {
|
|
|
20
21
|
commands: [
|
|
21
22
|
{
|
|
22
23
|
name: "output",
|
|
23
|
-
args: "
|
|
24
|
+
args: "",
|
|
24
25
|
description: "Process trace and output formatted result",
|
|
25
26
|
},
|
|
26
27
|
{
|
|
@@ -30,65 +31,111 @@ const definition = {
|
|
|
30
31
|
},
|
|
31
32
|
{
|
|
32
33
|
name: "run",
|
|
33
|
-
args: "
|
|
34
|
+
args: "",
|
|
34
35
|
description: "Run a single agent via the Claude Agent SDK",
|
|
36
|
+
options: {
|
|
37
|
+
"task-file": { type: "string", description: "Path to task file" },
|
|
38
|
+
"task-text": { type: "string", description: "Inline task text" },
|
|
39
|
+
"task-amend": {
|
|
40
|
+
type: "string",
|
|
41
|
+
description: "Additional text appended to task",
|
|
42
|
+
},
|
|
43
|
+
model: { type: "string", description: "Claude model (default: opus)" },
|
|
44
|
+
"max-turns": {
|
|
45
|
+
type: "string",
|
|
46
|
+
description: "Max agentic turns (default: 50)",
|
|
47
|
+
},
|
|
48
|
+
output: { type: "string", description: "Write NDJSON trace to file" },
|
|
49
|
+
cwd: { type: "string", description: "Working directory" },
|
|
50
|
+
"agent-profile": { type: "string", description: "Agent profile name" },
|
|
51
|
+
"allowed-tools": {
|
|
52
|
+
type: "string",
|
|
53
|
+
description: "Comma-separated tool list",
|
|
54
|
+
},
|
|
55
|
+
},
|
|
35
56
|
},
|
|
36
57
|
{
|
|
37
58
|
name: "supervise",
|
|
38
|
-
args: "
|
|
59
|
+
args: "",
|
|
39
60
|
description: "Run a supervised agent-supervisor relay loop",
|
|
61
|
+
options: {
|
|
62
|
+
"task-file": { type: "string", description: "Path to task file" },
|
|
63
|
+
"task-text": { type: "string", description: "Inline task text" },
|
|
64
|
+
"task-amend": {
|
|
65
|
+
type: "string",
|
|
66
|
+
description: "Additional text appended to task",
|
|
67
|
+
},
|
|
68
|
+
model: { type: "string", description: "Claude model (default: opus)" },
|
|
69
|
+
"max-turns": {
|
|
70
|
+
type: "string",
|
|
71
|
+
description: "Max agentic turns (default: 50)",
|
|
72
|
+
},
|
|
73
|
+
output: { type: "string", description: "Write NDJSON trace to file" },
|
|
74
|
+
cwd: { type: "string", description: "Working directory" },
|
|
75
|
+
"agent-profile": { type: "string", description: "Agent profile name" },
|
|
76
|
+
"allowed-tools": {
|
|
77
|
+
type: "string",
|
|
78
|
+
description: "Comma-separated tool list",
|
|
79
|
+
},
|
|
80
|
+
"supervisor-cwd": {
|
|
81
|
+
type: "string",
|
|
82
|
+
description: "Supervisor working directory",
|
|
83
|
+
},
|
|
84
|
+
"agent-cwd": { type: "string", description: "Agent working directory" },
|
|
85
|
+
"supervisor-profile": {
|
|
86
|
+
type: "string",
|
|
87
|
+
description: "Supervisor profile name",
|
|
88
|
+
},
|
|
89
|
+
"supervisor-allowed-tools": {
|
|
90
|
+
type: "string",
|
|
91
|
+
description: "Supervisor tool list",
|
|
92
|
+
},
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
name: "facilitate",
|
|
97
|
+
args: "",
|
|
98
|
+
description: "Run a facilitated multi-agent session",
|
|
99
|
+
options: {
|
|
100
|
+
"task-file": { type: "string", description: "Path to task file" },
|
|
101
|
+
"task-text": { type: "string", description: "Inline task text" },
|
|
102
|
+
"task-amend": {
|
|
103
|
+
type: "string",
|
|
104
|
+
description: "Additional text appended to task",
|
|
105
|
+
},
|
|
106
|
+
model: { type: "string", description: "Claude model (default: opus)" },
|
|
107
|
+
"max-turns": {
|
|
108
|
+
type: "string",
|
|
109
|
+
description: "Max facilitator LLM turns (default: 20)",
|
|
110
|
+
},
|
|
111
|
+
output: { type: "string", description: "Write NDJSON trace to file" },
|
|
112
|
+
"facilitator-cwd": {
|
|
113
|
+
type: "string",
|
|
114
|
+
description: "Facilitator working directory",
|
|
115
|
+
},
|
|
116
|
+
"facilitator-profile": {
|
|
117
|
+
type: "string",
|
|
118
|
+
description: "Facilitator profile name",
|
|
119
|
+
},
|
|
120
|
+
agents: {
|
|
121
|
+
type: "string",
|
|
122
|
+
description:
|
|
123
|
+
"Agent configs: name1:cwd=/tmp/a:role=explorer,name2:cwd=/tmp/b:role=tester",
|
|
124
|
+
},
|
|
125
|
+
},
|
|
40
126
|
},
|
|
41
127
|
],
|
|
42
|
-
|
|
128
|
+
globalOptions: {
|
|
43
129
|
format: { type: "string", description: "Output format (json|text)" },
|
|
44
130
|
help: { type: "boolean", short: "h", description: "Show this help" },
|
|
45
131
|
version: { type: "boolean", description: "Show version" },
|
|
46
132
|
json: { type: "boolean", description: "Output help as JSON" },
|
|
47
|
-
"task-file": { type: "string", description: "Path to task file" },
|
|
48
|
-
"task-text": { type: "string", description: "Inline task text" },
|
|
49
|
-
"task-amend": {
|
|
50
|
-
type: "string",
|
|
51
|
-
description: "Additional text appended to task",
|
|
52
|
-
},
|
|
53
|
-
model: {
|
|
54
|
-
type: "string",
|
|
55
|
-
description: "Claude model (default: opus)",
|
|
56
|
-
},
|
|
57
|
-
"max-turns": {
|
|
58
|
-
type: "string",
|
|
59
|
-
description: "Max agentic turns (default: 50)",
|
|
60
|
-
},
|
|
61
|
-
output: { type: "string", description: "Write NDJSON trace to file" },
|
|
62
|
-
cwd: { type: "string", description: "Working directory" },
|
|
63
|
-
"agent-profile": {
|
|
64
|
-
type: "string",
|
|
65
|
-
description: "Agent profile name",
|
|
66
|
-
},
|
|
67
|
-
"allowed-tools": {
|
|
68
|
-
type: "string",
|
|
69
|
-
description: "Comma-separated tool list",
|
|
70
|
-
},
|
|
71
|
-
"supervisor-cwd": {
|
|
72
|
-
type: "string",
|
|
73
|
-
description: "Supervisor working directory",
|
|
74
|
-
},
|
|
75
|
-
"agent-cwd": {
|
|
76
|
-
type: "string",
|
|
77
|
-
description: "Agent working directory",
|
|
78
|
-
},
|
|
79
|
-
"supervisor-profile": {
|
|
80
|
-
type: "string",
|
|
81
|
-
description: "Supervisor profile name",
|
|
82
|
-
},
|
|
83
|
-
"supervisor-allowed-tools": {
|
|
84
|
-
type: "string",
|
|
85
|
-
description: "Supervisor tool list",
|
|
86
|
-
},
|
|
87
133
|
},
|
|
88
134
|
examples: [
|
|
89
135
|
"fit-eval output --format=text < trace.ndjson",
|
|
90
136
|
"fit-eval run --task-file=task.md --model=opus",
|
|
91
137
|
"fit-eval supervise --task-file=task.md --supervisor-cwd=.",
|
|
138
|
+
'fit-eval facilitate --task-file=task.md --agents "explorer:cwd=/tmp/a,tester:cwd=/tmp/b"',
|
|
92
139
|
],
|
|
93
140
|
};
|
|
94
141
|
|
|
@@ -100,6 +147,7 @@ const COMMANDS = {
|
|
|
100
147
|
tee: runTeeCommand,
|
|
101
148
|
run: runRunCommand,
|
|
102
149
|
supervise: runSuperviseCommand,
|
|
150
|
+
facilitate: runFacilitateCommand,
|
|
103
151
|
};
|
|
104
152
|
|
|
105
153
|
async function main() {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@forwardimpact/libeval",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.15",
|
|
4
4
|
"description": "Process Claude Code stream-json output into structured traces",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"author": "D. Olsson <hi@senzilla.io>",
|
|
@@ -28,7 +28,8 @@
|
|
|
28
28
|
"dependencies": {
|
|
29
29
|
"@anthropic-ai/claude-agent-sdk": "^0.2.98",
|
|
30
30
|
"@forwardimpact/libcli": "^0.1.0",
|
|
31
|
-
"@forwardimpact/libtelemetry": "^0.1.22"
|
|
31
|
+
"@forwardimpact/libtelemetry": "^0.1.22",
|
|
32
|
+
"zod": "^3.23.0"
|
|
32
33
|
},
|
|
33
34
|
"publishConfig": {
|
|
34
35
|
"access": "public"
|
package/src/agent-runner.js
CHANGED
|
@@ -6,6 +6,28 @@
|
|
|
6
6
|
* Follows OO+DI: constructor injection, factory function, tests bypass factory.
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
|
+
const DEFAULT_ALLOWED_TOOLS = ["Bash", "Read", "Glob", "Grep", "Write", "Edit"];
|
|
10
|
+
|
|
11
|
+
function applyDefaults(deps) {
|
|
12
|
+
return {
|
|
13
|
+
cwd: deps.cwd,
|
|
14
|
+
query: deps.query,
|
|
15
|
+
output: deps.output,
|
|
16
|
+
model: deps.model ?? "opus",
|
|
17
|
+
maxTurns: deps.maxTurns ?? 50,
|
|
18
|
+
allowedTools: deps.allowedTools ?? DEFAULT_ALLOWED_TOOLS,
|
|
19
|
+
permissionMode: deps.permissionMode ?? "bypassPermissions",
|
|
20
|
+
onLine: deps.onLine ?? null,
|
|
21
|
+
onBatch: deps.onBatch ?? null,
|
|
22
|
+
batchSize: deps.batchSize ?? 3,
|
|
23
|
+
settingSources: deps.settingSources ?? [],
|
|
24
|
+
agentProfile: deps.agentProfile ?? null,
|
|
25
|
+
systemPrompt: deps.systemPrompt ?? null,
|
|
26
|
+
disallowedTools: deps.disallowedTools ?? [],
|
|
27
|
+
mcpServers: deps.mcpServers ?? null,
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
|
|
9
31
|
export class AgentRunner {
|
|
10
32
|
/**
|
|
11
33
|
* @param {object} deps
|
|
@@ -23,47 +45,13 @@ export class AgentRunner {
|
|
|
23
45
|
* @param {string} [deps.agentProfile] - Agent profile name to pass as --agent to the Claude CLI
|
|
24
46
|
* @param {string|object} [deps.systemPrompt] - SDK system prompt (string replaces default; {type:'preset', preset:'claude_code', append} appends)
|
|
25
47
|
* @param {string[]} [deps.disallowedTools] - Tools to explicitly remove from the model's context
|
|
48
|
+
* @param {Record<string, object>} [deps.mcpServers] - MCP server configs to pass to the SDK query
|
|
26
49
|
*/
|
|
27
|
-
constructor({
|
|
28
|
-
cwd
|
|
29
|
-
query
|
|
30
|
-
output
|
|
31
|
-
|
|
32
|
-
maxTurns,
|
|
33
|
-
allowedTools,
|
|
34
|
-
permissionMode,
|
|
35
|
-
onLine,
|
|
36
|
-
onBatch,
|
|
37
|
-
batchSize,
|
|
38
|
-
settingSources,
|
|
39
|
-
agentProfile,
|
|
40
|
-
systemPrompt,
|
|
41
|
-
disallowedTools,
|
|
42
|
-
}) {
|
|
43
|
-
if (!cwd) throw new Error("cwd is required");
|
|
44
|
-
if (!query) throw new Error("query is required");
|
|
45
|
-
if (!output) throw new Error("output is required");
|
|
46
|
-
this.cwd = cwd;
|
|
47
|
-
this.query = query;
|
|
48
|
-
this.output = output;
|
|
49
|
-
this.model = model ?? "opus";
|
|
50
|
-
this.maxTurns = maxTurns ?? 50; // 0 means unlimited (omit from SDK)
|
|
51
|
-
this.allowedTools = allowedTools ?? [
|
|
52
|
-
"Bash",
|
|
53
|
-
"Read",
|
|
54
|
-
"Glob",
|
|
55
|
-
"Grep",
|
|
56
|
-
"Write",
|
|
57
|
-
"Edit",
|
|
58
|
-
];
|
|
59
|
-
this.permissionMode = permissionMode ?? "bypassPermissions";
|
|
60
|
-
this.onLine = onLine ?? null;
|
|
61
|
-
this.onBatch = onBatch ?? null;
|
|
62
|
-
this.batchSize = batchSize ?? 3;
|
|
63
|
-
this.settingSources = settingSources ?? [];
|
|
64
|
-
this.agentProfile = agentProfile ?? null;
|
|
65
|
-
this.systemPrompt = systemPrompt ?? null;
|
|
66
|
-
this.disallowedTools = disallowedTools ?? [];
|
|
50
|
+
constructor(deps) {
|
|
51
|
+
if (!deps.cwd) throw new Error("cwd is required");
|
|
52
|
+
if (!deps.query) throw new Error("query is required");
|
|
53
|
+
if (!deps.output) throw new Error("output is required");
|
|
54
|
+
Object.assign(this, applyDefaults(deps));
|
|
67
55
|
this.sessionId = null;
|
|
68
56
|
this.buffer = [];
|
|
69
57
|
/** @type {AbortController|null} */
|
|
@@ -95,6 +83,7 @@ export class AgentRunner {
|
|
|
95
83
|
}),
|
|
96
84
|
...(this.systemPrompt && { systemPrompt: this.systemPrompt }),
|
|
97
85
|
...(this.agentProfile && { extraArgs: { agent: this.agentProfile } }),
|
|
86
|
+
...(this.mcpServers && { mcpServers: this.mcpServers }),
|
|
98
87
|
},
|
|
99
88
|
});
|
|
100
89
|
return await this.#consumeQuery(iterator);
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import { readFileSync, createWriteStream, mkdtempSync } from "node:fs";
|
|
2
|
+
import { resolve, join } from "node:path";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import { createFacilitator } from "../facilitator.js";
|
|
5
|
+
import { createTeeWriter } from "../tee-writer.js";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Parse agent config string into structured configs.
|
|
9
|
+
* Format: "name1:key=val:key=val,name2:key=val"
|
|
10
|
+
* @param {string} raw
|
|
11
|
+
* @returns {Array<{name: string, role: string, cwd: string, maxTurns?: number}>}
|
|
12
|
+
*/
|
|
13
|
+
function parseAgentConfigs(raw) {
|
|
14
|
+
return raw.split(",").map((spec) => {
|
|
15
|
+
const parts = spec.split(":");
|
|
16
|
+
const name = parts[0];
|
|
17
|
+
const config = { name, role: name };
|
|
18
|
+
for (let i = 1; i < parts.length; i++) {
|
|
19
|
+
const [key, val] = parts[i].split("=");
|
|
20
|
+
if (key === "cwd") config.cwd = resolve(val);
|
|
21
|
+
else if (key === "role") config.role = val;
|
|
22
|
+
else if (key === "maxTurns") config.maxTurns = parseInt(val, 10);
|
|
23
|
+
}
|
|
24
|
+
if (!config.cwd) {
|
|
25
|
+
config.cwd = mkdtempSync(join(tmpdir(), `fit-eval-${name}-`));
|
|
26
|
+
}
|
|
27
|
+
return config;
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Parse and validate facilitate command options.
|
|
33
|
+
* @param {object} values - Parsed option values
|
|
34
|
+
* @returns {object} Parsed options
|
|
35
|
+
*/
|
|
36
|
+
function parseFacilitateOptions(values) {
|
|
37
|
+
const taskFile = values["task-file"];
|
|
38
|
+
const taskText = values["task-text"];
|
|
39
|
+
if (taskFile && taskText)
|
|
40
|
+
throw new Error("--task-file and --task-text are mutually exclusive");
|
|
41
|
+
if (!taskFile && !taskText)
|
|
42
|
+
throw new Error("--task-file or --task-text is required");
|
|
43
|
+
|
|
44
|
+
const taskAmend = values["task-amend"] ?? undefined;
|
|
45
|
+
let taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
|
|
46
|
+
if (taskAmend) taskContent += `\n\n${taskAmend}`;
|
|
47
|
+
|
|
48
|
+
const agentsRaw = values.agents;
|
|
49
|
+
if (!agentsRaw) throw new Error("--agents is required");
|
|
50
|
+
|
|
51
|
+
const agentConfigs = parseAgentConfigs(agentsRaw);
|
|
52
|
+
if (agentConfigs.length < 1)
|
|
53
|
+
throw new Error("--agents must specify at least one agent");
|
|
54
|
+
|
|
55
|
+
const maxTurnsRaw = values["max-turns"] ?? "20";
|
|
56
|
+
|
|
57
|
+
return {
|
|
58
|
+
taskContent,
|
|
59
|
+
agentConfigs,
|
|
60
|
+
facilitatorCwd: resolve(values["facilitator-cwd"] ?? "."),
|
|
61
|
+
model: values.model ?? "opus",
|
|
62
|
+
maxTurns: maxTurnsRaw === "0" ? 0 : parseInt(maxTurnsRaw, 10),
|
|
63
|
+
outputPath: values.output,
|
|
64
|
+
facilitatorProfile: values["facilitator-profile"] ?? undefined,
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Facilitate command — run a facilitated multi-agent session.
|
|
70
|
+
*
|
|
71
|
+
* Usage: fit-eval facilitate [options]
|
|
72
|
+
*
|
|
73
|
+
* @param {object} values - Parsed option values from cli.parse()
|
|
74
|
+
* @param {string[]} _args - Positional arguments
|
|
75
|
+
*/
|
|
76
|
+
export async function runFacilitateCommand(values, _args) {
|
|
77
|
+
const opts = parseFacilitateOptions(values);
|
|
78
|
+
|
|
79
|
+
const fileStream = opts.outputPath
|
|
80
|
+
? createWriteStream(opts.outputPath)
|
|
81
|
+
: null;
|
|
82
|
+
const output = fileStream
|
|
83
|
+
? createTeeWriter({
|
|
84
|
+
fileStream,
|
|
85
|
+
textStream: process.stdout,
|
|
86
|
+
mode: "supervised",
|
|
87
|
+
})
|
|
88
|
+
: process.stdout;
|
|
89
|
+
|
|
90
|
+
const { query } = await import("@anthropic-ai/claude-agent-sdk");
|
|
91
|
+
const facilitator = createFacilitator({
|
|
92
|
+
facilitatorCwd: opts.facilitatorCwd,
|
|
93
|
+
agentConfigs: opts.agentConfigs,
|
|
94
|
+
query,
|
|
95
|
+
output,
|
|
96
|
+
model: opts.model,
|
|
97
|
+
maxTurns: opts.maxTurns,
|
|
98
|
+
facilitatorProfile: opts.facilitatorProfile,
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
const result = await facilitator.run(opts.taskContent);
|
|
102
|
+
|
|
103
|
+
if (fileStream) {
|
|
104
|
+
await new Promise((r) => output.end(r));
|
|
105
|
+
await new Promise((r) => fileStream.end(r));
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
process.exit(result.success ? 0 : 1);
|
|
109
|
+
}
|
package/src/commands/run.js
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import { readFileSync, createWriteStream } from "node:fs";
|
|
2
|
+
import { Writable } from "node:stream";
|
|
2
3
|
import { resolve } from "node:path";
|
|
3
4
|
import { createAgentRunner } from "../agent-runner.js";
|
|
4
5
|
import { createTeeWriter } from "../tee-writer.js";
|
|
6
|
+
import { SequenceCounter } from "../sequence-counter.js";
|
|
5
7
|
|
|
6
8
|
/**
|
|
7
9
|
* Parse and validate run command options from parsed values.
|
|
@@ -61,14 +63,28 @@ export async function runRunCommand(values, _args) {
|
|
|
61
63
|
? createTeeWriter({ fileStream, textStream: process.stdout, mode: "raw" })
|
|
62
64
|
: process.stdout;
|
|
63
65
|
|
|
66
|
+
const counter = new SequenceCounter();
|
|
67
|
+
const devNull = new Writable({
|
|
68
|
+
write(_chunk, _enc, cb) {
|
|
69
|
+
cb();
|
|
70
|
+
},
|
|
71
|
+
});
|
|
72
|
+
const onLine = (line) => {
|
|
73
|
+
const event = JSON.parse(line);
|
|
74
|
+
output.write(
|
|
75
|
+
JSON.stringify({ source: "agent", seq: counter.next(), event }) + "\n",
|
|
76
|
+
);
|
|
77
|
+
};
|
|
78
|
+
|
|
64
79
|
const { query } = await import("@anthropic-ai/claude-agent-sdk");
|
|
65
80
|
const runner = createAgentRunner({
|
|
66
81
|
cwd,
|
|
67
82
|
query,
|
|
68
|
-
output,
|
|
83
|
+
output: devNull,
|
|
69
84
|
model,
|
|
70
85
|
maxTurns,
|
|
71
86
|
allowedTools,
|
|
87
|
+
onLine,
|
|
72
88
|
settingSources: ["project"],
|
|
73
89
|
agentProfile,
|
|
74
90
|
});
|