@forwardimpact/libeval 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/fit-eval.js CHANGED
@@ -8,6 +8,7 @@ import { runOutputCommand } from "../src/commands/output.js";
8
8
  import { runTeeCommand } from "../src/commands/tee.js";
9
9
  import { runRunCommand } from "../src/commands/run.js";
10
10
  import { runSuperviseCommand } from "../src/commands/supervise.js";
11
+ import { runFacilitateCommand } from "../src/commands/facilitate.js";
11
12
 
12
13
  const { version: VERSION } = JSON.parse(
13
14
  readFileSync(new URL("../package.json", import.meta.url), "utf8"),
@@ -20,7 +21,7 @@ const definition = {
20
21
  commands: [
21
22
  {
22
23
  name: "output",
23
- args: "[--format=FORMAT]",
24
+ args: "",
24
25
  description: "Process trace and output formatted result",
25
26
  },
26
27
  {
@@ -30,65 +31,111 @@ const definition = {
30
31
  },
31
32
  {
32
33
  name: "run",
33
- args: "[options]",
34
+ args: "",
34
35
  description: "Run a single agent via the Claude Agent SDK",
36
+ options: {
37
+ "task-file": { type: "string", description: "Path to task file" },
38
+ "task-text": { type: "string", description: "Inline task text" },
39
+ "task-amend": {
40
+ type: "string",
41
+ description: "Additional text appended to task",
42
+ },
43
+ model: { type: "string", description: "Claude model (default: opus)" },
44
+ "max-turns": {
45
+ type: "string",
46
+ description: "Max agentic turns (default: 50)",
47
+ },
48
+ output: { type: "string", description: "Write NDJSON trace to file" },
49
+ cwd: { type: "string", description: "Working directory" },
50
+ "agent-profile": { type: "string", description: "Agent profile name" },
51
+ "allowed-tools": {
52
+ type: "string",
53
+ description: "Comma-separated tool list",
54
+ },
55
+ },
35
56
  },
36
57
  {
37
58
  name: "supervise",
38
- args: "[options]",
59
+ args: "",
39
60
  description: "Run a supervised agent-supervisor relay loop",
61
+ options: {
62
+ "task-file": { type: "string", description: "Path to task file" },
63
+ "task-text": { type: "string", description: "Inline task text" },
64
+ "task-amend": {
65
+ type: "string",
66
+ description: "Additional text appended to task",
67
+ },
68
+ model: { type: "string", description: "Claude model (default: opus)" },
69
+ "max-turns": {
70
+ type: "string",
71
+ description: "Max agentic turns (default: 50)",
72
+ },
73
+ output: { type: "string", description: "Write NDJSON trace to file" },
74
+ cwd: { type: "string", description: "Working directory" },
75
+ "agent-profile": { type: "string", description: "Agent profile name" },
76
+ "allowed-tools": {
77
+ type: "string",
78
+ description: "Comma-separated tool list",
79
+ },
80
+ "supervisor-cwd": {
81
+ type: "string",
82
+ description: "Supervisor working directory",
83
+ },
84
+ "agent-cwd": { type: "string", description: "Agent working directory" },
85
+ "supervisor-profile": {
86
+ type: "string",
87
+ description: "Supervisor profile name",
88
+ },
89
+ "supervisor-allowed-tools": {
90
+ type: "string",
91
+ description: "Supervisor tool list",
92
+ },
93
+ },
94
+ },
95
+ {
96
+ name: "facilitate",
97
+ args: "",
98
+ description: "Run a facilitated multi-agent session",
99
+ options: {
100
+ "task-file": { type: "string", description: "Path to task file" },
101
+ "task-text": { type: "string", description: "Inline task text" },
102
+ "task-amend": {
103
+ type: "string",
104
+ description: "Additional text appended to task",
105
+ },
106
+ model: { type: "string", description: "Claude model (default: opus)" },
107
+ "max-turns": {
108
+ type: "string",
109
+ description: "Max facilitator LLM turns (default: 20)",
110
+ },
111
+ output: { type: "string", description: "Write NDJSON trace to file" },
112
+ "facilitator-cwd": {
113
+ type: "string",
114
+ description: "Facilitator working directory",
115
+ },
116
+ "facilitator-profile": {
117
+ type: "string",
118
+ description: "Facilitator profile name",
119
+ },
120
+ agents: {
121
+ type: "string",
122
+ description:
123
+ "Agent configs: name1:cwd=/tmp/a:role=explorer,name2:cwd=/tmp/b:role=tester",
124
+ },
125
+ },
40
126
  },
41
127
  ],
42
- options: {
128
+ globalOptions: {
43
129
  format: { type: "string", description: "Output format (json|text)" },
44
130
  help: { type: "boolean", short: "h", description: "Show this help" },
45
131
  version: { type: "boolean", description: "Show version" },
46
132
  json: { type: "boolean", description: "Output help as JSON" },
47
- "task-file": { type: "string", description: "Path to task file" },
48
- "task-text": { type: "string", description: "Inline task text" },
49
- "task-amend": {
50
- type: "string",
51
- description: "Additional text appended to task",
52
- },
53
- model: {
54
- type: "string",
55
- description: "Claude model (default: opus)",
56
- },
57
- "max-turns": {
58
- type: "string",
59
- description: "Max agentic turns (default: 50)",
60
- },
61
- output: { type: "string", description: "Write NDJSON trace to file" },
62
- cwd: { type: "string", description: "Working directory" },
63
- "agent-profile": {
64
- type: "string",
65
- description: "Agent profile name",
66
- },
67
- "allowed-tools": {
68
- type: "string",
69
- description: "Comma-separated tool list",
70
- },
71
- "supervisor-cwd": {
72
- type: "string",
73
- description: "Supervisor working directory",
74
- },
75
- "agent-cwd": {
76
- type: "string",
77
- description: "Agent working directory",
78
- },
79
- "supervisor-profile": {
80
- type: "string",
81
- description: "Supervisor profile name",
82
- },
83
- "supervisor-allowed-tools": {
84
- type: "string",
85
- description: "Supervisor tool list",
86
- },
87
133
  },
88
134
  examples: [
89
135
  "fit-eval output --format=text < trace.ndjson",
90
136
  "fit-eval run --task-file=task.md --model=opus",
91
137
  "fit-eval supervise --task-file=task.md --supervisor-cwd=.",
138
+ 'fit-eval facilitate --task-file=task.md --agents "explorer:cwd=/tmp/a,tester:cwd=/tmp/b"',
92
139
  ],
93
140
  };
94
141
 
@@ -100,6 +147,7 @@ const COMMANDS = {
100
147
  tee: runTeeCommand,
101
148
  run: runRunCommand,
102
149
  supervise: runSuperviseCommand,
150
+ facilitate: runFacilitateCommand,
103
151
  };
104
152
 
105
153
  async function main() {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forwardimpact/libeval",
3
- "version": "0.1.13",
3
+ "version": "0.1.15",
4
4
  "description": "Process Claude Code stream-json output into structured traces",
5
5
  "license": "Apache-2.0",
6
6
  "author": "D. Olsson <hi@senzilla.io>",
@@ -28,7 +28,8 @@
28
28
  "dependencies": {
29
29
  "@anthropic-ai/claude-agent-sdk": "^0.2.98",
30
30
  "@forwardimpact/libcli": "^0.1.0",
31
- "@forwardimpact/libtelemetry": "^0.1.22"
31
+ "@forwardimpact/libtelemetry": "^0.1.22",
32
+ "zod": "^3.23.0"
32
33
  },
33
34
  "publishConfig": {
34
35
  "access": "public"
@@ -6,6 +6,28 @@
6
6
  * Follows OO+DI: constructor injection, factory function, tests bypass factory.
7
7
  */
8
8
 
9
+ const DEFAULT_ALLOWED_TOOLS = ["Bash", "Read", "Glob", "Grep", "Write", "Edit"];
10
+
11
+ function applyDefaults(deps) {
12
+ return {
13
+ cwd: deps.cwd,
14
+ query: deps.query,
15
+ output: deps.output,
16
+ model: deps.model ?? "opus",
17
+ maxTurns: deps.maxTurns ?? 50,
18
+ allowedTools: deps.allowedTools ?? DEFAULT_ALLOWED_TOOLS,
19
+ permissionMode: deps.permissionMode ?? "bypassPermissions",
20
+ onLine: deps.onLine ?? null,
21
+ onBatch: deps.onBatch ?? null,
22
+ batchSize: deps.batchSize ?? 3,
23
+ settingSources: deps.settingSources ?? [],
24
+ agentProfile: deps.agentProfile ?? null,
25
+ systemPrompt: deps.systemPrompt ?? null,
26
+ disallowedTools: deps.disallowedTools ?? [],
27
+ mcpServers: deps.mcpServers ?? null,
28
+ };
29
+ }
30
+
9
31
  export class AgentRunner {
10
32
  /**
11
33
  * @param {object} deps
@@ -23,47 +45,13 @@ export class AgentRunner {
23
45
  * @param {string} [deps.agentProfile] - Agent profile name to pass as --agent to the Claude CLI
24
46
  * @param {string|object} [deps.systemPrompt] - SDK system prompt (string replaces default; {type:'preset', preset:'claude_code', append} appends)
25
47
  * @param {string[]} [deps.disallowedTools] - Tools to explicitly remove from the model's context
48
+ * @param {Record<string, object>} [deps.mcpServers] - MCP server configs to pass to the SDK query
26
49
  */
27
- constructor({
28
- cwd,
29
- query,
30
- output,
31
- model,
32
- maxTurns,
33
- allowedTools,
34
- permissionMode,
35
- onLine,
36
- onBatch,
37
- batchSize,
38
- settingSources,
39
- agentProfile,
40
- systemPrompt,
41
- disallowedTools,
42
- }) {
43
- if (!cwd) throw new Error("cwd is required");
44
- if (!query) throw new Error("query is required");
45
- if (!output) throw new Error("output is required");
46
- this.cwd = cwd;
47
- this.query = query;
48
- this.output = output;
49
- this.model = model ?? "opus";
50
- this.maxTurns = maxTurns ?? 50; // 0 means unlimited (omit from SDK)
51
- this.allowedTools = allowedTools ?? [
52
- "Bash",
53
- "Read",
54
- "Glob",
55
- "Grep",
56
- "Write",
57
- "Edit",
58
- ];
59
- this.permissionMode = permissionMode ?? "bypassPermissions";
60
- this.onLine = onLine ?? null;
61
- this.onBatch = onBatch ?? null;
62
- this.batchSize = batchSize ?? 3;
63
- this.settingSources = settingSources ?? [];
64
- this.agentProfile = agentProfile ?? null;
65
- this.systemPrompt = systemPrompt ?? null;
66
- this.disallowedTools = disallowedTools ?? [];
50
+ constructor(deps) {
51
+ if (!deps.cwd) throw new Error("cwd is required");
52
+ if (!deps.query) throw new Error("query is required");
53
+ if (!deps.output) throw new Error("output is required");
54
+ Object.assign(this, applyDefaults(deps));
67
55
  this.sessionId = null;
68
56
  this.buffer = [];
69
57
  /** @type {AbortController|null} */
@@ -95,6 +83,7 @@ export class AgentRunner {
95
83
  }),
96
84
  ...(this.systemPrompt && { systemPrompt: this.systemPrompt }),
97
85
  ...(this.agentProfile && { extraArgs: { agent: this.agentProfile } }),
86
+ ...(this.mcpServers && { mcpServers: this.mcpServers }),
98
87
  },
99
88
  });
100
89
  return await this.#consumeQuery(iterator);
@@ -0,0 +1,109 @@
1
+ import { readFileSync, createWriteStream, mkdtempSync } from "node:fs";
2
+ import { resolve, join } from "node:path";
3
+ import { tmpdir } from "node:os";
4
+ import { createFacilitator } from "../facilitator.js";
5
+ import { createTeeWriter } from "../tee-writer.js";
6
+
7
+ /**
8
+ * Parse agent config string into structured configs.
9
+ * Format: "name1:key=val:key=val,name2:key=val"
10
+ * @param {string} raw
11
+ * @returns {Array<{name: string, role: string, cwd: string, maxTurns?: number}>}
12
+ */
13
+ function parseAgentConfigs(raw) {
14
+ return raw.split(",").map((spec) => {
15
+ const parts = spec.split(":");
16
+ const name = parts[0];
17
+ const config = { name, role: name };
18
+ for (let i = 1; i < parts.length; i++) {
19
+ const [key, val] = parts[i].split("=");
20
+ if (key === "cwd") config.cwd = resolve(val);
21
+ else if (key === "role") config.role = val;
22
+ else if (key === "maxTurns") config.maxTurns = parseInt(val, 10);
23
+ }
24
+ if (!config.cwd) {
25
+ config.cwd = mkdtempSync(join(tmpdir(), `fit-eval-${name}-`));
26
+ }
27
+ return config;
28
+ });
29
+ }
30
+
31
+ /**
32
+ * Parse and validate facilitate command options.
33
+ * @param {object} values - Parsed option values
34
+ * @returns {object} Parsed options
35
+ */
36
+ function parseFacilitateOptions(values) {
37
+ const taskFile = values["task-file"];
38
+ const taskText = values["task-text"];
39
+ if (taskFile && taskText)
40
+ throw new Error("--task-file and --task-text are mutually exclusive");
41
+ if (!taskFile && !taskText)
42
+ throw new Error("--task-file or --task-text is required");
43
+
44
+ const taskAmend = values["task-amend"] ?? undefined;
45
+ let taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
46
+ if (taskAmend) taskContent += `\n\n${taskAmend}`;
47
+
48
+ const agentsRaw = values.agents;
49
+ if (!agentsRaw) throw new Error("--agents is required");
50
+
51
+ const agentConfigs = parseAgentConfigs(agentsRaw);
52
+ if (agentConfigs.length < 1)
53
+ throw new Error("--agents must specify at least one agent");
54
+
55
+ const maxTurnsRaw = values["max-turns"] ?? "20";
56
+
57
+ return {
58
+ taskContent,
59
+ agentConfigs,
60
+ facilitatorCwd: resolve(values["facilitator-cwd"] ?? "."),
61
+ model: values.model ?? "opus",
62
+ maxTurns: maxTurnsRaw === "0" ? 0 : parseInt(maxTurnsRaw, 10),
63
+ outputPath: values.output,
64
+ facilitatorProfile: values["facilitator-profile"] ?? undefined,
65
+ };
66
+ }
67
+
68
+ /**
69
+ * Facilitate command — run a facilitated multi-agent session.
70
+ *
71
+ * Usage: fit-eval facilitate [options]
72
+ *
73
+ * @param {object} values - Parsed option values from cli.parse()
74
+ * @param {string[]} _args - Positional arguments
75
+ */
76
+ export async function runFacilitateCommand(values, _args) {
77
+ const opts = parseFacilitateOptions(values);
78
+
79
+ const fileStream = opts.outputPath
80
+ ? createWriteStream(opts.outputPath)
81
+ : null;
82
+ const output = fileStream
83
+ ? createTeeWriter({
84
+ fileStream,
85
+ textStream: process.stdout,
86
+ mode: "supervised",
87
+ })
88
+ : process.stdout;
89
+
90
+ const { query } = await import("@anthropic-ai/claude-agent-sdk");
91
+ const facilitator = createFacilitator({
92
+ facilitatorCwd: opts.facilitatorCwd,
93
+ agentConfigs: opts.agentConfigs,
94
+ query,
95
+ output,
96
+ model: opts.model,
97
+ maxTurns: opts.maxTurns,
98
+ facilitatorProfile: opts.facilitatorProfile,
99
+ });
100
+
101
+ const result = await facilitator.run(opts.taskContent);
102
+
103
+ if (fileStream) {
104
+ await new Promise((r) => output.end(r));
105
+ await new Promise((r) => fileStream.end(r));
106
+ }
107
+
108
+ process.exit(result.success ? 0 : 1);
109
+ }
@@ -1,7 +1,9 @@
1
1
  import { readFileSync, createWriteStream } from "node:fs";
2
+ import { Writable } from "node:stream";
2
3
  import { resolve } from "node:path";
3
4
  import { createAgentRunner } from "../agent-runner.js";
4
5
  import { createTeeWriter } from "../tee-writer.js";
6
+ import { SequenceCounter } from "../sequence-counter.js";
5
7
 
6
8
  /**
7
9
  * Parse and validate run command options from parsed values.
@@ -61,14 +63,28 @@ export async function runRunCommand(values, _args) {
61
63
  ? createTeeWriter({ fileStream, textStream: process.stdout, mode: "raw" })
62
64
  : process.stdout;
63
65
 
66
+ const counter = new SequenceCounter();
67
+ const devNull = new Writable({
68
+ write(_chunk, _enc, cb) {
69
+ cb();
70
+ },
71
+ });
72
+ const onLine = (line) => {
73
+ const event = JSON.parse(line);
74
+ output.write(
75
+ JSON.stringify({ source: "agent", seq: counter.next(), event }) + "\n",
76
+ );
77
+ };
78
+
64
79
  const { query } = await import("@anthropic-ai/claude-agent-sdk");
65
80
  const runner = createAgentRunner({
66
81
  cwd,
67
82
  query,
68
- output,
83
+ output: devNull,
69
84
  model,
70
85
  maxTurns,
71
86
  allowedTools,
87
+ onLine,
72
88
  settingSources: ["project"],
73
89
  agentProfile,
74
90
  });