@forwardimpact/libeval 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/fit-eval.js CHANGED
@@ -1,11 +1,15 @@
1
- #!/usr/bin/env node
1
+ #!/usr/bin/env bun
2
2
 
3
3
  import { runOutputCommand } from "../src/commands/output.js";
4
4
  import { runTeeCommand } from "../src/commands/tee.js";
5
+ import { runRunCommand } from "../src/commands/run.js";
6
+ import { runSuperviseCommand } from "../src/commands/supervise.js";
5
7
 
6
8
  const COMMANDS = {
7
9
  output: runOutputCommand,
8
10
  tee: runTeeCommand,
11
+ run: runRunCommand,
12
+ supervise: runSuperviseCommand,
9
13
  };
10
14
 
11
15
  const HELP_TEXT = `
@@ -17,6 +21,25 @@ Usage:
17
21
  Commands:
18
22
  output [--format=json|text] Process trace and output formatted result
19
23
  tee [output.ndjson] Stream text to stdout, optionally save raw NDJSON
24
+ run [options] Run a single agent via the Claude Agent SDK
25
+ supervise [options] Run a supervised agent ↔ supervisor relay loop
26
+
27
+ Run options:
28
+ --task=PATH Path to task file (required)
29
+ --cwd=DIR Agent working directory (default: .)
30
+ --model=MODEL Claude model to use (default: opus)
31
+ --max-turns=N Maximum agentic turns (default: 50)
32
+ --output=PATH Write NDJSON trace to file (default: stdout)
33
+ --allowed-tools=LIST Comma-separated tools (default: Bash,Read,Glob,Grep,Write,Edit)
34
+
35
+ Supervise options:
36
+ --task=PATH Path to task file (required)
37
+ --supervisor-cwd=DIR Supervisor working directory (default: .)
38
+ --agent-cwd=DIR Agent working directory (default: temp directory)
39
+ --model=MODEL Claude model to use (default: opus)
40
+ --max-turns=N Maximum supervisor ↔ agent exchanges (default: 20)
41
+ --output=PATH Write NDJSON trace to file (default: stdout)
42
+ --allowed-tools=LIST Comma-separated tools for agent (default: Bash,Read,Glob,Grep,Write,Edit)
20
43
 
21
44
  Options:
22
45
  --help Show this help message
@@ -27,6 +50,8 @@ Examples:
27
50
  fit-eval output --format=json < trace.ndjson
28
51
  fit-eval tee < trace.ndjson
29
52
  fit-eval tee output.ndjson < trace.ndjson
53
+ fit-eval run --task=.github/tasks/security-audit.md --model=opus
54
+ fit-eval supervise --task=scenarios/guide-setup/task.md --supervisor-cwd=.
30
55
  `.trim();
31
56
 
32
57
  async function main() {
package/index.js CHANGED
@@ -1 +1,4 @@
1
1
  export { TraceCollector, createTraceCollector } from "./src/trace-collector.js";
2
+ export { AgentRunner, createAgentRunner } from "./src/agent-runner.js";
3
+ export { Supervisor, createSupervisor } from "./src/supervisor.js";
4
+ export { TeeWriter, createTeeWriter } from "./src/tee-writer.js";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forwardimpact/libeval",
3
- "version": "0.1.0",
3
+ "version": "0.1.2",
4
4
  "description": "Process Claude Code stream-json output into structured traces",
5
5
  "license": "Apache-2.0",
6
6
  "author": "D. Olsson <hi@senzilla.io>",
@@ -10,10 +10,13 @@
10
10
  "fit-eval": "./bin/fit-eval.js"
11
11
  },
12
12
  "engines": {
13
- "node": ">=22.0.0"
13
+ "bun": ">=1.2.0"
14
14
  },
15
15
  "scripts": {
16
- "test": "node --test test/*.test.js"
16
+ "test": "bun run node --test test/*.test.js"
17
+ },
18
+ "dependencies": {
19
+ "@anthropic-ai/claude-agent-sdk": "^0.1.0"
17
20
  },
18
21
  "publishConfig": {
19
22
  "access": "public"
@@ -0,0 +1,154 @@
1
+ /**
2
+ * AgentRunner — runs a single Claude Agent SDK session and emits raw NDJSON
3
+ * events to an output stream. Building block for both `fit-eval run` and
4
+ * `fit-eval supervise`.
5
+ *
6
+ * Follows OO+DI: constructor injection, factory function, tests bypass factory.
7
+ */
8
+
9
+ export class AgentRunner {
10
+ /**
11
+ * @param {object} deps
12
+ * @param {string} deps.cwd - Agent working directory
13
+ * @param {function} deps.query - SDK query function (injected for testing)
14
+ * @param {import("stream").Writable} deps.output - Stream to emit NDJSON to
15
+ * @param {string} [deps.model] - Claude model identifier
16
+ * @param {number} [deps.maxTurns] - Maximum agentic turns
17
+ * @param {string[]} [deps.allowedTools] - Tools the agent may use
18
+ * @param {string} [deps.permissionMode] - SDK permission mode
19
+ * @param {function} [deps.onLine] - Callback invoked with each NDJSON line as it's produced
20
+ * @param {string[]} [deps.settingSources] - SDK setting sources (e.g. ['project'] to load CLAUDE.md)
21
+ */
22
+ constructor({
23
+ cwd,
24
+ query,
25
+ output,
26
+ model,
27
+ maxTurns,
28
+ allowedTools,
29
+ permissionMode,
30
+ onLine,
31
+ settingSources,
32
+ }) {
33
+ if (!cwd) throw new Error("cwd is required");
34
+ if (!query) throw new Error("query is required");
35
+ if (!output) throw new Error("output is required");
36
+ this.cwd = cwd;
37
+ this.query = query;
38
+ this.output = output;
39
+ this.model = model ?? "opus";
40
+ this.maxTurns = maxTurns ?? 50;
41
+ this.allowedTools = allowedTools ?? [
42
+ "Bash",
43
+ "Read",
44
+ "Glob",
45
+ "Grep",
46
+ "Write",
47
+ "Edit",
48
+ ];
49
+ this.permissionMode = permissionMode ?? "bypassPermissions";
50
+ this.onLine = onLine ?? null;
51
+ this.settingSources = settingSources ?? [];
52
+ this.sessionId = null;
53
+ this.buffer = [];
54
+ }
55
+
56
+ /**
57
+ * Run a new agent session with the given task.
58
+ * @param {string} task - The task prompt
59
+ * @returns {Promise<{success: boolean, text: string, sessionId: string|null}>}
60
+ */
61
+ async run(task) {
62
+ let text = "";
63
+ let stopReason = null;
64
+ let error = null;
65
+
66
+ try {
67
+ for await (const message of this.query({
68
+ prompt: task,
69
+ options: {
70
+ cwd: this.cwd,
71
+ allowedTools: this.allowedTools,
72
+ maxTurns: this.maxTurns,
73
+ model: this.model,
74
+ permissionMode: this.permissionMode,
75
+ allowDangerouslySkipPermissions: true,
76
+ settingSources: this.settingSources,
77
+ },
78
+ })) {
79
+ const line = JSON.stringify(message);
80
+ this.output.write(line + "\n");
81
+ this.buffer.push(line);
82
+ if (this.onLine) this.onLine(line);
83
+
84
+ if (message.type === "system" && message.subtype === "init") {
85
+ this.sessionId = message.session_id;
86
+ }
87
+ if (message.type === "result") {
88
+ text = message.result ?? "";
89
+ stopReason = message.subtype;
90
+ }
91
+ }
92
+ } catch (err) {
93
+ error = err;
94
+ }
95
+
96
+ // If the SDK already emitted a successful result, honour it even when the
97
+ // stream throws afterwards (e.g. "Credit balance is too low" during
98
+ // cleanup). Only treat errors as fatal when no result was received yet.
99
+ const success = stopReason === "success";
100
+ return { success, text, sessionId: this.sessionId, error };
101
+ }
102
+
103
+ /**
104
+ * Resume an existing session with a follow-up prompt.
105
+ * @param {string} prompt - The follow-up prompt
106
+ * @returns {Promise<{success: boolean, text: string}>}
107
+ */
108
+ async resume(prompt) {
109
+ let text = "";
110
+ let stopReason = null;
111
+ let error = null;
112
+
113
+ try {
114
+ for await (const message of this.query({
115
+ prompt,
116
+ options: { resume: this.sessionId },
117
+ })) {
118
+ const line = JSON.stringify(message);
119
+ this.output.write(line + "\n");
120
+ this.buffer.push(line);
121
+ if (this.onLine) this.onLine(line);
122
+
123
+ if (message.type === "result") {
124
+ text = message.result ?? "";
125
+ stopReason = message.subtype;
126
+ }
127
+ }
128
+ } catch (err) {
129
+ error = err;
130
+ }
131
+
132
+ const success = stopReason === "success";
133
+ return { success, text, error };
134
+ }
135
+
136
+ /**
137
+ * Drain buffered output lines. Used by Supervisor to tag and re-emit lines.
138
+ * @returns {string[]}
139
+ */
140
+ drainOutput() {
141
+ const lines = [...this.buffer];
142
+ this.buffer = [];
143
+ return lines;
144
+ }
145
+ }
146
+
147
+ /**
148
+ * Factory function — wires real dependencies.
149
+ * @param {object} deps - Same as AgentRunner constructor
150
+ * @returns {AgentRunner}
151
+ */
152
+ export function createAgentRunner(deps) {
153
+ return new AgentRunner(deps);
154
+ }
@@ -0,0 +1,76 @@
1
+ import { readFileSync, createWriteStream } from "node:fs";
2
+ import { resolve } from "node:path";
3
+ import { createAgentRunner } from "../agent-runner.js";
4
+ import { createTeeWriter } from "../tee-writer.js";
5
+
6
+ /**
7
+ * Parse a --key=value or --key value flag from args.
8
+ * @param {string[]} args
9
+ * @param {string} name - Flag name without --
10
+ * @returns {string|undefined}
11
+ */
12
+ function parseFlag(args, name) {
13
+ const prefix = `--${name}=`;
14
+ for (let i = 0; i < args.length; i++) {
15
+ if (args[i].startsWith(prefix)) return args[i].slice(prefix.length);
16
+ if (args[i] === `--${name}` && i + 1 < args.length) return args[i + 1];
17
+ }
18
+ return undefined;
19
+ }
20
+
21
+ /**
22
+ * Run command — execute a single agent via the Claude Agent SDK.
23
+ *
24
+ * Usage: fit-eval run [options]
25
+ *
26
+ * Options:
27
+ * --task=PATH Path to task file (required)
28
+ * --cwd=DIR Agent working directory (default: .)
29
+ * --model=MODEL Claude model to use (default: opus)
30
+ * --max-turns=N Maximum agentic turns (default: 50)
31
+ * --output=PATH Write NDJSON trace to file (default: stdout)
32
+ * --allowed-tools=LIST Comma-separated tools (default: Bash,Read,Glob,Grep,Write,Edit)
33
+ *
34
+ * @param {string[]} args - Command arguments
35
+ */
36
+ export async function runRunCommand(args) {
37
+ const task = parseFlag(args, "task");
38
+ if (!task) throw new Error("--task is required");
39
+
40
+ const cwd = resolve(parseFlag(args, "cwd") ?? ".");
41
+ const model = parseFlag(args, "model") ?? "opus";
42
+ const maxTurns = parseInt(parseFlag(args, "max-turns") ?? "50", 10);
43
+ const outputPath = parseFlag(args, "output");
44
+ const allowedTools = (
45
+ parseFlag(args, "allowed-tools") ?? "Bash,Read,Glob,Grep,Write,Edit"
46
+ ).split(",");
47
+
48
+ const taskContent = readFileSync(task, "utf8");
49
+
50
+ // When --output is specified, stream text to stdout while writing NDJSON to file.
51
+ // Otherwise, write NDJSON directly to stdout (backwards-compatible).
52
+ const fileStream = outputPath ? createWriteStream(outputPath) : null;
53
+ const output = fileStream
54
+ ? createTeeWriter({ fileStream, textStream: process.stdout, mode: "raw" })
55
+ : process.stdout;
56
+
57
+ const { query } = await import("@anthropic-ai/claude-agent-sdk");
58
+ const runner = createAgentRunner({
59
+ cwd,
60
+ query,
61
+ output,
62
+ model,
63
+ maxTurns,
64
+ allowedTools,
65
+ settingSources: ["project"],
66
+ });
67
+
68
+ const result = await runner.run(taskContent);
69
+
70
+ if (fileStream) {
71
+ await new Promise((r) => output.end(r));
72
+ await new Promise((r) => fileStream.end(r));
73
+ }
74
+
75
+ process.exit(result.success ? 0 : 1);
76
+ }
@@ -0,0 +1,86 @@
1
+ import { readFileSync, createWriteStream, mkdtempSync } from "node:fs";
2
+ import { resolve, join } from "node:path";
3
+ import { tmpdir } from "node:os";
4
+ import { createSupervisor } from "../supervisor.js";
5
+ import { createTeeWriter } from "../tee-writer.js";
6
+
7
+ /**
8
+ * Parse a --key=value or --key value flag from args.
9
+ * @param {string[]} args
10
+ * @param {string} name - Flag name without --
11
+ * @returns {string|undefined}
12
+ */
13
+ function parseFlag(args, name) {
14
+ const prefix = `--${name}=`;
15
+ for (let i = 0; i < args.length; i++) {
16
+ if (args[i].startsWith(prefix)) return args[i].slice(prefix.length);
17
+ if (args[i] === `--${name}` && i + 1 < args.length) return args[i + 1];
18
+ }
19
+ return undefined;
20
+ }
21
+
22
+ /**
23
+ * Supervise command — run two agents in a relay loop via the Claude Agent SDK.
24
+ *
25
+ * Usage: fit-eval supervise [options]
26
+ *
27
+ * Options:
28
+ * --task=PATH Path to task file (required)
29
+ * --supervisor-cwd=DIR Supervisor working directory (default: .)
30
+ * --agent-cwd=DIR Agent working directory (default: temp directory)
31
+ * --model=MODEL Claude model to use (default: opus)
32
+ * --max-turns=N Maximum supervisor ↔ agent exchanges (default: 20)
33
+ * --output=PATH Write NDJSON trace to file (default: stdout)
34
+ * --allowed-tools=LIST Comma-separated tools for the agent (default: Bash,Read,Glob,Grep,Write,Edit)
35
+ *
36
+ * @param {string[]} args - Command arguments
37
+ */
38
+ export async function runSuperviseCommand(args) {
39
+ const task = parseFlag(args, "task");
40
+ if (!task) throw new Error("--task is required");
41
+
42
+ const supervisorCwd = resolve(parseFlag(args, "supervisor-cwd") ?? ".");
43
+ const agentCwd = resolve(
44
+ parseFlag(args, "agent-cwd") ??
45
+ mkdtempSync(join(tmpdir(), "fit-eval-agent-")),
46
+ );
47
+ const model = parseFlag(args, "model") ?? "opus";
48
+ const maxTurns = parseInt(parseFlag(args, "max-turns") ?? "20", 10);
49
+ const outputPath = parseFlag(args, "output");
50
+ const allowedTools = (
51
+ parseFlag(args, "allowed-tools") ?? "Bash,Read,Glob,Grep,Write,Edit"
52
+ ).split(",");
53
+
54
+ const taskContent = readFileSync(task, "utf8");
55
+
56
+ // When --output is specified, stream text to stdout while writing NDJSON to file.
57
+ // Otherwise, write NDJSON directly to stdout (backwards-compatible).
58
+ const fileStream = outputPath ? createWriteStream(outputPath) : null;
59
+ const output = fileStream
60
+ ? createTeeWriter({
61
+ fileStream,
62
+ textStream: process.stdout,
63
+ mode: "supervised",
64
+ })
65
+ : process.stdout;
66
+
67
+ const { query } = await import("@anthropic-ai/claude-agent-sdk");
68
+ const supervisor = createSupervisor({
69
+ supervisorCwd,
70
+ agentCwd,
71
+ query,
72
+ output,
73
+ model,
74
+ maxTurns,
75
+ allowedTools,
76
+ });
77
+
78
+ const result = await supervisor.run(taskContent);
79
+
80
+ if (fileStream) {
81
+ await new Promise((r) => output.end(r));
82
+ await new Promise((r) => fileStream.end(r));
83
+ }
84
+
85
+ process.exit(result.success ? 0 : 1);
86
+ }
@@ -1,5 +1,7 @@
1
1
  import { createWriteStream } from "fs";
2
- import { createTraceCollector } from "@forwardimpact/libeval";
2
+ import { PassThrough } from "node:stream";
3
+ import { pipeline } from "node:stream/promises";
4
+ import { createTeeWriter } from "../tee-writer.js";
3
5
 
4
6
  /**
5
7
  * Tee command — stream text output to stdout while optionally saving the raw
@@ -12,46 +14,18 @@ import { createTraceCollector } from "@forwardimpact/libeval";
12
14
  export async function runTeeCommand(args) {
13
15
  const outputPath = args.find((a) => !a.startsWith("-")) ?? null;
14
16
  const fileStream = outputPath ? createWriteStream(outputPath) : null;
15
- const collector = createTraceCollector();
16
- const turnsEmitted = { count: 0 };
17
17
 
18
- try {
19
- let buffer = "";
20
-
21
- for await (const chunk of process.stdin) {
22
- buffer += chunk.toString("utf8");
23
-
24
- let newlineIdx;
25
- while ((newlineIdx = buffer.indexOf("\n")) !== -1) {
26
- const line = buffer.slice(0, newlineIdx);
27
- buffer = buffer.slice(newlineIdx + 1);
28
-
29
- if (fileStream) {
30
- fileStream.write(line + "\n");
31
- }
32
-
33
- collector.addLine(line);
34
- flushNewTurns(collector, turnsEmitted);
35
- }
36
- }
18
+ // TeeWriter requires a fileStream; when no output file is specified,
19
+ // use a PassThrough as a no-op sink (NDJSON is not saved).
20
+ const sink = fileStream ?? new PassThrough();
21
+ const tee = createTeeWriter({
22
+ fileStream: sink,
23
+ textStream: process.stdout,
24
+ mode: "raw",
25
+ });
37
26
 
38
- // Process any remaining data without a trailing newline
39
- if (buffer.trim()) {
40
- if (fileStream) {
41
- fileStream.write(buffer + "\n");
42
- }
43
- collector.addLine(buffer);
44
- flushNewTurns(collector, turnsEmitted);
45
- }
46
-
47
- // Emit the result summary at the end
48
- if (collector.result) {
49
- const text = collector.toText();
50
- const lastNewline = text.lastIndexOf("\n---");
51
- if (lastNewline !== -1) {
52
- process.stdout.write(text.slice(lastNewline) + "\n");
53
- }
54
- }
27
+ try {
28
+ await pipeline(process.stdin, tee);
55
29
  } finally {
56
30
  if (fileStream) {
57
31
  await new Promise((resolve, reject) => {
@@ -61,39 +35,3 @@ export async function runTeeCommand(args) {
61
35
  }
62
36
  }
63
37
  }
64
-
65
- /**
66
- * Write text for any new turns that haven't been emitted yet.
67
- * @param {import("@forwardimpact/libeval").TraceCollector} collector
68
- * @param {{ count: number }} turnsEmitted
69
- */
70
- function flushNewTurns(collector, turnsEmitted) {
71
- const turns = collector.turns;
72
- while (turnsEmitted.count < turns.length) {
73
- const turn = turns[turnsEmitted.count];
74
- turnsEmitted.count++;
75
-
76
- if (turn.role === "assistant") {
77
- for (const block of turn.content) {
78
- if (block.type === "text") {
79
- process.stdout.write(block.text + "\n");
80
- } else if (block.type === "tool_use") {
81
- const inputSummary = summarizeInput(block.input);
82
- process.stdout.write(`> Tool: ${block.name} ${inputSummary}\n`);
83
- }
84
- }
85
- }
86
- }
87
- }
88
-
89
- /**
90
- * Summarize tool input for text display, truncated to keep logs readable.
91
- * @param {object} input - Tool input object
92
- * @returns {string} Truncated summary
93
- */
94
- function summarizeInput(input) {
95
- if (!input || typeof input !== "object") return "";
96
- const json = JSON.stringify(input);
97
- if (json.length <= 200) return json;
98
- return json.slice(0, 197) + "...";
99
- }
@@ -0,0 +1,186 @@
1
+ /**
2
+ * Supervisor — orchestrates a relay loop between an agent and a supervisor,
3
+ * both running as AgentRunner instances. The agent works on a task while the
4
+ * supervisor observes and decides when the evaluation is complete.
5
+ *
6
+ * Follows OO+DI: constructor injection, factory function, tests bypass factory.
7
+ */
8
+
9
+ import { PassThrough } from "node:stream";
10
+ import { createAgentRunner } from "./agent-runner.js";
11
+
12
+ /**
13
+ * Check if the supervisor's response signals evaluation completion.
14
+ * Uses a structured signal — `EVALUATION_COMPLETE` on its own line —
15
+ * to avoid false positives from natural language.
16
+ * @param {string} text
17
+ * @returns {boolean}
18
+ */
19
+ export function isDone(text) {
20
+ return /^EVALUATION_COMPLETE$/m.test(text);
21
+ }
22
+
23
+ export class Supervisor {
24
+ /**
25
+ * @param {object} deps
26
+ * @param {import("./agent-runner.js").AgentRunner} deps.agentRunner - Runs the agent sessions
27
+ * @param {import("./agent-runner.js").AgentRunner} deps.supervisorRunner - Runs the supervisor sessions
28
+ * @param {import("stream").Writable} deps.output - Stream to emit tagged NDJSON to
29
+ * @param {number} [deps.maxTurns] - Maximum supervisor ↔ agent exchanges
30
+ */
31
+ constructor({ agentRunner, supervisorRunner, output, maxTurns }) {
32
+ if (!agentRunner) throw new Error("agentRunner is required");
33
+ if (!supervisorRunner) throw new Error("supervisorRunner is required");
34
+ if (!output) throw new Error("output is required");
35
+ this.agentRunner = agentRunner;
36
+ this.supervisorRunner = supervisorRunner;
37
+ this.output = output;
38
+ this.maxTurns = maxTurns ?? 20;
39
+ /** @type {"agent"|"supervisor"} */
40
+ this.currentSource = "agent";
41
+ /** @type {number} */
42
+ this.currentTurn = 0;
43
+ }
44
+
45
+ /**
46
+ * Run the supervisor ↔ agent relay loop.
47
+ * @param {string} task - The initial task for the agent
48
+ * @returns {Promise<{success: boolean, turns: number}>}
49
+ */
50
+ async run(task) {
51
+ // Turn 0: Agent receives the task and starts working
52
+ this.currentSource = "agent";
53
+ this.currentTurn = 0;
54
+ let agentResult = await this.agentRunner.run(task);
55
+
56
+ if (agentResult.error) {
57
+ this.emitSummary({ success: false, turns: 0 });
58
+ return { success: false, turns: 0 };
59
+ }
60
+
61
+ for (let turn = 1; turn <= this.maxTurns; turn++) {
62
+ // Supervisor observes the agent's output
63
+ const supervisorPrompt =
64
+ `The agent reported:\n\n${agentResult.text}\n\n` +
65
+ `Decide: provide guidance, answer a question, or say EVALUATION_COMPLETE on its own line.`;
66
+
67
+ this.currentSource = "supervisor";
68
+ this.currentTurn = turn;
69
+ let supervisorResult;
70
+ if (turn === 1) {
71
+ supervisorResult = await this.supervisorRunner.run(supervisorPrompt);
72
+ } else {
73
+ supervisorResult = await this.supervisorRunner.resume(supervisorPrompt);
74
+ }
75
+
76
+ if (supervisorResult.error) {
77
+ this.emitSummary({ success: false, turns: turn });
78
+ return { success: false, turns: turn };
79
+ }
80
+
81
+ if (isDone(supervisorResult.text)) {
82
+ this.emitSummary({ success: true, turns: turn });
83
+ return { success: true, turns: turn };
84
+ }
85
+
86
+ // Supervisor's response becomes the agent's next input
87
+ this.currentSource = "agent";
88
+ this.currentTurn = turn;
89
+ agentResult = await this.agentRunner.resume(supervisorResult.text);
90
+
91
+ if (agentResult.error) {
92
+ this.emitSummary({ success: false, turns: turn });
93
+ return { success: false, turns: turn };
94
+ }
95
+ }
96
+
97
+ this.emitSummary({ success: false, turns: this.maxTurns });
98
+ return { success: false, turns: this.maxTurns };
99
+ }
100
+
101
+ /**
102
+ * Emit a single NDJSON line tagged with the current source and turn.
103
+ * Called in real-time via the AgentRunner onLine callback.
104
+ * @param {string} line - Raw NDJSON line from the runner
105
+ */
106
+ emitLine(line) {
107
+ const event = JSON.parse(line);
108
+ const tagged = {
109
+ source: this.currentSource,
110
+ turn: this.currentTurn,
111
+ event,
112
+ };
113
+ this.output.write(JSON.stringify(tagged) + "\n");
114
+ }
115
+
116
+ /**
117
+ * Emit a final orchestrator summary line.
118
+ * @param {{success: boolean, turns: number}} result
119
+ */
120
+ emitSummary(result) {
121
+ const summary = {
122
+ source: "orchestrator",
123
+ type: "summary",
124
+ success: result.success,
125
+ turns: result.turns,
126
+ };
127
+ this.output.write(JSON.stringify(summary) + "\n");
128
+ }
129
+ }
130
+
131
+ /**
132
+ * Factory function — wires both AgentRunners with their respective configs.
133
+ * @param {object} deps
134
+ * @param {string} deps.supervisorCwd - Supervisor working directory
135
+ * @param {string} deps.agentCwd - Agent working directory
136
+ * @param {function} deps.query - SDK query function
137
+ * @param {import("stream").Writable} deps.output - Final output stream
138
+ * @param {string} [deps.model] - Claude model identifier
139
+ * @param {number} [deps.maxTurns] - Maximum supervisor ↔ agent exchanges
140
+ * @param {string[]} [deps.allowedTools] - Tools the agent may use
141
+ * @returns {Supervisor}
142
+ */
143
+ export function createSupervisor({
144
+ supervisorCwd,
145
+ agentCwd,
146
+ query,
147
+ output,
148
+ model,
149
+ maxTurns,
150
+ allowedTools,
151
+ }) {
152
+ // Forward-reference: onLine captures `supervisor` before construction completes.
153
+ // This is safe because onLine is only called during run(), after construction.
154
+ let supervisor;
155
+ const onLine = (line) => supervisor.emitLine(line);
156
+
157
+ const agentRunner = createAgentRunner({
158
+ cwd: agentCwd,
159
+ query,
160
+ output: new PassThrough(),
161
+ model,
162
+ maxTurns: 50,
163
+ allowedTools,
164
+ onLine,
165
+ settingSources: ["project"],
166
+ });
167
+
168
+ const supervisorRunner = createAgentRunner({
169
+ cwd: supervisorCwd,
170
+ query,
171
+ output: new PassThrough(),
172
+ model,
173
+ maxTurns: 10,
174
+ allowedTools: ["Read", "Glob", "Grep"],
175
+ onLine,
176
+ settingSources: ["project"],
177
+ });
178
+
179
+ supervisor = new Supervisor({
180
+ agentRunner,
181
+ supervisorRunner,
182
+ output,
183
+ maxTurns,
184
+ });
185
+ return supervisor;
186
+ }