@forwardimpact/libeval 0.1.6 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/fit-eval.js CHANGED
@@ -29,7 +29,7 @@ Run options:
29
29
  --task-text=STRING Inline task text (mutually exclusive with --task-file)
30
30
  --cwd=DIR Agent working directory (default: .)
31
31
  --model=MODEL Claude model to use (default: opus)
32
- --max-turns=N Maximum agentic turns (default: 50)
32
+ --max-turns=N Maximum agentic turns (default: 50, 0 = unlimited)
33
33
  --output=PATH Write NDJSON trace to file (default: stdout)
34
34
  --allowed-tools=LIST Comma-separated tools (default: Bash,Read,Glob,Grep,Write,Edit)
35
35
  --agent-profile=NAME Agent profile name (passed as --agent to Claude CLI)
@@ -40,7 +40,7 @@ Supervise options:
40
40
  --supervisor-cwd=DIR Supervisor working directory (default: .)
41
41
  --agent-cwd=DIR Agent working directory (default: temp directory)
42
42
  --model=MODEL Claude model to use (default: opus)
43
- --max-turns=N Maximum supervisor ↔ agent exchanges (default: 20)
43
+ --max-turns=N Maximum supervisor ↔ agent exchanges (default: 20, 0 = unlimited)
44
44
  --output=PATH Write NDJSON trace to file (default: stdout)
45
45
  --allowed-tools=LIST Comma-separated tools for agent (default: Bash,Read,Glob,Grep,Write,Edit)
46
46
  --supervisor-allowed-tools=LIST
package/index.js CHANGED
@@ -5,5 +5,7 @@ export {
5
5
  createSupervisor,
6
6
  SUPERVISOR_SYSTEM_PROMPT,
7
7
  AGENT_SYSTEM_PROMPT,
8
+ isComplete,
9
+ isIntervention,
8
10
  } from "./src/supervisor.js";
9
11
  export { TeeWriter, createTeeWriter } from "./src/tee-writer.js";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forwardimpact/libeval",
3
- "version": "0.1.6",
3
+ "version": "0.1.9",
4
4
  "description": "Process Claude Code stream-json output into structured traces",
5
5
  "license": "Apache-2.0",
6
6
  "author": "D. Olsson <hi@senzilla.io>",
@@ -17,6 +17,8 @@ export class AgentRunner {
17
17
  * @param {string[]} [deps.allowedTools] - Tools the agent may use
18
18
  * @param {string} [deps.permissionMode] - SDK permission mode
19
19
  * @param {function} [deps.onLine] - Callback invoked with each NDJSON line as it's produced
20
+ * @param {function} [deps.onBatch] - Async callback invoked with a batch of NDJSON lines at flush boundaries: every `batchSize` assistant text blocks, the terminal `result` message, and — on iterator crash/abort — once more in a final flush carrying any lines that never reached a boundary. Receives `(lines, { abort })` where calling `abort()` stops the in-flight SDK session via the AbortController. Optional; assignable at runtime so the Supervisor can swap it per turn.
21
+ * @param {number} [deps.batchSize] - Assistant text-block messages to accumulate before firing onBatch. Tool-only assistant messages ride along without counting. Default 3: the supervisor reviews the agent every three text turns instead of every turn. The terminal `result` always flushes regardless of count.
20
22
  * @param {string[]} [deps.settingSources] - SDK setting sources (e.g. ['project'] to load CLAUDE.md)
21
23
  * @param {string} [deps.agentProfile] - Agent profile name to pass as --agent to the Claude CLI
22
24
  * @param {string|object} [deps.systemPrompt] - SDK system prompt (string replaces default; {type:'preset', preset:'claude_code', append} appends)
@@ -31,6 +33,8 @@ export class AgentRunner {
31
33
  allowedTools,
32
34
  permissionMode,
33
35
  onLine,
36
+ onBatch,
37
+ batchSize,
34
38
  settingSources,
35
39
  agentProfile,
36
40
  systemPrompt,
@@ -43,7 +47,7 @@ export class AgentRunner {
43
47
  this.query = query;
44
48
  this.output = output;
45
49
  this.model = model ?? "opus";
46
- this.maxTurns = maxTurns ?? 50;
50
+ this.maxTurns = maxTurns ?? 50; // 0 means unlimited (omit from SDK)
47
51
  this.allowedTools = allowedTools ?? [
48
52
  "Bash",
49
53
  "Read",
@@ -54,101 +58,214 @@ export class AgentRunner {
54
58
  ];
55
59
  this.permissionMode = permissionMode ?? "bypassPermissions";
56
60
  this.onLine = onLine ?? null;
61
+ this.onBatch = onBatch ?? null;
62
+ this.batchSize = batchSize ?? 3;
57
63
  this.settingSources = settingSources ?? [];
58
64
  this.agentProfile = agentProfile ?? null;
59
65
  this.systemPrompt = systemPrompt ?? null;
60
66
  this.disallowedTools = disallowedTools ?? [];
61
67
  this.sessionId = null;
62
68
  this.buffer = [];
69
+ /** @type {AbortController|null} */
70
+ this.currentAbortController = null;
63
71
  }
64
72
 
65
73
  /**
66
74
  * Run a new agent session with the given task.
67
75
  * @param {string} task - The task prompt
68
- * @returns {Promise<{success: boolean, text: string, sessionId: string|null}>}
76
+ * @returns {Promise<{success: boolean, text: string, sessionId: string|null, error: Error|null, aborted: boolean}>}
69
77
  */
70
78
  async run(task) {
71
- let text = "";
72
- let stopReason = null;
73
- let error = null;
74
-
79
+ const abortController = new AbortController();
80
+ this.currentAbortController = abortController;
75
81
  try {
76
- for await (const message of this.query({
82
+ const iterator = this.query({
77
83
  prompt: task,
78
84
  options: {
79
85
  cwd: this.cwd,
80
86
  allowedTools: this.allowedTools,
81
- maxTurns: this.maxTurns,
87
+ ...(this.maxTurns > 0 && { maxTurns: this.maxTurns }),
82
88
  model: this.model,
83
89
  permissionMode: this.permissionMode,
84
90
  allowDangerouslySkipPermissions: true,
85
91
  settingSources: this.settingSources,
92
+ abortController,
86
93
  ...(this.disallowedTools.length > 0 && {
87
94
  disallowedTools: this.disallowedTools,
88
95
  }),
89
96
  ...(this.systemPrompt && { systemPrompt: this.systemPrompt }),
90
97
  ...(this.agentProfile && { extraArgs: { agent: this.agentProfile } }),
91
98
  },
92
- })) {
93
- const line = JSON.stringify(message);
94
- this.output.write(line + "\n");
95
- this.buffer.push(line);
96
- if (this.onLine) this.onLine(line);
97
-
98
- if (message.type === "system" && message.subtype === "init") {
99
- this.sessionId = message.session_id;
100
- }
101
- if (message.type === "result") {
102
- text = message.result ?? "";
103
- stopReason = message.subtype;
104
- }
105
- }
106
- } catch (err) {
107
- error = err;
99
+ });
100
+ return await this.#consumeQuery(iterator);
101
+ } finally {
102
+ this.currentAbortController = null;
108
103
  }
109
-
110
- // If the SDK already emitted a successful result, honour it even when the
111
- // stream throws afterwards (e.g. "Credit balance is too low" during
112
- // cleanup). Only treat errors as fatal when no result was received yet.
113
- const success = stopReason === "success";
114
- return { success, text, sessionId: this.sessionId, error };
115
104
  }
116
105
 
117
106
  /**
118
107
  * Resume an existing session with a follow-up prompt.
119
108
  * @param {string} prompt - The follow-up prompt
120
- * @returns {Promise<{success: boolean, text: string}>}
109
+ * @returns {Promise<{success: boolean, text: string, sessionId: string|null, error: Error|null, aborted: boolean}>}
121
110
  */
122
111
  async resume(prompt) {
123
- let text = "";
124
- let stopReason = null;
125
- let error = null;
126
-
112
+ const abortController = new AbortController();
113
+ this.currentAbortController = abortController;
127
114
  try {
128
- for await (const message of this.query({
115
+ const iterator = this.query({
129
116
  prompt,
130
117
  options: {
131
118
  resume: this.sessionId,
132
119
  permissionMode: this.permissionMode,
133
120
  allowDangerouslySkipPermissions: true,
121
+ abortController,
134
122
  },
135
- })) {
136
- const line = JSON.stringify(message);
137
- this.output.write(line + "\n");
138
- this.buffer.push(line);
139
- if (this.onLine) this.onLine(line);
123
+ });
124
+ return await this.#consumeQuery(iterator);
125
+ } finally {
126
+ this.currentAbortController = null;
127
+ }
128
+ }
129
+
130
+ /**
131
+ * Shared consumer for both `run()` and `resume()`. Iterates the SDK query
132
+ * iterator, mirroring every line to the output stream / buffer / onLine
133
+ * callback, and — when `onBatch` is set — flushes accumulated lines to it
134
+ * at coarse boundaries: every `batchSize` assistant text-block messages,
135
+ * and the terminal `result` message. Tool-only assistant messages still
136
+ * accumulate in the pending batch and ride along in the next flush, so
137
+ * the supervisor always sees the tool calls that led up to each text
138
+ * block. Raising `batchSize` above 1 is the knob that makes the mid-turn
139
+ * supervisor review less chatty — with the default of 3, the supervisor
140
+ * sees the agent in chunks of three text turns instead of every turn.
141
+ *
142
+ * Corollary: a turn that is *entirely* tool_use with no text blocks and
143
+ * then hits `result` produces exactly one flush at `result` regardless
144
+ * of how many tools ran. That is deliberate — the supervisor only needs
145
+ * to weigh in when the agent surfaces something text-like to react to.
146
+ *
147
+ * INVARIANT: the `await this.onBatch(...)` call below is the ONLY
148
+ * suspension point in this loop. While it is pending, no further lines
149
+ * are pulled from the SDK generator. The Supervisor relies on this — its
150
+ * onBatch callback flips `currentSource` to "supervisor" for the duration
151
+ * of its mid-turn LLM call, and the invariant guarantees no agent line
152
+ * can arrive concurrently and be mis-tagged.
153
+ *
154
+ * If the supervisor calls `abort()` from inside the callback, the next
155
+ * iteration of the for-await loop will throw. We catch the throw, check
156
+ * `currentAbortController.signal.aborted` (avoiding fragility around
157
+ * AbortError vs DOMException shapes), and report `aborted: true` so the
158
+ * caller can distinguish "supervisor asked us to stop" from a real error.
159
+ *
160
+ * If the iterator throws before a flush boundary, any lines still in the
161
+ * pending batch would otherwise vanish without the supervisor seeing
162
+ * them. The `finally` block emits a terminal batch so the supervisor can
163
+ * observe the partial state (e.g. note a crash or react to an external
164
+ * abort). A throw from that final flush becomes the returned `error`
165
+ * only if no earlier error was captured — the original failure wins.
166
+ * @param {AsyncIterable<object>} iterator
167
+ * @returns {Promise<{success: boolean, text: string, sessionId: string|null, error: Error|null, aborted: boolean}>}
168
+ */
169
+ async #consumeQuery(iterator) {
170
+ let text = "";
171
+ let stopReason = null;
172
+ let error = null;
173
+ let aborted = false;
174
+ const state = { pendingBatch: [], assistantTextCount: 0 };
140
175
 
176
+ try {
177
+ for await (const message of iterator) {
178
+ this.#recordLine(message, state);
141
179
  if (message.type === "result") {
142
180
  text = message.result ?? "";
143
181
  stopReason = message.subtype;
144
182
  }
183
+ await this.#maybeFlushBatch(message, state);
145
184
  }
146
185
  } catch (err) {
147
- error = err;
186
+ if (this.currentAbortController?.signal.aborted) {
187
+ aborted = true;
188
+ } else {
189
+ error = err;
190
+ }
148
191
  }
149
192
 
193
+ const flushErr = await this.#terminalFlush(state, { error, aborted });
194
+ if (flushErr && !error) error = flushErr;
195
+
150
196
  const success = stopReason === "success";
151
- return { success, text, error };
197
+ return { success, text, sessionId: this.sessionId, error, aborted };
198
+ }
199
+
200
+ /**
201
+ * Mirror a single SDK message to the output stream, buffer, onLine
202
+ * callback, and (when set) the pending-batch state. Also handles
203
+ * session id capture and text-block counting so `#consumeQuery` can
204
+ * stay within the complexity budget.
205
+ * @param {object} message
206
+ * @param {{pendingBatch: string[], assistantTextCount: number}} state
207
+ */
208
+ #recordLine(message, state) {
209
+ const line = JSON.stringify(message);
210
+ this.output.write(line + "\n");
211
+ this.buffer.push(line);
212
+ if (this.onLine) this.onLine(line);
213
+ if (this.onBatch) state.pendingBatch.push(line);
214
+
215
+ if (message.type === "system" && message.subtype === "init") {
216
+ this.sessionId = message.session_id;
217
+ }
218
+ if (message.type === "assistant" && hasTextBlock(message)) {
219
+ state.assistantTextCount++;
220
+ }
221
+ }
222
+
223
+ /**
224
+ * Terminal flush — only fires on the abnormal-end paths (iterator
225
+ * threw or was aborted mid-stream). Delivers any pending lines so the
226
+ * supervisor sees the partial state instead of losing the tail of
227
+ * the run. A natural-end iterator that simply ran out of messages
228
+ * without a `result` marker is treated as an incomplete stub (the
229
+ * real SDK always terminates with `result`) and its pending batch is
230
+ * not re-flushed. Returns an error thrown by the flush callback, or
231
+ * `null` if the flush succeeded or did not fire.
232
+ * @param {{pendingBatch: string[], assistantTextCount: number}} state
233
+ * @param {{error: Error|null, aborted: boolean}} outcome
234
+ * @returns {Promise<Error|null>}
235
+ */
236
+ async #terminalFlush(state, { error, aborted }) {
237
+ const loopEndedAbnormally = Boolean(error || aborted);
238
+ if (!loopEndedAbnormally) return null;
239
+ if (!this.onBatch || state.pendingBatch.length === 0) return null;
240
+ try {
241
+ const batchLines = state.pendingBatch.splice(0);
242
+ await this.onBatch(batchLines, {
243
+ abort: () => this.currentAbortController?.abort(),
244
+ });
245
+ return null;
246
+ } catch (flushErr) {
247
+ return flushErr;
248
+ }
249
+ }
250
+
251
+ /**
252
+ * Flush the pending batch to `onBatch` if either the batchSize threshold
253
+ * has been reached or the current message is the terminal `result`.
254
+ * Extracted so that `#consumeQuery` stays within the project's complexity
255
+ * budget — the flush is one cohesive unit of logic in its own right.
256
+ * @param {object} message
257
+ * @param {{pendingBatch: string[], assistantTextCount: number}} state
258
+ */
259
+ async #maybeFlushBatch(message, state) {
260
+ if (!this.onBatch) return;
261
+ const shouldFlush =
262
+ message.type === "result" || state.assistantTextCount >= this.batchSize;
263
+ if (!shouldFlush) return;
264
+ state.assistantTextCount = 0;
265
+ const batchLines = state.pendingBatch.splice(0);
266
+ await this.onBatch(batchLines, {
267
+ abort: () => this.currentAbortController?.abort(),
268
+ });
152
269
  }
153
270
 
154
271
  /**
@@ -162,6 +279,24 @@ export class AgentRunner {
162
279
  }
163
280
  }
164
281
 
282
+ /**
283
+ * Whether an SDK assistant message contains at least one text block.
284
+ * Only text-block messages count toward the `batchSize` threshold — tool-only
285
+ * assistant messages accumulate silently into the pending batch and ride along
286
+ * in the next flush, keeping supervisor LLM cost bounded. Exported so the mock
287
+ * runner can mirror the real flush predicate without duplicating the logic.
288
+ * @param {object} message
289
+ * @returns {boolean}
290
+ */
291
+ export function hasTextBlock(message) {
292
+ const content = message.message?.content ?? message.content;
293
+ if (!Array.isArray(content)) return false;
294
+ for (const block of content) {
295
+ if (block.type === "text" && block.text) return true;
296
+ }
297
+ return false;
298
+ }
299
+
165
300
  /**
166
301
  * Factory function — wires real dependencies.
167
302
  * @param {object} deps - Same as AgentRunner constructor
@@ -18,6 +18,38 @@ function parseFlag(args, name) {
18
18
  return undefined;
19
19
  }
20
20
 
21
+ /**
22
+ * Parse and validate run command options from args.
23
+ * @param {string[]} args
24
+ * @returns {{ taskContent: string, cwd: string, model: string, maxTurns: number, outputPath: string|undefined, agentProfile: string|undefined, allowedTools: string[] }}
25
+ */
26
+ function parseRunOptions(args) {
27
+ const taskFile = parseFlag(args, "task-file");
28
+ const taskText = parseFlag(args, "task-text");
29
+ if (taskFile && taskText)
30
+ throw new Error("--task-file and --task-text are mutually exclusive");
31
+ if (!taskFile && !taskText)
32
+ throw new Error("--task-file or --task-text is required");
33
+
34
+ const maxTurnsRaw = parseFlag(args, "max-turns") ?? "50";
35
+ const taskAmend = parseFlag(args, "task-amend") ?? undefined;
36
+ let taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
37
+ if (taskAmend) taskContent += `\n\n${taskAmend}`;
38
+
39
+ return {
40
+ taskContent,
41
+ cwd: resolve(parseFlag(args, "cwd") ?? "."),
42
+ model: parseFlag(args, "model") ?? "opus",
43
+ maxTurns: maxTurnsRaw === "0" ? 0 : parseInt(maxTurnsRaw, 10),
44
+ outputPath: parseFlag(args, "output"),
45
+ agentProfile: parseFlag(args, "agent-profile") ?? undefined,
46
+ allowedTools: (
47
+ parseFlag(args, "allowed-tools") ??
48
+ "Bash,Read,Glob,Grep,Write,Edit,Agent,TodoWrite"
49
+ ).split(","),
50
+ };
51
+ }
52
+
21
53
  /**
22
54
  * Run command — execute a single agent via the Claude Agent SDK.
23
55
  *
@@ -28,31 +60,24 @@ function parseFlag(args, name) {
28
60
  * --task-text=STRING Inline task text (mutually exclusive with --task-file)
29
61
  * --cwd=DIR Agent working directory (default: .)
30
62
  * --model=MODEL Claude model to use (default: opus)
31
- * --max-turns=N Maximum agentic turns (default: 50)
63
+ * --max-turns=N Maximum agentic turns (default: 50, 0 = unlimited)
32
64
  * --output=PATH Write NDJSON trace to file (default: stdout)
33
65
  * --allowed-tools=LIST Comma-separated tools (default: Bash,Read,Glob,Grep,Write,Edit)
34
66
  * --agent-profile=NAME Agent profile name (passed as --agent to Claude CLI)
67
+ * --task-amend=TEXT Additional text appended to the task prompt
35
68
  *
36
69
  * @param {string[]} args - Command arguments
37
70
  */
38
71
  export async function runRunCommand(args) {
39
- const taskFile = parseFlag(args, "task-file");
40
- const taskText = parseFlag(args, "task-text");
41
- if (taskFile && taskText)
42
- throw new Error("--task-file and --task-text are mutually exclusive");
43
- if (!taskFile && !taskText)
44
- throw new Error("--task-file or --task-text is required");
45
-
46
- const cwd = resolve(parseFlag(args, "cwd") ?? ".");
47
- const model = parseFlag(args, "model") ?? "opus";
48
- const maxTurns = parseInt(parseFlag(args, "max-turns") ?? "50", 10);
49
- const outputPath = parseFlag(args, "output");
50
- const agentProfile = parseFlag(args, "agent-profile") ?? undefined;
51
- const allowedTools = (
52
- parseFlag(args, "allowed-tools") ?? "Bash,Read,Glob,Grep,Write,Edit"
53
- ).split(",");
54
-
55
- const taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
72
+ const {
73
+ taskContent,
74
+ cwd,
75
+ model,
76
+ maxTurns,
77
+ outputPath,
78
+ agentProfile,
79
+ allowedTools,
80
+ } = parseRunOptions(args);
56
81
 
57
82
  // When --output is specified, stream text to stdout while writing NDJSON to file.
58
83
  // Otherwise, write NDJSON directly to stdout (backwards-compatible).
@@ -19,6 +19,50 @@ function parseFlag(args, name) {
19
19
  return undefined;
20
20
  }
21
21
 
22
+ /**
23
+ * Parse all supervise flags from args into an options object.
24
+ * @param {string[]} args
25
+ * @returns {object}
26
+ */
27
+ function parseSuperviseOptions(args) {
28
+ const taskFile = parseFlag(args, "task-file");
29
+ const taskText = parseFlag(args, "task-text");
30
+ if (taskFile && taskText)
31
+ throw new Error("--task-file and --task-text are mutually exclusive");
32
+ if (!taskFile && !taskText)
33
+ throw new Error("--task-file or --task-text is required");
34
+
35
+ const supervisorAllowedToolsRaw = parseFlag(args, "supervisor-allowed-tools");
36
+
37
+ const taskAmend = parseFlag(args, "task-amend") ?? undefined;
38
+ let taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
39
+ if (taskAmend) taskContent += `\n\n${taskAmend}`;
40
+
41
+ return {
42
+ taskContent,
43
+ supervisorCwd: resolve(parseFlag(args, "supervisor-cwd") ?? "."),
44
+ agentCwd: resolve(
45
+ parseFlag(args, "agent-cwd") ??
46
+ mkdtempSync(join(tmpdir(), "fit-eval-agent-")),
47
+ ),
48
+ model: parseFlag(args, "model") ?? "opus",
49
+ maxTurns: (() => {
50
+ const raw = parseFlag(args, "max-turns") ?? "20";
51
+ return raw === "0" ? 0 : parseInt(raw, 10);
52
+ })(),
53
+ outputPath: parseFlag(args, "output"),
54
+ supervisorProfile: parseFlag(args, "supervisor-profile") ?? undefined,
55
+ agentProfile: parseFlag(args, "agent-profile") ?? undefined,
56
+ allowedTools: (
57
+ parseFlag(args, "allowed-tools") ??
58
+ "Bash,Read,Glob,Grep,Write,Edit,Agent,TodoWrite"
59
+ ).split(","),
60
+ supervisorAllowedTools: supervisorAllowedToolsRaw
61
+ ? supervisorAllowedToolsRaw.split(",")
62
+ : undefined,
63
+ };
64
+ }
65
+
22
66
  /**
23
67
  * Supervise command — run two agents in a relay loop via the Claude Agent SDK.
24
68
  *
@@ -30,45 +74,23 @@ function parseFlag(args, name) {
30
74
  * --supervisor-cwd=DIR Supervisor working directory (default: .)
31
75
  * --agent-cwd=DIR Agent working directory (default: temp directory)
32
76
  * --model=MODEL Claude model to use (default: opus)
33
- * --max-turns=N Maximum supervisor agent exchanges (default: 20)
77
+ * --max-turns=N Maximum supervisor / agent exchanges (default: 20, 0 = unlimited)
34
78
  * --output=PATH Write NDJSON trace to file (default: stdout)
35
79
  * --allowed-tools=LIST Comma-separated tools for the agent (default: Bash,Read,Glob,Grep,Write,Edit)
36
80
  * --supervisor-profile=NAME Supervisor agent profile name (passed as --agent to Claude CLI)
37
81
  * --agent-profile=NAME Agent profile name (passed as --agent to Claude CLI)
82
+ * --task-amend=TEXT Additional text appended to the task prompt
38
83
  *
39
84
  * @param {string[]} args - Command arguments
40
85
  */
41
86
  export async function runSuperviseCommand(args) {
42
- const taskFile = parseFlag(args, "task-file");
43
- const taskText = parseFlag(args, "task-text");
44
- if (taskFile && taskText)
45
- throw new Error("--task-file and --task-text are mutually exclusive");
46
- if (!taskFile && !taskText)
47
- throw new Error("--task-file or --task-text is required");
48
-
49
- const supervisorCwd = resolve(parseFlag(args, "supervisor-cwd") ?? ".");
50
- const agentCwd = resolve(
51
- parseFlag(args, "agent-cwd") ??
52
- mkdtempSync(join(tmpdir(), "fit-eval-agent-")),
53
- );
54
- const model = parseFlag(args, "model") ?? "opus";
55
- const maxTurns = parseInt(parseFlag(args, "max-turns") ?? "20", 10);
56
- const outputPath = parseFlag(args, "output");
57
- const supervisorProfile = parseFlag(args, "supervisor-profile") ?? undefined;
58
- const agentProfile = parseFlag(args, "agent-profile") ?? undefined;
59
- const allowedTools = (
60
- parseFlag(args, "allowed-tools") ?? "Bash,Read,Glob,Grep,Write,Edit"
61
- ).split(",");
62
- const supervisorAllowedToolsRaw = parseFlag(args, "supervisor-allowed-tools");
63
- const supervisorAllowedTools = supervisorAllowedToolsRaw
64
- ? supervisorAllowedToolsRaw.split(",")
65
- : undefined;
66
-
67
- const taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
87
+ const opts = parseSuperviseOptions(args);
68
88
 
69
89
  // When --output is specified, stream text to stdout while writing NDJSON to file.
70
90
  // Otherwise, write NDJSON directly to stdout (backwards-compatible).
71
- const fileStream = outputPath ? createWriteStream(outputPath) : null;
91
+ const fileStream = opts.outputPath
92
+ ? createWriteStream(opts.outputPath)
93
+ : null;
72
94
  const output = fileStream
73
95
  ? createTeeWriter({
74
96
  fileStream,
@@ -79,19 +101,19 @@ export async function runSuperviseCommand(args) {
79
101
 
80
102
  const { query } = await import("@anthropic-ai/claude-agent-sdk");
81
103
  const supervisor = createSupervisor({
82
- supervisorCwd,
83
- agentCwd,
104
+ supervisorCwd: opts.supervisorCwd,
105
+ agentCwd: opts.agentCwd,
84
106
  query,
85
107
  output,
86
- model,
87
- maxTurns,
88
- allowedTools,
89
- supervisorAllowedTools,
90
- supervisorProfile,
91
- agentProfile,
108
+ model: opts.model,
109
+ maxTurns: opts.maxTurns,
110
+ allowedTools: opts.allowedTools,
111
+ supervisorAllowedTools: opts.supervisorAllowedTools,
112
+ supervisorProfile: opts.supervisorProfile,
113
+ agentProfile: opts.agentProfile,
92
114
  });
93
115
 
94
- const result = await supervisor.run(taskContent);
116
+ const result = await supervisor.run(opts.taskContent);
95
117
 
96
118
  if (fileStream) {
97
119
  await new Promise((r) => output.end(r));