@forwardimpact/libeval 0.1.43 → 0.1.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -46,10 +46,10 @@ export const definition = {
46
46
  description:
47
47
  "Claude model for the agent-under-test (default: claude-sonnet-4-6)",
48
48
  },
49
- "supervisor-model": {
49
+ "lead-model": {
50
50
  type: "string",
51
51
  description:
52
- "Claude model for the supervisor (default: claude-opus-4-7)",
52
+ "Claude model for the lead role (default: claude-opus-4-7)",
53
53
  },
54
54
  "judge-model": {
55
55
  type: "string",
package/bin/fit-eval.js CHANGED
@@ -9,6 +9,8 @@ import { runTeeCommand } from "../src/commands/tee.js";
9
9
  import { runRunCommand } from "../src/commands/run.js";
10
10
  import { runSuperviseCommand } from "../src/commands/supervise.js";
11
11
  import { runFacilitateCommand } from "../src/commands/facilitate.js";
12
+ import { runDiscussCommand } from "../src/commands/discuss.js";
13
+ import { runCallbackCommand } from "../src/commands/callback.js";
12
14
 
13
15
  // `bun build --compile` injects FIT_EVAL_VERSION via --define, eliminating
14
16
  // the readFileSync branch in the compiled binary (which would ENOENT against
@@ -18,6 +20,18 @@ const VERSION =
18
20
  JSON.parse(readFileSync(new URL("../package.json", import.meta.url), "utf8"))
19
21
  .version;
20
22
 
23
+ const LEAD_OPTIONS = {
24
+ "lead-profile": {
25
+ type: "string",
26
+ description: "Lead role profile name (supervisor / facilitator / chair)",
27
+ },
28
+ "lead-model": {
29
+ type: "string",
30
+ description:
31
+ "Claude model for the lead role (default: claude-opus-4-7[1m])",
32
+ },
33
+ };
34
+
21
35
  const definition = {
22
36
  name: "fit-eval",
23
37
  version: VERSION,
@@ -93,11 +107,7 @@ const definition = {
93
107
  description:
94
108
  "Claude model for the agent (default: claude-opus-4-7[1m])",
95
109
  },
96
- "supervisor-model": {
97
- type: "string",
98
- description:
99
- "Claude model for the supervisor (default: claude-opus-4-7[1m])",
100
- },
110
+ ...LEAD_OPTIONS,
101
111
  "max-turns": {
102
112
  type: "string",
103
113
  description:
@@ -117,10 +127,6 @@ const definition = {
117
127
  description: "Supervisor working directory",
118
128
  },
119
129
  "agent-cwd": { type: "string", description: "Agent working directory" },
120
- "supervisor-profile": {
121
- type: "string",
122
- description: "Supervisor (judge) profile name",
123
- },
124
130
  "supervisor-allowed-tools": {
125
131
  type: "string",
126
132
  description: "Supervisor tool allowlist",
@@ -154,11 +160,7 @@ const definition = {
154
160
  type: "string",
155
161
  description: "Claude model for agents (default: claude-opus-4-7[1m])",
156
162
  },
157
- "facilitator-model": {
158
- type: "string",
159
- description:
160
- "Claude model for the facilitator (default: claude-opus-4-7[1m])",
161
- },
163
+ ...LEAD_OPTIONS,
162
164
  "max-turns": {
163
165
  type: "string",
164
166
  description: "Max agentic turns (default: 20, 0 = unlimited)",
@@ -171,10 +173,6 @@ const definition = {
171
173
  type: "string",
172
174
  description: "Facilitator working directory",
173
175
  },
174
- "facilitator-profile": {
175
- type: "string",
176
- description: "Facilitator profile name",
177
- },
178
176
  "agent-profiles": {
179
177
  type: "string",
180
178
  description:
@@ -186,6 +184,56 @@ const definition = {
186
184
  },
187
185
  },
188
186
  },
187
+ {
188
+ name: "discuss",
189
+ args: "",
190
+ description:
191
+ "Run an async, suspendable discussion — Chair + N participants + bridge callback",
192
+ options: {
193
+ "task-file": {
194
+ type: "string",
195
+ description: "Path to a markdown task file",
196
+ },
197
+ "task-text": {
198
+ type: "string",
199
+ description: "Inline task text (alternative to --task-file)",
200
+ },
201
+ "task-amend": {
202
+ type: "string",
203
+ description: "Additional text appended to the task",
204
+ },
205
+ "agent-model": {
206
+ type: "string",
207
+ description: "Claude model for agents (default: claude-opus-4-7[1m])",
208
+ },
209
+ ...LEAD_OPTIONS,
210
+ "max-turns": {
211
+ type: "string",
212
+ description: "Max agentic turns (default: 40, 0 = unlimited)",
213
+ },
214
+ output: {
215
+ type: "string",
216
+ description: "Write the NDJSON trace to a file",
217
+ },
218
+ "agent-profiles": {
219
+ type: "string",
220
+ description: "Comma-separated participant profile names (optional)",
221
+ },
222
+ "agent-cwd": {
223
+ type: "string",
224
+ description: "Working directory shared by participants (default: .)",
225
+ },
226
+ "discussion-id": {
227
+ type: "string",
228
+ description:
229
+ "Stable id for the threaded conversation; carried through traces for linking",
230
+ },
231
+ "resume-context": {
232
+ type: "string",
233
+ description: "JSON-serialized prior state for a resumed run",
234
+ },
235
+ },
236
+ },
189
237
  {
190
238
  name: "output",
191
239
  args: "",
@@ -198,6 +246,35 @@ const definition = {
198
246
  description:
199
247
  "Stream readable text to stdout while saving raw NDJSON to a file",
200
248
  },
249
+ {
250
+ name: "callback",
251
+ args: "",
252
+ description:
253
+ "Extract the terminal summary from an NDJSON trace and POST it to a callback URL",
254
+ options: {
255
+ "trace-file": {
256
+ type: "string",
257
+ description: "Path to the NDJSON trace file",
258
+ },
259
+ "callback-url": {
260
+ type: "string",
261
+ description: "URL to POST the summary to",
262
+ },
263
+ "correlation-id": {
264
+ type: "string",
265
+ description: "Correlation ID to include in the payload",
266
+ },
267
+ "run-url": {
268
+ type: "string",
269
+ description: "GitHub Actions run URL (optional)",
270
+ },
271
+ "discussion-id": {
272
+ type: "string",
273
+ description:
274
+ "Discussion id (fallback when the trace lacks a meta event)",
275
+ },
276
+ },
277
+ },
201
278
  ],
202
279
  globalOptions: {
203
280
  format: { type: "string", description: "Output format (json|text)" },
@@ -207,8 +284,9 @@ const definition = {
207
284
  },
208
285
  examples: [
209
286
  "fit-eval run --task-file=task.md --output=trace.ndjson",
210
- "fit-eval supervise --task-file=task.md --supervisor-profile=judge --agent-profile=coder --output=trace.ndjson",
211
- 'fit-eval facilitate --task-file=task.md --facilitator-profile=lead --agent-profiles="security-engineer,technical-writer" --output=trace.ndjson',
287
+ "fit-eval supervise --task-file=task.md --lead-profile=judge --agent-profile=coder --output=trace.ndjson",
288
+ 'fit-eval facilitate --task-file=task.md --lead-profile=lead --agent-profiles="security-engineer,technical-writer" --output=trace.ndjson',
289
+ 'fit-eval discuss --task-file=task.md --lead-profile=release-engineer --agent-profiles="staff-engineer,security-engineer" --discussion-id=GD_kw...',
212
290
  "fit-eval output --format=text < trace.ndjson",
213
291
  ],
214
292
  documentation: [
@@ -234,7 +312,7 @@ const definition = {
234
312
  title: "Agent Teams",
235
313
  url: "https://www.forwardimpact.team/docs/products/agent-teams/index.md",
236
314
  description:
237
- "How to author the agent, supervisor, and facilitator profiles consumed by --agent-profile, --supervisor-profile, --facilitator-profile, and --agent-profiles.",
315
+ "How to author the profiles consumed by --agent-profile, --lead-profile, and --agent-profiles.",
238
316
  },
239
317
  ],
240
318
  };
@@ -248,6 +326,8 @@ const COMMANDS = {
248
326
  run: runRunCommand,
249
327
  supervise: runSuperviseCommand,
250
328
  facilitate: runFacilitateCommand,
329
+ discuss: runDiscussCommand,
330
+ callback: runCallbackCommand,
251
331
  };
252
332
 
253
333
  async function main() {
package/bin/fit-trace.js CHANGED
@@ -26,6 +26,7 @@ import {
26
26
  runSplitCommand,
27
27
  } from "../src/commands/trace.js";
28
28
  import { runAssertCommand } from "../src/commands/assert.js";
29
+ import { runByDiscussionCommand } from "../src/commands/by-discussion.js";
29
30
 
30
31
  // `bun build --compile` injects FIT_TRACE_VERSION via --define, eliminating
31
32
  // the readFileSync branch in the compiled binary (which would ENOENT against
@@ -160,6 +161,18 @@ const definition = {
160
161
  args: "<file> <index>",
161
162
  description: "Single turn by index",
162
163
  },
164
+ {
165
+ name: "by-discussion",
166
+ args: "<discussion-id> [trace-dir]",
167
+ description:
168
+ "List trace files whose meta header carries the given discussion_id, ordered by first-event timestamp",
169
+ options: {
170
+ "trace-dir": {
171
+ type: "string",
172
+ description: "Directory to scan (default: traces)",
173
+ },
174
+ },
175
+ },
163
176
  {
164
177
  name: "filter",
165
178
  args: "<file>",
@@ -307,6 +320,7 @@ const COMMANDS = {
307
320
  filter: runFilterCommand,
308
321
  split: runSplitCommand,
309
322
  assert: runAssertCommand,
323
+ "by-discussion": runByDiscussionCommand,
310
324
  };
311
325
 
312
326
  async function main() {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forwardimpact/libeval",
3
- "version": "0.1.43",
3
+ "version": "0.1.44",
4
4
  "description": "Agent evaluation framework — prove whether agent changes improved outcomes with reproducible evidence.",
5
5
  "keywords": [
6
6
  "eval",
@@ -40,7 +40,7 @@ function parseRunOptions(values) {
40
40
  runs,
41
41
  output: resolve(output),
42
42
  agentModel: values["agent-model"] ?? "claude-sonnet-4-6",
43
- supervisorModel: values["supervisor-model"] ?? "claude-opus-4-7",
43
+ supervisorModel: values["lead-model"] ?? "claude-opus-4-7",
44
44
  judgeModel: values["judge-model"] ?? "claude-opus-4-7",
45
45
  profiles: {
46
46
  agent: values["agent-profile"] ?? null,
@@ -0,0 +1,84 @@
1
+ import { readdirSync, statSync, openSync, readSync, closeSync } from "node:fs";
2
+ import { join } from "node:path";
3
+
4
+ /**
5
+ * Read the first newline-terminated line of a file. Bounded to 64 KiB
6
+ * which is well above any orchestrator envelope.
7
+ *
8
+ * @param {string} path
9
+ * @returns {string}
10
+ */
11
+ function readFirstLine(path) {
12
+ const fd = openSync(path, "r");
13
+ try {
14
+ const buf = Buffer.alloc(65536);
15
+ const bytes = readSync(fd, buf, 0, buf.length, 0);
16
+ const slice = buf.slice(0, bytes).toString("utf8");
17
+ const nl = slice.indexOf("\n");
18
+ return nl === -1 ? slice : slice.slice(0, nl);
19
+ } finally {
20
+ closeSync(fd);
21
+ }
22
+ }
23
+
24
+ /**
25
+ * Scan a directory for `.ndjson` files whose meta header carries the
26
+ * given discussion_id. The Step 2.6 first-line guarantee makes the
27
+ * lookup cheap: we read only the first line per file. Files without a
28
+ * meta header (e.g. legacy supervise/facilitate traces) are skipped
29
+ * silently — not erroneous.
30
+ *
31
+ * @param {string} dir
32
+ * @param {string} discussionId
33
+ * @returns {Array<{path: string, mtimeMs: number}>}
34
+ */
35
+ export function findTracesByDiscussion(dir, discussionId) {
36
+ const matches = [];
37
+ let entries;
38
+ try {
39
+ entries = readdirSync(dir);
40
+ } catch {
41
+ return [];
42
+ }
43
+ for (const entry of entries) {
44
+ if (!entry.endsWith(".ndjson")) continue;
45
+ const path = join(dir, entry);
46
+ let firstLine;
47
+ try {
48
+ firstLine = readFirstLine(path);
49
+ } catch {
50
+ continue;
51
+ }
52
+ let parsed;
53
+ try {
54
+ parsed = JSON.parse(firstLine);
55
+ } catch {
56
+ continue;
57
+ }
58
+ const event = parsed.event ?? parsed;
59
+ if (event?.type !== "meta") continue;
60
+ if (event.discussion_id !== discussionId) continue;
61
+ matches.push({ path, mtimeMs: statSync(path).mtimeMs });
62
+ }
63
+ matches.sort((a, b) => a.mtimeMs - b.mtimeMs);
64
+ return matches;
65
+ }
66
+
67
+ /**
68
+ * `fit-trace by-discussion <discussion-id> [trace-dir]` — list trace
69
+ * files whose meta header carries the given discussion_id, one per
70
+ * line, ordered by first-event timestamp (file mtime ascending). The
71
+ * result is usable with `xargs cat` for a chronological merge.
72
+ *
73
+ * @param {object} values
74
+ * @param {string[]} args
75
+ */
76
+ export async function runByDiscussionCommand(values, args) {
77
+ const [discussionId, traceDirArg] = args;
78
+ if (!discussionId) throw new Error("<discussion-id> is required");
79
+ const dir = traceDirArg ?? values["trace-dir"] ?? "traces";
80
+ const matches = findTracesByDiscussion(dir, discussionId);
81
+ for (const { path } of matches) {
82
+ process.stdout.write(`${path}\n`);
83
+ }
84
+ }
@@ -0,0 +1,104 @@
1
+ import { readFileSync } from "node:fs";
2
+
3
+ /**
4
+ * Scan an NDJSON trace and return the last orchestrator summary event,
5
+ * the first `meta` event's `discussion_id`, and any structured replies
6
+ * collected by the discusser. Skips malformed lines.
7
+ *
8
+ * The runner is verdict-agnostic — verbatim passthrough of whatever the
9
+ * trace carries ("success"/"failure" from supervise/facilitate; canonical
10
+ * "adjourned"/"recessed"/"failed" from discuss). The bridge layer maps to
11
+ * its channel semantics.
12
+ *
13
+ * @param {string} traceFile
14
+ * @returns {{verdict: string, summary: string, replies: object[], trigger?: object, discussionId?: string} | null}
15
+ */
16
+ // biome-ignore lint/complexity/noExcessiveCognitiveComplexity: NDJSON scan with malformed-line tolerance + meta/summary dual extraction
17
+ function readTraceSummary(traceFile) {
18
+ let summary = null;
19
+ let metaDiscussionId = null;
20
+ for (const line of readFileSync(traceFile, "utf8").split("\n")) {
21
+ if (!line.trim()) continue;
22
+ let record;
23
+ try {
24
+ record = JSON.parse(line);
25
+ } catch {
26
+ continue;
27
+ }
28
+ if (record.source !== "orchestrator") continue;
29
+ if (record.event?.type === "meta" && !metaDiscussionId) {
30
+ metaDiscussionId = record.event.discussion_id ?? null;
31
+ }
32
+ if (record.event?.type === "summary") {
33
+ summary = {
34
+ verdict: record.event.verdict ?? "failed",
35
+ summary: record.event.summary ?? "",
36
+ replies: Array.isArray(record.event.replies)
37
+ ? record.event.replies
38
+ : [],
39
+ ...(record.event.trigger && { trigger: record.event.trigger }),
40
+ ...(record.event.discussion_id && {
41
+ discussionId: record.event.discussion_id,
42
+ }),
43
+ };
44
+ }
45
+ }
46
+ if (summary && !summary.discussionId && metaDiscussionId) {
47
+ summary.discussionId = metaDiscussionId;
48
+ }
49
+ return summary;
50
+ }
51
+
52
+ /**
53
+ * Callback command — read an NDJSON trace, extract the terminal
54
+ * orchestrator summary, and POST a canonical callback body to the
55
+ * configured URL. Used by `kata-dispatch.yml` to deliver the lead's
56
+ * conclusion to the bridge that dispatched the run.
57
+ *
58
+ * Wire shape (single shape across modes):
59
+ *
60
+ * ```
61
+ * {
62
+ * correlation_id, verdict, summary, run_url,
63
+ * discussion_id?, replies: [], trigger?
64
+ * }
65
+ * ```
66
+ *
67
+ * @param {object} values - Parsed option values from cli.parse()
68
+ * @param {string[]} _args - Positional arguments
69
+ */
70
+ export async function runCallbackCommand(values, _args) {
71
+ const traceFile = values["trace-file"];
72
+ const callbackUrl = values["callback-url"];
73
+ const correlationId = values["correlation-id"];
74
+ const runUrl = values["run-url"] ?? "";
75
+ const discussionIdOverride = values["discussion-id"] ?? null;
76
+
77
+ if (!traceFile) throw new Error("--trace-file is required");
78
+ if (!callbackUrl) throw new Error("--callback-url is required");
79
+
80
+ const found = readTraceSummary(traceFile) ?? {
81
+ verdict: "failed",
82
+ summary: "Run ended without producing a summary.",
83
+ replies: [],
84
+ };
85
+
86
+ const discussionId = found.discussionId ?? discussionIdOverride ?? null;
87
+ const payload = {
88
+ correlation_id: correlationId,
89
+ verdict: found.verdict,
90
+ summary: found.summary,
91
+ run_url: runUrl,
92
+ replies: found.replies,
93
+ ...(discussionId && { discussion_id: discussionId }),
94
+ ...(found.trigger && { trigger: found.trigger }),
95
+ };
96
+ const res = await fetch(callbackUrl, {
97
+ method: "POST",
98
+ headers: { "Content-Type": "application/json" },
99
+ body: JSON.stringify(payload),
100
+ });
101
+ if (!res.ok) {
102
+ throw new Error(`Callback POST failed: ${res.status}`);
103
+ }
104
+ }
@@ -0,0 +1,116 @@
1
+ import { readFileSync, createWriteStream } from "node:fs";
2
+ import { resolve } from "node:path";
3
+ import { createDiscusser } from "../discusser.js";
4
+ import { createRedactor } from "../redaction.js";
5
+ import { createTeeWriter } from "../tee-writer.js";
6
+
7
+ function parseAgentProfiles(raw, cwd, maxTurns) {
8
+ if (!raw) return [];
9
+ return raw.split(",").map((entry) => {
10
+ const name = entry.trim();
11
+ return { name, role: name, cwd, agentProfile: name, maxTurns };
12
+ });
13
+ }
14
+
15
+ /**
16
+ * Parse and validate discuss command options. Exported so tests can verify
17
+ * defaults and the legacy-flag clean break.
18
+ * @param {object} values - Parsed option values
19
+ * @returns {object}
20
+ */
21
+ // biome-ignore lint/complexity/noExcessiveCognitiveComplexity: CLI option validation
22
+ export function parseDiscussOptions(values) {
23
+ const taskFile = values["task-file"];
24
+ const taskText = values["task-text"];
25
+ if (taskFile && taskText)
26
+ throw new Error("--task-file and --task-text are mutually exclusive");
27
+ if (!taskFile && !taskText)
28
+ throw new Error("--task-file or --task-text is required");
29
+
30
+ const taskAmend = values["task-amend"] ?? undefined;
31
+ const taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
32
+
33
+ const profilesRaw = values["agent-profiles"];
34
+ const agentCwd = resolve(values["agent-cwd"] ?? ".");
35
+
36
+ const maxTurnsRaw = values["max-turns"] ?? "40";
37
+ const maxTurns = maxTurnsRaw === "0" ? 0 : parseInt(maxTurnsRaw, 10);
38
+
39
+ const agentConfigs = parseAgentProfiles(profilesRaw, agentCwd, maxTurns);
40
+
41
+ const resumeContextRaw = values["resume-context"];
42
+ let resumeContext = null;
43
+ if (resumeContextRaw) {
44
+ try {
45
+ resumeContext = JSON.parse(resumeContextRaw);
46
+ } catch (err) {
47
+ throw new Error(`--resume-context is not valid JSON: ${err.message}`);
48
+ }
49
+ }
50
+
51
+ return {
52
+ taskContent,
53
+ taskAmend,
54
+ agentConfigs,
55
+ leadProfile: values["lead-profile"] ?? "release-engineer",
56
+ leadModel: values["lead-model"] ?? "claude-opus-4-7[1m]",
57
+ agentModel: values["agent-model"] ?? "claude-opus-4-7[1m]",
58
+ maxTurns,
59
+ outputPath: values.output,
60
+ discussionId: values["discussion-id"] ?? null,
61
+ resumeContext,
62
+ };
63
+ }
64
+
65
+ /**
66
+ * Discuss command — run a discusser-led session with suspend/resume
67
+ * semantics, threading `discussion_id` through the trace so multi-run
68
+ * conversations are queryable as one.
69
+ *
70
+ * @param {object} values - Parsed option values
71
+ * @param {string[]} _args - Positional arguments
72
+ */
73
+ export async function runDiscussCommand(values, _args) {
74
+ const opts = parseDiscussOptions(values);
75
+
76
+ const redactor = createRedactor();
77
+
78
+ const fileStream = opts.outputPath
79
+ ? createWriteStream(opts.outputPath)
80
+ : null;
81
+ const output = fileStream
82
+ ? createTeeWriter({
83
+ fileStream,
84
+ textStream: process.stdout,
85
+ mode: "supervised",
86
+ })
87
+ : process.stdout;
88
+
89
+ if (opts.leadProfile) {
90
+ process.env.LIBEVAL_AGENT_PROFILE = opts.leadProfile;
91
+ }
92
+
93
+ const { query } = await import("@anthropic-ai/claude-agent-sdk");
94
+ const discusser = createDiscusser({
95
+ leadProfile: opts.leadProfile,
96
+ leadModel: opts.leadModel,
97
+ agentModel: opts.agentModel,
98
+ agentConfigs: opts.agentConfigs,
99
+ discussionId: opts.discussionId,
100
+ resumeContext: opts.resumeContext,
101
+ query,
102
+ output,
103
+ maxTurns: opts.maxTurns,
104
+ taskAmend: opts.taskAmend,
105
+ redactor,
106
+ });
107
+
108
+ const result = await discusser.run(opts.taskContent);
109
+
110
+ if (fileStream) {
111
+ await new Promise((r) => output.end(r));
112
+ await new Promise((r) => fileStream.end(r));
113
+ }
114
+
115
+ process.exit(result.success ? 0 : 1);
116
+ }
@@ -54,10 +54,10 @@ export function parseFacilitateOptions(values) {
54
54
  agentConfigs,
55
55
  facilitatorCwd: resolve(values["facilitator-cwd"] ?? "."),
56
56
  agentModel: values["agent-model"] ?? "claude-opus-4-7[1m]",
57
- facilitatorModel: values["facilitator-model"] ?? "claude-opus-4-7[1m]",
57
+ facilitatorModel: values["lead-model"] ?? "claude-opus-4-7[1m]",
58
58
  maxTurns,
59
59
  outputPath: values.output,
60
- facilitatorProfile: values["facilitator-profile"] ?? undefined,
60
+ facilitatorProfile: values["lead-profile"] ?? undefined,
61
61
  };
62
62
  }
63
63
 
@@ -12,7 +12,7 @@ import { createServiceConfig } from "@forwardimpact/libconfig";
12
12
  * @returns {object}
13
13
  */
14
14
  // biome-ignore lint/complexity/noExcessiveCognitiveComplexity: CLI option validation
15
- function parseSuperviseOptions(values) {
15
+ export function parseSuperviseOptions(values) {
16
16
  const taskFile = values["task-file"];
17
17
  const taskText = values["task-text"];
18
18
  if (taskFile && taskText)
@@ -33,13 +33,13 @@ function parseSuperviseOptions(values) {
33
33
  values["agent-cwd"] ?? mkdtempSync(join(tmpdir(), "fit-eval-agent-")),
34
34
  ),
35
35
  agentModel: values["agent-model"] ?? "claude-opus-4-7[1m]",
36
- supervisorModel: values["supervisor-model"] ?? "claude-opus-4-7[1m]",
36
+ supervisorModel: values["lead-model"] ?? "claude-opus-4-7[1m]",
37
37
  maxTurns: (() => {
38
38
  const raw = values["max-turns"] ?? "200";
39
39
  return raw === "0" ? 0 : parseInt(raw, 10);
40
40
  })(),
41
41
  outputPath: values.output,
42
- supervisorProfile: values["supervisor-profile"] ?? undefined,
42
+ supervisorProfile: values["lead-profile"] ?? undefined,
43
43
  agentProfile: values["agent-profile"] ?? undefined,
44
44
  allowedTools: (
45
45
  values["allowed-tools"] ??