@forwardimpact/libeval 0.1.43 → 0.1.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/fit-benchmark.js +2 -2
- package/bin/fit-eval.js +101 -21
- package/bin/fit-trace.js +14 -0
- package/package.json +1 -1
- package/src/commands/benchmark-run.js +1 -1
- package/src/commands/by-discussion.js +84 -0
- package/src/commands/callback.js +104 -0
- package/src/commands/discuss.js +116 -0
- package/src/commands/facilitate.js +2 -2
- package/src/commands/supervise.js +3 -3
- package/src/discuss-tools.js +203 -0
- package/src/discusser.js +332 -0
- package/src/facilitator.js +39 -333
- package/src/index.js +14 -0
- package/src/orchestration-loop.js +369 -0
- package/src/redaction.js +10 -0
- package/src/render/orchestrator-filter.js +1 -0
- package/src/trace-collector.js +4 -0
package/bin/fit-benchmark.js
CHANGED
|
@@ -46,10 +46,10 @@ export const definition = {
|
|
|
46
46
|
description:
|
|
47
47
|
"Claude model for the agent-under-test (default: claude-sonnet-4-6)",
|
|
48
48
|
},
|
|
49
|
-
"
|
|
49
|
+
"lead-model": {
|
|
50
50
|
type: "string",
|
|
51
51
|
description:
|
|
52
|
-
"Claude model for the
|
|
52
|
+
"Claude model for the lead role (default: claude-opus-4-7)",
|
|
53
53
|
},
|
|
54
54
|
"judge-model": {
|
|
55
55
|
type: "string",
|
package/bin/fit-eval.js
CHANGED
|
@@ -9,6 +9,8 @@ import { runTeeCommand } from "../src/commands/tee.js";
|
|
|
9
9
|
import { runRunCommand } from "../src/commands/run.js";
|
|
10
10
|
import { runSuperviseCommand } from "../src/commands/supervise.js";
|
|
11
11
|
import { runFacilitateCommand } from "../src/commands/facilitate.js";
|
|
12
|
+
import { runDiscussCommand } from "../src/commands/discuss.js";
|
|
13
|
+
import { runCallbackCommand } from "../src/commands/callback.js";
|
|
12
14
|
|
|
13
15
|
// `bun build --compile` injects FIT_EVAL_VERSION via --define, eliminating
|
|
14
16
|
// the readFileSync branch in the compiled binary (which would ENOENT against
|
|
@@ -18,6 +20,18 @@ const VERSION =
|
|
|
18
20
|
JSON.parse(readFileSync(new URL("../package.json", import.meta.url), "utf8"))
|
|
19
21
|
.version;
|
|
20
22
|
|
|
23
|
+
const LEAD_OPTIONS = {
|
|
24
|
+
"lead-profile": {
|
|
25
|
+
type: "string",
|
|
26
|
+
description: "Lead role profile name (supervisor / facilitator / chair)",
|
|
27
|
+
},
|
|
28
|
+
"lead-model": {
|
|
29
|
+
type: "string",
|
|
30
|
+
description:
|
|
31
|
+
"Claude model for the lead role (default: claude-opus-4-7[1m])",
|
|
32
|
+
},
|
|
33
|
+
};
|
|
34
|
+
|
|
21
35
|
const definition = {
|
|
22
36
|
name: "fit-eval",
|
|
23
37
|
version: VERSION,
|
|
@@ -93,11 +107,7 @@ const definition = {
|
|
|
93
107
|
description:
|
|
94
108
|
"Claude model for the agent (default: claude-opus-4-7[1m])",
|
|
95
109
|
},
|
|
96
|
-
|
|
97
|
-
type: "string",
|
|
98
|
-
description:
|
|
99
|
-
"Claude model for the supervisor (default: claude-opus-4-7[1m])",
|
|
100
|
-
},
|
|
110
|
+
...LEAD_OPTIONS,
|
|
101
111
|
"max-turns": {
|
|
102
112
|
type: "string",
|
|
103
113
|
description:
|
|
@@ -117,10 +127,6 @@ const definition = {
|
|
|
117
127
|
description: "Supervisor working directory",
|
|
118
128
|
},
|
|
119
129
|
"agent-cwd": { type: "string", description: "Agent working directory" },
|
|
120
|
-
"supervisor-profile": {
|
|
121
|
-
type: "string",
|
|
122
|
-
description: "Supervisor (judge) profile name",
|
|
123
|
-
},
|
|
124
130
|
"supervisor-allowed-tools": {
|
|
125
131
|
type: "string",
|
|
126
132
|
description: "Supervisor tool allowlist",
|
|
@@ -154,11 +160,7 @@ const definition = {
|
|
|
154
160
|
type: "string",
|
|
155
161
|
description: "Claude model for agents (default: claude-opus-4-7[1m])",
|
|
156
162
|
},
|
|
157
|
-
|
|
158
|
-
type: "string",
|
|
159
|
-
description:
|
|
160
|
-
"Claude model for the facilitator (default: claude-opus-4-7[1m])",
|
|
161
|
-
},
|
|
163
|
+
...LEAD_OPTIONS,
|
|
162
164
|
"max-turns": {
|
|
163
165
|
type: "string",
|
|
164
166
|
description: "Max agentic turns (default: 20, 0 = unlimited)",
|
|
@@ -171,10 +173,6 @@ const definition = {
|
|
|
171
173
|
type: "string",
|
|
172
174
|
description: "Facilitator working directory",
|
|
173
175
|
},
|
|
174
|
-
"facilitator-profile": {
|
|
175
|
-
type: "string",
|
|
176
|
-
description: "Facilitator profile name",
|
|
177
|
-
},
|
|
178
176
|
"agent-profiles": {
|
|
179
177
|
type: "string",
|
|
180
178
|
description:
|
|
@@ -186,6 +184,56 @@ const definition = {
|
|
|
186
184
|
},
|
|
187
185
|
},
|
|
188
186
|
},
|
|
187
|
+
{
|
|
188
|
+
name: "discuss",
|
|
189
|
+
args: "",
|
|
190
|
+
description:
|
|
191
|
+
"Run an async, suspendable discussion — Chair + N participants + bridge callback",
|
|
192
|
+
options: {
|
|
193
|
+
"task-file": {
|
|
194
|
+
type: "string",
|
|
195
|
+
description: "Path to a markdown task file",
|
|
196
|
+
},
|
|
197
|
+
"task-text": {
|
|
198
|
+
type: "string",
|
|
199
|
+
description: "Inline task text (alternative to --task-file)",
|
|
200
|
+
},
|
|
201
|
+
"task-amend": {
|
|
202
|
+
type: "string",
|
|
203
|
+
description: "Additional text appended to the task",
|
|
204
|
+
},
|
|
205
|
+
"agent-model": {
|
|
206
|
+
type: "string",
|
|
207
|
+
description: "Claude model for agents (default: claude-opus-4-7[1m])",
|
|
208
|
+
},
|
|
209
|
+
...LEAD_OPTIONS,
|
|
210
|
+
"max-turns": {
|
|
211
|
+
type: "string",
|
|
212
|
+
description: "Max agentic turns (default: 40, 0 = unlimited)",
|
|
213
|
+
},
|
|
214
|
+
output: {
|
|
215
|
+
type: "string",
|
|
216
|
+
description: "Write the NDJSON trace to a file",
|
|
217
|
+
},
|
|
218
|
+
"agent-profiles": {
|
|
219
|
+
type: "string",
|
|
220
|
+
description: "Comma-separated participant profile names (optional)",
|
|
221
|
+
},
|
|
222
|
+
"agent-cwd": {
|
|
223
|
+
type: "string",
|
|
224
|
+
description: "Working directory shared by participants (default: .)",
|
|
225
|
+
},
|
|
226
|
+
"discussion-id": {
|
|
227
|
+
type: "string",
|
|
228
|
+
description:
|
|
229
|
+
"Stable id for the threaded conversation; carried through traces for linking",
|
|
230
|
+
},
|
|
231
|
+
"resume-context": {
|
|
232
|
+
type: "string",
|
|
233
|
+
description: "JSON-serialized prior state for a resumed run",
|
|
234
|
+
},
|
|
235
|
+
},
|
|
236
|
+
},
|
|
189
237
|
{
|
|
190
238
|
name: "output",
|
|
191
239
|
args: "",
|
|
@@ -198,6 +246,35 @@ const definition = {
|
|
|
198
246
|
description:
|
|
199
247
|
"Stream readable text to stdout while saving raw NDJSON to a file",
|
|
200
248
|
},
|
|
249
|
+
{
|
|
250
|
+
name: "callback",
|
|
251
|
+
args: "",
|
|
252
|
+
description:
|
|
253
|
+
"Extract the terminal summary from an NDJSON trace and POST it to a callback URL",
|
|
254
|
+
options: {
|
|
255
|
+
"trace-file": {
|
|
256
|
+
type: "string",
|
|
257
|
+
description: "Path to the NDJSON trace file",
|
|
258
|
+
},
|
|
259
|
+
"callback-url": {
|
|
260
|
+
type: "string",
|
|
261
|
+
description: "URL to POST the summary to",
|
|
262
|
+
},
|
|
263
|
+
"correlation-id": {
|
|
264
|
+
type: "string",
|
|
265
|
+
description: "Correlation ID to include in the payload",
|
|
266
|
+
},
|
|
267
|
+
"run-url": {
|
|
268
|
+
type: "string",
|
|
269
|
+
description: "GitHub Actions run URL (optional)",
|
|
270
|
+
},
|
|
271
|
+
"discussion-id": {
|
|
272
|
+
type: "string",
|
|
273
|
+
description:
|
|
274
|
+
"Discussion id (fallback when the trace lacks a meta event)",
|
|
275
|
+
},
|
|
276
|
+
},
|
|
277
|
+
},
|
|
201
278
|
],
|
|
202
279
|
globalOptions: {
|
|
203
280
|
format: { type: "string", description: "Output format (json|text)" },
|
|
@@ -207,8 +284,9 @@ const definition = {
|
|
|
207
284
|
},
|
|
208
285
|
examples: [
|
|
209
286
|
"fit-eval run --task-file=task.md --output=trace.ndjson",
|
|
210
|
-
"fit-eval supervise --task-file=task.md --
|
|
211
|
-
'fit-eval facilitate --task-file=task.md --
|
|
287
|
+
"fit-eval supervise --task-file=task.md --lead-profile=judge --agent-profile=coder --output=trace.ndjson",
|
|
288
|
+
'fit-eval facilitate --task-file=task.md --lead-profile=lead --agent-profiles="security-engineer,technical-writer" --output=trace.ndjson',
|
|
289
|
+
'fit-eval discuss --task-file=task.md --lead-profile=release-engineer --agent-profiles="staff-engineer,security-engineer" --discussion-id=GD_kw...',
|
|
212
290
|
"fit-eval output --format=text < trace.ndjson",
|
|
213
291
|
],
|
|
214
292
|
documentation: [
|
|
@@ -234,7 +312,7 @@ const definition = {
|
|
|
234
312
|
title: "Agent Teams",
|
|
235
313
|
url: "https://www.forwardimpact.team/docs/products/agent-teams/index.md",
|
|
236
314
|
description:
|
|
237
|
-
"How to author the
|
|
315
|
+
"How to author the profiles consumed by --agent-profile, --lead-profile, and --agent-profiles.",
|
|
238
316
|
},
|
|
239
317
|
],
|
|
240
318
|
};
|
|
@@ -248,6 +326,8 @@ const COMMANDS = {
|
|
|
248
326
|
run: runRunCommand,
|
|
249
327
|
supervise: runSuperviseCommand,
|
|
250
328
|
facilitate: runFacilitateCommand,
|
|
329
|
+
discuss: runDiscussCommand,
|
|
330
|
+
callback: runCallbackCommand,
|
|
251
331
|
};
|
|
252
332
|
|
|
253
333
|
async function main() {
|
package/bin/fit-trace.js
CHANGED
|
@@ -26,6 +26,7 @@ import {
|
|
|
26
26
|
runSplitCommand,
|
|
27
27
|
} from "../src/commands/trace.js";
|
|
28
28
|
import { runAssertCommand } from "../src/commands/assert.js";
|
|
29
|
+
import { runByDiscussionCommand } from "../src/commands/by-discussion.js";
|
|
29
30
|
|
|
30
31
|
// `bun build --compile` injects FIT_TRACE_VERSION via --define, eliminating
|
|
31
32
|
// the readFileSync branch in the compiled binary (which would ENOENT against
|
|
@@ -160,6 +161,18 @@ const definition = {
|
|
|
160
161
|
args: "<file> <index>",
|
|
161
162
|
description: "Single turn by index",
|
|
162
163
|
},
|
|
164
|
+
{
|
|
165
|
+
name: "by-discussion",
|
|
166
|
+
args: "<discussion-id> [trace-dir]",
|
|
167
|
+
description:
|
|
168
|
+
"List trace files whose meta header carries the given discussion_id, ordered by first-event timestamp",
|
|
169
|
+
options: {
|
|
170
|
+
"trace-dir": {
|
|
171
|
+
type: "string",
|
|
172
|
+
description: "Directory to scan (default: traces)",
|
|
173
|
+
},
|
|
174
|
+
},
|
|
175
|
+
},
|
|
163
176
|
{
|
|
164
177
|
name: "filter",
|
|
165
178
|
args: "<file>",
|
|
@@ -307,6 +320,7 @@ const COMMANDS = {
|
|
|
307
320
|
filter: runFilterCommand,
|
|
308
321
|
split: runSplitCommand,
|
|
309
322
|
assert: runAssertCommand,
|
|
323
|
+
"by-discussion": runByDiscussionCommand,
|
|
310
324
|
};
|
|
311
325
|
|
|
312
326
|
async function main() {
|
package/package.json
CHANGED
|
@@ -40,7 +40,7 @@ function parseRunOptions(values) {
|
|
|
40
40
|
runs,
|
|
41
41
|
output: resolve(output),
|
|
42
42
|
agentModel: values["agent-model"] ?? "claude-sonnet-4-6",
|
|
43
|
-
supervisorModel: values["
|
|
43
|
+
supervisorModel: values["lead-model"] ?? "claude-opus-4-7",
|
|
44
44
|
judgeModel: values["judge-model"] ?? "claude-opus-4-7",
|
|
45
45
|
profiles: {
|
|
46
46
|
agent: values["agent-profile"] ?? null,
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import { readdirSync, statSync, openSync, readSync, closeSync } from "node:fs";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Read the first newline-terminated line of a file. Bounded to 64 KiB
|
|
6
|
+
* which is well above any orchestrator envelope.
|
|
7
|
+
*
|
|
8
|
+
* @param {string} path
|
|
9
|
+
* @returns {string}
|
|
10
|
+
*/
|
|
11
|
+
function readFirstLine(path) {
|
|
12
|
+
const fd = openSync(path, "r");
|
|
13
|
+
try {
|
|
14
|
+
const buf = Buffer.alloc(65536);
|
|
15
|
+
const bytes = readSync(fd, buf, 0, buf.length, 0);
|
|
16
|
+
const slice = buf.slice(0, bytes).toString("utf8");
|
|
17
|
+
const nl = slice.indexOf("\n");
|
|
18
|
+
return nl === -1 ? slice : slice.slice(0, nl);
|
|
19
|
+
} finally {
|
|
20
|
+
closeSync(fd);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Scan a directory for `.ndjson` files whose meta header carries the
|
|
26
|
+
* given discussion_id. The Step 2.6 first-line guarantee makes the
|
|
27
|
+
* lookup cheap: we read only the first line per file. Files without a
|
|
28
|
+
* meta header (e.g. legacy supervise/facilitate traces) are skipped
|
|
29
|
+
* silently — not erroneous.
|
|
30
|
+
*
|
|
31
|
+
* @param {string} dir
|
|
32
|
+
* @param {string} discussionId
|
|
33
|
+
* @returns {Array<{path: string, mtimeMs: number}>}
|
|
34
|
+
*/
|
|
35
|
+
export function findTracesByDiscussion(dir, discussionId) {
|
|
36
|
+
const matches = [];
|
|
37
|
+
let entries;
|
|
38
|
+
try {
|
|
39
|
+
entries = readdirSync(dir);
|
|
40
|
+
} catch {
|
|
41
|
+
return [];
|
|
42
|
+
}
|
|
43
|
+
for (const entry of entries) {
|
|
44
|
+
if (!entry.endsWith(".ndjson")) continue;
|
|
45
|
+
const path = join(dir, entry);
|
|
46
|
+
let firstLine;
|
|
47
|
+
try {
|
|
48
|
+
firstLine = readFirstLine(path);
|
|
49
|
+
} catch {
|
|
50
|
+
continue;
|
|
51
|
+
}
|
|
52
|
+
let parsed;
|
|
53
|
+
try {
|
|
54
|
+
parsed = JSON.parse(firstLine);
|
|
55
|
+
} catch {
|
|
56
|
+
continue;
|
|
57
|
+
}
|
|
58
|
+
const event = parsed.event ?? parsed;
|
|
59
|
+
if (event?.type !== "meta") continue;
|
|
60
|
+
if (event.discussion_id !== discussionId) continue;
|
|
61
|
+
matches.push({ path, mtimeMs: statSync(path).mtimeMs });
|
|
62
|
+
}
|
|
63
|
+
matches.sort((a, b) => a.mtimeMs - b.mtimeMs);
|
|
64
|
+
return matches;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* `fit-trace by-discussion <discussion-id> [trace-dir]` — list trace
|
|
69
|
+
* files whose meta header carries the given discussion_id, one per
|
|
70
|
+
* line, ordered by first-event timestamp (file mtime ascending). The
|
|
71
|
+
* result is usable with `xargs cat` for a chronological merge.
|
|
72
|
+
*
|
|
73
|
+
* @param {object} values
|
|
74
|
+
* @param {string[]} args
|
|
75
|
+
*/
|
|
76
|
+
export async function runByDiscussionCommand(values, args) {
|
|
77
|
+
const [discussionId, traceDirArg] = args;
|
|
78
|
+
if (!discussionId) throw new Error("<discussion-id> is required");
|
|
79
|
+
const dir = traceDirArg ?? values["trace-dir"] ?? "traces";
|
|
80
|
+
const matches = findTracesByDiscussion(dir, discussionId);
|
|
81
|
+
for (const { path } of matches) {
|
|
82
|
+
process.stdout.write(`${path}\n`);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import { readFileSync } from "node:fs";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Scan an NDJSON trace and return the last orchestrator summary event,
|
|
5
|
+
* the first `meta` event's `discussion_id`, and any structured replies
|
|
6
|
+
* collected by the discusser. Skips malformed lines.
|
|
7
|
+
*
|
|
8
|
+
* The runner is verdict-agnostic — verbatim passthrough of whatever the
|
|
9
|
+
* trace carries ("success"/"failure" from supervise/facilitate; canonical
|
|
10
|
+
* "adjourned"/"recessed"/"failed" from discuss). The bridge layer maps to
|
|
11
|
+
* its channel semantics.
|
|
12
|
+
*
|
|
13
|
+
* @param {string} traceFile
|
|
14
|
+
* @returns {{verdict: string, summary: string, replies: object[], trigger?: object, discussionId?: string} | null}
|
|
15
|
+
*/
|
|
16
|
+
// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: NDJSON scan with malformed-line tolerance + meta/summary dual extraction
|
|
17
|
+
function readTraceSummary(traceFile) {
|
|
18
|
+
let summary = null;
|
|
19
|
+
let metaDiscussionId = null;
|
|
20
|
+
for (const line of readFileSync(traceFile, "utf8").split("\n")) {
|
|
21
|
+
if (!line.trim()) continue;
|
|
22
|
+
let record;
|
|
23
|
+
try {
|
|
24
|
+
record = JSON.parse(line);
|
|
25
|
+
} catch {
|
|
26
|
+
continue;
|
|
27
|
+
}
|
|
28
|
+
if (record.source !== "orchestrator") continue;
|
|
29
|
+
if (record.event?.type === "meta" && !metaDiscussionId) {
|
|
30
|
+
metaDiscussionId = record.event.discussion_id ?? null;
|
|
31
|
+
}
|
|
32
|
+
if (record.event?.type === "summary") {
|
|
33
|
+
summary = {
|
|
34
|
+
verdict: record.event.verdict ?? "failed",
|
|
35
|
+
summary: record.event.summary ?? "",
|
|
36
|
+
replies: Array.isArray(record.event.replies)
|
|
37
|
+
? record.event.replies
|
|
38
|
+
: [],
|
|
39
|
+
...(record.event.trigger && { trigger: record.event.trigger }),
|
|
40
|
+
...(record.event.discussion_id && {
|
|
41
|
+
discussionId: record.event.discussion_id,
|
|
42
|
+
}),
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
if (summary && !summary.discussionId && metaDiscussionId) {
|
|
47
|
+
summary.discussionId = metaDiscussionId;
|
|
48
|
+
}
|
|
49
|
+
return summary;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Callback command — read an NDJSON trace, extract the terminal
|
|
54
|
+
* orchestrator summary, and POST a canonical callback body to the
|
|
55
|
+
* configured URL. Used by `kata-dispatch.yml` to deliver the lead's
|
|
56
|
+
* conclusion to the bridge that dispatched the run.
|
|
57
|
+
*
|
|
58
|
+
* Wire shape (single shape across modes):
|
|
59
|
+
*
|
|
60
|
+
* ```
|
|
61
|
+
* {
|
|
62
|
+
* correlation_id, verdict, summary, run_url,
|
|
63
|
+
* discussion_id?, replies: [], trigger?
|
|
64
|
+
* }
|
|
65
|
+
* ```
|
|
66
|
+
*
|
|
67
|
+
* @param {object} values - Parsed option values from cli.parse()
|
|
68
|
+
* @param {string[]} _args - Positional arguments
|
|
69
|
+
*/
|
|
70
|
+
export async function runCallbackCommand(values, _args) {
|
|
71
|
+
const traceFile = values["trace-file"];
|
|
72
|
+
const callbackUrl = values["callback-url"];
|
|
73
|
+
const correlationId = values["correlation-id"];
|
|
74
|
+
const runUrl = values["run-url"] ?? "";
|
|
75
|
+
const discussionIdOverride = values["discussion-id"] ?? null;
|
|
76
|
+
|
|
77
|
+
if (!traceFile) throw new Error("--trace-file is required");
|
|
78
|
+
if (!callbackUrl) throw new Error("--callback-url is required");
|
|
79
|
+
|
|
80
|
+
const found = readTraceSummary(traceFile) ?? {
|
|
81
|
+
verdict: "failed",
|
|
82
|
+
summary: "Run ended without producing a summary.",
|
|
83
|
+
replies: [],
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
const discussionId = found.discussionId ?? discussionIdOverride ?? null;
|
|
87
|
+
const payload = {
|
|
88
|
+
correlation_id: correlationId,
|
|
89
|
+
verdict: found.verdict,
|
|
90
|
+
summary: found.summary,
|
|
91
|
+
run_url: runUrl,
|
|
92
|
+
replies: found.replies,
|
|
93
|
+
...(discussionId && { discussion_id: discussionId }),
|
|
94
|
+
...(found.trigger && { trigger: found.trigger }),
|
|
95
|
+
};
|
|
96
|
+
const res = await fetch(callbackUrl, {
|
|
97
|
+
method: "POST",
|
|
98
|
+
headers: { "Content-Type": "application/json" },
|
|
99
|
+
body: JSON.stringify(payload),
|
|
100
|
+
});
|
|
101
|
+
if (!res.ok) {
|
|
102
|
+
throw new Error(`Callback POST failed: ${res.status}`);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import { readFileSync, createWriteStream } from "node:fs";
|
|
2
|
+
import { resolve } from "node:path";
|
|
3
|
+
import { createDiscusser } from "../discusser.js";
|
|
4
|
+
import { createRedactor } from "../redaction.js";
|
|
5
|
+
import { createTeeWriter } from "../tee-writer.js";
|
|
6
|
+
|
|
7
|
+
function parseAgentProfiles(raw, cwd, maxTurns) {
|
|
8
|
+
if (!raw) return [];
|
|
9
|
+
return raw.split(",").map((entry) => {
|
|
10
|
+
const name = entry.trim();
|
|
11
|
+
return { name, role: name, cwd, agentProfile: name, maxTurns };
|
|
12
|
+
});
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Parse and validate discuss command options. Exported so tests can verify
|
|
17
|
+
* defaults and the legacy-flag clean break.
|
|
18
|
+
* @param {object} values - Parsed option values
|
|
19
|
+
* @returns {object}
|
|
20
|
+
*/
|
|
21
|
+
// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: CLI option validation
|
|
22
|
+
export function parseDiscussOptions(values) {
|
|
23
|
+
const taskFile = values["task-file"];
|
|
24
|
+
const taskText = values["task-text"];
|
|
25
|
+
if (taskFile && taskText)
|
|
26
|
+
throw new Error("--task-file and --task-text are mutually exclusive");
|
|
27
|
+
if (!taskFile && !taskText)
|
|
28
|
+
throw new Error("--task-file or --task-text is required");
|
|
29
|
+
|
|
30
|
+
const taskAmend = values["task-amend"] ?? undefined;
|
|
31
|
+
const taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
|
|
32
|
+
|
|
33
|
+
const profilesRaw = values["agent-profiles"];
|
|
34
|
+
const agentCwd = resolve(values["agent-cwd"] ?? ".");
|
|
35
|
+
|
|
36
|
+
const maxTurnsRaw = values["max-turns"] ?? "40";
|
|
37
|
+
const maxTurns = maxTurnsRaw === "0" ? 0 : parseInt(maxTurnsRaw, 10);
|
|
38
|
+
|
|
39
|
+
const agentConfigs = parseAgentProfiles(profilesRaw, agentCwd, maxTurns);
|
|
40
|
+
|
|
41
|
+
const resumeContextRaw = values["resume-context"];
|
|
42
|
+
let resumeContext = null;
|
|
43
|
+
if (resumeContextRaw) {
|
|
44
|
+
try {
|
|
45
|
+
resumeContext = JSON.parse(resumeContextRaw);
|
|
46
|
+
} catch (err) {
|
|
47
|
+
throw new Error(`--resume-context is not valid JSON: ${err.message}`);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
return {
|
|
52
|
+
taskContent,
|
|
53
|
+
taskAmend,
|
|
54
|
+
agentConfigs,
|
|
55
|
+
leadProfile: values["lead-profile"] ?? "release-engineer",
|
|
56
|
+
leadModel: values["lead-model"] ?? "claude-opus-4-7[1m]",
|
|
57
|
+
agentModel: values["agent-model"] ?? "claude-opus-4-7[1m]",
|
|
58
|
+
maxTurns,
|
|
59
|
+
outputPath: values.output,
|
|
60
|
+
discussionId: values["discussion-id"] ?? null,
|
|
61
|
+
resumeContext,
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Discuss command — run a discusser-led session with suspend/resume
|
|
67
|
+
* semantics, threading `discussion_id` through the trace so multi-run
|
|
68
|
+
* conversations are queryable as one.
|
|
69
|
+
*
|
|
70
|
+
* @param {object} values - Parsed option values
|
|
71
|
+
* @param {string[]} _args - Positional arguments
|
|
72
|
+
*/
|
|
73
|
+
export async function runDiscussCommand(values, _args) {
|
|
74
|
+
const opts = parseDiscussOptions(values);
|
|
75
|
+
|
|
76
|
+
const redactor = createRedactor();
|
|
77
|
+
|
|
78
|
+
const fileStream = opts.outputPath
|
|
79
|
+
? createWriteStream(opts.outputPath)
|
|
80
|
+
: null;
|
|
81
|
+
const output = fileStream
|
|
82
|
+
? createTeeWriter({
|
|
83
|
+
fileStream,
|
|
84
|
+
textStream: process.stdout,
|
|
85
|
+
mode: "supervised",
|
|
86
|
+
})
|
|
87
|
+
: process.stdout;
|
|
88
|
+
|
|
89
|
+
if (opts.leadProfile) {
|
|
90
|
+
process.env.LIBEVAL_AGENT_PROFILE = opts.leadProfile;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const { query } = await import("@anthropic-ai/claude-agent-sdk");
|
|
94
|
+
const discusser = createDiscusser({
|
|
95
|
+
leadProfile: opts.leadProfile,
|
|
96
|
+
leadModel: opts.leadModel,
|
|
97
|
+
agentModel: opts.agentModel,
|
|
98
|
+
agentConfigs: opts.agentConfigs,
|
|
99
|
+
discussionId: opts.discussionId,
|
|
100
|
+
resumeContext: opts.resumeContext,
|
|
101
|
+
query,
|
|
102
|
+
output,
|
|
103
|
+
maxTurns: opts.maxTurns,
|
|
104
|
+
taskAmend: opts.taskAmend,
|
|
105
|
+
redactor,
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
const result = await discusser.run(opts.taskContent);
|
|
109
|
+
|
|
110
|
+
if (fileStream) {
|
|
111
|
+
await new Promise((r) => output.end(r));
|
|
112
|
+
await new Promise((r) => fileStream.end(r));
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
process.exit(result.success ? 0 : 1);
|
|
116
|
+
}
|
|
@@ -54,10 +54,10 @@ export function parseFacilitateOptions(values) {
|
|
|
54
54
|
agentConfigs,
|
|
55
55
|
facilitatorCwd: resolve(values["facilitator-cwd"] ?? "."),
|
|
56
56
|
agentModel: values["agent-model"] ?? "claude-opus-4-7[1m]",
|
|
57
|
-
facilitatorModel: values["
|
|
57
|
+
facilitatorModel: values["lead-model"] ?? "claude-opus-4-7[1m]",
|
|
58
58
|
maxTurns,
|
|
59
59
|
outputPath: values.output,
|
|
60
|
-
facilitatorProfile: values["
|
|
60
|
+
facilitatorProfile: values["lead-profile"] ?? undefined,
|
|
61
61
|
};
|
|
62
62
|
}
|
|
63
63
|
|
|
@@ -12,7 +12,7 @@ import { createServiceConfig } from "@forwardimpact/libconfig";
|
|
|
12
12
|
* @returns {object}
|
|
13
13
|
*/
|
|
14
14
|
// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: CLI option validation
|
|
15
|
-
function parseSuperviseOptions(values) {
|
|
15
|
+
export function parseSuperviseOptions(values) {
|
|
16
16
|
const taskFile = values["task-file"];
|
|
17
17
|
const taskText = values["task-text"];
|
|
18
18
|
if (taskFile && taskText)
|
|
@@ -33,13 +33,13 @@ function parseSuperviseOptions(values) {
|
|
|
33
33
|
values["agent-cwd"] ?? mkdtempSync(join(tmpdir(), "fit-eval-agent-")),
|
|
34
34
|
),
|
|
35
35
|
agentModel: values["agent-model"] ?? "claude-opus-4-7[1m]",
|
|
36
|
-
supervisorModel: values["
|
|
36
|
+
supervisorModel: values["lead-model"] ?? "claude-opus-4-7[1m]",
|
|
37
37
|
maxTurns: (() => {
|
|
38
38
|
const raw = values["max-turns"] ?? "200";
|
|
39
39
|
return raw === "0" ? 0 : parseInt(raw, 10);
|
|
40
40
|
})(),
|
|
41
41
|
outputPath: values.output,
|
|
42
|
-
supervisorProfile: values["
|
|
42
|
+
supervisorProfile: values["lead-profile"] ?? undefined,
|
|
43
43
|
agentProfile: values["agent-profile"] ?? undefined,
|
|
44
44
|
allowedTools: (
|
|
45
45
|
values["allowed-tools"] ??
|