@forwardimpact/libeval 0.1.50 → 0.1.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +11 -8
  2. package/bin/fit-benchmark.js +26 -27
  3. package/bin/fit-eval.js +49 -30
  4. package/bin/fit-trace.js +83 -57
  5. package/package.json +1 -1
  6. package/src/agent-runner.js +20 -12
  7. package/src/benchmark/env-loader.js +35 -23
  8. package/src/benchmark/{scorer.js → invariants.js} +14 -12
  9. package/src/benchmark/judge.js +5 -8
  10. package/src/benchmark/report.js +15 -15
  11. package/src/benchmark/result.js +11 -11
  12. package/src/benchmark/runner.js +11 -11
  13. package/src/benchmark/task-family.js +6 -4
  14. package/src/benchmark/workdir.js +18 -3
  15. package/src/commands/assert.js +30 -22
  16. package/src/commands/benchmark-invariants.js +74 -0
  17. package/src/commands/benchmark-report.js +23 -15
  18. package/src/commands/benchmark-run.js +15 -8
  19. package/src/commands/by-discussion.js +29 -18
  20. package/src/commands/callback.js +20 -11
  21. package/src/commands/discuss.js +28 -11
  22. package/src/commands/facilitate.js +18 -12
  23. package/src/commands/output.js +11 -12
  24. package/src/commands/run.js +22 -12
  25. package/src/commands/supervise.js +27 -18
  26. package/src/commands/task-input.js +10 -5
  27. package/src/commands/trace.js +174 -97
  28. package/src/discuss-tools.js +48 -2
  29. package/src/discusser.js +49 -2
  30. package/src/events/github.js +27 -5
  31. package/src/inbox-poller.js +84 -0
  32. package/src/judge.js +1 -1
  33. package/src/message-bus.js +6 -0
  34. package/src/orchestration-loop.js +14 -4
  35. package/src/orchestration-toolkit.js +14 -0
  36. package/src/redaction.js +31 -9
  37. package/src/reply-emitter.js +47 -0
  38. package/src/commands/benchmark-score.js +0 -68
@@ -6,29 +6,28 @@ import { createTraceCollector } from "@forwardimpact/libeval";
6
6
  *
7
7
  * Usage: fit-eval output [--format=json|text] < trace.ndjson
8
8
  *
9
- * @param {object} values - Parsed option values from cli.parse()
10
- * @param {string[]} args - Positional arguments
9
+ * @param {import("@forwardimpact/libcli").InvocationContext} ctx
10
+ * @returns {Promise<{ok: true}>}
11
11
  */
12
- export async function runOutputCommand(values, _args) {
12
+ export async function runOutputCommand(ctx) {
13
+ const values = ctx.options;
14
+ const runtime = ctx.deps.runtime;
13
15
  const format =
14
16
  values.format === "text" || values.format === "json"
15
17
  ? values.format
16
18
  : "json";
17
19
  const collector = createTraceCollector();
18
20
 
19
- const chunks = [];
20
- for await (const chunk of process.stdin) {
21
- chunks.push(chunk);
22
- }
23
- const input = Buffer.concat(chunks).toString("utf8");
24
-
25
- for (const line of input.split("\n")) {
21
+ // `runtime.proc.stdin` is an AsyncIterable of UTF-8 lines (newline-split by
22
+ // the runtime), so each yielded value is exactly one NDJSON record.
23
+ for await (const line of runtime.proc.stdin) {
26
24
  collector.addLine(line);
27
25
  }
28
26
 
29
27
  if (format === "text") {
30
- process.stdout.write(collector.toText() + "\n");
28
+ runtime.proc.stdout.write(collector.toText() + "\n");
31
29
  } else {
32
- process.stdout.write(JSON.stringify(collector.toJSON()) + "\n");
30
+ runtime.proc.stdout.write(JSON.stringify(collector.toJSON()) + "\n");
33
31
  }
32
+ return { ok: true };
34
33
  }
@@ -12,10 +12,14 @@ import { createServiceConfig } from "@forwardimpact/libconfig";
12
12
  /**
13
13
  * Parse and validate run command options from parsed values.
14
14
  * @param {object} values - Parsed option values from cli.parse()
15
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
15
16
  * @returns {{ taskContent: string, cwd: string, model: string, maxTurns: number, outputPath: string|undefined, agentProfile: string|undefined, allowedTools: string[] }}
16
17
  */
17
- function parseRunOptions(values) {
18
- const { task: taskContent, amend: taskAmend } = resolveTaskContent(values);
18
+ function parseRunOptions(values, runtime) {
19
+ const { task: taskContent, amend: taskAmend } = resolveTaskContent(
20
+ values,
21
+ runtime,
22
+ );
19
23
  const maxTurnsRaw = values["max-turns"] ?? "50";
20
24
 
21
25
  return {
@@ -39,10 +43,11 @@ function parseRunOptions(values) {
39
43
  *
40
44
  * Usage: fit-eval run [options]
41
45
  *
42
- * @param {object} values - Parsed option values from cli.parse()
43
- * @param {string[]} args - Positional arguments
46
+ * @param {import("@forwardimpact/libcli").InvocationContext} ctx
47
+ * @returns {Promise<{ok: boolean, code?: number, error?: string}>}
44
48
  */
45
- export async function runRunCommand(values, _args) {
49
+ export async function runRunCommand(ctx) {
50
+ const runtime = ctx.deps.runtime;
46
51
  const {
47
52
  taskContent,
48
53
  taskAmend,
@@ -53,19 +58,23 @@ export async function runRunCommand(values, _args) {
53
58
  agentProfile,
54
59
  allowedTools,
55
60
  mcpServer,
56
- } = parseRunOptions(values);
61
+ } = parseRunOptions(ctx.options, runtime);
57
62
 
58
63
  // Build the redactor as the first observable side-effect after option
59
64
  // parsing — the env snapshot must freeze BEFORE any in-process
60
- // process.env writes the command performs (e.g. LIBEVAL_AGENT_PROFILE).
61
- const redactor = createRedactor();
65
+ // env writes the command performs (e.g. LIBEVAL_AGENT_PROFILE).
66
+ const redactor = createRedactor({ runtime });
62
67
 
63
68
  // When --output is specified, stream text to stdout while writing NDJSON to file.
64
69
  // Otherwise, write NDJSON directly to stdout (backwards-compatible).
65
70
  const fileStream = outputPath ? createWriteStream(outputPath) : null;
66
71
  const output = fileStream
67
- ? createTeeWriter({ fileStream, textStream: process.stdout, mode: "raw" })
68
- : process.stdout;
72
+ ? createTeeWriter({
73
+ fileStream,
74
+ textStream: runtime.proc.stdout,
75
+ mode: "raw",
76
+ })
77
+ : runtime.proc.stdout;
69
78
 
70
79
  const counter = new SequenceCounter();
71
80
  const devNull = new Writable({
@@ -93,7 +102,7 @@ export async function runRunCommand(values, _args) {
93
102
  }
94
103
 
95
104
  if (agentProfile) {
96
- process.env.LIBEVAL_AGENT_PROFILE = agentProfile;
105
+ runtime.proc.env.LIBEVAL_AGENT_PROFILE = agentProfile;
97
106
  }
98
107
 
99
108
  const systemPrompt = agentProfile
@@ -116,6 +125,7 @@ export async function runRunCommand(values, _args) {
116
125
  taskAmend,
117
126
  mcpServers,
118
127
  redactor,
128
+ runtime,
119
129
  });
120
130
 
121
131
  const result = await runner.run(taskContent);
@@ -125,5 +135,5 @@ export async function runRunCommand(values, _args) {
125
135
  await new Promise((r) => fileStream.end(r));
126
136
  }
127
137
 
128
- process.exit(result.success ? 0 : 1);
138
+ return result.success ? { ok: true } : { ok: false, code: 1, error: "" };
129
139
  }
@@ -1,6 +1,5 @@
1
- import { createWriteStream, mkdtempSync } from "node:fs";
1
+ import { createWriteStream } from "node:fs";
2
2
  import { resolve, join } from "node:path";
3
- import { tmpdir } from "node:os";
4
3
  import { createSupervisor } from "../supervisor.js";
5
4
  import { createRedactor } from "../redaction.js";
6
5
  import { createTeeWriter } from "../tee-writer.js";
@@ -10,19 +9,27 @@ import { createServiceConfig } from "@forwardimpact/libconfig";
10
9
  /**
11
10
  * Parse all supervise flags from parsed values into an options object.
12
11
  * @param {object} values - Parsed option values from cli.parse()
13
- * @returns {object}
12
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
13
+ * @returns {Promise<object>}
14
14
  */
15
- export function parseSuperviseOptions(values) {
16
- const { task: taskContent, amend: taskAmend } = resolveTaskContent(values);
15
+ export async function parseSuperviseOptions(values, runtime) {
16
+ const { task: taskContent, amend: taskAmend } = resolveTaskContent(
17
+ values,
18
+ runtime,
19
+ );
17
20
  const supervisorAllowedToolsRaw = values["supervisor-allowed-tools"];
18
21
 
22
+ const tmpRoot = runtime.proc.env.TMPDIR ?? "/tmp";
23
+ const agentCwd = resolve(
24
+ values["agent-cwd"] ??
25
+ (await runtime.fs.mkdtemp(join(tmpRoot, "fit-eval-agent-"))),
26
+ );
27
+
19
28
  return {
20
29
  taskContent,
21
30
  taskAmend,
22
31
  supervisorCwd: resolve(values["supervisor-cwd"] ?? "."),
23
- agentCwd: resolve(
24
- values["agent-cwd"] ?? mkdtempSync(join(tmpdir(), "fit-eval-agent-")),
25
- ),
32
+ agentCwd,
26
33
  agentModel: values["agent-model"] ?? "claude-opus-4-7[1m]",
27
34
  supervisorModel: values["lead-model"] ?? "claude-opus-4-7[1m]",
28
35
  maxTurns: (() => {
@@ -50,16 +57,17 @@ export function parseSuperviseOptions(values) {
50
57
  *
51
58
  * Usage: fit-eval supervise [options]
52
59
  *
53
- * @param {object} values - Parsed option values from cli.parse()
54
- * @param {string[]} args - Positional arguments
60
+ * @param {import("@forwardimpact/libcli").InvocationContext} ctx
61
+ * @returns {Promise<{ok: boolean, code?: number, error?: string}>}
55
62
  */
56
- export async function runSuperviseCommand(values, _args) {
57
- const opts = parseSuperviseOptions(values);
63
+ export async function runSuperviseCommand(ctx) {
64
+ const runtime = ctx.deps.runtime;
65
+ const opts = await parseSuperviseOptions(ctx.options, runtime);
58
66
 
59
67
  // Build the redactor as the first observable side-effect after option
60
68
  // parsing — the env snapshot must freeze BEFORE any in-process
61
- // process.env writes the command performs (e.g. LIBEVAL_AGENT_PROFILE).
62
- const redactor = createRedactor();
69
+ // env writes the command performs (e.g. LIBEVAL_AGENT_PROFILE).
70
+ const redactor = createRedactor({ runtime });
63
71
 
64
72
  // When --output is specified, stream text to stdout while writing NDJSON to file.
65
73
  // Otherwise, write NDJSON directly to stdout (backwards-compatible).
@@ -69,10 +77,10 @@ export async function runSuperviseCommand(values, _args) {
69
77
  const output = fileStream
70
78
  ? createTeeWriter({
71
79
  fileStream,
72
- textStream: process.stdout,
80
+ textStream: runtime.proc.stdout,
73
81
  mode: "supervised",
74
82
  })
75
- : process.stdout;
83
+ : runtime.proc.stdout;
76
84
 
77
85
  let agentMcpServers = null;
78
86
  if (opts.mcpServer) {
@@ -88,7 +96,7 @@ export async function runSuperviseCommand(values, _args) {
88
96
  }
89
97
 
90
98
  if (opts.agentProfile) {
91
- process.env.LIBEVAL_AGENT_PROFILE = opts.agentProfile;
99
+ runtime.proc.env.LIBEVAL_AGENT_PROFILE = opts.agentProfile;
92
100
  }
93
101
 
94
102
  const { query } = await import("@anthropic-ai/claude-agent-sdk");
@@ -107,6 +115,7 @@ export async function runSuperviseCommand(values, _args) {
107
115
  taskAmend: opts.taskAmend,
108
116
  agentMcpServers,
109
117
  redactor,
118
+ runtime,
110
119
  });
111
120
 
112
121
  const result = await supervisor.run(opts.taskContent);
@@ -116,5 +125,5 @@ export async function runSuperviseCommand(values, _args) {
116
125
  await new Promise((r) => fileStream.end(r));
117
126
  }
118
127
 
119
- process.exit(result.success ? 0 : 1);
128
+ return result.success ? { ok: true } : { ok: false, code: 1, error: "" };
120
129
  }
@@ -1,4 +1,3 @@
1
- import { readFileSync } from "node:fs";
2
1
  import { composeTaskFromGitHubEvent } from "../events/github.js";
3
2
 
4
3
  /**
@@ -11,9 +10,12 @@ import { composeTaskFromGitHubEvent } from "../events/github.js";
11
10
  * works as before.
12
11
  *
13
12
  * @param {object} values - Parsed option values from cli.parse()
13
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime - Ambient
14
+ * collaborators; `fsSync.readFileSync` loads `--task-file`/`--task-event`
15
+ * and `proc.env` resolves `GITHUB_EVENT_NAME`.
14
16
  * @returns {{ task: string, amend: string | undefined }}
15
17
  */
16
- export function resolveTaskContent(values) {
18
+ export function resolveTaskContent(values, runtime) {
17
19
  const taskFile = values["task-file"];
18
20
  const taskText = values["task-text"];
19
21
  const taskEvent = values["task-event"];
@@ -33,17 +35,20 @@ export function resolveTaskContent(values) {
33
35
  const amendFlag = values["task-amend"] ?? undefined;
34
36
 
35
37
  if (taskFile) {
36
- return { task: readFileSync(taskFile, "utf8"), amend: amendFlag };
38
+ return {
39
+ task: runtime.fsSync.readFileSync(taskFile, "utf8"),
40
+ amend: amendFlag,
41
+ };
37
42
  }
38
43
  if (taskText) {
39
44
  return { task: taskText, amend: amendFlag };
40
45
  }
41
46
 
42
- const eventName = process.env.GITHUB_EVENT_NAME;
47
+ const eventName = runtime.proc.env.GITHUB_EVENT_NAME;
43
48
  if (!eventName) {
44
49
  throw new Error("--task-event requires GITHUB_EVENT_NAME to be set");
45
50
  }
46
- const payload = JSON.parse(readFileSync(taskEvent, "utf8"));
51
+ const payload = JSON.parse(runtime.fsSync.readFileSync(taskEvent, "utf8"));
47
52
  const composed = composeTaskFromGitHubEvent(payload, eventName);
48
53
  return { task: composed.task, amend: amendFlag ?? composed.amend };
49
54
  }
@@ -1,153 +1,227 @@
1
- import { readFileSync, writeFileSync, mkdirSync } from "node:fs";
2
1
  import { join, dirname } from "node:path";
3
2
  import { createTraceCollector } from "@forwardimpact/libeval";
4
3
  import { createTraceQuery } from "../trace-query.js";
5
4
  import { createTraceGitHub } from "../trace-github.js";
6
5
  import { stripSignatures } from "../signature-filter.js";
7
6
 
7
+ // Every handler receives a libcli `InvocationContext`:
8
+ // ctx.options — parsed flag values (`cli.parse().values`)
9
+ // ctx.args — named positionals declared on the subcommand
10
+ // ctx.deps — host-injected collaborators: `{ runtime, config }`
11
+ // Handlers read/write the filesystem and stdout exclusively through
12
+ // `ctx.deps.runtime` and return `{ ok: true }` on success.
13
+
8
14
  // --- GitHub commands ---
9
15
 
10
16
  /**
11
17
  * List recent workflow runs matching a pattern.
12
- * @param {object} values - Parsed option values
13
- * @param {string[]} args - [pattern?]
14
- * @param {{config: import("@forwardimpact/libconfig").Config}} ctx
18
+ * @param {import("@forwardimpact/libcli").InvocationContext} ctx
15
19
  */
16
- export async function runRunsCommand(values, args, ctx) {
20
+ export async function runRunsCommand(ctx) {
21
+ const { runtime, config } = ctx.deps;
17
22
  const gh = await createTraceGitHub({
18
- token: ctx.config.ghToken(),
19
- repo: values.repo,
23
+ token: config.ghToken(),
24
+ repo: ctx.options.repo,
25
+ runtime,
20
26
  });
21
- const pattern = args[0] ?? "agent";
22
- const lookback = values.lookback ?? "7d";
27
+ const pattern = ctx.args.pattern ?? "agent";
28
+ const lookback = ctx.options.lookback ?? "7d";
23
29
  const runs = await gh.listRuns({ pattern, lookback });
24
- writeJSON(runs, values);
30
+ writeJSON(runtime, runs, ctx.options);
31
+ return { ok: true };
25
32
  }
26
33
 
27
34
  /**
28
35
  * Download a trace artifact and auto-convert to structured JSON.
29
- * @param {object} values - Parsed option values
30
- * @param {string[]} args - [run-id]
31
- * @param {{config: import("@forwardimpact/libconfig").Config}} ctx
36
+ * @param {import("@forwardimpact/libcli").InvocationContext} ctx
32
37
  */
33
- export async function runDownloadCommand(values, args, ctx) {
38
+ export async function runDownloadCommand(ctx) {
39
+ const { runtime, config } = ctx.deps;
34
40
  const gh = await createTraceGitHub({
35
- token: ctx.config.ghToken(),
36
- repo: values.repo,
41
+ token: config.ghToken(),
42
+ repo: ctx.options.repo,
43
+ runtime,
37
44
  });
38
- const result = await gh.downloadTrace(args[0], {
39
- dir: values.dir,
40
- name: values.artifact,
45
+ const result = await gh.downloadTrace(ctx.args["run-id"], {
46
+ dir: ctx.options.dir,
47
+ name: ctx.options.artifact,
41
48
  });
42
49
 
43
50
  const ndjsonFile = result.files.find((f) => f.endsWith(".ndjson"));
44
51
  if (ndjsonFile) {
45
52
  const ndjsonPath = join(result.dir, ndjsonFile);
46
53
  const collector = createTraceCollector();
47
- for (const line of readFileSync(ndjsonPath, "utf8").split("\n")) {
54
+ for (const line of runtime.fsSync
55
+ .readFileSync(ndjsonPath, "utf8")
56
+ .split("\n")) {
48
57
  collector.addLine(line);
49
58
  }
50
59
  const structuredPath = join(result.dir, "structured.json");
51
- writeFileSync(structuredPath, JSON.stringify(collector.toJSON()) + "\n");
60
+ runtime.fsSync.writeFileSync(
61
+ structuredPath,
62
+ JSON.stringify(collector.toJSON()) + "\n",
63
+ );
52
64
  result.files.push("structured.json");
53
65
  }
54
66
 
55
- writeJSON(result, values);
67
+ writeJSON(runtime, result, ctx.options);
68
+ return { ok: true };
56
69
  }
57
70
 
58
71
  // --- Query commands ---
59
72
 
60
- /** @param {object} values @param {string[]} args - [file] */
61
- export async function runOverviewCommand(values, args) {
62
- writeJSON(loadTrace(args[0]).overview(), values);
73
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
74
+ export async function runOverviewCommand(ctx) {
75
+ const { runtime } = ctx.deps;
76
+ writeJSON(runtime, loadTrace(runtime, ctx.args.file).overview(), ctx.options);
77
+ return { ok: true };
63
78
  }
64
79
 
65
- /** @param {object} values @param {string[]} args - [file] */
66
- export async function runCountCommand(values, args) {
67
- process.stdout.write(String(loadTrace(args[0]).count()) + "\n");
80
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
81
+ export async function runCountCommand(ctx) {
82
+ const { runtime } = ctx.deps;
83
+ runtime.proc.stdout.write(
84
+ String(loadTrace(runtime, ctx.args.file).count()) + "\n",
85
+ );
86
+ return { ok: true };
68
87
  }
69
88
 
70
- /** @param {object} values @param {string[]} args - [file, from, to] */
71
- export async function runBatchCommand(values, args) {
89
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
90
+ export async function runBatchCommand(ctx) {
91
+ const { runtime } = ctx.deps;
72
92
  writeJSON(
73
- loadTrace(args[0]).batch(parseInt(args[1], 10), parseInt(args[2], 10)),
74
- values,
93
+ runtime,
94
+ loadTrace(runtime, ctx.args.file).batch(
95
+ parseInt(ctx.args.from, 10),
96
+ parseInt(ctx.args.to, 10),
97
+ ),
98
+ ctx.options,
75
99
  );
100
+ return { ok: true };
76
101
  }
77
102
 
78
- /** @param {object} values @param {string[]} args - [file, N?] */
79
- export async function runHeadCommand(values, args) {
80
- const n = args[1] ? parseInt(args[1], 10) : 10;
81
- writeJSON(loadTrace(args[0]).head(n), values);
103
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
104
+ export async function runHeadCommand(ctx) {
105
+ const { runtime } = ctx.deps;
106
+ const n = ctx.args.n ? parseInt(ctx.args.n, 10) : 10;
107
+ writeJSON(runtime, loadTrace(runtime, ctx.args.file).head(n), ctx.options);
108
+ return { ok: true };
82
109
  }
83
110
 
84
- /** @param {object} values @param {string[]} args - [file, N?] */
85
- export async function runTailCommand(values, args) {
86
- const n = args[1] ? parseInt(args[1], 10) : 10;
87
- writeJSON(loadTrace(args[0]).tail(n), values);
111
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
112
+ export async function runTailCommand(ctx) {
113
+ const { runtime } = ctx.deps;
114
+ const n = ctx.args.n ? parseInt(ctx.args.n, 10) : 10;
115
+ writeJSON(runtime, loadTrace(runtime, ctx.args.file).tail(n), ctx.options);
116
+ return { ok: true };
88
117
  }
89
118
 
90
- /** @param {object} values @param {string[]} args - [file, pattern] */
91
- export async function runSearchCommand(values, args) {
92
- const limit = values.limit ? parseInt(values.limit, 10) : 50;
93
- const context = values.context ? parseInt(values.context, 10) : 0;
94
- const full = values.full ?? false;
119
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
120
+ export async function runSearchCommand(ctx) {
121
+ const { runtime } = ctx.deps;
122
+ const limit = ctx.options.limit ? parseInt(ctx.options.limit, 10) : 50;
123
+ const context = ctx.options.context ? parseInt(ctx.options.context, 10) : 0;
124
+ const full = ctx.options.full ?? false;
95
125
  writeJSON(
96
- loadTrace(args[0]).search(args[1], { limit, context, full }),
97
- values,
126
+ runtime,
127
+ loadTrace(runtime, ctx.args.file).search(ctx.args.pattern, {
128
+ limit,
129
+ context,
130
+ full,
131
+ }),
132
+ ctx.options,
98
133
  );
134
+ return { ok: true };
99
135
  }
100
136
 
101
- /** @param {object} values @param {string[]} args - [file] */
102
- export async function runToolsCommand(values, args) {
103
- writeJSON(loadTrace(args[0]).toolFrequency(), values);
137
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
138
+ export async function runToolsCommand(ctx) {
139
+ const { runtime } = ctx.deps;
140
+ writeJSON(
141
+ runtime,
142
+ loadTrace(runtime, ctx.args.file).toolFrequency(),
143
+ ctx.options,
144
+ );
145
+ return { ok: true };
104
146
  }
105
147
 
106
- /** @param {object} values @param {string[]} args - [file, name] */
107
- export async function runToolCommand(values, args) {
108
- writeJSON(loadTrace(args[0]).tool(args[1]), values);
148
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
149
+ export async function runToolCommand(ctx) {
150
+ const { runtime } = ctx.deps;
151
+ writeJSON(
152
+ runtime,
153
+ loadTrace(runtime, ctx.args.file).tool(ctx.args.name),
154
+ ctx.options,
155
+ );
156
+ return { ok: true };
109
157
  }
110
158
 
111
- /** @param {object} values @param {string[]} args - [file] */
112
- export async function runErrorsCommand(values, args) {
113
- writeJSON(loadTrace(args[0]).errors(), values);
159
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
160
+ export async function runErrorsCommand(ctx) {
161
+ const { runtime } = ctx.deps;
162
+ writeJSON(runtime, loadTrace(runtime, ctx.args.file).errors(), ctx.options);
163
+ return { ok: true };
114
164
  }
115
165
 
116
- /** @param {object} values @param {string[]} args - [file] */
117
- export async function runReasoningCommand(values, args) {
118
- const from = values.from ? parseInt(values.from, 10) : undefined;
119
- const to = values.to ? parseInt(values.to, 10) : undefined;
120
- writeJSON(loadTrace(args[0]).reasoning({ from, to }), values);
166
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
167
+ export async function runReasoningCommand(ctx) {
168
+ const { runtime } = ctx.deps;
169
+ const from = ctx.options.from ? parseInt(ctx.options.from, 10) : undefined;
170
+ const to = ctx.options.to ? parseInt(ctx.options.to, 10) : undefined;
171
+ writeJSON(
172
+ runtime,
173
+ loadTrace(runtime, ctx.args.file).reasoning({ from, to }),
174
+ ctx.options,
175
+ );
176
+ return { ok: true };
121
177
  }
122
178
 
123
- /** @param {object} values @param {string[]} args - [file] */
124
- export async function runTimelineCommand(values, args) {
125
- const lines = loadTrace(args[0]).timeline();
126
- process.stdout.write(lines.join("\n") + "\n");
179
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
180
+ export async function runTimelineCommand(ctx) {
181
+ const { runtime } = ctx.deps;
182
+ const lines = loadTrace(runtime, ctx.args.file).timeline();
183
+ runtime.proc.stdout.write(lines.join("\n") + "\n");
184
+ return { ok: true };
127
185
  }
128
186
 
129
- /** @param {object} values @param {string[]} args - [file] */
130
- export async function runStatsCommand(values, args) {
131
- writeJSON(loadTrace(args[0]).stats(), values);
187
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
188
+ export async function runStatsCommand(ctx) {
189
+ const { runtime } = ctx.deps;
190
+ writeJSON(runtime, loadTrace(runtime, ctx.args.file).stats(), ctx.options);
191
+ return { ok: true };
132
192
  }
133
193
 
134
- /** @param {object} values @param {string[]} args - [file] */
135
- export async function runInitCommand(values, args) {
136
- writeJSON(loadTrace(args[0]).init(), values);
194
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
195
+ export async function runInitCommand(ctx) {
196
+ const { runtime } = ctx.deps;
197
+ writeJSON(runtime, loadTrace(runtime, ctx.args.file).init(), ctx.options);
198
+ return { ok: true };
137
199
  }
138
200
 
139
- /** @param {object} values @param {string[]} args - [file, index] */
140
- export async function runTurnCommand(values, args) {
141
- writeJSON(loadTrace(args[0]).turn(parseInt(args[1], 10)), values);
201
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
202
+ export async function runTurnCommand(ctx) {
203
+ const { runtime } = ctx.deps;
204
+ writeJSON(
205
+ runtime,
206
+ loadTrace(runtime, ctx.args.file).turn(parseInt(ctx.args.index, 10)),
207
+ ctx.options,
208
+ );
209
+ return { ok: true };
142
210
  }
143
211
 
144
- /** @param {object} values @param {string[]} args - [file] */
145
- export async function runFilterCommand(values, args) {
212
+ /** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
213
+ export async function runFilterCommand(ctx) {
214
+ const { runtime } = ctx.deps;
146
215
  const opts = {};
147
- if (values.role) opts.role = values.role;
148
- if (values.tool) opts.toolName = values.tool;
149
- if (values.error) opts.isError = true;
150
- writeJSON(loadTrace(args[0]).filter(opts), values);
216
+ if (ctx.options.role) opts.role = ctx.options.role;
217
+ if (ctx.options.tool) opts.toolName = ctx.options.tool;
218
+ if (ctx.options.error) opts.isError = true;
219
+ writeJSON(
220
+ runtime,
221
+ loadTrace(runtime, ctx.args.file).filter(opts),
222
+ ctx.options,
223
+ );
224
+ return { ok: true };
151
225
  }
152
226
 
153
227
  // --- Split command ---
@@ -168,24 +242,24 @@ const STRUCTURAL_ROLES = new Set(["agent", "supervisor", "facilitator"]);
168
242
  * `staff-engineer`) classify as agents with the profile in the participant
169
243
  * slot. Orchestrator events and invalid source names are dropped.
170
244
  *
171
- * @param {object} values - Parsed option values
172
- * @param {string[]} args - [file]
245
+ * @param {import("@forwardimpact/libcli").InvocationContext} ctx
173
246
  */
174
- export async function runSplitCommand(values, args) {
175
- const file = args[0];
176
- if (!file) throw new Error("split: missing input file");
247
+ export async function runSplitCommand(ctx) {
248
+ const { runtime } = ctx.deps;
249
+ const file = ctx.args.file;
250
+ if (!file) return { ok: false, code: 1, error: "split: missing input file" };
177
251
 
178
- const mode = values.mode;
179
- if (!mode) throw new Error("split: --mode is required");
252
+ const mode = ctx.options.mode;
253
+ if (!mode) return { ok: false, code: 1, error: "split: --mode is required" };
180
254
  if (!["run", "supervise", "facilitate"].includes(mode)) {
181
- throw new Error(`split: invalid --mode "${mode}"`);
255
+ return { ok: false, code: 1, error: `split: invalid --mode "${mode}"` };
182
256
  }
183
257
 
184
- const caseId = values.case ?? "default";
185
- const outputDir = values["output-dir"] || dirname(file);
186
- mkdirSync(outputDir, { recursive: true });
258
+ const caseId = ctx.options.case ?? "default";
259
+ const outputDir = ctx.options["output-dir"] || dirname(file);
260
+ runtime.fsSync.mkdirSync(outputDir, { recursive: true });
187
261
 
188
- const buckets = parseBuckets(readFileSync(file, "utf8"));
262
+ const buckets = parseBuckets(runtime.fsSync.readFileSync(file, "utf8"));
189
263
 
190
264
  for (const [source, lines] of buckets.entries()) {
191
265
  if (!VALID_SOURCE_NAME.test(source)) continue;
@@ -194,8 +268,9 @@ export async function runSplitCommand(values, args) {
194
268
  outputDir,
195
269
  `trace--${caseId}--${source}.${role}.ndjson`,
196
270
  );
197
- writeFileSync(outPath, lines.join("\n") + "\n");
271
+ runtime.fsSync.writeFileSync(outPath, lines.join("\n") + "\n");
198
272
  }
273
+ return { ok: true };
199
274
  }
200
275
 
201
276
  /**
@@ -234,11 +309,12 @@ function parseBuckets(content) {
234
309
 
235
310
  /**
236
311
  * Load a trace file. Supports structured JSON and raw NDJSON.
312
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
237
313
  * @param {string} file
238
314
  * @returns {import("../trace-query.js").TraceQuery}
239
315
  */
240
- function loadTrace(file) {
241
- const content = readFileSync(file, "utf8");
316
+ function loadTrace(runtime, file) {
317
+ const content = runtime.fsSync.readFileSync(file, "utf8");
242
318
 
243
319
  try {
244
320
  const parsed = JSON.parse(content);
@@ -260,10 +336,11 @@ function loadTrace(file) {
260
336
  * Write JSON output to stdout. By default strips `thinking.signature`
261
337
  * base64 blobs from the payload so they don't dominate terminal output;
262
338
  * pass `--signatures` (surfaced as `values.signatures`) to keep them.
339
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
263
340
  * @param {*} data
264
341
  * @param {object} [values]
265
342
  */
266
- function writeJSON(data, values = {}) {
343
+ function writeJSON(runtime, data, values = {}) {
267
344
  const output = values.signatures ? data : stripSignatures(data);
268
- process.stdout.write(JSON.stringify(output, null, 2) + "\n");
345
+ runtime.proc.stdout.write(JSON.stringify(output, null, 2) + "\n");
269
346
  }