@forwardimpact/libeval 0.1.50 → 0.1.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -8
- package/bin/fit-benchmark.js +26 -27
- package/bin/fit-eval.js +49 -30
- package/bin/fit-trace.js +83 -57
- package/package.json +1 -1
- package/src/agent-runner.js +20 -12
- package/src/benchmark/env-loader.js +35 -23
- package/src/benchmark/{scorer.js → invariants.js} +14 -12
- package/src/benchmark/judge.js +5 -8
- package/src/benchmark/report.js +15 -15
- package/src/benchmark/result.js +11 -11
- package/src/benchmark/runner.js +11 -11
- package/src/benchmark/task-family.js +6 -4
- package/src/benchmark/workdir.js +18 -3
- package/src/commands/assert.js +30 -22
- package/src/commands/benchmark-invariants.js +74 -0
- package/src/commands/benchmark-report.js +23 -15
- package/src/commands/benchmark-run.js +15 -8
- package/src/commands/by-discussion.js +29 -18
- package/src/commands/callback.js +20 -11
- package/src/commands/discuss.js +28 -11
- package/src/commands/facilitate.js +18 -12
- package/src/commands/output.js +11 -12
- package/src/commands/run.js +22 -12
- package/src/commands/supervise.js +27 -18
- package/src/commands/task-input.js +10 -5
- package/src/commands/trace.js +174 -97
- package/src/discuss-tools.js +48 -2
- package/src/discusser.js +49 -2
- package/src/events/github.js +27 -5
- package/src/inbox-poller.js +84 -0
- package/src/judge.js +1 -1
- package/src/message-bus.js +6 -0
- package/src/orchestration-loop.js +14 -4
- package/src/orchestration-toolkit.js +14 -0
- package/src/redaction.js +31 -9
- package/src/reply-emitter.js +47 -0
- package/src/commands/benchmark-score.js +0 -68
package/src/commands/output.js
CHANGED
|
@@ -6,29 +6,28 @@ import { createTraceCollector } from "@forwardimpact/libeval";
|
|
|
6
6
|
*
|
|
7
7
|
* Usage: fit-eval output [--format=json|text] < trace.ndjson
|
|
8
8
|
*
|
|
9
|
-
* @param {
|
|
10
|
-
* @
|
|
9
|
+
* @param {import("@forwardimpact/libcli").InvocationContext} ctx
|
|
10
|
+
* @returns {Promise<{ok: true}>}
|
|
11
11
|
*/
|
|
12
|
-
export async function runOutputCommand(
|
|
12
|
+
export async function runOutputCommand(ctx) {
|
|
13
|
+
const values = ctx.options;
|
|
14
|
+
const runtime = ctx.deps.runtime;
|
|
13
15
|
const format =
|
|
14
16
|
values.format === "text" || values.format === "json"
|
|
15
17
|
? values.format
|
|
16
18
|
: "json";
|
|
17
19
|
const collector = createTraceCollector();
|
|
18
20
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
}
|
|
23
|
-
const input = Buffer.concat(chunks).toString("utf8");
|
|
24
|
-
|
|
25
|
-
for (const line of input.split("\n")) {
|
|
21
|
+
// `runtime.proc.stdin` is an AsyncIterable of UTF-8 lines (newline-split by
|
|
22
|
+
// the runtime), so each yielded value is exactly one NDJSON record.
|
|
23
|
+
for await (const line of runtime.proc.stdin) {
|
|
26
24
|
collector.addLine(line);
|
|
27
25
|
}
|
|
28
26
|
|
|
29
27
|
if (format === "text") {
|
|
30
|
-
|
|
28
|
+
runtime.proc.stdout.write(collector.toText() + "\n");
|
|
31
29
|
} else {
|
|
32
|
-
|
|
30
|
+
runtime.proc.stdout.write(JSON.stringify(collector.toJSON()) + "\n");
|
|
33
31
|
}
|
|
32
|
+
return { ok: true };
|
|
34
33
|
}
|
package/src/commands/run.js
CHANGED
|
@@ -12,10 +12,14 @@ import { createServiceConfig } from "@forwardimpact/libconfig";
|
|
|
12
12
|
/**
|
|
13
13
|
* Parse and validate run command options from parsed values.
|
|
14
14
|
* @param {object} values - Parsed option values from cli.parse()
|
|
15
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
|
|
15
16
|
* @returns {{ taskContent: string, cwd: string, model: string, maxTurns: number, outputPath: string|undefined, agentProfile: string|undefined, allowedTools: string[] }}
|
|
16
17
|
*/
|
|
17
|
-
function parseRunOptions(values) {
|
|
18
|
-
const { task: taskContent, amend: taskAmend } = resolveTaskContent(
|
|
18
|
+
function parseRunOptions(values, runtime) {
|
|
19
|
+
const { task: taskContent, amend: taskAmend } = resolveTaskContent(
|
|
20
|
+
values,
|
|
21
|
+
runtime,
|
|
22
|
+
);
|
|
19
23
|
const maxTurnsRaw = values["max-turns"] ?? "50";
|
|
20
24
|
|
|
21
25
|
return {
|
|
@@ -39,10 +43,11 @@ function parseRunOptions(values) {
|
|
|
39
43
|
*
|
|
40
44
|
* Usage: fit-eval run [options]
|
|
41
45
|
*
|
|
42
|
-
* @param {
|
|
43
|
-
* @
|
|
46
|
+
* @param {import("@forwardimpact/libcli").InvocationContext} ctx
|
|
47
|
+
* @returns {Promise<{ok: boolean, code?: number, error?: string}>}
|
|
44
48
|
*/
|
|
45
|
-
export async function runRunCommand(
|
|
49
|
+
export async function runRunCommand(ctx) {
|
|
50
|
+
const runtime = ctx.deps.runtime;
|
|
46
51
|
const {
|
|
47
52
|
taskContent,
|
|
48
53
|
taskAmend,
|
|
@@ -53,19 +58,23 @@ export async function runRunCommand(values, _args) {
|
|
|
53
58
|
agentProfile,
|
|
54
59
|
allowedTools,
|
|
55
60
|
mcpServer,
|
|
56
|
-
} = parseRunOptions(
|
|
61
|
+
} = parseRunOptions(ctx.options, runtime);
|
|
57
62
|
|
|
58
63
|
// Build the redactor as the first observable side-effect after option
|
|
59
64
|
// parsing — the env snapshot must freeze BEFORE any in-process
|
|
60
|
-
//
|
|
61
|
-
const redactor = createRedactor();
|
|
65
|
+
// env writes the command performs (e.g. LIBEVAL_AGENT_PROFILE).
|
|
66
|
+
const redactor = createRedactor({ runtime });
|
|
62
67
|
|
|
63
68
|
// When --output is specified, stream text to stdout while writing NDJSON to file.
|
|
64
69
|
// Otherwise, write NDJSON directly to stdout (backwards-compatible).
|
|
65
70
|
const fileStream = outputPath ? createWriteStream(outputPath) : null;
|
|
66
71
|
const output = fileStream
|
|
67
|
-
? createTeeWriter({
|
|
68
|
-
|
|
72
|
+
? createTeeWriter({
|
|
73
|
+
fileStream,
|
|
74
|
+
textStream: runtime.proc.stdout,
|
|
75
|
+
mode: "raw",
|
|
76
|
+
})
|
|
77
|
+
: runtime.proc.stdout;
|
|
69
78
|
|
|
70
79
|
const counter = new SequenceCounter();
|
|
71
80
|
const devNull = new Writable({
|
|
@@ -93,7 +102,7 @@ export async function runRunCommand(values, _args) {
|
|
|
93
102
|
}
|
|
94
103
|
|
|
95
104
|
if (agentProfile) {
|
|
96
|
-
|
|
105
|
+
runtime.proc.env.LIBEVAL_AGENT_PROFILE = agentProfile;
|
|
97
106
|
}
|
|
98
107
|
|
|
99
108
|
const systemPrompt = agentProfile
|
|
@@ -116,6 +125,7 @@ export async function runRunCommand(values, _args) {
|
|
|
116
125
|
taskAmend,
|
|
117
126
|
mcpServers,
|
|
118
127
|
redactor,
|
|
128
|
+
runtime,
|
|
119
129
|
});
|
|
120
130
|
|
|
121
131
|
const result = await runner.run(taskContent);
|
|
@@ -125,5 +135,5 @@ export async function runRunCommand(values, _args) {
|
|
|
125
135
|
await new Promise((r) => fileStream.end(r));
|
|
126
136
|
}
|
|
127
137
|
|
|
128
|
-
|
|
138
|
+
return result.success ? { ok: true } : { ok: false, code: 1, error: "" };
|
|
129
139
|
}
|
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
import { createWriteStream
|
|
1
|
+
import { createWriteStream } from "node:fs";
|
|
2
2
|
import { resolve, join } from "node:path";
|
|
3
|
-
import { tmpdir } from "node:os";
|
|
4
3
|
import { createSupervisor } from "../supervisor.js";
|
|
5
4
|
import { createRedactor } from "../redaction.js";
|
|
6
5
|
import { createTeeWriter } from "../tee-writer.js";
|
|
@@ -10,19 +9,27 @@ import { createServiceConfig } from "@forwardimpact/libconfig";
|
|
|
10
9
|
/**
|
|
11
10
|
* Parse all supervise flags from parsed values into an options object.
|
|
12
11
|
* @param {object} values - Parsed option values from cli.parse()
|
|
13
|
-
* @
|
|
12
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
|
|
13
|
+
* @returns {Promise<object>}
|
|
14
14
|
*/
|
|
15
|
-
export function parseSuperviseOptions(values) {
|
|
16
|
-
const { task: taskContent, amend: taskAmend } = resolveTaskContent(
|
|
15
|
+
export async function parseSuperviseOptions(values, runtime) {
|
|
16
|
+
const { task: taskContent, amend: taskAmend } = resolveTaskContent(
|
|
17
|
+
values,
|
|
18
|
+
runtime,
|
|
19
|
+
);
|
|
17
20
|
const supervisorAllowedToolsRaw = values["supervisor-allowed-tools"];
|
|
18
21
|
|
|
22
|
+
const tmpRoot = runtime.proc.env.TMPDIR ?? "/tmp";
|
|
23
|
+
const agentCwd = resolve(
|
|
24
|
+
values["agent-cwd"] ??
|
|
25
|
+
(await runtime.fs.mkdtemp(join(tmpRoot, "fit-eval-agent-"))),
|
|
26
|
+
);
|
|
27
|
+
|
|
19
28
|
return {
|
|
20
29
|
taskContent,
|
|
21
30
|
taskAmend,
|
|
22
31
|
supervisorCwd: resolve(values["supervisor-cwd"] ?? "."),
|
|
23
|
-
agentCwd
|
|
24
|
-
values["agent-cwd"] ?? mkdtempSync(join(tmpdir(), "fit-eval-agent-")),
|
|
25
|
-
),
|
|
32
|
+
agentCwd,
|
|
26
33
|
agentModel: values["agent-model"] ?? "claude-opus-4-7[1m]",
|
|
27
34
|
supervisorModel: values["lead-model"] ?? "claude-opus-4-7[1m]",
|
|
28
35
|
maxTurns: (() => {
|
|
@@ -50,16 +57,17 @@ export function parseSuperviseOptions(values) {
|
|
|
50
57
|
*
|
|
51
58
|
* Usage: fit-eval supervise [options]
|
|
52
59
|
*
|
|
53
|
-
* @param {
|
|
54
|
-
* @
|
|
60
|
+
* @param {import("@forwardimpact/libcli").InvocationContext} ctx
|
|
61
|
+
* @returns {Promise<{ok: boolean, code?: number, error?: string}>}
|
|
55
62
|
*/
|
|
56
|
-
export async function runSuperviseCommand(
|
|
57
|
-
const
|
|
63
|
+
export async function runSuperviseCommand(ctx) {
|
|
64
|
+
const runtime = ctx.deps.runtime;
|
|
65
|
+
const opts = await parseSuperviseOptions(ctx.options, runtime);
|
|
58
66
|
|
|
59
67
|
// Build the redactor as the first observable side-effect after option
|
|
60
68
|
// parsing — the env snapshot must freeze BEFORE any in-process
|
|
61
|
-
//
|
|
62
|
-
const redactor = createRedactor();
|
|
69
|
+
// env writes the command performs (e.g. LIBEVAL_AGENT_PROFILE).
|
|
70
|
+
const redactor = createRedactor({ runtime });
|
|
63
71
|
|
|
64
72
|
// When --output is specified, stream text to stdout while writing NDJSON to file.
|
|
65
73
|
// Otherwise, write NDJSON directly to stdout (backwards-compatible).
|
|
@@ -69,10 +77,10 @@ export async function runSuperviseCommand(values, _args) {
|
|
|
69
77
|
const output = fileStream
|
|
70
78
|
? createTeeWriter({
|
|
71
79
|
fileStream,
|
|
72
|
-
textStream:
|
|
80
|
+
textStream: runtime.proc.stdout,
|
|
73
81
|
mode: "supervised",
|
|
74
82
|
})
|
|
75
|
-
:
|
|
83
|
+
: runtime.proc.stdout;
|
|
76
84
|
|
|
77
85
|
let agentMcpServers = null;
|
|
78
86
|
if (opts.mcpServer) {
|
|
@@ -88,7 +96,7 @@ export async function runSuperviseCommand(values, _args) {
|
|
|
88
96
|
}
|
|
89
97
|
|
|
90
98
|
if (opts.agentProfile) {
|
|
91
|
-
|
|
99
|
+
runtime.proc.env.LIBEVAL_AGENT_PROFILE = opts.agentProfile;
|
|
92
100
|
}
|
|
93
101
|
|
|
94
102
|
const { query } = await import("@anthropic-ai/claude-agent-sdk");
|
|
@@ -107,6 +115,7 @@ export async function runSuperviseCommand(values, _args) {
|
|
|
107
115
|
taskAmend: opts.taskAmend,
|
|
108
116
|
agentMcpServers,
|
|
109
117
|
redactor,
|
|
118
|
+
runtime,
|
|
110
119
|
});
|
|
111
120
|
|
|
112
121
|
const result = await supervisor.run(opts.taskContent);
|
|
@@ -116,5 +125,5 @@ export async function runSuperviseCommand(values, _args) {
|
|
|
116
125
|
await new Promise((r) => fileStream.end(r));
|
|
117
126
|
}
|
|
118
127
|
|
|
119
|
-
|
|
128
|
+
return result.success ? { ok: true } : { ok: false, code: 1, error: "" };
|
|
120
129
|
}
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { readFileSync } from "node:fs";
|
|
2
1
|
import { composeTaskFromGitHubEvent } from "../events/github.js";
|
|
3
2
|
|
|
4
3
|
/**
|
|
@@ -11,9 +10,12 @@ import { composeTaskFromGitHubEvent } from "../events/github.js";
|
|
|
11
10
|
* works as before.
|
|
12
11
|
*
|
|
13
12
|
* @param {object} values - Parsed option values from cli.parse()
|
|
13
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} runtime - Ambient
|
|
14
|
+
* collaborators; `fsSync.readFileSync` loads `--task-file`/`--task-event`
|
|
15
|
+
* and `proc.env` resolves `GITHUB_EVENT_NAME`.
|
|
14
16
|
* @returns {{ task: string, amend: string | undefined }}
|
|
15
17
|
*/
|
|
16
|
-
export function resolveTaskContent(values) {
|
|
18
|
+
export function resolveTaskContent(values, runtime) {
|
|
17
19
|
const taskFile = values["task-file"];
|
|
18
20
|
const taskText = values["task-text"];
|
|
19
21
|
const taskEvent = values["task-event"];
|
|
@@ -33,17 +35,20 @@ export function resolveTaskContent(values) {
|
|
|
33
35
|
const amendFlag = values["task-amend"] ?? undefined;
|
|
34
36
|
|
|
35
37
|
if (taskFile) {
|
|
36
|
-
return {
|
|
38
|
+
return {
|
|
39
|
+
task: runtime.fsSync.readFileSync(taskFile, "utf8"),
|
|
40
|
+
amend: amendFlag,
|
|
41
|
+
};
|
|
37
42
|
}
|
|
38
43
|
if (taskText) {
|
|
39
44
|
return { task: taskText, amend: amendFlag };
|
|
40
45
|
}
|
|
41
46
|
|
|
42
|
-
const eventName =
|
|
47
|
+
const eventName = runtime.proc.env.GITHUB_EVENT_NAME;
|
|
43
48
|
if (!eventName) {
|
|
44
49
|
throw new Error("--task-event requires GITHUB_EVENT_NAME to be set");
|
|
45
50
|
}
|
|
46
|
-
const payload = JSON.parse(readFileSync(taskEvent, "utf8"));
|
|
51
|
+
const payload = JSON.parse(runtime.fsSync.readFileSync(taskEvent, "utf8"));
|
|
47
52
|
const composed = composeTaskFromGitHubEvent(payload, eventName);
|
|
48
53
|
return { task: composed.task, amend: amendFlag ?? composed.amend };
|
|
49
54
|
}
|
package/src/commands/trace.js
CHANGED
|
@@ -1,153 +1,227 @@
|
|
|
1
|
-
import { readFileSync, writeFileSync, mkdirSync } from "node:fs";
|
|
2
1
|
import { join, dirname } from "node:path";
|
|
3
2
|
import { createTraceCollector } from "@forwardimpact/libeval";
|
|
4
3
|
import { createTraceQuery } from "../trace-query.js";
|
|
5
4
|
import { createTraceGitHub } from "../trace-github.js";
|
|
6
5
|
import { stripSignatures } from "../signature-filter.js";
|
|
7
6
|
|
|
7
|
+
// Every handler receives a libcli `InvocationContext`:
|
|
8
|
+
// ctx.options — parsed flag values (`cli.parse().values`)
|
|
9
|
+
// ctx.args — named positionals declared on the subcommand
|
|
10
|
+
// ctx.deps — host-injected collaborators: `{ runtime, config }`
|
|
11
|
+
// Handlers read/write the filesystem and stdout exclusively through
|
|
12
|
+
// `ctx.deps.runtime` and return `{ ok: true }` on success.
|
|
13
|
+
|
|
8
14
|
// --- GitHub commands ---
|
|
9
15
|
|
|
10
16
|
/**
|
|
11
17
|
* List recent workflow runs matching a pattern.
|
|
12
|
-
* @param {
|
|
13
|
-
* @param {string[]} args - [pattern?]
|
|
14
|
-
* @param {{config: import("@forwardimpact/libconfig").Config}} ctx
|
|
18
|
+
* @param {import("@forwardimpact/libcli").InvocationContext} ctx
|
|
15
19
|
*/
|
|
16
|
-
export async function runRunsCommand(
|
|
20
|
+
export async function runRunsCommand(ctx) {
|
|
21
|
+
const { runtime, config } = ctx.deps;
|
|
17
22
|
const gh = await createTraceGitHub({
|
|
18
|
-
token:
|
|
19
|
-
repo:
|
|
23
|
+
token: config.ghToken(),
|
|
24
|
+
repo: ctx.options.repo,
|
|
25
|
+
runtime,
|
|
20
26
|
});
|
|
21
|
-
const pattern = args
|
|
22
|
-
const lookback =
|
|
27
|
+
const pattern = ctx.args.pattern ?? "agent";
|
|
28
|
+
const lookback = ctx.options.lookback ?? "7d";
|
|
23
29
|
const runs = await gh.listRuns({ pattern, lookback });
|
|
24
|
-
writeJSON(runs,
|
|
30
|
+
writeJSON(runtime, runs, ctx.options);
|
|
31
|
+
return { ok: true };
|
|
25
32
|
}
|
|
26
33
|
|
|
27
34
|
/**
|
|
28
35
|
* Download a trace artifact and auto-convert to structured JSON.
|
|
29
|
-
* @param {
|
|
30
|
-
* @param {string[]} args - [run-id]
|
|
31
|
-
* @param {{config: import("@forwardimpact/libconfig").Config}} ctx
|
|
36
|
+
* @param {import("@forwardimpact/libcli").InvocationContext} ctx
|
|
32
37
|
*/
|
|
33
|
-
export async function runDownloadCommand(
|
|
38
|
+
export async function runDownloadCommand(ctx) {
|
|
39
|
+
const { runtime, config } = ctx.deps;
|
|
34
40
|
const gh = await createTraceGitHub({
|
|
35
|
-
token:
|
|
36
|
-
repo:
|
|
41
|
+
token: config.ghToken(),
|
|
42
|
+
repo: ctx.options.repo,
|
|
43
|
+
runtime,
|
|
37
44
|
});
|
|
38
|
-
const result = await gh.downloadTrace(args[
|
|
39
|
-
dir:
|
|
40
|
-
name:
|
|
45
|
+
const result = await gh.downloadTrace(ctx.args["run-id"], {
|
|
46
|
+
dir: ctx.options.dir,
|
|
47
|
+
name: ctx.options.artifact,
|
|
41
48
|
});
|
|
42
49
|
|
|
43
50
|
const ndjsonFile = result.files.find((f) => f.endsWith(".ndjson"));
|
|
44
51
|
if (ndjsonFile) {
|
|
45
52
|
const ndjsonPath = join(result.dir, ndjsonFile);
|
|
46
53
|
const collector = createTraceCollector();
|
|
47
|
-
for (const line of
|
|
54
|
+
for (const line of runtime.fsSync
|
|
55
|
+
.readFileSync(ndjsonPath, "utf8")
|
|
56
|
+
.split("\n")) {
|
|
48
57
|
collector.addLine(line);
|
|
49
58
|
}
|
|
50
59
|
const structuredPath = join(result.dir, "structured.json");
|
|
51
|
-
|
|
60
|
+
runtime.fsSync.writeFileSync(
|
|
61
|
+
structuredPath,
|
|
62
|
+
JSON.stringify(collector.toJSON()) + "\n",
|
|
63
|
+
);
|
|
52
64
|
result.files.push("structured.json");
|
|
53
65
|
}
|
|
54
66
|
|
|
55
|
-
writeJSON(result,
|
|
67
|
+
writeJSON(runtime, result, ctx.options);
|
|
68
|
+
return { ok: true };
|
|
56
69
|
}
|
|
57
70
|
|
|
58
71
|
// --- Query commands ---
|
|
59
72
|
|
|
60
|
-
/** @param {
|
|
61
|
-
export async function runOverviewCommand(
|
|
62
|
-
|
|
73
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
74
|
+
export async function runOverviewCommand(ctx) {
|
|
75
|
+
const { runtime } = ctx.deps;
|
|
76
|
+
writeJSON(runtime, loadTrace(runtime, ctx.args.file).overview(), ctx.options);
|
|
77
|
+
return { ok: true };
|
|
63
78
|
}
|
|
64
79
|
|
|
65
|
-
/** @param {
|
|
66
|
-
export async function runCountCommand(
|
|
67
|
-
|
|
80
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
81
|
+
export async function runCountCommand(ctx) {
|
|
82
|
+
const { runtime } = ctx.deps;
|
|
83
|
+
runtime.proc.stdout.write(
|
|
84
|
+
String(loadTrace(runtime, ctx.args.file).count()) + "\n",
|
|
85
|
+
);
|
|
86
|
+
return { ok: true };
|
|
68
87
|
}
|
|
69
88
|
|
|
70
|
-
/** @param {
|
|
71
|
-
export async function runBatchCommand(
|
|
89
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
90
|
+
export async function runBatchCommand(ctx) {
|
|
91
|
+
const { runtime } = ctx.deps;
|
|
72
92
|
writeJSON(
|
|
73
|
-
|
|
74
|
-
|
|
93
|
+
runtime,
|
|
94
|
+
loadTrace(runtime, ctx.args.file).batch(
|
|
95
|
+
parseInt(ctx.args.from, 10),
|
|
96
|
+
parseInt(ctx.args.to, 10),
|
|
97
|
+
),
|
|
98
|
+
ctx.options,
|
|
75
99
|
);
|
|
100
|
+
return { ok: true };
|
|
76
101
|
}
|
|
77
102
|
|
|
78
|
-
/** @param {
|
|
79
|
-
export async function runHeadCommand(
|
|
80
|
-
const
|
|
81
|
-
|
|
103
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
104
|
+
export async function runHeadCommand(ctx) {
|
|
105
|
+
const { runtime } = ctx.deps;
|
|
106
|
+
const n = ctx.args.n ? parseInt(ctx.args.n, 10) : 10;
|
|
107
|
+
writeJSON(runtime, loadTrace(runtime, ctx.args.file).head(n), ctx.options);
|
|
108
|
+
return { ok: true };
|
|
82
109
|
}
|
|
83
110
|
|
|
84
|
-
/** @param {
|
|
85
|
-
export async function runTailCommand(
|
|
86
|
-
const
|
|
87
|
-
|
|
111
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
112
|
+
export async function runTailCommand(ctx) {
|
|
113
|
+
const { runtime } = ctx.deps;
|
|
114
|
+
const n = ctx.args.n ? parseInt(ctx.args.n, 10) : 10;
|
|
115
|
+
writeJSON(runtime, loadTrace(runtime, ctx.args.file).tail(n), ctx.options);
|
|
116
|
+
return { ok: true };
|
|
88
117
|
}
|
|
89
118
|
|
|
90
|
-
/** @param {
|
|
91
|
-
export async function runSearchCommand(
|
|
92
|
-
const
|
|
93
|
-
const
|
|
94
|
-
const
|
|
119
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
120
|
+
export async function runSearchCommand(ctx) {
|
|
121
|
+
const { runtime } = ctx.deps;
|
|
122
|
+
const limit = ctx.options.limit ? parseInt(ctx.options.limit, 10) : 50;
|
|
123
|
+
const context = ctx.options.context ? parseInt(ctx.options.context, 10) : 0;
|
|
124
|
+
const full = ctx.options.full ?? false;
|
|
95
125
|
writeJSON(
|
|
96
|
-
|
|
97
|
-
|
|
126
|
+
runtime,
|
|
127
|
+
loadTrace(runtime, ctx.args.file).search(ctx.args.pattern, {
|
|
128
|
+
limit,
|
|
129
|
+
context,
|
|
130
|
+
full,
|
|
131
|
+
}),
|
|
132
|
+
ctx.options,
|
|
98
133
|
);
|
|
134
|
+
return { ok: true };
|
|
99
135
|
}
|
|
100
136
|
|
|
101
|
-
/** @param {
|
|
102
|
-
export async function runToolsCommand(
|
|
103
|
-
|
|
137
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
138
|
+
export async function runToolsCommand(ctx) {
|
|
139
|
+
const { runtime } = ctx.deps;
|
|
140
|
+
writeJSON(
|
|
141
|
+
runtime,
|
|
142
|
+
loadTrace(runtime, ctx.args.file).toolFrequency(),
|
|
143
|
+
ctx.options,
|
|
144
|
+
);
|
|
145
|
+
return { ok: true };
|
|
104
146
|
}
|
|
105
147
|
|
|
106
|
-
/** @param {
|
|
107
|
-
export async function runToolCommand(
|
|
108
|
-
|
|
148
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
149
|
+
export async function runToolCommand(ctx) {
|
|
150
|
+
const { runtime } = ctx.deps;
|
|
151
|
+
writeJSON(
|
|
152
|
+
runtime,
|
|
153
|
+
loadTrace(runtime, ctx.args.file).tool(ctx.args.name),
|
|
154
|
+
ctx.options,
|
|
155
|
+
);
|
|
156
|
+
return { ok: true };
|
|
109
157
|
}
|
|
110
158
|
|
|
111
|
-
/** @param {
|
|
112
|
-
export async function runErrorsCommand(
|
|
113
|
-
|
|
159
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
160
|
+
export async function runErrorsCommand(ctx) {
|
|
161
|
+
const { runtime } = ctx.deps;
|
|
162
|
+
writeJSON(runtime, loadTrace(runtime, ctx.args.file).errors(), ctx.options);
|
|
163
|
+
return { ok: true };
|
|
114
164
|
}
|
|
115
165
|
|
|
116
|
-
/** @param {
|
|
117
|
-
export async function runReasoningCommand(
|
|
118
|
-
const
|
|
119
|
-
const
|
|
120
|
-
|
|
166
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
167
|
+
export async function runReasoningCommand(ctx) {
|
|
168
|
+
const { runtime } = ctx.deps;
|
|
169
|
+
const from = ctx.options.from ? parseInt(ctx.options.from, 10) : undefined;
|
|
170
|
+
const to = ctx.options.to ? parseInt(ctx.options.to, 10) : undefined;
|
|
171
|
+
writeJSON(
|
|
172
|
+
runtime,
|
|
173
|
+
loadTrace(runtime, ctx.args.file).reasoning({ from, to }),
|
|
174
|
+
ctx.options,
|
|
175
|
+
);
|
|
176
|
+
return { ok: true };
|
|
121
177
|
}
|
|
122
178
|
|
|
123
|
-
/** @param {
|
|
124
|
-
export async function runTimelineCommand(
|
|
125
|
-
const
|
|
126
|
-
|
|
179
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
180
|
+
export async function runTimelineCommand(ctx) {
|
|
181
|
+
const { runtime } = ctx.deps;
|
|
182
|
+
const lines = loadTrace(runtime, ctx.args.file).timeline();
|
|
183
|
+
runtime.proc.stdout.write(lines.join("\n") + "\n");
|
|
184
|
+
return { ok: true };
|
|
127
185
|
}
|
|
128
186
|
|
|
129
|
-
/** @param {
|
|
130
|
-
export async function runStatsCommand(
|
|
131
|
-
|
|
187
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
188
|
+
export async function runStatsCommand(ctx) {
|
|
189
|
+
const { runtime } = ctx.deps;
|
|
190
|
+
writeJSON(runtime, loadTrace(runtime, ctx.args.file).stats(), ctx.options);
|
|
191
|
+
return { ok: true };
|
|
132
192
|
}
|
|
133
193
|
|
|
134
|
-
/** @param {
|
|
135
|
-
export async function runInitCommand(
|
|
136
|
-
|
|
194
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
195
|
+
export async function runInitCommand(ctx) {
|
|
196
|
+
const { runtime } = ctx.deps;
|
|
197
|
+
writeJSON(runtime, loadTrace(runtime, ctx.args.file).init(), ctx.options);
|
|
198
|
+
return { ok: true };
|
|
137
199
|
}
|
|
138
200
|
|
|
139
|
-
/** @param {
|
|
140
|
-
export async function runTurnCommand(
|
|
141
|
-
|
|
201
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
202
|
+
export async function runTurnCommand(ctx) {
|
|
203
|
+
const { runtime } = ctx.deps;
|
|
204
|
+
writeJSON(
|
|
205
|
+
runtime,
|
|
206
|
+
loadTrace(runtime, ctx.args.file).turn(parseInt(ctx.args.index, 10)),
|
|
207
|
+
ctx.options,
|
|
208
|
+
);
|
|
209
|
+
return { ok: true };
|
|
142
210
|
}
|
|
143
211
|
|
|
144
|
-
/** @param {
|
|
145
|
-
export async function runFilterCommand(
|
|
212
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
213
|
+
export async function runFilterCommand(ctx) {
|
|
214
|
+
const { runtime } = ctx.deps;
|
|
146
215
|
const opts = {};
|
|
147
|
-
if (
|
|
148
|
-
if (
|
|
149
|
-
if (
|
|
150
|
-
writeJSON(
|
|
216
|
+
if (ctx.options.role) opts.role = ctx.options.role;
|
|
217
|
+
if (ctx.options.tool) opts.toolName = ctx.options.tool;
|
|
218
|
+
if (ctx.options.error) opts.isError = true;
|
|
219
|
+
writeJSON(
|
|
220
|
+
runtime,
|
|
221
|
+
loadTrace(runtime, ctx.args.file).filter(opts),
|
|
222
|
+
ctx.options,
|
|
223
|
+
);
|
|
224
|
+
return { ok: true };
|
|
151
225
|
}
|
|
152
226
|
|
|
153
227
|
// --- Split command ---
|
|
@@ -168,24 +242,24 @@ const STRUCTURAL_ROLES = new Set(["agent", "supervisor", "facilitator"]);
|
|
|
168
242
|
* `staff-engineer`) classify as agents with the profile in the participant
|
|
169
243
|
* slot. Orchestrator events and invalid source names are dropped.
|
|
170
244
|
*
|
|
171
|
-
* @param {
|
|
172
|
-
* @param {string[]} args - [file]
|
|
245
|
+
* @param {import("@forwardimpact/libcli").InvocationContext} ctx
|
|
173
246
|
*/
|
|
174
|
-
export async function runSplitCommand(
|
|
175
|
-
const
|
|
176
|
-
|
|
247
|
+
export async function runSplitCommand(ctx) {
|
|
248
|
+
const { runtime } = ctx.deps;
|
|
249
|
+
const file = ctx.args.file;
|
|
250
|
+
if (!file) return { ok: false, code: 1, error: "split: missing input file" };
|
|
177
251
|
|
|
178
|
-
const mode =
|
|
179
|
-
if (!mode)
|
|
252
|
+
const mode = ctx.options.mode;
|
|
253
|
+
if (!mode) return { ok: false, code: 1, error: "split: --mode is required" };
|
|
180
254
|
if (!["run", "supervise", "facilitate"].includes(mode)) {
|
|
181
|
-
|
|
255
|
+
return { ok: false, code: 1, error: `split: invalid --mode "${mode}"` };
|
|
182
256
|
}
|
|
183
257
|
|
|
184
|
-
const caseId =
|
|
185
|
-
const outputDir =
|
|
186
|
-
mkdirSync(outputDir, { recursive: true });
|
|
258
|
+
const caseId = ctx.options.case ?? "default";
|
|
259
|
+
const outputDir = ctx.options["output-dir"] || dirname(file);
|
|
260
|
+
runtime.fsSync.mkdirSync(outputDir, { recursive: true });
|
|
187
261
|
|
|
188
|
-
const buckets = parseBuckets(readFileSync(file, "utf8"));
|
|
262
|
+
const buckets = parseBuckets(runtime.fsSync.readFileSync(file, "utf8"));
|
|
189
263
|
|
|
190
264
|
for (const [source, lines] of buckets.entries()) {
|
|
191
265
|
if (!VALID_SOURCE_NAME.test(source)) continue;
|
|
@@ -194,8 +268,9 @@ export async function runSplitCommand(values, args) {
|
|
|
194
268
|
outputDir,
|
|
195
269
|
`trace--${caseId}--${source}.${role}.ndjson`,
|
|
196
270
|
);
|
|
197
|
-
writeFileSync(outPath, lines.join("\n") + "\n");
|
|
271
|
+
runtime.fsSync.writeFileSync(outPath, lines.join("\n") + "\n");
|
|
198
272
|
}
|
|
273
|
+
return { ok: true };
|
|
199
274
|
}
|
|
200
275
|
|
|
201
276
|
/**
|
|
@@ -234,11 +309,12 @@ function parseBuckets(content) {
|
|
|
234
309
|
|
|
235
310
|
/**
|
|
236
311
|
* Load a trace file. Supports structured JSON and raw NDJSON.
|
|
312
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
|
|
237
313
|
* @param {string} file
|
|
238
314
|
* @returns {import("../trace-query.js").TraceQuery}
|
|
239
315
|
*/
|
|
240
|
-
function loadTrace(file) {
|
|
241
|
-
const content = readFileSync(file, "utf8");
|
|
316
|
+
function loadTrace(runtime, file) {
|
|
317
|
+
const content = runtime.fsSync.readFileSync(file, "utf8");
|
|
242
318
|
|
|
243
319
|
try {
|
|
244
320
|
const parsed = JSON.parse(content);
|
|
@@ -260,10 +336,11 @@ function loadTrace(file) {
|
|
|
260
336
|
* Write JSON output to stdout. By default strips `thinking.signature`
|
|
261
337
|
* base64 blobs from the payload so they don't dominate terminal output;
|
|
262
338
|
* pass `--signatures` (surfaced as `values.signatures`) to keep them.
|
|
339
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
|
|
263
340
|
* @param {*} data
|
|
264
341
|
* @param {object} [values]
|
|
265
342
|
*/
|
|
266
|
-
function writeJSON(data, values = {}) {
|
|
343
|
+
function writeJSON(runtime, data, values = {}) {
|
|
267
344
|
const output = values.signatures ? data : stripSignatures(data);
|
|
268
|
-
|
|
345
|
+
runtime.proc.stdout.write(JSON.stringify(output, null, 2) + "\n");
|
|
269
346
|
}
|