@forwardimpact/libeval 0.1.51 → 0.1.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/fit-eval.js +1 -14
- package/package.json +1 -1
- package/src/benchmark/apm-installer.js +48 -44
- package/src/benchmark/invariants.js +51 -63
- package/src/benchmark/judge.js +13 -11
- package/src/benchmark/npm-installer.js +33 -33
- package/src/benchmark/report.js +25 -11
- package/src/benchmark/runner.js +82 -38
- package/src/benchmark/task-family.js +73 -62
- package/src/benchmark/workdir.js +91 -99
- package/src/commands/benchmark-invariants.js +2 -2
- package/src/commands/benchmark-report.js +1 -0
- package/src/commands/benchmark-run.js +1 -1
- package/src/commands/by-discussion.js +10 -11
- package/src/commands/discuss.js +3 -2
- package/src/commands/facilitate.js +3 -2
- package/src/commands/output.js +4 -1
- package/src/commands/run.js +6 -2
- package/src/commands/supervise.js +3 -2
- package/src/commands/tee.js +24 -9
- package/src/commands/trace.js +7 -2
- package/src/discusser.js +4 -0
- package/src/facilitator.js +4 -0
- package/src/judge.js +3 -0
- package/src/profile-prompt.js +22 -9
- package/src/supervisor.js +4 -0
- package/src/tee-writer.js +4 -2
- package/src/trace-collector.js +9 -2
- package/src/trace-github.js +47 -27
package/src/benchmark/workdir.js
CHANGED
|
@@ -5,16 +5,17 @@
|
|
|
5
5
|
*
|
|
6
6
|
* The Workdir handle threads `cwd`, `port`, `pgid`, and trace paths through
|
|
7
7
|
* runAgent → invariants → judge → teardown.
|
|
8
|
+
*
|
|
9
|
+
* Filesystem, subprocess, clock, and process-signal access all route through
|
|
10
|
+
* the injected `runtime` bag. Only raw TCP plumbing (`node:net`) stays direct —
|
|
11
|
+
* it is not an ambient-dependency smell and the runtime bag models no socket
|
|
12
|
+
* surface.
|
|
8
13
|
*/
|
|
9
14
|
|
|
10
|
-
import { spawn } from "node:child_process";
|
|
11
|
-
import { cp, mkdir } from "node:fs/promises";
|
|
12
15
|
import { createServer } from "node:net";
|
|
13
16
|
import { connect } from "node:net";
|
|
14
17
|
import { join } from "node:path";
|
|
15
18
|
|
|
16
|
-
import { createDefaultRuntime } from "@forwardimpact/libutil/runtime";
|
|
17
|
-
|
|
18
19
|
import { loadEnv } from "./env-loader.js";
|
|
19
20
|
|
|
20
21
|
const DEFAULT_TERM_GRACE_MS = 5_000;
|
|
@@ -39,6 +40,8 @@ export class WorkdirManager {
|
|
|
39
40
|
* @param {object} deps
|
|
40
41
|
* @param {string} deps.stagingDir - Output of `installApm(...)`.
|
|
41
42
|
* @param {string} deps.runOutputDir - Root run-output directory (parent of `runs/`).
|
|
43
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} deps.runtime -
|
|
44
|
+
* Ambient collaborators; uses `fs`, `subprocess`, `clock`, `proc`.
|
|
42
45
|
*/
|
|
43
46
|
constructor({
|
|
44
47
|
stagingDir,
|
|
@@ -49,14 +52,12 @@ export class WorkdirManager {
|
|
|
49
52
|
}) {
|
|
50
53
|
if (!stagingDir) throw new Error("stagingDir is required");
|
|
51
54
|
if (!runOutputDir) throw new Error("runOutputDir is required");
|
|
55
|
+
if (!runtime) throw new Error("runtime is required");
|
|
52
56
|
this.stagingDir = stagingDir;
|
|
53
57
|
this.runOutputDir = runOutputDir;
|
|
54
58
|
this.termGraceMs = termGraceMs ?? DEFAULT_TERM_GRACE_MS;
|
|
55
59
|
this.familyRootPath = familyRootPath ?? null;
|
|
56
|
-
|
|
57
|
-
// rest of this manager still uses raw streaming/net/process-group APIs the
|
|
58
|
-
// runtime surface does not yet cover.
|
|
59
|
-
this.runtime = runtime ?? null;
|
|
60
|
+
this.runtime = runtime;
|
|
60
61
|
}
|
|
61
62
|
|
|
62
63
|
/**
|
|
@@ -66,36 +67,39 @@ export class WorkdirManager {
|
|
|
66
67
|
* @returns {Promise<Workdir>}
|
|
67
68
|
*/
|
|
68
69
|
async start(task, runIndex) {
|
|
70
|
+
const fs = this.runtime.fs;
|
|
69
71
|
const slug = task.id.replace("/", "__");
|
|
70
72
|
const runDir = join(this.runOutputDir, "runs", slug, String(runIndex));
|
|
71
73
|
const cwd = join(runDir, "cwd");
|
|
72
|
-
await mkdir(cwd, { recursive: true });
|
|
74
|
+
await fs.mkdir(cwd, { recursive: true });
|
|
73
75
|
|
|
74
|
-
await cp(task.paths.workdir, cwd, { recursive: true }).catch((e) => {
|
|
75
|
-
if (e.code !== "ENOENT") throw e;
|
|
76
|
-
});
|
|
77
|
-
await cp(task.paths.specs, join(cwd, "specs"), {
|
|
78
|
-
recursive: true,
|
|
79
|
-
}).catch((e) => {
|
|
76
|
+
await fs.cp(task.paths.workdir, cwd, { recursive: true }).catch((e) => {
|
|
80
77
|
if (e.code !== "ENOENT") throw e;
|
|
81
78
|
});
|
|
82
|
-
await
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
79
|
+
await fs
|
|
80
|
+
.cp(task.paths.specs, join(cwd, "specs"), {
|
|
81
|
+
recursive: true,
|
|
82
|
+
})
|
|
83
|
+
.catch((e) => {
|
|
84
|
+
if (e.code !== "ENOENT") throw e;
|
|
85
|
+
});
|
|
86
|
+
await fs.cp(join(this.stagingDir, ".claude"), join(cwd, ".claude"), {
|
|
86
87
|
recursive: true,
|
|
87
|
-
}).catch((e) => {
|
|
88
|
-
if (e.code !== "ENOENT") throw e;
|
|
89
88
|
});
|
|
89
|
+
await fs
|
|
90
|
+
.cp(join(this.stagingDir, "node_modules"), join(cwd, "node_modules"), {
|
|
91
|
+
recursive: true,
|
|
92
|
+
})
|
|
93
|
+
.catch((e) => {
|
|
94
|
+
if (e.code !== "ENOENT") throw e;
|
|
95
|
+
});
|
|
90
96
|
|
|
91
97
|
const envDirs = [
|
|
92
98
|
...(this.familyRootPath ? [this.familyRootPath] : []),
|
|
93
99
|
...(task.paths.taskDir ? [task.paths.taskDir] : []),
|
|
94
100
|
];
|
|
95
101
|
const envNames =
|
|
96
|
-
envDirs.length > 0
|
|
97
|
-
? await loadEnv(envDirs, cwd, this.runtime ?? createDefaultRuntime())
|
|
98
|
-
: [];
|
|
102
|
+
envDirs.length > 0 ? await loadEnv(envDirs, cwd, this.runtime) : [];
|
|
99
103
|
|
|
100
104
|
const port = await allocatePort();
|
|
101
105
|
const agentTracePath = join(runDir, "agent.ndjson");
|
|
@@ -103,7 +107,7 @@ export class WorkdirManager {
|
|
|
103
107
|
const judgeTracePath = join(runDir, "judge.ndjson");
|
|
104
108
|
|
|
105
109
|
const preflight = task.paths.preflight
|
|
106
|
-
? await runPreflight(task.paths.preflight, cwd, port)
|
|
110
|
+
? await runPreflight(this.runtime, task.paths.preflight, cwd, port)
|
|
107
111
|
: { pgid: 0 };
|
|
108
112
|
|
|
109
113
|
return {
|
|
@@ -126,81 +130,71 @@ export class WorkdirManager {
|
|
|
126
130
|
* @returns {Promise<{portFree: boolean, descendants: number}>}
|
|
127
131
|
*/
|
|
128
132
|
async teardown(workdir) {
|
|
133
|
+
const { proc, clock } = this.runtime;
|
|
129
134
|
if (workdir.pgid && workdir.pgid > 0) {
|
|
130
135
|
try {
|
|
131
|
-
|
|
136
|
+
proc.kill(-workdir.pgid, "SIGTERM");
|
|
132
137
|
} catch {
|
|
133
138
|
// Process group already gone — fine.
|
|
134
139
|
}
|
|
135
|
-
await sleep(this.termGraceMs);
|
|
140
|
+
await clock.sleep(this.termGraceMs);
|
|
136
141
|
try {
|
|
137
|
-
|
|
142
|
+
proc.kill(-workdir.pgid, "SIGKILL");
|
|
138
143
|
} catch {
|
|
139
144
|
// Already exited.
|
|
140
145
|
}
|
|
141
146
|
// Poll briefly until the process group is empty — SIGKILL returns
|
|
142
147
|
// before the kernel finishes reaping descendants.
|
|
143
148
|
await waitFor(
|
|
144
|
-
|
|
149
|
+
this.runtime,
|
|
150
|
+
async () => (await countDescendants(this.runtime, workdir.pgid)) === 0,
|
|
145
151
|
2_000,
|
|
146
152
|
);
|
|
147
153
|
}
|
|
148
154
|
const portFree = await isPortFree(workdir.port);
|
|
149
|
-
const descendants = await countDescendants(workdir.pgid);
|
|
155
|
+
const descendants = await countDescendants(this.runtime, workdir.pgid);
|
|
150
156
|
return { portFree, descendants };
|
|
151
157
|
}
|
|
152
158
|
}
|
|
153
159
|
|
|
154
160
|
/**
|
|
155
161
|
* Spawn preflight. Stays detached so we can SIGTERM the whole process group.
|
|
162
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
|
|
156
163
|
* @param {string} script
|
|
157
164
|
* @param {string} cwd - Agent CWD passed via $WORKDIR.
|
|
158
165
|
* @param {number} port - Free TCP port passed via $PORT.
|
|
159
166
|
* @returns {Promise<{pgid: number, error?: {phase: string, message: string, exitCode: number}}>}
|
|
160
167
|
*/
|
|
161
|
-
function runPreflight(script, cwd, port) {
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
detached: true,
|
|
168
|
-
stdio: ["ignore", "pipe", "pipe"],
|
|
169
|
-
});
|
|
170
|
-
if (child.pid === undefined) {
|
|
171
|
-
rej(new Error(`failed to spawn preflight: ${script}`));
|
|
172
|
-
return;
|
|
173
|
-
}
|
|
174
|
-
const pgid = child.pid;
|
|
175
|
-
child.stderr.on("data", (d) => {
|
|
176
|
-
stderr += d.toString();
|
|
177
|
-
});
|
|
178
|
-
child.on("error", (e) => {
|
|
179
|
-
res({
|
|
180
|
-
pgid,
|
|
181
|
-
error: {
|
|
182
|
-
phase: "preflight",
|
|
183
|
-
message: `preflight failed to spawn: ${e.message}`,
|
|
184
|
-
exitCode: -1,
|
|
185
|
-
},
|
|
186
|
-
});
|
|
187
|
-
});
|
|
188
|
-
child.on("exit", (code, signal) => {
|
|
189
|
-
if (code === 0) {
|
|
190
|
-
res({ pgid });
|
|
191
|
-
return;
|
|
192
|
-
}
|
|
193
|
-
const message = stderr.trim() || `preflight exited with signal ${signal}`;
|
|
194
|
-
res({
|
|
195
|
-
pgid,
|
|
196
|
-
error: {
|
|
197
|
-
phase: "preflight",
|
|
198
|
-
message,
|
|
199
|
-
exitCode: typeof code === "number" ? code : -1,
|
|
200
|
-
},
|
|
201
|
-
});
|
|
202
|
-
});
|
|
168
|
+
async function runPreflight(runtime, script, cwd, port) {
|
|
169
|
+
const child = runtime.subprocess.spawn(script, [], {
|
|
170
|
+
cwd,
|
|
171
|
+
env: { ...runtime.proc.env, WORKDIR: cwd, PORT: String(port) },
|
|
172
|
+
detached: true,
|
|
173
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
203
174
|
});
|
|
175
|
+
if (child.pid === undefined) {
|
|
176
|
+
throw new Error(`failed to spawn preflight: ${script}`);
|
|
177
|
+
}
|
|
178
|
+
const pgid = child.pid;
|
|
179
|
+
let stderr = "";
|
|
180
|
+
const drainStdout = (async () => {
|
|
181
|
+
for await (const _chunk of child.stdout) {
|
|
182
|
+
// discard
|
|
183
|
+
}
|
|
184
|
+
})();
|
|
185
|
+
for await (const chunk of child.stderr) stderr += chunk.toString();
|
|
186
|
+
await drainStdout;
|
|
187
|
+
const code = await child.exitCode;
|
|
188
|
+
if (code === 0) return { pgid };
|
|
189
|
+
const message = stderr.trim() || `preflight exited with code ${code}`;
|
|
190
|
+
return {
|
|
191
|
+
pgid,
|
|
192
|
+
error: {
|
|
193
|
+
phase: "preflight",
|
|
194
|
+
message,
|
|
195
|
+
exitCode: typeof code === "number" ? code : -1,
|
|
196
|
+
},
|
|
197
|
+
};
|
|
204
198
|
}
|
|
205
199
|
|
|
206
200
|
function allocatePort() {
|
|
@@ -236,37 +230,35 @@ function isPortFree(port) {
|
|
|
236
230
|
});
|
|
237
231
|
}
|
|
238
232
|
|
|
239
|
-
function countDescendants(pgid) {
|
|
240
|
-
if (!pgid || pgid <= 0) return
|
|
241
|
-
|
|
242
|
-
|
|
233
|
+
async function countDescendants(runtime, pgid) {
|
|
234
|
+
if (!pgid || pgid <= 0) return 0;
|
|
235
|
+
const child = runtime.subprocess.spawn(
|
|
236
|
+
"ps",
|
|
237
|
+
["-o", "pid=", "-g", String(pgid)],
|
|
238
|
+
{
|
|
243
239
|
stdio: ["ignore", "pipe", "ignore"],
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
child.
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
function sleep(ms) {
|
|
262
|
-
return new Promise((r) => setTimeout(r, ms));
|
|
240
|
+
},
|
|
241
|
+
);
|
|
242
|
+
let out = "";
|
|
243
|
+
try {
|
|
244
|
+
for await (const chunk of child.stdout) out += chunk.toString();
|
|
245
|
+
await child.exitCode;
|
|
246
|
+
} catch {
|
|
247
|
+
return 0;
|
|
248
|
+
}
|
|
249
|
+
const pids = out
|
|
250
|
+
.split("\n")
|
|
251
|
+
.map((s) => s.trim())
|
|
252
|
+
.filter(Boolean)
|
|
253
|
+
.filter((s) => Number(s) !== runtime.proc.pid);
|
|
254
|
+
return pids.length;
|
|
263
255
|
}
|
|
264
256
|
|
|
265
|
-
async function waitFor(predicate, timeoutMs) {
|
|
266
|
-
const deadline =
|
|
267
|
-
while (
|
|
257
|
+
async function waitFor(runtime, predicate, timeoutMs) {
|
|
258
|
+
const deadline = runtime.clock.now() + timeoutMs;
|
|
259
|
+
while (runtime.clock.now() < deadline) {
|
|
268
260
|
if (await predicate()) return true;
|
|
269
|
-
await sleep(50);
|
|
261
|
+
await runtime.clock.sleep(50);
|
|
270
262
|
}
|
|
271
263
|
return false;
|
|
272
264
|
}
|
|
@@ -27,7 +27,7 @@ export async function runBenchmarkInvariantsCommand(ctx) {
|
|
|
27
27
|
if (!workdirArg)
|
|
28
28
|
return { ok: false, code: 1, error: "--workdir is required" };
|
|
29
29
|
|
|
30
|
-
const family = await loadTaskFamily(familyInput);
|
|
30
|
+
const family = await loadTaskFamily(familyInput, runtime);
|
|
31
31
|
const task = family.tasks().find((t) => t.id === taskId);
|
|
32
32
|
if (!task)
|
|
33
33
|
return { ok: false, code: 1, error: `task not found in family: ${taskId}` };
|
|
@@ -36,7 +36,7 @@ export async function runBenchmarkInvariantsCommand(ctx) {
|
|
|
36
36
|
const cwd = join(runDir, "cwd");
|
|
37
37
|
const port = await allocatePort();
|
|
38
38
|
|
|
39
|
-
const invariants = await runInvariants(task, { cwd, port, runDir });
|
|
39
|
+
const invariants = await runInvariants(task, { cwd, port, runDir }, runtime);
|
|
40
40
|
const record = {
|
|
41
41
|
taskId: task.id,
|
|
42
42
|
invariants,
|
|
@@ -33,7 +33,7 @@ export async function runBenchmarkRunCommand(ctx) {
|
|
|
33
33
|
delete runtime.proc.env.NODE_EXTRA_CA_CERTS;
|
|
34
34
|
|
|
35
35
|
const { query } = await import("@anthropic-ai/claude-agent-sdk");
|
|
36
|
-
const runner = createBenchmarkRunner({ ...opts, query });
|
|
36
|
+
const runner = createBenchmarkRunner({ ...opts, query, runtime });
|
|
37
37
|
|
|
38
38
|
let anyFail = false;
|
|
39
39
|
for await (const record of runner.run()) {
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { closeSync, openSync, readSync } from "node:fs";
|
|
2
1
|
import { join } from "node:path";
|
|
3
2
|
|
|
4
3
|
const FIRST_LINE_CAP = 64 * 1024;
|
|
@@ -6,25 +5,25 @@ const FIRST_LINE_CAP = 64 * 1024;
|
|
|
6
5
|
/**
|
|
7
6
|
* Read the first newline-terminated line of a file, bounded to the first
|
|
8
7
|
* {@link FIRST_LINE_CAP} bytes. Trace `.ndjson` files can be many MB; the
|
|
9
|
-
* Step 2.6 meta header is always small, so a bounded
|
|
10
|
-
* loading whole files into memory just to inspect the header.
|
|
11
|
-
* `
|
|
12
|
-
*
|
|
13
|
-
* `import:fs` in `check-ambient-deps.deny.yml` until that seam exists.
|
|
8
|
+
* Step 2.6 meta header is always small, so a bounded positional read avoids
|
|
9
|
+
* loading whole files into memory just to inspect the header. The positional
|
|
10
|
+
* `openSync`/`readSync`/`closeSync` trio is read off the injected
|
|
11
|
+
* `runtime.fsSync` surface.
|
|
14
12
|
*
|
|
13
|
+
* @param {object} fsSync - Sync filesystem surface (`runtime.fsSync`).
|
|
15
14
|
* @param {string} path
|
|
16
15
|
* @returns {string}
|
|
17
16
|
*/
|
|
18
|
-
function readFirstLine(path) {
|
|
19
|
-
const fd = openSync(path, "r");
|
|
17
|
+
function readFirstLine(fsSync, path) {
|
|
18
|
+
const fd = fsSync.openSync(path, "r");
|
|
20
19
|
try {
|
|
21
20
|
const buf = Buffer.alloc(FIRST_LINE_CAP);
|
|
22
|
-
const bytes = readSync(fd, buf, 0, buf.length, 0);
|
|
21
|
+
const bytes = fsSync.readSync(fd, buf, 0, buf.length, 0);
|
|
23
22
|
const text = buf.toString("utf8", 0, bytes);
|
|
24
23
|
const nl = text.indexOf("\n");
|
|
25
24
|
return nl === -1 ? text : text.slice(0, nl);
|
|
26
25
|
} finally {
|
|
27
|
-
closeSync(fd);
|
|
26
|
+
fsSync.closeSync(fd);
|
|
28
27
|
}
|
|
29
28
|
}
|
|
30
29
|
|
|
@@ -53,7 +52,7 @@ export function findTracesByDiscussion(dir, discussionId, fsSync) {
|
|
|
53
52
|
const path = join(dir, entry);
|
|
54
53
|
let firstLine;
|
|
55
54
|
try {
|
|
56
|
-
firstLine = readFirstLine(path);
|
|
55
|
+
firstLine = readFirstLine(fsSync, path);
|
|
57
56
|
} catch {
|
|
58
57
|
continue;
|
|
59
58
|
}
|
package/src/commands/discuss.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { createWriteStream } from "node:fs";
|
|
2
1
|
import { resolve } from "node:path";
|
|
2
|
+
import { isoTimestamp } from "@forwardimpact/libutil";
|
|
3
3
|
import { createDiscusser } from "../discusser.js";
|
|
4
4
|
import { createRedactor } from "../redaction.js";
|
|
5
5
|
import { createTeeWriter } from "../tee-writer.js";
|
|
@@ -80,13 +80,14 @@ export async function runDiscussCommand(ctx) {
|
|
|
80
80
|
const redactor = createRedactor({ runtime });
|
|
81
81
|
|
|
82
82
|
const fileStream = opts.outputPath
|
|
83
|
-
? createWriteStream(opts.outputPath)
|
|
83
|
+
? runtime.fs.createWriteStream(opts.outputPath)
|
|
84
84
|
: null;
|
|
85
85
|
const output = fileStream
|
|
86
86
|
? createTeeWriter({
|
|
87
87
|
fileStream,
|
|
88
88
|
textStream: runtime.proc.stdout,
|
|
89
89
|
mode: "supervised",
|
|
90
|
+
now: () => isoTimestamp(runtime.clock.now()),
|
|
90
91
|
})
|
|
91
92
|
: runtime.proc.stdout;
|
|
92
93
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { createWriteStream } from "node:fs";
|
|
2
1
|
import { resolve } from "node:path";
|
|
2
|
+
import { isoTimestamp } from "@forwardimpact/libutil";
|
|
3
3
|
import { createFacilitator } from "../facilitator.js";
|
|
4
4
|
import { createRedactor } from "../redaction.js";
|
|
5
5
|
import { createTeeWriter } from "../tee-writer.js";
|
|
@@ -76,13 +76,14 @@ export async function runFacilitateCommand(ctx) {
|
|
|
76
76
|
const redactor = createRedactor({ runtime });
|
|
77
77
|
|
|
78
78
|
const fileStream = opts.outputPath
|
|
79
|
-
? createWriteStream(opts.outputPath)
|
|
79
|
+
? runtime.fs.createWriteStream(opts.outputPath)
|
|
80
80
|
: null;
|
|
81
81
|
const output = fileStream
|
|
82
82
|
? createTeeWriter({
|
|
83
83
|
fileStream,
|
|
84
84
|
textStream: runtime.proc.stdout,
|
|
85
85
|
mode: "supervised",
|
|
86
|
+
now: () => isoTimestamp(runtime.clock.now()),
|
|
86
87
|
})
|
|
87
88
|
: runtime.proc.stdout;
|
|
88
89
|
|
package/src/commands/output.js
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { isoTimestamp } from "@forwardimpact/libutil";
|
|
1
2
|
import { createTraceCollector } from "@forwardimpact/libeval";
|
|
2
3
|
|
|
3
4
|
/**
|
|
@@ -16,7 +17,9 @@ export async function runOutputCommand(ctx) {
|
|
|
16
17
|
values.format === "text" || values.format === "json"
|
|
17
18
|
? values.format
|
|
18
19
|
: "json";
|
|
19
|
-
const collector = createTraceCollector(
|
|
20
|
+
const collector = createTraceCollector({
|
|
21
|
+
now: () => isoTimestamp(runtime.clock.now()),
|
|
22
|
+
});
|
|
20
23
|
|
|
21
24
|
// `runtime.proc.stdin` is an AsyncIterable of UTF-8 lines (newline-split by
|
|
22
25
|
// the runtime), so each yielded value is exactly one NDJSON record.
|
package/src/commands/run.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { createWriteStream } from "node:fs";
|
|
2
1
|
import { Writable } from "node:stream";
|
|
3
2
|
import { resolve } from "node:path";
|
|
3
|
+
import { isoTimestamp } from "@forwardimpact/libutil";
|
|
4
4
|
import { createAgentRunner } from "../agent-runner.js";
|
|
5
5
|
import { composeProfilePrompt } from "../profile-prompt.js";
|
|
6
6
|
import { createRedactor } from "../redaction.js";
|
|
@@ -67,12 +67,15 @@ export async function runRunCommand(ctx) {
|
|
|
67
67
|
|
|
68
68
|
// When --output is specified, stream text to stdout while writing NDJSON to file.
|
|
69
69
|
// Otherwise, write NDJSON directly to stdout (backwards-compatible).
|
|
70
|
-
const fileStream = outputPath
|
|
70
|
+
const fileStream = outputPath
|
|
71
|
+
? runtime.fs.createWriteStream(outputPath)
|
|
72
|
+
: null;
|
|
71
73
|
const output = fileStream
|
|
72
74
|
? createTeeWriter({
|
|
73
75
|
fileStream,
|
|
74
76
|
textStream: runtime.proc.stdout,
|
|
75
77
|
mode: "raw",
|
|
78
|
+
now: () => isoTimestamp(runtime.clock.now()),
|
|
76
79
|
})
|
|
77
80
|
: runtime.proc.stdout;
|
|
78
81
|
|
|
@@ -108,6 +111,7 @@ export async function runRunCommand(ctx) {
|
|
|
108
111
|
const systemPrompt = agentProfile
|
|
109
112
|
? composeProfilePrompt(agentProfile, {
|
|
110
113
|
profilesDir: resolve(cwd, ".claude/agents"),
|
|
114
|
+
runtime,
|
|
111
115
|
})
|
|
112
116
|
: undefined;
|
|
113
117
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { createWriteStream } from "node:fs";
|
|
2
1
|
import { resolve, join } from "node:path";
|
|
2
|
+
import { isoTimestamp } from "@forwardimpact/libutil";
|
|
3
3
|
import { createSupervisor } from "../supervisor.js";
|
|
4
4
|
import { createRedactor } from "../redaction.js";
|
|
5
5
|
import { createTeeWriter } from "../tee-writer.js";
|
|
@@ -72,13 +72,14 @@ export async function runSuperviseCommand(ctx) {
|
|
|
72
72
|
// When --output is specified, stream text to stdout while writing NDJSON to file.
|
|
73
73
|
// Otherwise, write NDJSON directly to stdout (backwards-compatible).
|
|
74
74
|
const fileStream = opts.outputPath
|
|
75
|
-
? createWriteStream(opts.outputPath)
|
|
75
|
+
? runtime.fs.createWriteStream(opts.outputPath)
|
|
76
76
|
: null;
|
|
77
77
|
const output = fileStream
|
|
78
78
|
? createTeeWriter({
|
|
79
79
|
fileStream,
|
|
80
80
|
textStream: runtime.proc.stdout,
|
|
81
81
|
mode: "supervised",
|
|
82
|
+
now: () => isoTimestamp(runtime.clock.now()),
|
|
82
83
|
})
|
|
83
84
|
: runtime.proc.stdout;
|
|
84
85
|
|
package/src/commands/tee.js
CHANGED
|
@@ -1,32 +1,47 @@
|
|
|
1
|
-
import { createWriteStream } from "fs";
|
|
2
1
|
import { PassThrough } from "node:stream";
|
|
3
2
|
import { pipeline } from "node:stream/promises";
|
|
3
|
+
import { isoTimestamp } from "@forwardimpact/libutil";
|
|
4
4
|
import { createTeeWriter } from "../tee-writer.js";
|
|
5
5
|
|
|
6
6
|
/**
|
|
7
7
|
* Tee command — stream text output to stdout while optionally saving the raw
|
|
8
|
-
* NDJSON to a file.
|
|
8
|
+
* NDJSON to a file. Reads stdin line-by-line through the injected runtime and
|
|
9
|
+
* re-delimits each record with a newline so the TeeWriter's line splitter sees
|
|
10
|
+
* the same framing the raw byte stream produced.
|
|
9
11
|
*
|
|
10
12
|
* Usage: fit-eval tee [output.ndjson] < trace.ndjson
|
|
11
13
|
*
|
|
12
|
-
* @param {
|
|
13
|
-
* @
|
|
14
|
+
* @param {import("@forwardimpact/libcli").InvocationContext} ctx
|
|
15
|
+
* @returns {Promise<{ok: boolean, code?: number, error?: string}>}
|
|
14
16
|
*/
|
|
15
|
-
export async function runTeeCommand(
|
|
16
|
-
const
|
|
17
|
-
const
|
|
17
|
+
export async function runTeeCommand(ctx) {
|
|
18
|
+
const runtime = ctx.deps.runtime;
|
|
19
|
+
const outputPath = ctx.args.output ?? null;
|
|
20
|
+
const fileStream = outputPath
|
|
21
|
+
? runtime.fs.createWriteStream(outputPath)
|
|
22
|
+
: null;
|
|
18
23
|
|
|
19
24
|
// TeeWriter requires a fileStream; when no output file is specified,
|
|
20
25
|
// use a PassThrough as a no-op sink (NDJSON is not saved).
|
|
21
26
|
const sink = fileStream ?? new PassThrough();
|
|
22
27
|
const tee = createTeeWriter({
|
|
23
28
|
fileStream: sink,
|
|
24
|
-
textStream:
|
|
29
|
+
textStream: runtime.proc.stdout,
|
|
25
30
|
mode: "raw",
|
|
31
|
+
now: () => isoTimestamp(runtime.clock.now()),
|
|
26
32
|
});
|
|
27
33
|
|
|
28
34
|
try {
|
|
29
|
-
|
|
35
|
+
// `runtime.proc.stdin` yields newline-stripped lines; re-append `\n` so the
|
|
36
|
+
// TeeWriter's `_write` line splitter frames records exactly as it did when
|
|
37
|
+
// piped the raw byte stream.
|
|
38
|
+
const lines = (async function* () {
|
|
39
|
+
for await (const line of runtime.proc.stdin) yield `${line}\n`;
|
|
40
|
+
})();
|
|
41
|
+
await pipeline(lines, tee);
|
|
42
|
+
return { ok: true };
|
|
43
|
+
} catch (error) {
|
|
44
|
+
return { ok: false, code: 1, error: error.message };
|
|
30
45
|
} finally {
|
|
31
46
|
if (fileStream) {
|
|
32
47
|
await new Promise((resolve, reject) => {
|
package/src/commands/trace.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { join, dirname } from "node:path";
|
|
2
|
+
import { isoTimestamp } from "@forwardimpact/libutil";
|
|
2
3
|
import { createTraceCollector } from "@forwardimpact/libeval";
|
|
3
4
|
import { createTraceQuery } from "../trace-query.js";
|
|
4
5
|
import { createTraceGitHub } from "../trace-github.js";
|
|
@@ -50,7 +51,9 @@ export async function runDownloadCommand(ctx) {
|
|
|
50
51
|
const ndjsonFile = result.files.find((f) => f.endsWith(".ndjson"));
|
|
51
52
|
if (ndjsonFile) {
|
|
52
53
|
const ndjsonPath = join(result.dir, ndjsonFile);
|
|
53
|
-
const collector = createTraceCollector(
|
|
54
|
+
const collector = createTraceCollector({
|
|
55
|
+
now: () => isoTimestamp(runtime.clock.now()),
|
|
56
|
+
});
|
|
54
57
|
for (const line of runtime.fsSync
|
|
55
58
|
.readFileSync(ndjsonPath, "utf8")
|
|
56
59
|
.split("\n")) {
|
|
@@ -325,7 +328,9 @@ function loadTrace(runtime, file) {
|
|
|
325
328
|
// Not valid JSON — fall through to NDJSON.
|
|
326
329
|
}
|
|
327
330
|
|
|
328
|
-
const collector = createTraceCollector(
|
|
331
|
+
const collector = createTraceCollector({
|
|
332
|
+
now: () => isoTimestamp(runtime.clock.now()),
|
|
333
|
+
});
|
|
329
334
|
for (const line of content.split("\n")) {
|
|
330
335
|
collector.addLine(line);
|
|
331
336
|
}
|
package/src/discusser.js
CHANGED
|
@@ -226,8 +226,10 @@ export function createDiscusser({
|
|
|
226
226
|
callbackUrl,
|
|
227
227
|
inboxUrl,
|
|
228
228
|
correlationId,
|
|
229
|
+
runtime,
|
|
229
230
|
}) {
|
|
230
231
|
if (!redactor) throw new Error("redactor is required");
|
|
232
|
+
if (!runtime) throw new Error("runtime is required");
|
|
231
233
|
const resolvedLeadCwd = resolve(leadCwd ?? ".");
|
|
232
234
|
const resolvedProfilesDir =
|
|
233
235
|
profilesDir ?? resolve(resolvedLeadCwd, ".claude/agents");
|
|
@@ -326,6 +328,7 @@ export function createDiscusser({
|
|
|
326
328
|
profile: config.agentProfile,
|
|
327
329
|
profilesDir: resolvedProfilesDir,
|
|
328
330
|
trailer: agentTrailer,
|
|
331
|
+
runtime,
|
|
329
332
|
}),
|
|
330
333
|
redactor,
|
|
331
334
|
});
|
|
@@ -358,6 +361,7 @@ export function createDiscusser({
|
|
|
358
361
|
profile: leadProfile,
|
|
359
362
|
profilesDir: resolvedProfilesDir,
|
|
360
363
|
trailer: DISCUSS_SYSTEM_PROMPT,
|
|
364
|
+
runtime,
|
|
361
365
|
}),
|
|
362
366
|
redactor,
|
|
363
367
|
});
|
package/src/facilitator.js
CHANGED
|
@@ -109,8 +109,10 @@ export function createFacilitator({
|
|
|
109
109
|
profilesDir,
|
|
110
110
|
taskAmend,
|
|
111
111
|
redactor,
|
|
112
|
+
runtime,
|
|
112
113
|
}) {
|
|
113
114
|
if (!redactor) throw new Error("redactor is required");
|
|
115
|
+
if (!runtime) throw new Error("runtime is required");
|
|
114
116
|
const resolvedProfilesDir =
|
|
115
117
|
profilesDir ?? resolve(facilitatorCwd, ".claude/agents");
|
|
116
118
|
const ctx = createOrchestrationContext();
|
|
@@ -151,6 +153,7 @@ export function createFacilitator({
|
|
|
151
153
|
profile: config.agentProfile,
|
|
152
154
|
profilesDir: resolvedProfilesDir,
|
|
153
155
|
trailer: agentTrailer,
|
|
156
|
+
runtime,
|
|
154
157
|
}),
|
|
155
158
|
redactor,
|
|
156
159
|
});
|
|
@@ -187,6 +190,7 @@ export function createFacilitator({
|
|
|
187
190
|
profile: facilitatorProfile,
|
|
188
191
|
profilesDir: resolvedProfilesDir,
|
|
189
192
|
trailer: FACILITATOR_SYSTEM_PROMPT,
|
|
193
|
+
runtime,
|
|
190
194
|
}),
|
|
191
195
|
redactor,
|
|
192
196
|
});
|
package/src/judge.js
CHANGED
|
@@ -167,17 +167,20 @@ export function createJudge({
|
|
|
167
167
|
judgeProfile,
|
|
168
168
|
profilesDir,
|
|
169
169
|
taskAmend,
|
|
170
|
+
runtime,
|
|
170
171
|
}) {
|
|
171
172
|
if (!cwd) throw new Error("cwd is required");
|
|
172
173
|
if (!query) throw new Error("query is required");
|
|
173
174
|
if (!output) throw new Error("output is required");
|
|
174
175
|
if (!redactor) throw new Error("redactor is required");
|
|
176
|
+
if (!runtime) throw new Error("runtime is required");
|
|
175
177
|
|
|
176
178
|
const resolvedProfilesDir = profilesDir ?? resolve(cwd, ".claude/agents");
|
|
177
179
|
const systemPrompt = judgeProfile
|
|
178
180
|
? composeProfilePrompt(judgeProfile, {
|
|
179
181
|
profilesDir: resolvedProfilesDir,
|
|
180
182
|
trailer: JUDGE_SYSTEM_PROMPT,
|
|
183
|
+
runtime,
|
|
181
184
|
})
|
|
182
185
|
: {
|
|
183
186
|
type: "preset",
|