@forwardimpact/libeval 0.1.51 → 0.1.53

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  * Task-family loader. A task family is a directory under
3
3
  * <root>/
4
4
  * apm.lock.yaml
5
- * .claude/ # pre-staged skills + agents (P1)
5
+ * .claude/ # pre-staged skills + agents
6
6
  * tasks/<task_name>/
7
7
  * agent.task.md
8
8
  * supervisor.task.md # optional; appended to the task as supervisor context
@@ -17,45 +17,55 @@
17
17
  * a temp dir and `familyRevision` becomes `git:<sha>` of HEAD at clone time.
18
18
  * Local paths use the canonical-tree algorithm from design § Family revision
19
19
  * algorithm so the result is stable across operating systems.
20
+ *
21
+ * Filesystem and subprocess access route through the injected `runtime` bag
22
+ * (`runtime.fs` async, `runtime.subprocess.run` one-shot, `tmpdir` derived
23
+ * from `runtime.proc.env`).
20
24
  */
21
25
 
22
- import { spawn } from "node:child_process";
23
26
  import { createHash } from "node:crypto";
24
- import {
25
- access,
26
- constants,
27
- lstat,
28
- mkdtemp,
29
- readdir,
30
- readFile,
31
- realpath,
32
- } from "node:fs/promises";
33
- import { tmpdir } from "node:os";
34
27
  import { join, posix, relative, resolve, sep } from "node:path";
35
28
 
36
29
  const GIT_URL_RE = /^(git@|https?:\/\/|ssh:\/\/|git:\/\/)/;
37
30
  const SKIP_DIRS = new Set([".git", "node_modules"]);
31
+ // POSIX `X_OK` (execute permission); node's fs honours the numeric mode, so we
32
+ // avoid importing `node:fs`'s `constants` (which would light the fs smell).
33
+ const X_OK = 1;
34
+
35
+ /**
36
+ * Derive the system temp dir from the env (node's `os.tmpdir()` is itself an
37
+ * env-respecting wrapper). The runtime bag has no `os` slot by design.
38
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
39
+ * @returns {string}
40
+ */
41
+ function tmpdir(runtime) {
42
+ return runtime.proc.env.TMPDIR ?? "/tmp";
43
+ }
38
44
 
39
45
  /**
40
46
  * Load a task family from a local path or git URL.
41
47
  * @param {string} rootPathOrGitUrl
48
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
42
49
  * @returns {Promise<TaskFamily>}
43
50
  */
44
- export async function loadTaskFamily(rootPathOrGitUrl) {
51
+ export async function loadTaskFamily(rootPathOrGitUrl, runtime) {
52
+ if (!runtime) throw new Error("runtime is required");
45
53
  const isGit = GIT_URL_RE.test(rootPathOrGitUrl);
46
54
  let rootPath;
47
55
  let familyRevision;
48
56
  if (isGit) {
49
- const dir = await mkdtemp(join(tmpdir(), "fit-benchmark-family-"));
50
- await gitClone(rootPathOrGitUrl, dir);
57
+ const dir = await runtime.fs.mkdtemp(
58
+ join(tmpdir(runtime), "fit-benchmark-family-"),
59
+ );
60
+ await gitClone(runtime, rootPathOrGitUrl, dir);
51
61
  rootPath = dir;
52
- familyRevision = "git:" + (await gitHead(dir));
62
+ familyRevision = "git:" + (await gitHead(runtime, dir));
53
63
  } else {
54
64
  rootPath = resolve(rootPathOrGitUrl);
55
- familyRevision = "sha256:" + (await canonicalTreeHash(rootPath));
65
+ familyRevision = "sha256:" + (await canonicalTreeHash(runtime, rootPath));
56
66
  }
57
67
 
58
- const tasks = await discoverTasks(rootPath);
68
+ const tasks = await discoverTasks(runtime, rootPath);
59
69
 
60
70
  return {
61
71
  rootPath,
@@ -73,27 +83,30 @@ export async function loadTaskFamily(rootPathOrGitUrl) {
73
83
  * @param {TaskFamily} _family
74
84
  * @param {string} judgeProfilesDir
75
85
  * @param {string} judgeProfile
86
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
76
87
  * @returns {Promise<void>}
77
88
  */
78
89
  export async function assertJudgeProfileStaged(
79
90
  _family,
80
91
  judgeProfilesDir,
81
92
  judgeProfile,
93
+ runtime,
82
94
  ) {
83
95
  const candidate = join(judgeProfilesDir, `${judgeProfile}.md`);
84
96
  try {
85
- await access(candidate);
97
+ await runtime.fs.access(candidate);
86
98
  } catch {
87
99
  throw new Error(`judge profile not staged: ${candidate}`);
88
100
  }
89
101
  }
90
102
 
91
- async function discoverTasks(rootPath) {
103
+ async function discoverTasks(runtime, rootPath) {
104
+ const fs = runtime.fs;
92
105
  const tasksRoot = join(rootPath, "tasks");
93
106
  const tasks = [];
94
107
  let entries;
95
108
  try {
96
- entries = await readdir(tasksRoot, { withFileTypes: true });
109
+ entries = await fs.readdir(tasksRoot, { withFileTypes: true });
97
110
  } catch (e) {
98
111
  if (e.code === "ENOENT") return tasks;
99
112
  throw e;
@@ -110,11 +123,15 @@ async function discoverTasks(rootPath) {
110
123
  paths: {
111
124
  taskDir,
112
125
  instructions: join(taskDir, "agent.task.md"),
113
- supervisor: (await fileExists(supervisorPath)) ? supervisorPath : null,
114
- judge: (await fileExists(judgePath)) ? judgePath : null,
126
+ supervisor: (await fileExists(fs, supervisorPath))
127
+ ? supervisorPath
128
+ : null,
129
+ judge: (await fileExists(fs, judgePath)) ? judgePath : null,
115
130
  hooks: join(taskDir, "hooks"),
116
- preflight: (await fileExecutable(preflightPath)) ? preflightPath : null,
117
- invariants: (await fileExecutable(invariantsPath))
131
+ preflight: (await fileExecutable(fs, preflightPath))
132
+ ? preflightPath
133
+ : null,
134
+ invariants: (await fileExecutable(fs, invariantsPath))
118
135
  ? invariantsPath
119
136
  : null,
120
137
  specs: join(taskDir, "specs"),
@@ -126,18 +143,18 @@ async function discoverTasks(rootPath) {
126
143
  return tasks;
127
144
  }
128
145
 
129
- async function fileExists(path) {
146
+ async function fileExists(fs, path) {
130
147
  try {
131
- await access(path);
148
+ await fs.access(path);
132
149
  return true;
133
150
  } catch {
134
151
  return false;
135
152
  }
136
153
  }
137
154
 
138
- async function fileExecutable(path) {
155
+ async function fileExecutable(fs, path) {
139
156
  try {
140
- await access(path, constants.X_OK);
157
+ await fs.access(path, X_OK);
141
158
  return true;
142
159
  } catch {
143
160
  return false;
@@ -151,16 +168,18 @@ async function fileExecutable(path) {
151
168
  * sort by NFC-normalised POSIX-style root-relative path
152
169
  * row = <rel-path>\0<hex-sha256>\n
153
170
  * sha256(concat(rows))
171
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
154
172
  * @param {string} rootPath
155
173
  * @returns {Promise<string>} hex digest
156
174
  */
157
- async function canonicalTreeHash(rootPath) {
158
- const real = await realpath(rootPath);
175
+ async function canonicalTreeHash(runtime, rootPath) {
176
+ const fs = runtime.fs;
177
+ const real = await fs.realpath(rootPath);
159
178
  const rows = [];
160
- for await (const filePath of walkFiles(real)) {
179
+ for await (const filePath of walkFiles(fs, real)) {
161
180
  const rel = toPosix(relative(real, filePath)).normalize("NFC");
162
- const target = await realpath(filePath);
163
- const bytes = await readFile(target);
181
+ const target = await fs.realpath(filePath);
182
+ const bytes = await fs.readFile(target);
164
183
  const hex = createHash("sha256").update(bytes).digest("hex");
165
184
  rows.push({ rel, hex });
166
185
  }
@@ -170,15 +189,15 @@ async function canonicalTreeHash(rootPath) {
170
189
  return acc.digest("hex");
171
190
  }
172
191
 
173
- async function* walkFiles(dir) {
174
- const entries = await readdir(dir, { withFileTypes: true });
192
+ async function* walkFiles(fs, dir) {
193
+ const entries = await fs.readdir(dir, { withFileTypes: true });
175
194
  for (const entry of entries) {
176
195
  const full = join(dir, entry.name);
177
196
  if (entry.isDirectory()) {
178
197
  if (SKIP_DIRS.has(entry.name)) continue;
179
- yield* walkFiles(full);
198
+ yield* walkFiles(fs, full);
180
199
  } else if (entry.isSymbolicLink()) {
181
- const resolvedFile = await resolveSymlinkToFile(full);
200
+ const resolvedFile = await resolveSymlinkToFile(fs, full);
182
201
  if (resolvedFile) yield full;
183
202
  } else if (entry.isFile()) {
184
203
  yield full;
@@ -190,12 +209,12 @@ async function* walkFiles(dir) {
190
209
  * Return the resolved path if `linkPath` is a symlink to a regular file.
191
210
  * Returns null for dangling symlinks or links to non-file targets.
192
211
  */
193
- async function resolveSymlinkToFile(linkPath) {
194
- const st = await lstat(linkPath);
212
+ async function resolveSymlinkToFile(fs, linkPath) {
213
+ const st = await fs.lstat(linkPath);
195
214
  if (!st.isSymbolicLink()) return null;
196
215
  try {
197
- const resolved = await realpath(linkPath);
198
- const tstat = await lstat(resolved);
216
+ const resolved = await fs.realpath(linkPath);
217
+ const tstat = await fs.lstat(resolved);
199
218
  return tstat.isFile() ? resolved : null;
200
219
  } catch {
201
220
  return null;
@@ -207,32 +226,24 @@ function toPosix(p) {
207
226
  return p.split(sep).join(posix.sep);
208
227
  }
209
228
 
210
- async function gitClone(url, dir) {
211
- await run("git", ["clone", "--depth", "1", url, dir]);
229
+ async function gitClone(runtime, url, dir) {
230
+ await git(runtime, ["clone", "--depth", "1", url, dir]);
212
231
  }
213
232
 
214
- async function gitHead(dir) {
215
- const out = await run("git", ["-C", dir, "rev-parse", "HEAD"]);
233
+ async function gitHead(runtime, dir) {
234
+ const out = await git(runtime, ["-C", dir, "rev-parse", "HEAD"]);
216
235
  return out.trim();
217
236
  }
218
237
 
219
- function run(cmd, args) {
220
- return new Promise((res, rej) => {
221
- const child = spawn(cmd, args, { stdio: ["ignore", "pipe", "pipe"] });
222
- let stdout = "";
223
- let stderr = "";
224
- child.stdout.on("data", (d) => {
225
- stdout += d.toString();
226
- });
227
- child.stderr.on("data", (d) => {
228
- stderr += d.toString();
229
- });
230
- child.on("error", rej);
231
- child.on("close", (code) => {
232
- if (code === 0) res(stdout);
233
- else rej(new Error(`${cmd} ${args.join(" ")} exited ${code}: ${stderr}`));
234
- });
235
- });
238
+ async function git(runtime, args) {
239
+ const { stdout, stderr, exitCode } = await runtime.subprocess.run(
240
+ "git",
241
+ args,
242
+ );
243
+ if (exitCode !== 0) {
244
+ throw new Error(`git ${args.join(" ")} exited ${exitCode}: ${stderr}`);
245
+ }
246
+ return stdout;
236
247
  }
237
248
 
238
249
  /**
@@ -5,16 +5,17 @@
5
5
  *
6
6
  * The Workdir handle threads `cwd`, `port`, `pgid`, and trace paths through
7
7
  * runAgent → invariants → judge → teardown.
8
+ *
9
+ * Filesystem, subprocess, clock, and process-signal access all route through
10
+ * the injected `runtime` bag. Only raw TCP plumbing (`node:net`) stays direct —
11
+ * it is not an ambient-dependency smell and the runtime bag models no socket
12
+ * surface.
8
13
  */
9
14
 
10
- import { spawn } from "node:child_process";
11
- import { cp, mkdir } from "node:fs/promises";
12
15
  import { createServer } from "node:net";
13
16
  import { connect } from "node:net";
14
17
  import { join } from "node:path";
15
18
 
16
- import { createDefaultRuntime } from "@forwardimpact/libutil/runtime";
17
-
18
19
  import { loadEnv } from "./env-loader.js";
19
20
 
20
21
  const DEFAULT_TERM_GRACE_MS = 5_000;
@@ -39,6 +40,8 @@ export class WorkdirManager {
39
40
  * @param {object} deps
40
41
  * @param {string} deps.stagingDir - Output of `installApm(...)`.
41
42
  * @param {string} deps.runOutputDir - Root run-output directory (parent of `runs/`).
43
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} deps.runtime -
44
+ * Ambient collaborators; uses `fs`, `subprocess`, `clock`, `proc`.
42
45
  */
43
46
  constructor({
44
47
  stagingDir,
@@ -49,14 +52,12 @@ export class WorkdirManager {
49
52
  }) {
50
53
  if (!stagingDir) throw new Error("stagingDir is required");
51
54
  if (!runOutputDir) throw new Error("runOutputDir is required");
55
+ if (!runtime) throw new Error("runtime is required");
52
56
  this.stagingDir = stagingDir;
53
57
  this.runOutputDir = runOutputDir;
54
58
  this.termGraceMs = termGraceMs ?? DEFAULT_TERM_GRACE_MS;
55
59
  this.familyRootPath = familyRootPath ?? null;
56
- // `loadEnv` is the only collaborator routed through the runtime today; the
57
- // rest of this manager still uses raw streaming/net/process-group APIs the
58
- // runtime surface does not yet cover.
59
- this.runtime = runtime ?? null;
60
+ this.runtime = runtime;
60
61
  }
61
62
 
62
63
  /**
@@ -66,36 +67,39 @@ export class WorkdirManager {
66
67
  * @returns {Promise<Workdir>}
67
68
  */
68
69
  async start(task, runIndex) {
70
+ const fs = this.runtime.fs;
69
71
  const slug = task.id.replace("/", "__");
70
72
  const runDir = join(this.runOutputDir, "runs", slug, String(runIndex));
71
73
  const cwd = join(runDir, "cwd");
72
- await mkdir(cwd, { recursive: true });
74
+ await fs.mkdir(cwd, { recursive: true });
73
75
 
74
- await cp(task.paths.workdir, cwd, { recursive: true }).catch((e) => {
75
- if (e.code !== "ENOENT") throw e;
76
- });
77
- await cp(task.paths.specs, join(cwd, "specs"), {
78
- recursive: true,
79
- }).catch((e) => {
76
+ await fs.cp(task.paths.workdir, cwd, { recursive: true }).catch((e) => {
80
77
  if (e.code !== "ENOENT") throw e;
81
78
  });
82
- await cp(join(this.stagingDir, ".claude"), join(cwd, ".claude"), {
83
- recursive: true,
84
- });
85
- await cp(join(this.stagingDir, "node_modules"), join(cwd, "node_modules"), {
79
+ await fs
80
+ .cp(task.paths.specs, join(cwd, "specs"), {
81
+ recursive: true,
82
+ })
83
+ .catch((e) => {
84
+ if (e.code !== "ENOENT") throw e;
85
+ });
86
+ await fs.cp(join(this.stagingDir, ".claude"), join(cwd, ".claude"), {
86
87
  recursive: true,
87
- }).catch((e) => {
88
- if (e.code !== "ENOENT") throw e;
89
88
  });
89
+ await fs
90
+ .cp(join(this.stagingDir, "node_modules"), join(cwd, "node_modules"), {
91
+ recursive: true,
92
+ })
93
+ .catch((e) => {
94
+ if (e.code !== "ENOENT") throw e;
95
+ });
90
96
 
91
97
  const envDirs = [
92
98
  ...(this.familyRootPath ? [this.familyRootPath] : []),
93
99
  ...(task.paths.taskDir ? [task.paths.taskDir] : []),
94
100
  ];
95
101
  const envNames =
96
- envDirs.length > 0
97
- ? await loadEnv(envDirs, cwd, this.runtime ?? createDefaultRuntime())
98
- : [];
102
+ envDirs.length > 0 ? await loadEnv(envDirs, cwd, this.runtime) : [];
99
103
 
100
104
  const port = await allocatePort();
101
105
  const agentTracePath = join(runDir, "agent.ndjson");
@@ -103,7 +107,7 @@ export class WorkdirManager {
103
107
  const judgeTracePath = join(runDir, "judge.ndjson");
104
108
 
105
109
  const preflight = task.paths.preflight
106
- ? await runPreflight(task.paths.preflight, cwd, port)
110
+ ? await runPreflight(this.runtime, task.paths.preflight, cwd, port)
107
111
  : { pgid: 0 };
108
112
 
109
113
  return {
@@ -126,81 +130,71 @@ export class WorkdirManager {
126
130
  * @returns {Promise<{portFree: boolean, descendants: number}>}
127
131
  */
128
132
  async teardown(workdir) {
133
+ const { proc, clock } = this.runtime;
129
134
  if (workdir.pgid && workdir.pgid > 0) {
130
135
  try {
131
- process.kill(-workdir.pgid, "SIGTERM");
136
+ proc.kill(-workdir.pgid, "SIGTERM");
132
137
  } catch {
133
138
  // Process group already gone — fine.
134
139
  }
135
- await sleep(this.termGraceMs);
140
+ await clock.sleep(this.termGraceMs);
136
141
  try {
137
- process.kill(-workdir.pgid, "SIGKILL");
142
+ proc.kill(-workdir.pgid, "SIGKILL");
138
143
  } catch {
139
144
  // Already exited.
140
145
  }
141
146
  // Poll briefly until the process group is empty — SIGKILL returns
142
147
  // before the kernel finishes reaping descendants.
143
148
  await waitFor(
144
- async () => (await countDescendants(workdir.pgid)) === 0,
149
+ this.runtime,
150
+ async () => (await countDescendants(this.runtime, workdir.pgid)) === 0,
145
151
  2_000,
146
152
  );
147
153
  }
148
154
  const portFree = await isPortFree(workdir.port);
149
- const descendants = await countDescendants(workdir.pgid);
155
+ const descendants = await countDescendants(this.runtime, workdir.pgid);
150
156
  return { portFree, descendants };
151
157
  }
152
158
  }
153
159
 
154
160
  /**
155
161
  * Spawn preflight. Stays detached so we can SIGTERM the whole process group.
162
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
156
163
  * @param {string} script
157
164
  * @param {string} cwd - Agent CWD passed via $WORKDIR.
158
165
  * @param {number} port - Free TCP port passed via $PORT.
159
166
  * @returns {Promise<{pgid: number, error?: {phase: string, message: string, exitCode: number}}>}
160
167
  */
161
- function runPreflight(script, cwd, port) {
162
- return new Promise((res, rej) => {
163
- let stderr = "";
164
- const child = spawn(script, [], {
165
- cwd,
166
- env: { ...process.env, WORKDIR: cwd, PORT: String(port) },
167
- detached: true,
168
- stdio: ["ignore", "pipe", "pipe"],
169
- });
170
- if (child.pid === undefined) {
171
- rej(new Error(`failed to spawn preflight: ${script}`));
172
- return;
173
- }
174
- const pgid = child.pid;
175
- child.stderr.on("data", (d) => {
176
- stderr += d.toString();
177
- });
178
- child.on("error", (e) => {
179
- res({
180
- pgid,
181
- error: {
182
- phase: "preflight",
183
- message: `preflight failed to spawn: ${e.message}`,
184
- exitCode: -1,
185
- },
186
- });
187
- });
188
- child.on("exit", (code, signal) => {
189
- if (code === 0) {
190
- res({ pgid });
191
- return;
192
- }
193
- const message = stderr.trim() || `preflight exited with signal ${signal}`;
194
- res({
195
- pgid,
196
- error: {
197
- phase: "preflight",
198
- message,
199
- exitCode: typeof code === "number" ? code : -1,
200
- },
201
- });
202
- });
168
+ async function runPreflight(runtime, script, cwd, port) {
169
+ const child = runtime.subprocess.spawn(script, [], {
170
+ cwd,
171
+ env: { ...runtime.proc.env, WORKDIR: cwd, PORT: String(port) },
172
+ detached: true,
173
+ stdio: ["ignore", "pipe", "pipe"],
203
174
  });
175
+ if (child.pid === undefined) {
176
+ throw new Error(`failed to spawn preflight: ${script}`);
177
+ }
178
+ const pgid = child.pid;
179
+ let stderr = "";
180
+ const drainStdout = (async () => {
181
+ for await (const _chunk of child.stdout) {
182
+ // discard
183
+ }
184
+ })();
185
+ for await (const chunk of child.stderr) stderr += chunk.toString();
186
+ await drainStdout;
187
+ const code = await child.exitCode;
188
+ if (code === 0) return { pgid };
189
+ const message = stderr.trim() || `preflight exited with code ${code}`;
190
+ return {
191
+ pgid,
192
+ error: {
193
+ phase: "preflight",
194
+ message,
195
+ exitCode: typeof code === "number" ? code : -1,
196
+ },
197
+ };
204
198
  }
205
199
 
206
200
  function allocatePort() {
@@ -236,37 +230,35 @@ function isPortFree(port) {
236
230
  });
237
231
  }
238
232
 
239
- function countDescendants(pgid) {
240
- if (!pgid || pgid <= 0) return Promise.resolve(0);
241
- return new Promise((res) => {
242
- const child = spawn("ps", ["-o", "pid=", "-g", String(pgid)], {
233
+ async function countDescendants(runtime, pgid) {
234
+ if (!pgid || pgid <= 0) return 0;
235
+ const child = runtime.subprocess.spawn(
236
+ "ps",
237
+ ["-o", "pid=", "-g", String(pgid)],
238
+ {
243
239
  stdio: ["ignore", "pipe", "ignore"],
244
- });
245
- let out = "";
246
- child.stdout.on("data", (d) => {
247
- out += d.toString();
248
- });
249
- child.on("error", () => res(0));
250
- child.on("close", () => {
251
- const pids = out
252
- .split("\n")
253
- .map((s) => s.trim())
254
- .filter(Boolean)
255
- .filter((s) => Number(s) !== process.pid);
256
- res(pids.length);
257
- });
258
- });
259
- }
260
-
261
- function sleep(ms) {
262
- return new Promise((r) => setTimeout(r, ms));
240
+ },
241
+ );
242
+ let out = "";
243
+ try {
244
+ for await (const chunk of child.stdout) out += chunk.toString();
245
+ await child.exitCode;
246
+ } catch {
247
+ return 0;
248
+ }
249
+ const pids = out
250
+ .split("\n")
251
+ .map((s) => s.trim())
252
+ .filter(Boolean)
253
+ .filter((s) => Number(s) !== runtime.proc.pid);
254
+ return pids.length;
263
255
  }
264
256
 
265
- async function waitFor(predicate, timeoutMs) {
266
- const deadline = Date.now() + timeoutMs;
267
- while (Date.now() < deadline) {
257
+ async function waitFor(runtime, predicate, timeoutMs) {
258
+ const deadline = runtime.clock.now() + timeoutMs;
259
+ while (runtime.clock.now() < deadline) {
268
260
  if (await predicate()) return true;
269
- await sleep(50);
261
+ await runtime.clock.sleep(50);
270
262
  }
271
263
  return false;
272
264
  }
@@ -1,6 +1,6 @@
1
1
  /**
2
2
  * `fit-benchmark invariants` — check a single task's invariants against a
3
- * post-run workdir directory without invoking an agent (P6/P7). Useful for
3
+ * post-run workdir directory without invoking an agent. Useful for
4
4
  * re-checking an agent's output against revised grading material.
5
5
  */
6
6
 
@@ -27,7 +27,7 @@ export async function runBenchmarkInvariantsCommand(ctx) {
27
27
  if (!workdirArg)
28
28
  return { ok: false, code: 1, error: "--workdir is required" };
29
29
 
30
- const family = await loadTaskFamily(familyInput);
30
+ const family = await loadTaskFamily(familyInput, runtime);
31
31
  const task = family.tasks().find((t) => t.id === taskId);
32
32
  if (!task)
33
33
  return { ok: false, code: 1, error: `task not found in family: ${taskId}` };
@@ -36,7 +36,7 @@ export async function runBenchmarkInvariantsCommand(ctx) {
36
36
  const cwd = join(runDir, "cwd");
37
37
  const port = await allocatePort();
38
38
 
39
- const invariants = await runInvariants(task, { cwd, port, runDir });
39
+ const invariants = await runInvariants(task, { cwd, port, runDir }, runtime);
40
40
  const record = {
41
41
  taskId: task.id,
42
42
  invariants,
@@ -40,6 +40,7 @@ export async function runBenchmarkReportCommand(ctx) {
40
40
  inputDir: resolve(inputDir),
41
41
  kValues,
42
42
  includeRuns: format === "text",
43
+ runtime,
43
44
  });
44
45
  if (format === "text") {
45
46
  runtime.proc.stdout.write(renderTextReport(report, kValues) + "\n");
@@ -33,7 +33,7 @@ export async function runBenchmarkRunCommand(ctx) {
33
33
  delete runtime.proc.env.NODE_EXTRA_CA_CERTS;
34
34
 
35
35
  const { query } = await import("@anthropic-ai/claude-agent-sdk");
36
- const runner = createBenchmarkRunner({ ...opts, query });
36
+ const runner = createBenchmarkRunner({ ...opts, query, runtime });
37
37
 
38
38
  let anyFail = false;
39
39
  for await (const record of runner.run()) {
@@ -1,4 +1,3 @@
1
- import { closeSync, openSync, readSync } from "node:fs";
2
1
  import { join } from "node:path";
3
2
 
4
3
  const FIRST_LINE_CAP = 64 * 1024;
@@ -6,25 +5,25 @@ const FIRST_LINE_CAP = 64 * 1024;
6
5
  /**
7
6
  * Read the first newline-terminated line of a file, bounded to the first
8
7
  * {@link FIRST_LINE_CAP} bytes. Trace `.ndjson` files can be many MB; the
9
- * Step 2.6 meta header is always small, so a bounded `readSync` avoids
10
- * loading whole files into memory just to inspect the header. This uses
11
- * `node:fs` directly because the `runtime.fsSync` surface exposes no
12
- * positional `openSync`/`readSync` — the file is grandfathered for
13
- * `import:fs` in `check-ambient-deps.deny.yml` until that seam exists.
8
+ * Step 2.6 meta header is always small, so a bounded positional read avoids
9
+ * loading whole files into memory just to inspect the header. The positional
10
+ * `openSync`/`readSync`/`closeSync` trio is read off the injected
11
+ * `runtime.fsSync` surface.
14
12
  *
13
+ * @param {object} fsSync - Sync filesystem surface (`runtime.fsSync`).
15
14
  * @param {string} path
16
15
  * @returns {string}
17
16
  */
18
- function readFirstLine(path) {
19
- const fd = openSync(path, "r");
17
+ function readFirstLine(fsSync, path) {
18
+ const fd = fsSync.openSync(path, "r");
20
19
  try {
21
20
  const buf = Buffer.alloc(FIRST_LINE_CAP);
22
- const bytes = readSync(fd, buf, 0, buf.length, 0);
21
+ const bytes = fsSync.readSync(fd, buf, 0, buf.length, 0);
23
22
  const text = buf.toString("utf8", 0, bytes);
24
23
  const nl = text.indexOf("\n");
25
24
  return nl === -1 ? text : text.slice(0, nl);
26
25
  } finally {
27
- closeSync(fd);
26
+ fsSync.closeSync(fd);
28
27
  }
29
28
  }
30
29
 
@@ -53,7 +52,7 @@ export function findTracesByDiscussion(dir, discussionId, fsSync) {
53
52
  const path = join(dir, entry);
54
53
  let firstLine;
55
54
  try {
56
- firstLine = readFirstLine(path);
55
+ firstLine = readFirstLine(fsSync, path);
57
56
  } catch {
58
57
  continue;
59
58
  }
@@ -1,5 +1,5 @@
1
- import { createWriteStream } from "node:fs";
2
1
  import { resolve } from "node:path";
2
+ import { isoTimestamp } from "@forwardimpact/libutil";
3
3
  import { createDiscusser } from "../discusser.js";
4
4
  import { createRedactor } from "../redaction.js";
5
5
  import { createTeeWriter } from "../tee-writer.js";
@@ -80,13 +80,14 @@ export async function runDiscussCommand(ctx) {
80
80
  const redactor = createRedactor({ runtime });
81
81
 
82
82
  const fileStream = opts.outputPath
83
- ? createWriteStream(opts.outputPath)
83
+ ? runtime.fs.createWriteStream(opts.outputPath)
84
84
  : null;
85
85
  const output = fileStream
86
86
  ? createTeeWriter({
87
87
  fileStream,
88
88
  textStream: runtime.proc.stdout,
89
89
  mode: "supervised",
90
+ now: () => isoTimestamp(runtime.clock.now()),
90
91
  })
91
92
  : runtime.proc.stdout;
92
93