@forwardimpact/libeval 0.1.51 → 0.1.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/fit-eval.js CHANGED
@@ -15,19 +15,6 @@ import { runFacilitateCommand } from "../src/commands/facilitate.js";
15
15
  import { runDiscussCommand } from "../src/commands/discuss.js";
16
16
  import { runCallbackCommand } from "../src/commands/callback.js";
17
17
 
18
- // `tee` streams stdin→stdout via Node's `pipeline`, which needs real stream
19
- // objects the runtime surface does not expose; it keeps the legacy
20
- // `(values, args)` signature and this adapter bridges it into dispatch.
21
- async function teeHandler(ctx) {
22
- const out = ctx.args.output;
23
- try {
24
- await runTeeCommand(ctx.options, out ? [out] : []);
25
- return { ok: true };
26
- } catch (error) {
27
- return { ok: false, code: 1, error: error.message };
28
- }
29
- }
30
-
31
18
  // `bun build --compile` injects FIT_EVAL_VERSION via --define, eliminating
32
19
  // the readFileSync branch in the compiled binary (which would ENOENT against
33
20
  // the bunfs virtual mount). Source execution falls through to package.json.
@@ -249,7 +236,7 @@ const definition = {
249
236
  name: "tee",
250
237
  args: ["output"],
251
238
  argsUsage: "[output.ndjson]",
252
- handler: teeHandler,
239
+ handler: runTeeCommand,
253
240
  description:
254
241
  "Stream readable text to stdout while saving raw NDJSON to a file",
255
242
  },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forwardimpact/libeval",
3
- "version": "0.1.51",
3
+ "version": "0.1.52",
4
4
  "description": "Agent evaluation framework — prove whether agent changes improved outcomes with reproducible evidence.",
5
5
  "keywords": [
6
6
  "eval",
@@ -4,26 +4,25 @@
4
4
  * staging directory, and computes the manifest fingerprint from the lockfile.
5
5
  * Per-task copy happens later in WorkdirManager.
6
6
  *
7
- * The class takes a `spawn` seam so tests can substitute a fake child process
8
- * without ever shelling out to a real `apm` binary. See `createApmInstaller`
9
- * for the real-dependency wiring; `installApm` is a thin free-function wrapper
10
- * for callers that don't need to inject anything.
7
+ * Subprocess and filesystem access route through the injected `runtime` bag
8
+ * (`runtime.subprocess.spawn` for the streaming `apm` child, `runtime.fs` for
9
+ * the async staging copies). See `createApmInstaller` for the real-dependency
10
+ * wiring; `installApm` is a thin free-function wrapper.
11
11
  */
12
12
 
13
- import { spawn as nodeSpawn } from "node:child_process";
14
13
  import { createHash } from "node:crypto";
15
- import { access, cp, mkdir, readFile, rm } from "node:fs/promises";
16
14
  import { join } from "node:path";
17
15
 
18
16
  /** Installs apm and stages `.claude/` for a task family. */
19
17
  export class ApmInstaller {
20
18
  /**
21
- * @param {object} [deps]
22
- * @param {typeof nodeSpawn} [deps.spawn] - Spawn seam (defaults to
23
- * `node:child_process` spawn). Tests inject a fake to avoid shelling out.
19
+ * @param {object} deps
20
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} deps.runtime -
21
+ * Ambient collaborators; uses `subprocess.spawn` and `fs`.
24
22
  */
25
- constructor({ spawn } = {}) {
26
- this.spawn = spawn ?? nodeSpawn;
23
+ constructor({ runtime }) {
24
+ if (!runtime) throw new Error("runtime is required");
25
+ this.runtime = runtime;
27
26
  }
28
27
 
29
28
  /**
@@ -32,19 +31,21 @@ export class ApmInstaller {
32
31
  * @returns {Promise<{stagingDir: string, skillSetHash: string, judgeProfilesDir: string}>}
33
32
  */
34
33
  async install(family, outputDir) {
34
+ const fs = this.runtime.fs;
35
35
  const stagingDir = join(outputDir, ".apm-staging");
36
36
  const stagedClaude = join(stagingDir, ".claude");
37
37
  const sourceClaude = join(family.rootPath, ".claude");
38
38
  const apmYml = join(family.rootPath, "apm.yml");
39
39
 
40
- const hasApm = await access(apmYml)
40
+ const hasApm = await fs
41
+ .access(apmYml)
41
42
  .then(() => true)
42
43
  .catch(() => false);
43
44
 
44
45
  if (hasApm) {
45
46
  await this.#runApmInstall(family.rootPath);
46
47
  try {
47
- await access(sourceClaude);
48
+ await fs.access(sourceClaude);
48
49
  } catch {
49
50
  throw new Error(
50
51
  `apm install did not produce .claude/ at ${sourceClaude}; check the family's apm.yml`,
@@ -52,14 +53,15 @@ export class ApmInstaller {
52
53
  }
53
54
  }
54
55
 
55
- await rm(stagingDir, { recursive: true, force: true });
56
- const hasClaudeDir = await access(sourceClaude)
56
+ await fs.rm(stagingDir, { recursive: true, force: true });
57
+ const hasClaudeDir = await fs
58
+ .access(sourceClaude)
57
59
  .then(() => true)
58
60
  .catch(() => false);
59
61
  if (hasClaudeDir) {
60
- await cp(sourceClaude, stagedClaude, { recursive: true });
62
+ await fs.cp(sourceClaude, stagedClaude, { recursive: true });
61
63
  } else {
62
- await mkdir(stagedClaude, { recursive: true });
64
+ await fs.mkdir(stagedClaude, { recursive: true });
63
65
  }
64
66
 
65
67
  // Stage the family-local judge profile outside .claude/ so it is available
@@ -67,15 +69,15 @@ export class ApmInstaller {
67
69
  const judgeSource = join(family.rootPath, "judge.md");
68
70
  const judgeProfilesDir = join(stagingDir, "judge-profiles");
69
71
  try {
70
- await access(judgeSource);
71
- await mkdir(judgeProfilesDir, { recursive: true });
72
- await cp(judgeSource, join(judgeProfilesDir, "judge.md"));
72
+ await fs.access(judgeSource);
73
+ await fs.mkdir(judgeProfilesDir, { recursive: true });
74
+ await fs.cp(judgeSource, join(judgeProfilesDir, "judge.md"));
73
75
  } catch {}
74
76
 
75
77
  const lockPath = join(family.rootPath, "apm.lock.yaml");
76
78
  let skillSetHash = "";
77
79
  try {
78
- const lockBytes = await readFile(lockPath);
80
+ const lockBytes = await fs.readFile(lockPath);
79
81
  skillSetHash =
80
82
  "sha256:" +
81
83
  createHash("sha256").update(normalizeLf(lockBytes)).digest("hex");
@@ -86,25 +88,26 @@ export class ApmInstaller {
86
88
  return { stagingDir, skillSetHash, judgeProfilesDir };
87
89
  }
88
90
 
89
- #runApmInstall(cwd) {
90
- return new Promise((res, rej) => {
91
- const child = this.spawn("apm", ["install", "--target", "claude"], {
92
- cwd,
93
- stdio: ["ignore", "pipe", "pipe"],
94
- });
95
- let stderr = "";
96
- child.stdout.on("data", () => {});
97
- child.stderr.on("data", (d) => {
98
- stderr += d.toString();
99
- });
100
- child.on("error", (e) => {
101
- rej(new Error(`failed to spawn apm: ${e.message}`));
102
- });
103
- child.on("close", (code) => {
104
- if (code === 0) res();
105
- else rej(new Error(`apm install exited ${code}: ${stderr}`));
106
- });
107
- });
91
+ async #runApmInstall(cwd) {
92
+ const child = this.runtime.subprocess.spawn(
93
+ "apm",
94
+ ["install", "--target", "claude"],
95
+ { cwd, stdio: ["ignore", "pipe", "pipe"] },
96
+ );
97
+ // Drain stdout concurrently so the child never blocks on backpressure;
98
+ // capture stderr for the failure message.
99
+ let stderr = "";
100
+ const drainStdout = (async () => {
101
+ for await (const _chunk of child.stdout) {
102
+ // discard
103
+ }
104
+ })();
105
+ for await (const chunk of child.stderr) stderr += chunk.toString();
106
+ await drainStdout;
107
+ const code = await child.exitCode;
108
+ if (code !== 0) {
109
+ throw new Error(`apm install exited ${code}: ${stderr}`);
110
+ }
108
111
  }
109
112
  }
110
113
 
@@ -119,7 +122,7 @@ function normalizeLf(buf) {
119
122
 
120
123
  /**
121
124
  * Factory function — wires real dependencies.
122
- * @param {ConstructorParameters<typeof ApmInstaller>[0]} [deps]
125
+ * @param {ConstructorParameters<typeof ApmInstaller>[0]} deps
123
126
  * @returns {ApmInstaller}
124
127
  */
125
128
  export function createApmInstaller(deps) {
@@ -127,10 +130,11 @@ export function createApmInstaller(deps) {
127
130
  }
128
131
 
129
132
  /**
130
- * Free-function shorthand for callers that don't need to inject a spawn seam.
133
+ * Free-function shorthand for callers that thread a runtime bag.
131
134
  * @param {import("./task-family.js").TaskFamily} family
132
135
  * @param {string} outputDir
136
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
133
137
  */
134
- export function installApm(family, outputDir) {
135
- return new ApmInstaller().install(family, outputDir);
138
+ export function installApm(family, outputDir, runtime) {
139
+ return new ApmInstaller({ runtime }).install(family, outputDir);
136
140
  }
@@ -2,16 +2,12 @@
2
2
  * Invariants — runs `<task.paths.hooks>/invariants.sh` from the template path
3
3
  * against the post-run agent CWD. The exit code is authoritative for the
4
4
  * verdict; structured per-check rows arrive on fd 3 (`$RESULTS_FD=3`) as NDJSON.
5
+ *
6
+ * Subprocess access flows through `runtime.subprocess.spawn`; the fd-3 backing
7
+ * store and the stderr log use the sync filesystem surface (`runtime.fsSync`) —
8
+ * the only surface this module touches, per design Decision 7.
5
9
  */
6
10
 
7
- import { spawn } from "node:child_process";
8
- import {
9
- closeSync,
10
- createWriteStream,
11
- openSync,
12
- readFileSync,
13
- unlinkSync,
14
- } from "node:fs";
15
11
  import { join } from "node:path";
16
12
 
17
13
  /**
@@ -25,72 +21,64 @@ import { join } from "node:path";
25
21
  * Run the task's invariants script.
26
22
  * @param {import("./task-family.js").Task} task
27
23
  * @param {{cwd: string, port: number, runDir: string}} ctx
24
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
28
25
  * @returns {Promise<InvariantsResult>}
29
26
  */
30
- export function runInvariants(task, ctx) {
27
+ export async function runInvariants(task, ctx, runtime) {
28
+ if (!runtime) throw new Error("runtime is required");
31
29
  if (!task.paths.invariants) {
32
- return Promise.resolve({ verdict: "pass", details: [], exitCode: 0 });
30
+ return { verdict: "pass", details: [], exitCode: 0 };
33
31
  }
34
- return new Promise((res, rej) => {
35
- const script = task.paths.invariants;
36
- const stderrLog = createWriteStream(
37
- join(ctx.runDir, "invariants.stderr.log"),
38
- );
32
+ const fsSync = runtime.fsSync;
33
+ const script = task.paths.invariants;
34
+ const stderrLogPath = join(ctx.runDir, "invariants.stderr.log");
39
35
 
40
- // Bun's child_process pipe setup for fd >= 3 is racy under load (it
41
- // creates a unix socket pair and the connect() can return ENOENT). Use
42
- // a temp file as the fd-3 backing store instead — the script still
43
- // writes via `$RESULTS_FD`, but we hand it a real file descriptor.
44
- const fd3Path = join(ctx.runDir, "invariants.fd3.ndjson");
45
- let fd3File;
46
- try {
47
- fd3File = openSync(fd3Path, "w+");
48
- } catch (e) {
49
- rej(e);
50
- return;
51
- }
36
+ // Bun's child_process pipe setup for fd >= 3 is racy under load (it
37
+ // creates a unix socket pair and the connect() can return ENOENT). Use
38
+ // a temp file as the fd-3 backing store instead — the script still
39
+ // writes via `$RESULTS_FD`, but we hand it a real file descriptor.
40
+ const fd3Path = join(ctx.runDir, "invariants.fd3.ndjson");
41
+ const fd3File = fsSync.openSync(fd3Path, "w+");
52
42
 
53
- const child = spawn(script, [], {
43
+ let child;
44
+ try {
45
+ child = runtime.subprocess.spawn(script, [], {
54
46
  env: {
55
- ...process.env,
47
+ ...runtime.proc.env,
56
48
  WORKDIR: ctx.cwd,
57
49
  PORT: String(ctx.port),
58
50
  RESULTS_FD: "3",
59
51
  },
60
52
  stdio: ["inherit", "pipe", "pipe", fd3File],
61
53
  });
62
- if (child.pid === undefined) {
63
- try {
64
- closeSync(fd3File);
65
- } catch {
66
- // already closed
67
- }
68
- rej(new Error(`failed to spawn invariants script: ${script}`));
69
- return;
54
+ } catch (e) {
55
+ tryClose(fsSync, fd3File);
56
+ throw e;
57
+ }
58
+
59
+ // Drain stdout (do not require consumers to read it); capture stderr to log.
60
+ const drainStdout = (async () => {
61
+ for await (const _chunk of child.stdout) {
62
+ // discard
70
63
  }
64
+ })();
65
+ let stderr = "";
66
+ for await (const chunk of child.stderr) stderr += chunk.toString();
67
+ await drainStdout;
68
+ const code = await child.exitCode;
71
69
 
72
- child.stderr.pipe(stderrLog);
73
- // Drain stdout (do not require consumers to read it).
74
- child.stdout.on("data", () => {});
70
+ fsSync.writeFileSync(stderrLogPath, stderr);
71
+ tryClose(fsSync, fd3File);
75
72
 
76
- child.on("error", (e) => {
77
- tryClose(fd3File);
78
- rej(e);
79
- });
80
- child.on("close", (code) => {
81
- stderrLog.end();
82
- tryClose(fd3File);
83
- const raw = readAndUnlink(fd3Path);
84
- const details = [];
85
- parseFd3Buffer(raw, details);
86
- const exitCode = typeof code === "number" ? code : -1;
87
- res({
88
- verdict: exitCode === 0 ? "pass" : "fail",
89
- details,
90
- exitCode,
91
- });
92
- });
93
- });
73
+ const raw = readAndUnlink(fsSync, fd3Path);
74
+ const details = [];
75
+ parseFd3Buffer(raw, details);
76
+ const exitCode = typeof code === "number" ? code : -1;
77
+ return {
78
+ verdict: exitCode === 0 ? "pass" : "fail",
79
+ details,
80
+ exitCode,
81
+ };
94
82
  }
95
83
 
96
84
  function pushRow(line, details) {
@@ -103,23 +91,23 @@ function pushRow(line, details) {
103
91
  }
104
92
  }
105
93
 
106
- function tryClose(fd) {
94
+ function tryClose(fsSync, fd) {
107
95
  try {
108
- closeSync(fd);
96
+ fsSync.closeSync(fd);
109
97
  } catch {
110
98
  // already closed
111
99
  }
112
100
  }
113
101
 
114
- function readAndUnlink(path) {
102
+ function readAndUnlink(fsSync, path) {
115
103
  let raw = "";
116
104
  try {
117
- raw = readFileSync(path, "utf8");
105
+ raw = fsSync.readFileSync(path, "utf8");
118
106
  } catch {
119
107
  // empty
120
108
  }
121
109
  try {
122
- unlinkSync(path);
110
+ fsSync.unlinkSync(path);
123
111
  } catch {
124
112
  // best-effort cleanup
125
113
  }
@@ -21,9 +21,6 @@
21
21
  * historical run from its judge.ndjson file).
22
22
  */
23
23
 
24
- import { createReadStream, createWriteStream } from "node:fs";
25
- import { readFile } from "node:fs/promises";
26
- import { createInterface } from "node:readline";
27
24
  import { createJudge } from "../judge.js";
28
25
  import { createRedactor } from "../redaction.js";
29
26
 
@@ -45,12 +42,15 @@ import { createRedactor } from "../redaction.js";
45
42
  * @param {import("./task-family.js").Task} task
46
43
  * @param {import("./workdir.js").Workdir} workdir
47
44
  * @param {import("./invariants.js").InvariantsResult} invariants
48
- * @param {{query: Function, model: string, judgeProfile?: string, profilesDir?: string}} deps
45
+ * @param {{query: Function, model: string, judgeProfile?: string, profilesDir?: string, runtime: import("@forwardimpact/libutil/runtime").Runtime}} deps
49
46
  * @param {JudgeContext} [context]
50
47
  * @returns {Promise<JudgeVerdict>}
51
48
  */
52
49
  export async function runJudge(task, workdir, invariants, deps, context) {
53
- const template = await readFile(task.paths.judge, "utf8");
50
+ const runtime = deps.runtime;
51
+ if (!runtime) throw new Error("runtime is required");
52
+ const fs = runtime.fs;
53
+ const template = await fs.readFile(task.paths.judge, "utf8");
54
54
  const invariantsJson = JSON.stringify(invariants, null, 2);
55
55
  const taskText = template
56
56
  .replaceAll("{{INVARIANTS_RESULT}}", invariantsJson)
@@ -61,7 +61,7 @@ export async function runJudge(task, workdir, invariants, deps, context) {
61
61
  .replaceAll("{{TASK_ID}}", task.id)
62
62
  .replaceAll("{{TASK_DIR}}", workdir.cwd);
63
63
 
64
- const output = createWriteStream(workdir.judgeTracePath);
64
+ const output = fs.createWriteStream(workdir.judgeTracePath);
65
65
  const judge = createJudge({
66
66
  cwd: workdir.cwd,
67
67
  query: deps.query,
@@ -70,7 +70,8 @@ export async function runJudge(task, workdir, invariants, deps, context) {
70
70
  judgeProfile: deps.judgeProfile,
71
71
  profilesDir: deps.profilesDir,
72
72
  maxTurns: 25,
73
- redactor: createRedactor(),
73
+ redactor: createRedactor({ runtime }),
74
+ runtime,
74
75
  });
75
76
 
76
77
  let outcome;
@@ -95,13 +96,14 @@ export async function runJudge(task, workdir, invariants, deps, context) {
95
96
  * and map the verdict (`success → pass`, `failure → fail`). Preserved for
96
97
  * offline analysis; not used on the runtime happy path.
97
98
  * @param {string} tracePath
99
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
98
100
  * @returns {Promise<JudgeVerdict | null>}
99
101
  */
100
- export async function parseConcludeFromTrace(tracePath) {
101
- const stream = createReadStream(tracePath);
102
- const rl = createInterface({ input: stream, crlfDelay: Infinity });
102
+ export async function parseConcludeFromTrace(tracePath, runtime) {
103
+ if (!runtime) throw new Error("runtime is required");
104
+ const content = await runtime.fs.readFile(tracePath, "utf8");
103
105
  let last = null;
104
- for await (const line of rl) {
106
+ for (const line of content.split("\n")) {
105
107
  const candidate = extractConcludeInput(line);
106
108
  if (candidate) last = candidate;
107
109
  }
@@ -3,23 +3,22 @@
3
3
  * is present, then copies the resulting `node_modules/` into the staging
4
4
  * directory so WorkdirManager can seed each per-task CWD.
5
5
  *
6
- * Symmetric to ApmInstaller: constructor injection of `spawn` for testability,
7
- * factory function, and a free-function shorthand.
6
+ * Symmetric to ApmInstaller: the subprocess and filesystem flow through the
7
+ * injected `runtime` bag (`runtime.subprocess.spawn` + `runtime.fs`).
8
8
  */
9
9
 
10
- import { spawn as nodeSpawn } from "node:child_process";
11
- import { access, cp } from "node:fs/promises";
12
10
  import { join } from "node:path";
13
11
 
14
12
  /** Run `bun install` in the family root and stage node_modules/ for per-task CWDs. */
15
13
  export class NpmInstaller {
16
14
  /**
17
- * @param {object} [deps]
18
- * @param {typeof nodeSpawn} [deps.spawn] - Spawn seam (defaults to
19
- * `node:child_process` spawn). Tests inject a fake to avoid shelling out.
15
+ * @param {object} deps
16
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} deps.runtime -
17
+ * Ambient collaborators; uses `subprocess.spawn` and `fs`.
20
18
  */
21
- constructor({ spawn } = {}) {
22
- this.spawn = spawn ?? nodeSpawn;
19
+ constructor({ runtime }) {
20
+ if (!runtime) throw new Error("runtime is required");
21
+ this.runtime = runtime;
23
22
  }
24
23
 
25
24
  /**
@@ -28,8 +27,10 @@ export class NpmInstaller {
28
27
  * @returns {Promise<void>}
29
28
  */
30
29
  async install(family, stagingDir) {
30
+ const fs = this.runtime.fs;
31
31
  const pkgJson = join(family.rootPath, "package.json");
32
- const hasPkg = await access(pkgJson)
32
+ const hasPkg = await fs
33
+ .access(pkgJson)
33
34
  .then(() => true)
34
35
  .catch(() => false);
35
36
  if (!hasPkg) return;
@@ -38,37 +39,35 @@ export class NpmInstaller {
38
39
 
39
40
  const sourceModules = join(family.rootPath, "node_modules");
40
41
  try {
41
- await access(sourceModules);
42
+ await fs.access(sourceModules);
42
43
  } catch {
43
44
  throw new Error(
44
45
  `bun install did not produce node_modules/ at ${sourceModules}; check the family's package.json`,
45
46
  );
46
47
  }
47
48
 
48
- await cp(sourceModules, join(stagingDir, "node_modules"), {
49
+ await fs.cp(sourceModules, join(stagingDir, "node_modules"), {
49
50
  recursive: true,
50
51
  });
51
52
  }
52
53
 
53
- #runBunInstall(cwd) {
54
- return new Promise((res, rej) => {
55
- const child = this.spawn("bun", ["install"], {
56
- cwd,
57
- stdio: ["ignore", "pipe", "pipe"],
58
- });
59
- let stderr = "";
60
- child.stdout.on("data", () => {});
61
- child.stderr.on("data", (d) => {
62
- stderr += d.toString();
63
- });
64
- child.on("error", (e) => {
65
- rej(new Error(`failed to spawn bun: ${e.message}`));
66
- });
67
- child.on("close", (code) => {
68
- if (code === 0) res();
69
- else rej(new Error(`bun install exited ${code}: ${stderr}`));
70
- });
54
+ async #runBunInstall(cwd) {
55
+ const child = this.runtime.subprocess.spawn("bun", ["install"], {
56
+ cwd,
57
+ stdio: ["ignore", "pipe", "pipe"],
71
58
  });
59
+ let stderr = "";
60
+ const drainStdout = (async () => {
61
+ for await (const _chunk of child.stdout) {
62
+ // discard
63
+ }
64
+ })();
65
+ for await (const chunk of child.stderr) stderr += chunk.toString();
66
+ await drainStdout;
67
+ const code = await child.exitCode;
68
+ if (code !== 0) {
69
+ throw new Error(`bun install exited ${code}: ${stderr}`);
70
+ }
72
71
  }
73
72
  }
74
73
 
@@ -78,10 +77,11 @@ export function createNpmInstaller(deps) {
78
77
  }
79
78
 
80
79
  /**
81
- * Free-function shorthand for callers that don't need to inject a spawn seam.
80
+ * Free-function shorthand for callers that thread a runtime bag.
82
81
  * @param {import("./task-family.js").TaskFamily} family
83
82
  * @param {string} stagingDir
83
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
84
84
  */
85
- export function installNpm(family, stagingDir) {
86
- return new NpmInstaller().install(family, stagingDir);
85
+ export function installNpm(family, stagingDir, runtime) {
86
+ return new NpmInstaller({ runtime }).install(family, stagingDir);
87
87
  }
@@ -12,9 +12,7 @@
12
12
  * whole report.
13
13
  */
14
14
 
15
- import { createReadStream } from "node:fs";
16
15
  import { join } from "node:path";
17
- import { createInterface } from "node:readline";
18
16
 
19
17
  import { validateResultRecord } from "./result.js";
20
18
 
@@ -41,11 +39,17 @@ import { validateResultRecord } from "./result.js";
41
39
  */
42
40
 
43
41
  /**
44
- * @param {{inputDir: string, kValues: number[], includeRuns?: boolean}} opts
42
+ * @param {{inputDir: string, kValues: number[], includeRuns?: boolean, runtime: import("@forwardimpact/libutil/runtime").Runtime}} opts
45
43
  * @returns {Promise<{tasks: TaskReport[], totals: object}>}
46
44
  */
47
- export async function aggregate({ inputDir, kValues, includeRuns = false }) {
48
- const records = await loadRecords(inputDir);
45
+ export async function aggregate({
46
+ inputDir,
47
+ kValues,
48
+ includeRuns = false,
49
+ runtime,
50
+ }) {
51
+ if (!runtime) throw new Error("runtime is required");
52
+ const records = await loadRecords(inputDir, runtime);
49
53
  const grouped = groupByTask(records.records);
50
54
  const tasks = [];
51
55
  let totalRuns = 0;
@@ -429,20 +433,30 @@ function median(arr) {
429
433
  // Record loading
430
434
  // ---------------------------------------------------------------------------
431
435
 
432
- async function loadRecords(inputDir) {
436
+ async function loadRecords(inputDir, runtime) {
433
437
  const path = join(inputDir, "results.jsonl");
434
- const stream = createReadStream(path);
435
- const rl = createInterface({ input: stream, crlfDelay: Infinity });
438
+ let content;
439
+ try {
440
+ content = await runtime.fs.readFile(path, "utf8");
441
+ } catch (e) {
442
+ // Re-throw with the stack collapsed to the message line so the CLI's
443
+ // error rendering stays free of node-internal async `readFile` frames
444
+ // (matching the pre-1370 stream-error shape the golden captured).
445
+ const err = new Error(e.message);
446
+ if (e.code) err.code = e.code;
447
+ err.stack = `Error: ${e.message}`;
448
+ throw err;
449
+ }
436
450
  const records = [];
437
451
  let skipped = 0;
438
- for await (const line of rl) {
452
+ for (const line of content.split("\n")) {
439
453
  const trimmed = line.trim();
440
454
  if (!trimmed) continue;
441
455
  let record;
442
456
  try {
443
457
  record = JSON.parse(trimmed);
444
458
  } catch (e) {
445
- process.stderr.write(
459
+ runtime.proc.stderr.write(
446
460
  `benchmark report: skipped malformed JSON line — ${e.message}\n`,
447
461
  );
448
462
  skipped++;
@@ -451,7 +465,7 @@ async function loadRecords(inputDir) {
451
465
  try {
452
466
  validateResultRecord(record);
453
467
  } catch (e) {
454
- process.stderr.write(
468
+ runtime.proc.stderr.write(
455
469
  `benchmark report: skipped record failing schema — ${describeError(e)}\n`,
456
470
  );
457
471
  skipped++;