@forwardimpact/libeval 0.1.50 → 0.1.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +11 -8
  2. package/bin/fit-benchmark.js +26 -27
  3. package/bin/fit-eval.js +36 -30
  4. package/bin/fit-trace.js +83 -57
  5. package/package.json +1 -1
  6. package/src/agent-runner.js +20 -12
  7. package/src/benchmark/apm-installer.js +48 -44
  8. package/src/benchmark/env-loader.js +35 -23
  9. package/src/benchmark/invariants.js +128 -0
  10. package/src/benchmark/judge.js +18 -19
  11. package/src/benchmark/npm-installer.js +33 -33
  12. package/src/benchmark/report.js +40 -26
  13. package/src/benchmark/result.js +11 -11
  14. package/src/benchmark/runner.js +90 -46
  15. package/src/benchmark/task-family.js +78 -65
  16. package/src/benchmark/workdir.js +100 -93
  17. package/src/commands/assert.js +30 -22
  18. package/src/commands/benchmark-invariants.js +74 -0
  19. package/src/commands/benchmark-report.js +24 -15
  20. package/src/commands/benchmark-run.js +16 -9
  21. package/src/commands/by-discussion.js +33 -23
  22. package/src/commands/callback.js +20 -11
  23. package/src/commands/discuss.js +31 -13
  24. package/src/commands/facilitate.js +21 -14
  25. package/src/commands/output.js +15 -13
  26. package/src/commands/run.js +28 -14
  27. package/src/commands/supervise.js +29 -19
  28. package/src/commands/task-input.js +10 -5
  29. package/src/commands/tee.js +24 -9
  30. package/src/commands/trace.js +181 -99
  31. package/src/discuss-tools.js +48 -2
  32. package/src/discusser.js +53 -2
  33. package/src/events/github.js +27 -5
  34. package/src/facilitator.js +4 -0
  35. package/src/inbox-poller.js +84 -0
  36. package/src/judge.js +4 -1
  37. package/src/message-bus.js +6 -0
  38. package/src/orchestration-loop.js +14 -4
  39. package/src/orchestration-toolkit.js +14 -0
  40. package/src/profile-prompt.js +22 -9
  41. package/src/redaction.js +31 -9
  42. package/src/reply-emitter.js +47 -0
  43. package/src/supervisor.js +4 -0
  44. package/src/tee-writer.js +4 -2
  45. package/src/trace-collector.js +9 -2
  46. package/src/trace-github.js +47 -27
  47. package/src/benchmark/scorer.js +0 -138
  48. package/src/commands/benchmark-score.js +0 -68
@@ -9,7 +9,7 @@
9
9
  * judge.task.md
10
10
  * hooks/ # harness-only; never copied to agent CWD
11
11
  * preflight.sh
12
- * score.sh
12
+ * invariants.sh
13
13
  * specs/ # copied into agent CWD
14
14
  * workdir/ # copied into agent CWD
15
15
  *
@@ -17,45 +17,55 @@
17
17
  * a temp dir and `familyRevision` becomes `git:<sha>` of HEAD at clone time.
18
18
  * Local paths use the canonical-tree algorithm from design § Family revision
19
19
  * algorithm so the result is stable across operating systems.
20
+ *
21
+ * Filesystem and subprocess access route through the injected `runtime` bag
22
+ * (`runtime.fs` async, `runtime.subprocess.run` one-shot, `tmpdir` derived
23
+ * from `runtime.proc.env`).
20
24
  */
21
25
 
22
- import { spawn } from "node:child_process";
23
26
  import { createHash } from "node:crypto";
24
- import {
25
- access,
26
- constants,
27
- lstat,
28
- mkdtemp,
29
- readdir,
30
- readFile,
31
- realpath,
32
- } from "node:fs/promises";
33
- import { tmpdir } from "node:os";
34
27
  import { join, posix, relative, resolve, sep } from "node:path";
35
28
 
36
29
  const GIT_URL_RE = /^(git@|https?:\/\/|ssh:\/\/|git:\/\/)/;
37
30
  const SKIP_DIRS = new Set([".git", "node_modules"]);
31
+ // POSIX `X_OK` (execute permission); node's fs honours the numeric mode, so we
32
+ // avoid importing `node:fs`'s `constants` (which would light the fs smell).
33
+ const X_OK = 1;
34
+
35
+ /**
36
+ * Derive the system temp dir from the env (node's `os.tmpdir()` is itself an
37
+ * env-respecting wrapper). The runtime bag has no `os` slot by design.
38
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
39
+ * @returns {string}
40
+ */
41
+ function tmpdir(runtime) {
42
+ return runtime.proc.env.TMPDIR ?? "/tmp";
43
+ }
38
44
 
39
45
  /**
40
46
  * Load a task family from a local path or git URL.
41
47
  * @param {string} rootPathOrGitUrl
48
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
42
49
  * @returns {Promise<TaskFamily>}
43
50
  */
44
- export async function loadTaskFamily(rootPathOrGitUrl) {
51
+ export async function loadTaskFamily(rootPathOrGitUrl, runtime) {
52
+ if (!runtime) throw new Error("runtime is required");
45
53
  const isGit = GIT_URL_RE.test(rootPathOrGitUrl);
46
54
  let rootPath;
47
55
  let familyRevision;
48
56
  if (isGit) {
49
- const dir = await mkdtemp(join(tmpdir(), "fit-benchmark-family-"));
50
- await gitClone(rootPathOrGitUrl, dir);
57
+ const dir = await runtime.fs.mkdtemp(
58
+ join(tmpdir(runtime), "fit-benchmark-family-"),
59
+ );
60
+ await gitClone(runtime, rootPathOrGitUrl, dir);
51
61
  rootPath = dir;
52
- familyRevision = "git:" + (await gitHead(dir));
62
+ familyRevision = "git:" + (await gitHead(runtime, dir));
53
63
  } else {
54
64
  rootPath = resolve(rootPathOrGitUrl);
55
- familyRevision = "sha256:" + (await canonicalTreeHash(rootPath));
65
+ familyRevision = "sha256:" + (await canonicalTreeHash(runtime, rootPath));
56
66
  }
57
67
 
58
- const tasks = await discoverTasks(rootPath);
68
+ const tasks = await discoverTasks(runtime, rootPath);
59
69
 
60
70
  return {
61
71
  rootPath,
@@ -73,27 +83,30 @@ export async function loadTaskFamily(rootPathOrGitUrl) {
73
83
  * @param {TaskFamily} _family
74
84
  * @param {string} judgeProfilesDir
75
85
  * @param {string} judgeProfile
86
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
76
87
  * @returns {Promise<void>}
77
88
  */
78
89
  export async function assertJudgeProfileStaged(
79
90
  _family,
80
91
  judgeProfilesDir,
81
92
  judgeProfile,
93
+ runtime,
82
94
  ) {
83
95
  const candidate = join(judgeProfilesDir, `${judgeProfile}.md`);
84
96
  try {
85
- await access(candidate);
97
+ await runtime.fs.access(candidate);
86
98
  } catch {
87
99
  throw new Error(`judge profile not staged: ${candidate}`);
88
100
  }
89
101
  }
90
102
 
91
- async function discoverTasks(rootPath) {
103
+ async function discoverTasks(runtime, rootPath) {
104
+ const fs = runtime.fs;
92
105
  const tasksRoot = join(rootPath, "tasks");
93
106
  const tasks = [];
94
107
  let entries;
95
108
  try {
96
- entries = await readdir(tasksRoot, { withFileTypes: true });
109
+ entries = await fs.readdir(tasksRoot, { withFileTypes: true });
97
110
  } catch (e) {
98
111
  if (e.code === "ENOENT") return tasks;
99
112
  throw e;
@@ -104,17 +117,23 @@ async function discoverTasks(rootPath) {
104
117
  const supervisorPath = join(taskDir, "supervisor.task.md");
105
118
  const judgePath = join(taskDir, "judge.task.md");
106
119
  const preflightPath = join(taskDir, "hooks", "preflight.sh");
107
- const scorePath = join(taskDir, "hooks", "score.sh");
120
+ const invariantsPath = join(taskDir, "hooks", "invariants.sh");
108
121
  tasks.push({
109
122
  id: entry.name,
110
123
  paths: {
111
124
  taskDir,
112
125
  instructions: join(taskDir, "agent.task.md"),
113
- supervisor: (await fileExists(supervisorPath)) ? supervisorPath : null,
114
- judge: (await fileExists(judgePath)) ? judgePath : null,
126
+ supervisor: (await fileExists(fs, supervisorPath))
127
+ ? supervisorPath
128
+ : null,
129
+ judge: (await fileExists(fs, judgePath)) ? judgePath : null,
115
130
  hooks: join(taskDir, "hooks"),
116
- preflight: (await fileExecutable(preflightPath)) ? preflightPath : null,
117
- score: (await fileExecutable(scorePath)) ? scorePath : null,
131
+ preflight: (await fileExecutable(fs, preflightPath))
132
+ ? preflightPath
133
+ : null,
134
+ invariants: (await fileExecutable(fs, invariantsPath))
135
+ ? invariantsPath
136
+ : null,
118
137
  specs: join(taskDir, "specs"),
119
138
  workdir: join(taskDir, "workdir"),
120
139
  },
@@ -124,18 +143,18 @@ async function discoverTasks(rootPath) {
124
143
  return tasks;
125
144
  }
126
145
 
127
- async function fileExists(path) {
146
+ async function fileExists(fs, path) {
128
147
  try {
129
- await access(path);
148
+ await fs.access(path);
130
149
  return true;
131
150
  } catch {
132
151
  return false;
133
152
  }
134
153
  }
135
154
 
136
- async function fileExecutable(path) {
155
+ async function fileExecutable(fs, path) {
137
156
  try {
138
- await access(path, constants.X_OK);
157
+ await fs.access(path, X_OK);
139
158
  return true;
140
159
  } catch {
141
160
  return false;
@@ -149,16 +168,18 @@ async function fileExecutable(path) {
149
168
  * sort by NFC-normalised POSIX-style root-relative path
150
169
  * row = <rel-path>\0<hex-sha256>\n
151
170
  * sha256(concat(rows))
171
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
152
172
  * @param {string} rootPath
153
173
  * @returns {Promise<string>} hex digest
154
174
  */
155
- async function canonicalTreeHash(rootPath) {
156
- const real = await realpath(rootPath);
175
+ async function canonicalTreeHash(runtime, rootPath) {
176
+ const fs = runtime.fs;
177
+ const real = await fs.realpath(rootPath);
157
178
  const rows = [];
158
- for await (const filePath of walkFiles(real)) {
179
+ for await (const filePath of walkFiles(fs, real)) {
159
180
  const rel = toPosix(relative(real, filePath)).normalize("NFC");
160
- const target = await realpath(filePath);
161
- const bytes = await readFile(target);
181
+ const target = await fs.realpath(filePath);
182
+ const bytes = await fs.readFile(target);
162
183
  const hex = createHash("sha256").update(bytes).digest("hex");
163
184
  rows.push({ rel, hex });
164
185
  }
@@ -168,15 +189,15 @@ async function canonicalTreeHash(rootPath) {
168
189
  return acc.digest("hex");
169
190
  }
170
191
 
171
- async function* walkFiles(dir) {
172
- const entries = await readdir(dir, { withFileTypes: true });
192
+ async function* walkFiles(fs, dir) {
193
+ const entries = await fs.readdir(dir, { withFileTypes: true });
173
194
  for (const entry of entries) {
174
195
  const full = join(dir, entry.name);
175
196
  if (entry.isDirectory()) {
176
197
  if (SKIP_DIRS.has(entry.name)) continue;
177
- yield* walkFiles(full);
198
+ yield* walkFiles(fs, full);
178
199
  } else if (entry.isSymbolicLink()) {
179
- const resolvedFile = await resolveSymlinkToFile(full);
200
+ const resolvedFile = await resolveSymlinkToFile(fs, full);
180
201
  if (resolvedFile) yield full;
181
202
  } else if (entry.isFile()) {
182
203
  yield full;
@@ -188,12 +209,12 @@ async function* walkFiles(dir) {
188
209
  * Return the resolved path if `linkPath` is a symlink to a regular file.
189
210
  * Returns null for dangling symlinks or links to non-file targets.
190
211
  */
191
- async function resolveSymlinkToFile(linkPath) {
192
- const st = await lstat(linkPath);
212
+ async function resolveSymlinkToFile(fs, linkPath) {
213
+ const st = await fs.lstat(linkPath);
193
214
  if (!st.isSymbolicLink()) return null;
194
215
  try {
195
- const resolved = await realpath(linkPath);
196
- const tstat = await lstat(resolved);
216
+ const resolved = await fs.realpath(linkPath);
217
+ const tstat = await fs.lstat(resolved);
197
218
  return tstat.isFile() ? resolved : null;
198
219
  } catch {
199
220
  return null;
@@ -205,38 +226,30 @@ function toPosix(p) {
205
226
  return p.split(sep).join(posix.sep);
206
227
  }
207
228
 
208
- async function gitClone(url, dir) {
209
- await run("git", ["clone", "--depth", "1", url, dir]);
229
+ async function gitClone(runtime, url, dir) {
230
+ await git(runtime, ["clone", "--depth", "1", url, dir]);
210
231
  }
211
232
 
212
- async function gitHead(dir) {
213
- const out = await run("git", ["-C", dir, "rev-parse", "HEAD"]);
233
+ async function gitHead(runtime, dir) {
234
+ const out = await git(runtime, ["-C", dir, "rev-parse", "HEAD"]);
214
235
  return out.trim();
215
236
  }
216
237
 
217
- function run(cmd, args) {
218
- return new Promise((res, rej) => {
219
- const child = spawn(cmd, args, { stdio: ["ignore", "pipe", "pipe"] });
220
- let stdout = "";
221
- let stderr = "";
222
- child.stdout.on("data", (d) => {
223
- stdout += d.toString();
224
- });
225
- child.stderr.on("data", (d) => {
226
- stderr += d.toString();
227
- });
228
- child.on("error", rej);
229
- child.on("close", (code) => {
230
- if (code === 0) res(stdout);
231
- else rej(new Error(`${cmd} ${args.join(" ")} exited ${code}: ${stderr}`));
232
- });
233
- });
238
+ async function git(runtime, args) {
239
+ const { stdout, stderr, exitCode } = await runtime.subprocess.run(
240
+ "git",
241
+ args,
242
+ );
243
+ if (exitCode !== 0) {
244
+ throw new Error(`git ${args.join(" ")} exited ${exitCode}: ${stderr}`);
245
+ }
246
+ return stdout;
234
247
  }
235
248
 
236
249
  /**
237
250
  * @typedef {object} Task
238
251
  * @property {string} id - Task name (directory name under tasks/)
239
- * @property {{taskDir: string, instructions: string, supervisor: string|null, judge: string|null, hooks: string, preflight: string|null, score: string|null, specs: string, workdir: string}} paths
252
+ * @property {{taskDir: string, instructions: string, supervisor: string|null, judge: string|null, hooks: string, preflight: string|null, invariants: string|null, specs: string, workdir: string}} paths
240
253
  */
241
254
 
242
255
  /**
@@ -4,11 +4,14 @@
4
4
  * the pre-flight smoke probe, and tear down the process group at end of run.
5
5
  *
6
6
  * The Workdir handle threads `cwd`, `port`, `pgid`, and trace paths through
7
- * runAgent → score → judge → teardown.
7
+ * runAgent → invariants → judge → teardown.
8
+ *
9
+ * Filesystem, subprocess, clock, and process-signal access all route through
10
+ * the injected `runtime` bag. Only raw TCP plumbing (`node:net`) stays direct —
11
+ * it is not an ambient-dependency smell and the runtime bag models no socket
12
+ * surface.
8
13
  */
9
14
 
10
- import { spawn } from "node:child_process";
11
- import { cp, mkdir } from "node:fs/promises";
12
15
  import { createServer } from "node:net";
13
16
  import { connect } from "node:net";
14
17
  import { join } from "node:path";
@@ -37,14 +40,24 @@ export class WorkdirManager {
37
40
  * @param {object} deps
38
41
  * @param {string} deps.stagingDir - Output of `installApm(...)`.
39
42
  * @param {string} deps.runOutputDir - Root run-output directory (parent of `runs/`).
43
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} deps.runtime -
44
+ * Ambient collaborators; uses `fs`, `subprocess`, `clock`, `proc`.
40
45
  */
41
- constructor({ stagingDir, runOutputDir, termGraceMs, familyRootPath }) {
46
+ constructor({
47
+ stagingDir,
48
+ runOutputDir,
49
+ termGraceMs,
50
+ familyRootPath,
51
+ runtime,
52
+ }) {
42
53
  if (!stagingDir) throw new Error("stagingDir is required");
43
54
  if (!runOutputDir) throw new Error("runOutputDir is required");
55
+ if (!runtime) throw new Error("runtime is required");
44
56
  this.stagingDir = stagingDir;
45
57
  this.runOutputDir = runOutputDir;
46
58
  this.termGraceMs = termGraceMs ?? DEFAULT_TERM_GRACE_MS;
47
59
  this.familyRootPath = familyRootPath ?? null;
60
+ this.runtime = runtime;
48
61
  }
49
62
 
50
63
  /**
@@ -54,33 +67,39 @@ export class WorkdirManager {
54
67
  * @returns {Promise<Workdir>}
55
68
  */
56
69
  async start(task, runIndex) {
70
+ const fs = this.runtime.fs;
57
71
  const slug = task.id.replace("/", "__");
58
72
  const runDir = join(this.runOutputDir, "runs", slug, String(runIndex));
59
73
  const cwd = join(runDir, "cwd");
60
- await mkdir(cwd, { recursive: true });
74
+ await fs.mkdir(cwd, { recursive: true });
61
75
 
62
- await cp(task.paths.workdir, cwd, { recursive: true }).catch((e) => {
63
- if (e.code !== "ENOENT") throw e;
64
- });
65
- await cp(task.paths.specs, join(cwd, "specs"), {
66
- recursive: true,
67
- }).catch((e) => {
76
+ await fs.cp(task.paths.workdir, cwd, { recursive: true }).catch((e) => {
68
77
  if (e.code !== "ENOENT") throw e;
69
78
  });
70
- await cp(join(this.stagingDir, ".claude"), join(cwd, ".claude"), {
71
- recursive: true,
72
- });
73
- await cp(join(this.stagingDir, "node_modules"), join(cwd, "node_modules"), {
79
+ await fs
80
+ .cp(task.paths.specs, join(cwd, "specs"), {
81
+ recursive: true,
82
+ })
83
+ .catch((e) => {
84
+ if (e.code !== "ENOENT") throw e;
85
+ });
86
+ await fs.cp(join(this.stagingDir, ".claude"), join(cwd, ".claude"), {
74
87
  recursive: true,
75
- }).catch((e) => {
76
- if (e.code !== "ENOENT") throw e;
77
88
  });
89
+ await fs
90
+ .cp(join(this.stagingDir, "node_modules"), join(cwd, "node_modules"), {
91
+ recursive: true,
92
+ })
93
+ .catch((e) => {
94
+ if (e.code !== "ENOENT") throw e;
95
+ });
78
96
 
79
97
  const envDirs = [
80
98
  ...(this.familyRootPath ? [this.familyRootPath] : []),
81
99
  ...(task.paths.taskDir ? [task.paths.taskDir] : []),
82
100
  ];
83
- const envNames = envDirs.length > 0 ? await loadEnv(envDirs, cwd) : [];
101
+ const envNames =
102
+ envDirs.length > 0 ? await loadEnv(envDirs, cwd, this.runtime) : [];
84
103
 
85
104
  const port = await allocatePort();
86
105
  const agentTracePath = join(runDir, "agent.ndjson");
@@ -88,7 +107,7 @@ export class WorkdirManager {
88
107
  const judgeTracePath = join(runDir, "judge.ndjson");
89
108
 
90
109
  const preflight = task.paths.preflight
91
- ? await runPreflight(task.paths.preflight, cwd, port)
110
+ ? await runPreflight(this.runtime, task.paths.preflight, cwd, port)
92
111
  : { pgid: 0 };
93
112
 
94
113
  return {
@@ -111,81 +130,71 @@ export class WorkdirManager {
111
130
  * @returns {Promise<{portFree: boolean, descendants: number}>}
112
131
  */
113
132
  async teardown(workdir) {
133
+ const { proc, clock } = this.runtime;
114
134
  if (workdir.pgid && workdir.pgid > 0) {
115
135
  try {
116
- process.kill(-workdir.pgid, "SIGTERM");
136
+ proc.kill(-workdir.pgid, "SIGTERM");
117
137
  } catch {
118
138
  // Process group already gone — fine.
119
139
  }
120
- await sleep(this.termGraceMs);
140
+ await clock.sleep(this.termGraceMs);
121
141
  try {
122
- process.kill(-workdir.pgid, "SIGKILL");
142
+ proc.kill(-workdir.pgid, "SIGKILL");
123
143
  } catch {
124
144
  // Already exited.
125
145
  }
126
146
  // Poll briefly until the process group is empty — SIGKILL returns
127
147
  // before the kernel finishes reaping descendants.
128
148
  await waitFor(
129
- async () => (await countDescendants(workdir.pgid)) === 0,
149
+ this.runtime,
150
+ async () => (await countDescendants(this.runtime, workdir.pgid)) === 0,
130
151
  2_000,
131
152
  );
132
153
  }
133
154
  const portFree = await isPortFree(workdir.port);
134
- const descendants = await countDescendants(workdir.pgid);
155
+ const descendants = await countDescendants(this.runtime, workdir.pgid);
135
156
  return { portFree, descendants };
136
157
  }
137
158
  }
138
159
 
139
160
  /**
140
161
  * Spawn preflight. Stays detached so we can SIGTERM the whole process group.
162
+ * @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
141
163
  * @param {string} script
142
164
  * @param {string} cwd - Agent CWD passed via $WORKDIR.
143
165
  * @param {number} port - Free TCP port passed via $PORT.
144
166
  * @returns {Promise<{pgid: number, error?: {phase: string, message: string, exitCode: number}}>}
145
167
  */
146
- function runPreflight(script, cwd, port) {
147
- return new Promise((res, rej) => {
148
- let stderr = "";
149
- const child = spawn(script, [], {
150
- cwd,
151
- env: { ...process.env, WORKDIR: cwd, PORT: String(port) },
152
- detached: true,
153
- stdio: ["ignore", "pipe", "pipe"],
154
- });
155
- if (child.pid === undefined) {
156
- rej(new Error(`failed to spawn preflight: ${script}`));
157
- return;
158
- }
159
- const pgid = child.pid;
160
- child.stderr.on("data", (d) => {
161
- stderr += d.toString();
162
- });
163
- child.on("error", (e) => {
164
- res({
165
- pgid,
166
- error: {
167
- phase: "preflight",
168
- message: `preflight failed to spawn: ${e.message}`,
169
- exitCode: -1,
170
- },
171
- });
172
- });
173
- child.on("exit", (code, signal) => {
174
- if (code === 0) {
175
- res({ pgid });
176
- return;
177
- }
178
- const message = stderr.trim() || `preflight exited with signal ${signal}`;
179
- res({
180
- pgid,
181
- error: {
182
- phase: "preflight",
183
- message,
184
- exitCode: typeof code === "number" ? code : -1,
185
- },
186
- });
187
- });
168
+ async function runPreflight(runtime, script, cwd, port) {
169
+ const child = runtime.subprocess.spawn(script, [], {
170
+ cwd,
171
+ env: { ...runtime.proc.env, WORKDIR: cwd, PORT: String(port) },
172
+ detached: true,
173
+ stdio: ["ignore", "pipe", "pipe"],
188
174
  });
175
+ if (child.pid === undefined) {
176
+ throw new Error(`failed to spawn preflight: ${script}`);
177
+ }
178
+ const pgid = child.pid;
179
+ let stderr = "";
180
+ const drainStdout = (async () => {
181
+ for await (const _chunk of child.stdout) {
182
+ // discard
183
+ }
184
+ })();
185
+ for await (const chunk of child.stderr) stderr += chunk.toString();
186
+ await drainStdout;
187
+ const code = await child.exitCode;
188
+ if (code === 0) return { pgid };
189
+ const message = stderr.trim() || `preflight exited with code ${code}`;
190
+ return {
191
+ pgid,
192
+ error: {
193
+ phase: "preflight",
194
+ message,
195
+ exitCode: typeof code === "number" ? code : -1,
196
+ },
197
+ };
189
198
  }
190
199
 
191
200
  function allocatePort() {
@@ -221,37 +230,35 @@ function isPortFree(port) {
221
230
  });
222
231
  }
223
232
 
224
- function countDescendants(pgid) {
225
- if (!pgid || pgid <= 0) return Promise.resolve(0);
226
- return new Promise((res) => {
227
- const child = spawn("ps", ["-o", "pid=", "-g", String(pgid)], {
233
+ async function countDescendants(runtime, pgid) {
234
+ if (!pgid || pgid <= 0) return 0;
235
+ const child = runtime.subprocess.spawn(
236
+ "ps",
237
+ ["-o", "pid=", "-g", String(pgid)],
238
+ {
228
239
  stdio: ["ignore", "pipe", "ignore"],
229
- });
230
- let out = "";
231
- child.stdout.on("data", (d) => {
232
- out += d.toString();
233
- });
234
- child.on("error", () => res(0));
235
- child.on("close", () => {
236
- const pids = out
237
- .split("\n")
238
- .map((s) => s.trim())
239
- .filter(Boolean)
240
- .filter((s) => Number(s) !== process.pid);
241
- res(pids.length);
242
- });
243
- });
244
- }
245
-
246
- function sleep(ms) {
247
- return new Promise((r) => setTimeout(r, ms));
240
+ },
241
+ );
242
+ let out = "";
243
+ try {
244
+ for await (const chunk of child.stdout) out += chunk.toString();
245
+ await child.exitCode;
246
+ } catch {
247
+ return 0;
248
+ }
249
+ const pids = out
250
+ .split("\n")
251
+ .map((s) => s.trim())
252
+ .filter(Boolean)
253
+ .filter((s) => Number(s) !== runtime.proc.pid);
254
+ return pids.length;
248
255
  }
249
256
 
250
- async function waitFor(predicate, timeoutMs) {
251
- const deadline = Date.now() + timeoutMs;
252
- while (Date.now() < deadline) {
257
+ async function waitFor(runtime, predicate, timeoutMs) {
258
+ const deadline = runtime.clock.now() + timeoutMs;
259
+ while (runtime.clock.now() < deadline) {
253
260
  if (await predicate()) return true;
254
- await sleep(50);
261
+ await runtime.clock.sleep(50);
255
262
  }
256
263
  return false;
257
264
  }
@@ -1,4 +1,3 @@
1
- import { existsSync, readFileSync } from "node:fs";
2
1
  import { basename } from "node:path";
3
2
  import jmespath from "jmespath";
4
3
 
@@ -6,10 +5,11 @@ import jmespath from "jmespath";
6
5
  * Evaluate an assertion and return the structured result.
7
6
  * @param {object} values - { grep?: string, query?: string, exists?: boolean, not?: boolean, message?: string }
8
7
  * @param {string[]} args - [testName, file]
8
+ * @param {object} fsSync - Sync filesystem surface (`runtime.fsSync`): `existsSync`, `readFileSync`.
9
9
  * @returns {{ test: string, pass: boolean, message?: string }}
10
10
  */
11
11
  // biome-ignore lint/complexity/noExcessiveCognitiveComplexity: assertion dispatch by type
12
- export function evaluateAssertion(values, args) {
12
+ export function evaluateAssertion(values, args, fsSync) {
13
13
  const testName = args[0];
14
14
  if (!testName) throw new Error("assert: missing test name");
15
15
 
@@ -34,16 +34,16 @@ export function evaluateAssertion(values, args) {
34
34
  let result;
35
35
  if (values.exists) {
36
36
  if (!file) throw new Error("assert: missing file argument");
37
- result = assertExists(file);
37
+ result = assertExists(file, fsSync);
38
38
  } else if (values.grep) {
39
39
  if (!file) throw new Error("assert: missing file argument for --grep");
40
- result = assertGrep(values.grep, file);
40
+ result = assertGrep(values.grep, file, fsSync);
41
41
  } else if (values["cites-job"]) {
42
42
  if (!file) throw new Error("assert: missing file argument for --cites-job");
43
- result = assertCitesJob(values["cites-job"], file);
43
+ result = assertCitesJob(values["cites-job"], file, fsSync);
44
44
  } else {
45
45
  if (!file) throw new Error("assert: missing file argument for --query");
46
- result = assertQuery(values.query, file);
46
+ result = assertQuery(values.query, file, fsSync);
47
47
  }
48
48
 
49
49
  if (values.not) {
@@ -66,23 +66,31 @@ export function evaluateAssertion(values, args) {
66
66
  }
67
67
 
68
68
  /**
69
- * Run an assertion, write JSON to stdout, and set process.exitCode on failure.
70
- * @param {object} values
71
- * @param {string[]} args
69
+ * Run an assertion, write JSON to stdout, and return a failure envelope when
70
+ * the assertion does not pass.
71
+ * @param {import("@forwardimpact/libcli").InvocationContext} ctx
72
+ * @returns {Promise<{ok: true} | {ok: false, code: number, error: string}>}
72
73
  */
73
- export async function runAssertCommand(values, args) {
74
- const result = evaluateAssertion(values, args);
75
- process.stdout.write(JSON.stringify(result) + "\n");
76
- if (!result.pass) process.exitCode = 1;
74
+ export async function runAssertCommand(ctx) {
75
+ const runtime = ctx.deps.runtime;
76
+ const args = [ctx.args["test-name"], ctx.args.file];
77
+ let result;
78
+ try {
79
+ result = evaluateAssertion(ctx.options, args, runtime.fsSync);
80
+ } catch (err) {
81
+ return { ok: false, code: 1, error: err.message };
82
+ }
83
+ runtime.proc.stdout.write(JSON.stringify(result) + "\n");
84
+ return result.pass ? { ok: true } : { ok: false, code: 1, error: "" };
77
85
  }
78
86
 
79
- function assertExists(file) {
80
- if (existsSync(file)) return { pass: true };
87
+ function assertExists(file, fsSync) {
88
+ if (fsSync.existsSync(file)) return { pass: true };
81
89
  return { pass: false, message: `${file} not found` };
82
90
  }
83
91
 
84
- function assertGrep(pattern, file) {
85
- const content = readFileSync(file, "utf8");
92
+ function assertGrep(pattern, file, fsSync) {
93
+ const content = fsSync.readFileSync(file, "utf8");
86
94
  const re = new RegExp(pattern, "im");
87
95
  if (re.test(content)) return { pass: true };
88
96
  return {
@@ -91,8 +99,8 @@ function assertGrep(pattern, file) {
91
99
  };
92
100
  }
93
101
 
94
- function assertQuery(expression, file) {
95
- const content = readFileSync(file, "utf8");
102
+ function assertQuery(expression, file, fsSync) {
103
+ const content = fsSync.readFileSync(file, "utf8");
96
104
  const data = parseJsonOrNdjson(content);
97
105
  const result = jmespath.search(data, expression);
98
106
  const truthy =
@@ -109,8 +117,8 @@ function assertQuery(expression, file) {
109
117
 
110
118
  const JOB_TAG_RE = /<job\s+user="([^"]*)"\s+goal="([^"]*)">/;
111
119
 
112
- function assertCitesJob(jobFile, file) {
113
- const jobContent = readFileSync(jobFile, "utf8");
120
+ function assertCitesJob(jobFile, file, fsSync) {
121
+ const jobContent = fsSync.readFileSync(jobFile, "utf8");
114
122
  const match = JOB_TAG_RE.exec(jobContent);
115
123
  if (!match) {
116
124
  return {
@@ -119,7 +127,7 @@ function assertCitesJob(jobFile, file) {
119
127
  };
120
128
  }
121
129
  const citation = `${match[1]}: ${match[2]}`;
122
- const content = readFileSync(file, "utf8");
130
+ const content = fsSync.readFileSync(file, "utf8");
123
131
  if (content.includes(citation)) return { pass: true };
124
132
  return { pass: false, message: `missing "${citation}"` };
125
133
  }