@forwardimpact/libeval 0.1.50 → 0.1.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -8
- package/bin/fit-benchmark.js +26 -27
- package/bin/fit-eval.js +36 -30
- package/bin/fit-trace.js +83 -57
- package/package.json +1 -1
- package/src/agent-runner.js +20 -12
- package/src/benchmark/apm-installer.js +48 -44
- package/src/benchmark/env-loader.js +35 -23
- package/src/benchmark/invariants.js +128 -0
- package/src/benchmark/judge.js +18 -19
- package/src/benchmark/npm-installer.js +33 -33
- package/src/benchmark/report.js +40 -26
- package/src/benchmark/result.js +11 -11
- package/src/benchmark/runner.js +90 -46
- package/src/benchmark/task-family.js +78 -65
- package/src/benchmark/workdir.js +100 -93
- package/src/commands/assert.js +30 -22
- package/src/commands/benchmark-invariants.js +74 -0
- package/src/commands/benchmark-report.js +24 -15
- package/src/commands/benchmark-run.js +16 -9
- package/src/commands/by-discussion.js +33 -23
- package/src/commands/callback.js +20 -11
- package/src/commands/discuss.js +31 -13
- package/src/commands/facilitate.js +21 -14
- package/src/commands/output.js +15 -13
- package/src/commands/run.js +28 -14
- package/src/commands/supervise.js +29 -19
- package/src/commands/task-input.js +10 -5
- package/src/commands/tee.js +24 -9
- package/src/commands/trace.js +181 -99
- package/src/discuss-tools.js +48 -2
- package/src/discusser.js +53 -2
- package/src/events/github.js +27 -5
- package/src/facilitator.js +4 -0
- package/src/inbox-poller.js +84 -0
- package/src/judge.js +4 -1
- package/src/message-bus.js +6 -0
- package/src/orchestration-loop.js +14 -4
- package/src/orchestration-toolkit.js +14 -0
- package/src/profile-prompt.js +22 -9
- package/src/redaction.js +31 -9
- package/src/reply-emitter.js +47 -0
- package/src/supervisor.js +4 -0
- package/src/tee-writer.js +4 -2
- package/src/trace-collector.js +9 -2
- package/src/trace-github.js +47 -27
- package/src/benchmark/scorer.js +0 -138
- package/src/commands/benchmark-score.js +0 -68
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
* judge.task.md
|
|
10
10
|
* hooks/ # harness-only; never copied to agent CWD
|
|
11
11
|
* preflight.sh
|
|
12
|
-
*
|
|
12
|
+
* invariants.sh
|
|
13
13
|
* specs/ # copied into agent CWD
|
|
14
14
|
* workdir/ # copied into agent CWD
|
|
15
15
|
*
|
|
@@ -17,45 +17,55 @@
|
|
|
17
17
|
* a temp dir and `familyRevision` becomes `git:<sha>` of HEAD at clone time.
|
|
18
18
|
* Local paths use the canonical-tree algorithm from design § Family revision
|
|
19
19
|
* algorithm so the result is stable across operating systems.
|
|
20
|
+
*
|
|
21
|
+
* Filesystem and subprocess access route through the injected `runtime` bag
|
|
22
|
+
* (`runtime.fs` async, `runtime.subprocess.run` one-shot, `tmpdir` derived
|
|
23
|
+
* from `runtime.proc.env`).
|
|
20
24
|
*/
|
|
21
25
|
|
|
22
|
-
import { spawn } from "node:child_process";
|
|
23
26
|
import { createHash } from "node:crypto";
|
|
24
|
-
import {
|
|
25
|
-
access,
|
|
26
|
-
constants,
|
|
27
|
-
lstat,
|
|
28
|
-
mkdtemp,
|
|
29
|
-
readdir,
|
|
30
|
-
readFile,
|
|
31
|
-
realpath,
|
|
32
|
-
} from "node:fs/promises";
|
|
33
|
-
import { tmpdir } from "node:os";
|
|
34
27
|
import { join, posix, relative, resolve, sep } from "node:path";
|
|
35
28
|
|
|
36
29
|
const GIT_URL_RE = /^(git@|https?:\/\/|ssh:\/\/|git:\/\/)/;
|
|
37
30
|
const SKIP_DIRS = new Set([".git", "node_modules"]);
|
|
31
|
+
// POSIX `X_OK` (execute permission); node's fs honours the numeric mode, so we
|
|
32
|
+
// avoid importing `node:fs`'s `constants` (which would light the fs smell).
|
|
33
|
+
const X_OK = 1;
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Derive the system temp dir from the env (node's `os.tmpdir()` is itself an
|
|
37
|
+
* env-respecting wrapper). The runtime bag has no `os` slot by design.
|
|
38
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
|
|
39
|
+
* @returns {string}
|
|
40
|
+
*/
|
|
41
|
+
function tmpdir(runtime) {
|
|
42
|
+
return runtime.proc.env.TMPDIR ?? "/tmp";
|
|
43
|
+
}
|
|
38
44
|
|
|
39
45
|
/**
|
|
40
46
|
* Load a task family from a local path or git URL.
|
|
41
47
|
* @param {string} rootPathOrGitUrl
|
|
48
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
|
|
42
49
|
* @returns {Promise<TaskFamily>}
|
|
43
50
|
*/
|
|
44
|
-
export async function loadTaskFamily(rootPathOrGitUrl) {
|
|
51
|
+
export async function loadTaskFamily(rootPathOrGitUrl, runtime) {
|
|
52
|
+
if (!runtime) throw new Error("runtime is required");
|
|
45
53
|
const isGit = GIT_URL_RE.test(rootPathOrGitUrl);
|
|
46
54
|
let rootPath;
|
|
47
55
|
let familyRevision;
|
|
48
56
|
if (isGit) {
|
|
49
|
-
const dir = await mkdtemp(
|
|
50
|
-
|
|
57
|
+
const dir = await runtime.fs.mkdtemp(
|
|
58
|
+
join(tmpdir(runtime), "fit-benchmark-family-"),
|
|
59
|
+
);
|
|
60
|
+
await gitClone(runtime, rootPathOrGitUrl, dir);
|
|
51
61
|
rootPath = dir;
|
|
52
|
-
familyRevision = "git:" + (await gitHead(dir));
|
|
62
|
+
familyRevision = "git:" + (await gitHead(runtime, dir));
|
|
53
63
|
} else {
|
|
54
64
|
rootPath = resolve(rootPathOrGitUrl);
|
|
55
|
-
familyRevision = "sha256:" + (await canonicalTreeHash(rootPath));
|
|
65
|
+
familyRevision = "sha256:" + (await canonicalTreeHash(runtime, rootPath));
|
|
56
66
|
}
|
|
57
67
|
|
|
58
|
-
const tasks = await discoverTasks(rootPath);
|
|
68
|
+
const tasks = await discoverTasks(runtime, rootPath);
|
|
59
69
|
|
|
60
70
|
return {
|
|
61
71
|
rootPath,
|
|
@@ -73,27 +83,30 @@ export async function loadTaskFamily(rootPathOrGitUrl) {
|
|
|
73
83
|
* @param {TaskFamily} _family
|
|
74
84
|
* @param {string} judgeProfilesDir
|
|
75
85
|
* @param {string} judgeProfile
|
|
86
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
|
|
76
87
|
* @returns {Promise<void>}
|
|
77
88
|
*/
|
|
78
89
|
export async function assertJudgeProfileStaged(
|
|
79
90
|
_family,
|
|
80
91
|
judgeProfilesDir,
|
|
81
92
|
judgeProfile,
|
|
93
|
+
runtime,
|
|
82
94
|
) {
|
|
83
95
|
const candidate = join(judgeProfilesDir, `${judgeProfile}.md`);
|
|
84
96
|
try {
|
|
85
|
-
await access(candidate);
|
|
97
|
+
await runtime.fs.access(candidate);
|
|
86
98
|
} catch {
|
|
87
99
|
throw new Error(`judge profile not staged: ${candidate}`);
|
|
88
100
|
}
|
|
89
101
|
}
|
|
90
102
|
|
|
91
|
-
async function discoverTasks(rootPath) {
|
|
103
|
+
async function discoverTasks(runtime, rootPath) {
|
|
104
|
+
const fs = runtime.fs;
|
|
92
105
|
const tasksRoot = join(rootPath, "tasks");
|
|
93
106
|
const tasks = [];
|
|
94
107
|
let entries;
|
|
95
108
|
try {
|
|
96
|
-
entries = await readdir(tasksRoot, { withFileTypes: true });
|
|
109
|
+
entries = await fs.readdir(tasksRoot, { withFileTypes: true });
|
|
97
110
|
} catch (e) {
|
|
98
111
|
if (e.code === "ENOENT") return tasks;
|
|
99
112
|
throw e;
|
|
@@ -104,17 +117,23 @@ async function discoverTasks(rootPath) {
|
|
|
104
117
|
const supervisorPath = join(taskDir, "supervisor.task.md");
|
|
105
118
|
const judgePath = join(taskDir, "judge.task.md");
|
|
106
119
|
const preflightPath = join(taskDir, "hooks", "preflight.sh");
|
|
107
|
-
const
|
|
120
|
+
const invariantsPath = join(taskDir, "hooks", "invariants.sh");
|
|
108
121
|
tasks.push({
|
|
109
122
|
id: entry.name,
|
|
110
123
|
paths: {
|
|
111
124
|
taskDir,
|
|
112
125
|
instructions: join(taskDir, "agent.task.md"),
|
|
113
|
-
supervisor: (await fileExists(supervisorPath))
|
|
114
|
-
|
|
126
|
+
supervisor: (await fileExists(fs, supervisorPath))
|
|
127
|
+
? supervisorPath
|
|
128
|
+
: null,
|
|
129
|
+
judge: (await fileExists(fs, judgePath)) ? judgePath : null,
|
|
115
130
|
hooks: join(taskDir, "hooks"),
|
|
116
|
-
preflight: (await fileExecutable(preflightPath))
|
|
117
|
-
|
|
131
|
+
preflight: (await fileExecutable(fs, preflightPath))
|
|
132
|
+
? preflightPath
|
|
133
|
+
: null,
|
|
134
|
+
invariants: (await fileExecutable(fs, invariantsPath))
|
|
135
|
+
? invariantsPath
|
|
136
|
+
: null,
|
|
118
137
|
specs: join(taskDir, "specs"),
|
|
119
138
|
workdir: join(taskDir, "workdir"),
|
|
120
139
|
},
|
|
@@ -124,18 +143,18 @@ async function discoverTasks(rootPath) {
|
|
|
124
143
|
return tasks;
|
|
125
144
|
}
|
|
126
145
|
|
|
127
|
-
async function fileExists(path) {
|
|
146
|
+
async function fileExists(fs, path) {
|
|
128
147
|
try {
|
|
129
|
-
await access(path);
|
|
148
|
+
await fs.access(path);
|
|
130
149
|
return true;
|
|
131
150
|
} catch {
|
|
132
151
|
return false;
|
|
133
152
|
}
|
|
134
153
|
}
|
|
135
154
|
|
|
136
|
-
async function fileExecutable(path) {
|
|
155
|
+
async function fileExecutable(fs, path) {
|
|
137
156
|
try {
|
|
138
|
-
await access(path,
|
|
157
|
+
await fs.access(path, X_OK);
|
|
139
158
|
return true;
|
|
140
159
|
} catch {
|
|
141
160
|
return false;
|
|
@@ -149,16 +168,18 @@ async function fileExecutable(path) {
|
|
|
149
168
|
* sort by NFC-normalised POSIX-style root-relative path
|
|
150
169
|
* row = <rel-path>\0<hex-sha256>\n
|
|
151
170
|
* sha256(concat(rows))
|
|
171
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
|
|
152
172
|
* @param {string} rootPath
|
|
153
173
|
* @returns {Promise<string>} hex digest
|
|
154
174
|
*/
|
|
155
|
-
async function canonicalTreeHash(rootPath) {
|
|
156
|
-
const
|
|
175
|
+
async function canonicalTreeHash(runtime, rootPath) {
|
|
176
|
+
const fs = runtime.fs;
|
|
177
|
+
const real = await fs.realpath(rootPath);
|
|
157
178
|
const rows = [];
|
|
158
|
-
for await (const filePath of walkFiles(real)) {
|
|
179
|
+
for await (const filePath of walkFiles(fs, real)) {
|
|
159
180
|
const rel = toPosix(relative(real, filePath)).normalize("NFC");
|
|
160
|
-
const target = await realpath(filePath);
|
|
161
|
-
const bytes = await readFile(target);
|
|
181
|
+
const target = await fs.realpath(filePath);
|
|
182
|
+
const bytes = await fs.readFile(target);
|
|
162
183
|
const hex = createHash("sha256").update(bytes).digest("hex");
|
|
163
184
|
rows.push({ rel, hex });
|
|
164
185
|
}
|
|
@@ -168,15 +189,15 @@ async function canonicalTreeHash(rootPath) {
|
|
|
168
189
|
return acc.digest("hex");
|
|
169
190
|
}
|
|
170
191
|
|
|
171
|
-
async function* walkFiles(dir) {
|
|
172
|
-
const entries = await readdir(dir, { withFileTypes: true });
|
|
192
|
+
async function* walkFiles(fs, dir) {
|
|
193
|
+
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
173
194
|
for (const entry of entries) {
|
|
174
195
|
const full = join(dir, entry.name);
|
|
175
196
|
if (entry.isDirectory()) {
|
|
176
197
|
if (SKIP_DIRS.has(entry.name)) continue;
|
|
177
|
-
yield* walkFiles(full);
|
|
198
|
+
yield* walkFiles(fs, full);
|
|
178
199
|
} else if (entry.isSymbolicLink()) {
|
|
179
|
-
const resolvedFile = await resolveSymlinkToFile(full);
|
|
200
|
+
const resolvedFile = await resolveSymlinkToFile(fs, full);
|
|
180
201
|
if (resolvedFile) yield full;
|
|
181
202
|
} else if (entry.isFile()) {
|
|
182
203
|
yield full;
|
|
@@ -188,12 +209,12 @@ async function* walkFiles(dir) {
|
|
|
188
209
|
* Return the resolved path if `linkPath` is a symlink to a regular file.
|
|
189
210
|
* Returns null for dangling symlinks or links to non-file targets.
|
|
190
211
|
*/
|
|
191
|
-
async function resolveSymlinkToFile(linkPath) {
|
|
192
|
-
const st = await lstat(linkPath);
|
|
212
|
+
async function resolveSymlinkToFile(fs, linkPath) {
|
|
213
|
+
const st = await fs.lstat(linkPath);
|
|
193
214
|
if (!st.isSymbolicLink()) return null;
|
|
194
215
|
try {
|
|
195
|
-
const resolved = await realpath(linkPath);
|
|
196
|
-
const tstat = await lstat(resolved);
|
|
216
|
+
const resolved = await fs.realpath(linkPath);
|
|
217
|
+
const tstat = await fs.lstat(resolved);
|
|
197
218
|
return tstat.isFile() ? resolved : null;
|
|
198
219
|
} catch {
|
|
199
220
|
return null;
|
|
@@ -205,38 +226,30 @@ function toPosix(p) {
|
|
|
205
226
|
return p.split(sep).join(posix.sep);
|
|
206
227
|
}
|
|
207
228
|
|
|
208
|
-
async function gitClone(url, dir) {
|
|
209
|
-
await
|
|
229
|
+
async function gitClone(runtime, url, dir) {
|
|
230
|
+
await git(runtime, ["clone", "--depth", "1", url, dir]);
|
|
210
231
|
}
|
|
211
232
|
|
|
212
|
-
async function gitHead(dir) {
|
|
213
|
-
const out = await
|
|
233
|
+
async function gitHead(runtime, dir) {
|
|
234
|
+
const out = await git(runtime, ["-C", dir, "rev-parse", "HEAD"]);
|
|
214
235
|
return out.trim();
|
|
215
236
|
}
|
|
216
237
|
|
|
217
|
-
function
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
stderr += d.toString();
|
|
227
|
-
});
|
|
228
|
-
child.on("error", rej);
|
|
229
|
-
child.on("close", (code) => {
|
|
230
|
-
if (code === 0) res(stdout);
|
|
231
|
-
else rej(new Error(`${cmd} ${args.join(" ")} exited ${code}: ${stderr}`));
|
|
232
|
-
});
|
|
233
|
-
});
|
|
238
|
+
async function git(runtime, args) {
|
|
239
|
+
const { stdout, stderr, exitCode } = await runtime.subprocess.run(
|
|
240
|
+
"git",
|
|
241
|
+
args,
|
|
242
|
+
);
|
|
243
|
+
if (exitCode !== 0) {
|
|
244
|
+
throw new Error(`git ${args.join(" ")} exited ${exitCode}: ${stderr}`);
|
|
245
|
+
}
|
|
246
|
+
return stdout;
|
|
234
247
|
}
|
|
235
248
|
|
|
236
249
|
/**
|
|
237
250
|
* @typedef {object} Task
|
|
238
251
|
* @property {string} id - Task name (directory name under tasks/)
|
|
239
|
-
* @property {{taskDir: string, instructions: string, supervisor: string|null, judge: string|null, hooks: string, preflight: string|null,
|
|
252
|
+
* @property {{taskDir: string, instructions: string, supervisor: string|null, judge: string|null, hooks: string, preflight: string|null, invariants: string|null, specs: string, workdir: string}} paths
|
|
240
253
|
*/
|
|
241
254
|
|
|
242
255
|
/**
|
package/src/benchmark/workdir.js
CHANGED
|
@@ -4,11 +4,14 @@
|
|
|
4
4
|
* the pre-flight smoke probe, and tear down the process group at end of run.
|
|
5
5
|
*
|
|
6
6
|
* The Workdir handle threads `cwd`, `port`, `pgid`, and trace paths through
|
|
7
|
-
* runAgent →
|
|
7
|
+
* runAgent → invariants → judge → teardown.
|
|
8
|
+
*
|
|
9
|
+
* Filesystem, subprocess, clock, and process-signal access all route through
|
|
10
|
+
* the injected `runtime` bag. Only raw TCP plumbing (`node:net`) stays direct —
|
|
11
|
+
* it is not an ambient-dependency smell and the runtime bag models no socket
|
|
12
|
+
* surface.
|
|
8
13
|
*/
|
|
9
14
|
|
|
10
|
-
import { spawn } from "node:child_process";
|
|
11
|
-
import { cp, mkdir } from "node:fs/promises";
|
|
12
15
|
import { createServer } from "node:net";
|
|
13
16
|
import { connect } from "node:net";
|
|
14
17
|
import { join } from "node:path";
|
|
@@ -37,14 +40,24 @@ export class WorkdirManager {
|
|
|
37
40
|
* @param {object} deps
|
|
38
41
|
* @param {string} deps.stagingDir - Output of `installApm(...)`.
|
|
39
42
|
* @param {string} deps.runOutputDir - Root run-output directory (parent of `runs/`).
|
|
43
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} deps.runtime -
|
|
44
|
+
* Ambient collaborators; uses `fs`, `subprocess`, `clock`, `proc`.
|
|
40
45
|
*/
|
|
41
|
-
constructor({
|
|
46
|
+
constructor({
|
|
47
|
+
stagingDir,
|
|
48
|
+
runOutputDir,
|
|
49
|
+
termGraceMs,
|
|
50
|
+
familyRootPath,
|
|
51
|
+
runtime,
|
|
52
|
+
}) {
|
|
42
53
|
if (!stagingDir) throw new Error("stagingDir is required");
|
|
43
54
|
if (!runOutputDir) throw new Error("runOutputDir is required");
|
|
55
|
+
if (!runtime) throw new Error("runtime is required");
|
|
44
56
|
this.stagingDir = stagingDir;
|
|
45
57
|
this.runOutputDir = runOutputDir;
|
|
46
58
|
this.termGraceMs = termGraceMs ?? DEFAULT_TERM_GRACE_MS;
|
|
47
59
|
this.familyRootPath = familyRootPath ?? null;
|
|
60
|
+
this.runtime = runtime;
|
|
48
61
|
}
|
|
49
62
|
|
|
50
63
|
/**
|
|
@@ -54,33 +67,39 @@ export class WorkdirManager {
|
|
|
54
67
|
* @returns {Promise<Workdir>}
|
|
55
68
|
*/
|
|
56
69
|
async start(task, runIndex) {
|
|
70
|
+
const fs = this.runtime.fs;
|
|
57
71
|
const slug = task.id.replace("/", "__");
|
|
58
72
|
const runDir = join(this.runOutputDir, "runs", slug, String(runIndex));
|
|
59
73
|
const cwd = join(runDir, "cwd");
|
|
60
|
-
await mkdir(cwd, { recursive: true });
|
|
74
|
+
await fs.mkdir(cwd, { recursive: true });
|
|
61
75
|
|
|
62
|
-
await cp(task.paths.workdir, cwd, { recursive: true }).catch((e) => {
|
|
63
|
-
if (e.code !== "ENOENT") throw e;
|
|
64
|
-
});
|
|
65
|
-
await cp(task.paths.specs, join(cwd, "specs"), {
|
|
66
|
-
recursive: true,
|
|
67
|
-
}).catch((e) => {
|
|
76
|
+
await fs.cp(task.paths.workdir, cwd, { recursive: true }).catch((e) => {
|
|
68
77
|
if (e.code !== "ENOENT") throw e;
|
|
69
78
|
});
|
|
70
|
-
await
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
79
|
+
await fs
|
|
80
|
+
.cp(task.paths.specs, join(cwd, "specs"), {
|
|
81
|
+
recursive: true,
|
|
82
|
+
})
|
|
83
|
+
.catch((e) => {
|
|
84
|
+
if (e.code !== "ENOENT") throw e;
|
|
85
|
+
});
|
|
86
|
+
await fs.cp(join(this.stagingDir, ".claude"), join(cwd, ".claude"), {
|
|
74
87
|
recursive: true,
|
|
75
|
-
}).catch((e) => {
|
|
76
|
-
if (e.code !== "ENOENT") throw e;
|
|
77
88
|
});
|
|
89
|
+
await fs
|
|
90
|
+
.cp(join(this.stagingDir, "node_modules"), join(cwd, "node_modules"), {
|
|
91
|
+
recursive: true,
|
|
92
|
+
})
|
|
93
|
+
.catch((e) => {
|
|
94
|
+
if (e.code !== "ENOENT") throw e;
|
|
95
|
+
});
|
|
78
96
|
|
|
79
97
|
const envDirs = [
|
|
80
98
|
...(this.familyRootPath ? [this.familyRootPath] : []),
|
|
81
99
|
...(task.paths.taskDir ? [task.paths.taskDir] : []),
|
|
82
100
|
];
|
|
83
|
-
const envNames =
|
|
101
|
+
const envNames =
|
|
102
|
+
envDirs.length > 0 ? await loadEnv(envDirs, cwd, this.runtime) : [];
|
|
84
103
|
|
|
85
104
|
const port = await allocatePort();
|
|
86
105
|
const agentTracePath = join(runDir, "agent.ndjson");
|
|
@@ -88,7 +107,7 @@ export class WorkdirManager {
|
|
|
88
107
|
const judgeTracePath = join(runDir, "judge.ndjson");
|
|
89
108
|
|
|
90
109
|
const preflight = task.paths.preflight
|
|
91
|
-
? await runPreflight(task.paths.preflight, cwd, port)
|
|
110
|
+
? await runPreflight(this.runtime, task.paths.preflight, cwd, port)
|
|
92
111
|
: { pgid: 0 };
|
|
93
112
|
|
|
94
113
|
return {
|
|
@@ -111,81 +130,71 @@ export class WorkdirManager {
|
|
|
111
130
|
* @returns {Promise<{portFree: boolean, descendants: number}>}
|
|
112
131
|
*/
|
|
113
132
|
async teardown(workdir) {
|
|
133
|
+
const { proc, clock } = this.runtime;
|
|
114
134
|
if (workdir.pgid && workdir.pgid > 0) {
|
|
115
135
|
try {
|
|
116
|
-
|
|
136
|
+
proc.kill(-workdir.pgid, "SIGTERM");
|
|
117
137
|
} catch {
|
|
118
138
|
// Process group already gone — fine.
|
|
119
139
|
}
|
|
120
|
-
await sleep(this.termGraceMs);
|
|
140
|
+
await clock.sleep(this.termGraceMs);
|
|
121
141
|
try {
|
|
122
|
-
|
|
142
|
+
proc.kill(-workdir.pgid, "SIGKILL");
|
|
123
143
|
} catch {
|
|
124
144
|
// Already exited.
|
|
125
145
|
}
|
|
126
146
|
// Poll briefly until the process group is empty — SIGKILL returns
|
|
127
147
|
// before the kernel finishes reaping descendants.
|
|
128
148
|
await waitFor(
|
|
129
|
-
|
|
149
|
+
this.runtime,
|
|
150
|
+
async () => (await countDescendants(this.runtime, workdir.pgid)) === 0,
|
|
130
151
|
2_000,
|
|
131
152
|
);
|
|
132
153
|
}
|
|
133
154
|
const portFree = await isPortFree(workdir.port);
|
|
134
|
-
const descendants = await countDescendants(workdir.pgid);
|
|
155
|
+
const descendants = await countDescendants(this.runtime, workdir.pgid);
|
|
135
156
|
return { portFree, descendants };
|
|
136
157
|
}
|
|
137
158
|
}
|
|
138
159
|
|
|
139
160
|
/**
|
|
140
161
|
* Spawn preflight. Stays detached so we can SIGTERM the whole process group.
|
|
162
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
|
|
141
163
|
* @param {string} script
|
|
142
164
|
* @param {string} cwd - Agent CWD passed via $WORKDIR.
|
|
143
165
|
* @param {number} port - Free TCP port passed via $PORT.
|
|
144
166
|
* @returns {Promise<{pgid: number, error?: {phase: string, message: string, exitCode: number}}>}
|
|
145
167
|
*/
|
|
146
|
-
function runPreflight(script, cwd, port) {
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
detached: true,
|
|
153
|
-
stdio: ["ignore", "pipe", "pipe"],
|
|
154
|
-
});
|
|
155
|
-
if (child.pid === undefined) {
|
|
156
|
-
rej(new Error(`failed to spawn preflight: ${script}`));
|
|
157
|
-
return;
|
|
158
|
-
}
|
|
159
|
-
const pgid = child.pid;
|
|
160
|
-
child.stderr.on("data", (d) => {
|
|
161
|
-
stderr += d.toString();
|
|
162
|
-
});
|
|
163
|
-
child.on("error", (e) => {
|
|
164
|
-
res({
|
|
165
|
-
pgid,
|
|
166
|
-
error: {
|
|
167
|
-
phase: "preflight",
|
|
168
|
-
message: `preflight failed to spawn: ${e.message}`,
|
|
169
|
-
exitCode: -1,
|
|
170
|
-
},
|
|
171
|
-
});
|
|
172
|
-
});
|
|
173
|
-
child.on("exit", (code, signal) => {
|
|
174
|
-
if (code === 0) {
|
|
175
|
-
res({ pgid });
|
|
176
|
-
return;
|
|
177
|
-
}
|
|
178
|
-
const message = stderr.trim() || `preflight exited with signal ${signal}`;
|
|
179
|
-
res({
|
|
180
|
-
pgid,
|
|
181
|
-
error: {
|
|
182
|
-
phase: "preflight",
|
|
183
|
-
message,
|
|
184
|
-
exitCode: typeof code === "number" ? code : -1,
|
|
185
|
-
},
|
|
186
|
-
});
|
|
187
|
-
});
|
|
168
|
+
async function runPreflight(runtime, script, cwd, port) {
|
|
169
|
+
const child = runtime.subprocess.spawn(script, [], {
|
|
170
|
+
cwd,
|
|
171
|
+
env: { ...runtime.proc.env, WORKDIR: cwd, PORT: String(port) },
|
|
172
|
+
detached: true,
|
|
173
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
188
174
|
});
|
|
175
|
+
if (child.pid === undefined) {
|
|
176
|
+
throw new Error(`failed to spawn preflight: ${script}`);
|
|
177
|
+
}
|
|
178
|
+
const pgid = child.pid;
|
|
179
|
+
let stderr = "";
|
|
180
|
+
const drainStdout = (async () => {
|
|
181
|
+
for await (const _chunk of child.stdout) {
|
|
182
|
+
// discard
|
|
183
|
+
}
|
|
184
|
+
})();
|
|
185
|
+
for await (const chunk of child.stderr) stderr += chunk.toString();
|
|
186
|
+
await drainStdout;
|
|
187
|
+
const code = await child.exitCode;
|
|
188
|
+
if (code === 0) return { pgid };
|
|
189
|
+
const message = stderr.trim() || `preflight exited with code ${code}`;
|
|
190
|
+
return {
|
|
191
|
+
pgid,
|
|
192
|
+
error: {
|
|
193
|
+
phase: "preflight",
|
|
194
|
+
message,
|
|
195
|
+
exitCode: typeof code === "number" ? code : -1,
|
|
196
|
+
},
|
|
197
|
+
};
|
|
189
198
|
}
|
|
190
199
|
|
|
191
200
|
function allocatePort() {
|
|
@@ -221,37 +230,35 @@ function isPortFree(port) {
|
|
|
221
230
|
});
|
|
222
231
|
}
|
|
223
232
|
|
|
224
|
-
function countDescendants(pgid) {
|
|
225
|
-
if (!pgid || pgid <= 0) return
|
|
226
|
-
|
|
227
|
-
|
|
233
|
+
async function countDescendants(runtime, pgid) {
|
|
234
|
+
if (!pgid || pgid <= 0) return 0;
|
|
235
|
+
const child = runtime.subprocess.spawn(
|
|
236
|
+
"ps",
|
|
237
|
+
["-o", "pid=", "-g", String(pgid)],
|
|
238
|
+
{
|
|
228
239
|
stdio: ["ignore", "pipe", "ignore"],
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
child.
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
function sleep(ms) {
|
|
247
|
-
return new Promise((r) => setTimeout(r, ms));
|
|
240
|
+
},
|
|
241
|
+
);
|
|
242
|
+
let out = "";
|
|
243
|
+
try {
|
|
244
|
+
for await (const chunk of child.stdout) out += chunk.toString();
|
|
245
|
+
await child.exitCode;
|
|
246
|
+
} catch {
|
|
247
|
+
return 0;
|
|
248
|
+
}
|
|
249
|
+
const pids = out
|
|
250
|
+
.split("\n")
|
|
251
|
+
.map((s) => s.trim())
|
|
252
|
+
.filter(Boolean)
|
|
253
|
+
.filter((s) => Number(s) !== runtime.proc.pid);
|
|
254
|
+
return pids.length;
|
|
248
255
|
}
|
|
249
256
|
|
|
250
|
-
async function waitFor(predicate, timeoutMs) {
|
|
251
|
-
const deadline =
|
|
252
|
-
while (
|
|
257
|
+
async function waitFor(runtime, predicate, timeoutMs) {
|
|
258
|
+
const deadline = runtime.clock.now() + timeoutMs;
|
|
259
|
+
while (runtime.clock.now() < deadline) {
|
|
253
260
|
if (await predicate()) return true;
|
|
254
|
-
await sleep(50);
|
|
261
|
+
await runtime.clock.sleep(50);
|
|
255
262
|
}
|
|
256
263
|
return false;
|
|
257
264
|
}
|
package/src/commands/assert.js
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { existsSync, readFileSync } from "node:fs";
|
|
2
1
|
import { basename } from "node:path";
|
|
3
2
|
import jmespath from "jmespath";
|
|
4
3
|
|
|
@@ -6,10 +5,11 @@ import jmespath from "jmespath";
|
|
|
6
5
|
* Evaluate an assertion and return the structured result.
|
|
7
6
|
* @param {object} values - { grep?: string, query?: string, exists?: boolean, not?: boolean, message?: string }
|
|
8
7
|
* @param {string[]} args - [testName, file]
|
|
8
|
+
* @param {object} fsSync - Sync filesystem surface (`runtime.fsSync`): `existsSync`, `readFileSync`.
|
|
9
9
|
* @returns {{ test: string, pass: boolean, message?: string }}
|
|
10
10
|
*/
|
|
11
11
|
// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: assertion dispatch by type
|
|
12
|
-
export function evaluateAssertion(values, args) {
|
|
12
|
+
export function evaluateAssertion(values, args, fsSync) {
|
|
13
13
|
const testName = args[0];
|
|
14
14
|
if (!testName) throw new Error("assert: missing test name");
|
|
15
15
|
|
|
@@ -34,16 +34,16 @@ export function evaluateAssertion(values, args) {
|
|
|
34
34
|
let result;
|
|
35
35
|
if (values.exists) {
|
|
36
36
|
if (!file) throw new Error("assert: missing file argument");
|
|
37
|
-
result = assertExists(file);
|
|
37
|
+
result = assertExists(file, fsSync);
|
|
38
38
|
} else if (values.grep) {
|
|
39
39
|
if (!file) throw new Error("assert: missing file argument for --grep");
|
|
40
|
-
result = assertGrep(values.grep, file);
|
|
40
|
+
result = assertGrep(values.grep, file, fsSync);
|
|
41
41
|
} else if (values["cites-job"]) {
|
|
42
42
|
if (!file) throw new Error("assert: missing file argument for --cites-job");
|
|
43
|
-
result = assertCitesJob(values["cites-job"], file);
|
|
43
|
+
result = assertCitesJob(values["cites-job"], file, fsSync);
|
|
44
44
|
} else {
|
|
45
45
|
if (!file) throw new Error("assert: missing file argument for --query");
|
|
46
|
-
result = assertQuery(values.query, file);
|
|
46
|
+
result = assertQuery(values.query, file, fsSync);
|
|
47
47
|
}
|
|
48
48
|
|
|
49
49
|
if (values.not) {
|
|
@@ -66,23 +66,31 @@ export function evaluateAssertion(values, args) {
|
|
|
66
66
|
}
|
|
67
67
|
|
|
68
68
|
/**
|
|
69
|
-
* Run an assertion, write JSON to stdout, and
|
|
70
|
-
*
|
|
71
|
-
* @param {
|
|
69
|
+
* Run an assertion, write JSON to stdout, and return a failure envelope when
|
|
70
|
+
* the assertion does not pass.
|
|
71
|
+
* @param {import("@forwardimpact/libcli").InvocationContext} ctx
|
|
72
|
+
* @returns {Promise<{ok: true} | {ok: false, code: number, error: string}>}
|
|
72
73
|
*/
|
|
73
|
-
export async function runAssertCommand(
|
|
74
|
-
const
|
|
75
|
-
|
|
76
|
-
|
|
74
|
+
export async function runAssertCommand(ctx) {
|
|
75
|
+
const runtime = ctx.deps.runtime;
|
|
76
|
+
const args = [ctx.args["test-name"], ctx.args.file];
|
|
77
|
+
let result;
|
|
78
|
+
try {
|
|
79
|
+
result = evaluateAssertion(ctx.options, args, runtime.fsSync);
|
|
80
|
+
} catch (err) {
|
|
81
|
+
return { ok: false, code: 1, error: err.message };
|
|
82
|
+
}
|
|
83
|
+
runtime.proc.stdout.write(JSON.stringify(result) + "\n");
|
|
84
|
+
return result.pass ? { ok: true } : { ok: false, code: 1, error: "" };
|
|
77
85
|
}
|
|
78
86
|
|
|
79
|
-
function assertExists(file) {
|
|
80
|
-
if (existsSync(file)) return { pass: true };
|
|
87
|
+
function assertExists(file, fsSync) {
|
|
88
|
+
if (fsSync.existsSync(file)) return { pass: true };
|
|
81
89
|
return { pass: false, message: `${file} not found` };
|
|
82
90
|
}
|
|
83
91
|
|
|
84
|
-
function assertGrep(pattern, file) {
|
|
85
|
-
const content = readFileSync(file, "utf8");
|
|
92
|
+
function assertGrep(pattern, file, fsSync) {
|
|
93
|
+
const content = fsSync.readFileSync(file, "utf8");
|
|
86
94
|
const re = new RegExp(pattern, "im");
|
|
87
95
|
if (re.test(content)) return { pass: true };
|
|
88
96
|
return {
|
|
@@ -91,8 +99,8 @@ function assertGrep(pattern, file) {
|
|
|
91
99
|
};
|
|
92
100
|
}
|
|
93
101
|
|
|
94
|
-
function assertQuery(expression, file) {
|
|
95
|
-
const content = readFileSync(file, "utf8");
|
|
102
|
+
function assertQuery(expression, file, fsSync) {
|
|
103
|
+
const content = fsSync.readFileSync(file, "utf8");
|
|
96
104
|
const data = parseJsonOrNdjson(content);
|
|
97
105
|
const result = jmespath.search(data, expression);
|
|
98
106
|
const truthy =
|
|
@@ -109,8 +117,8 @@ function assertQuery(expression, file) {
|
|
|
109
117
|
|
|
110
118
|
const JOB_TAG_RE = /<job\s+user="([^"]*)"\s+goal="([^"]*)">/;
|
|
111
119
|
|
|
112
|
-
function assertCitesJob(jobFile, file) {
|
|
113
|
-
const jobContent = readFileSync(jobFile, "utf8");
|
|
120
|
+
function assertCitesJob(jobFile, file, fsSync) {
|
|
121
|
+
const jobContent = fsSync.readFileSync(jobFile, "utf8");
|
|
114
122
|
const match = JOB_TAG_RE.exec(jobContent);
|
|
115
123
|
if (!match) {
|
|
116
124
|
return {
|
|
@@ -119,7 +127,7 @@ function assertCitesJob(jobFile, file) {
|
|
|
119
127
|
};
|
|
120
128
|
}
|
|
121
129
|
const citation = `${match[1]}: ${match[2]}`;
|
|
122
|
-
const content = readFileSync(file, "utf8");
|
|
130
|
+
const content = fsSync.readFileSync(file, "utf8");
|
|
123
131
|
if (content.includes(citation)) return { pass: true };
|
|
124
132
|
return { pass: false, message: `missing "${citation}"` };
|
|
125
133
|
}
|