@forwardimpact/libeval 0.1.51 → 0.1.53
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/fit-benchmark.js +8 -14
- package/bin/fit-eval.js +8 -28
- package/bin/fit-selfedit.js +6 -4
- package/bin/fit-trace.js +7 -14
- package/package.json +1 -1
- package/src/benchmark/apm-installer.js +48 -44
- package/src/benchmark/invariants.js +51 -63
- package/src/benchmark/judge.js +13 -11
- package/src/benchmark/npm-installer.js +33 -33
- package/src/benchmark/report.js +25 -11
- package/src/benchmark/result.js +2 -2
- package/src/benchmark/runner.js +82 -38
- package/src/benchmark/task-family.js +74 -63
- package/src/benchmark/workdir.js +91 -99
- package/src/commands/benchmark-invariants.js +3 -3
- package/src/commands/benchmark-report.js +1 -0
- package/src/commands/benchmark-run.js +1 -1
- package/src/commands/by-discussion.js +10 -11
- package/src/commands/discuss.js +3 -2
- package/src/commands/facilitate.js +3 -2
- package/src/commands/output.js +4 -1
- package/src/commands/run.js +6 -2
- package/src/commands/supervise.js +3 -2
- package/src/commands/tee.js +24 -9
- package/src/commands/trace.js +7 -2
- package/src/discusser.js +7 -5
- package/src/events/github.js +7 -1
- package/src/facilitator.js +6 -5
- package/src/inbox-poller.js +5 -8
- package/src/judge.js +12 -13
- package/src/profile-prompt.js +124 -26
- package/src/redaction.js +3 -16
- package/src/supervisor.js +7 -0
- package/src/tee-writer.js +4 -2
- package/src/trace-collector.js +9 -2
- package/src/trace-github.js +47 -27
package/src/tee-writer.js
CHANGED
|
@@ -27,15 +27,17 @@ export class TeeWriter extends Writable {
|
|
|
27
27
|
* @param {import("stream").Writable} deps.fileStream - Stream to write raw NDJSON to
|
|
28
28
|
* @param {import("stream").Writable} deps.textStream - Stream to write human-readable text to
|
|
29
29
|
* @param {"raw"|"supervised"} [deps.mode] - Display mode: "raw" (no source labels) or "supervised" (source labels) (default: "raw")
|
|
30
|
+
* @param {function} [deps.now] - Injected ISO-timestamp source threaded into
|
|
31
|
+
* the internal `TraceCollector` (`() => isoTimestamp(runtime.clock.now())`).
|
|
30
32
|
*/
|
|
31
|
-
constructor({ fileStream, textStream, mode }) {
|
|
33
|
+
constructor({ fileStream, textStream, mode, now }) {
|
|
32
34
|
super();
|
|
33
35
|
if (!fileStream) throw new Error("fileStream is required");
|
|
34
36
|
if (!textStream) throw new Error("textStream is required");
|
|
35
37
|
this.fileStream = fileStream;
|
|
36
38
|
this.textStream = textStream;
|
|
37
39
|
this.mode = mode ?? "raw";
|
|
38
|
-
this.collector = new TraceCollector();
|
|
40
|
+
this.collector = new TraceCollector({ now });
|
|
39
41
|
this.turnsEmitted = 0;
|
|
40
42
|
}
|
|
41
43
|
|
package/src/trace-collector.js
CHANGED
|
@@ -9,6 +9,8 @@
|
|
|
9
9
|
* one formatting path.
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
|
+
import { isoTimestamp } from "@forwardimpact/libutil";
|
|
13
|
+
|
|
12
14
|
import { renderTurnLines } from "./render/turn-renderer.js";
|
|
13
15
|
import { isSuppressedOrchestratorEvent } from "./render/orchestrator-filter.js";
|
|
14
16
|
|
|
@@ -16,11 +18,16 @@ import { isSuppressedOrchestratorEvent } from "./render/orchestrator-filter.js";
|
|
|
16
18
|
export class TraceCollector {
|
|
17
19
|
/**
|
|
18
20
|
* @param {object} [deps]
|
|
19
|
-
* @param {function} [deps.now] - Returns ISO timestamp string.
|
|
21
|
+
* @param {function} [deps.now] - Returns an ISO timestamp string. Injected
|
|
22
|
+
* so the collector never reads the wall clock directly; construct it as
|
|
23
|
+
* `() => isoTimestamp(runtime.clock.now())`. When omitted (pure
|
|
24
|
+
* structural/replay use where every event already carries a `timestamp`),
|
|
25
|
+
* the fallback formats the epoch — a deterministic sentinel, not a clock
|
|
26
|
+
* read.
|
|
20
27
|
*/
|
|
21
28
|
constructor(deps = {}) {
|
|
22
29
|
/** @type {function} */
|
|
23
|
-
this.now = deps.now ?? (() =>
|
|
30
|
+
this.now = deps.now ?? (() => isoTimestamp(0));
|
|
24
31
|
/** @type {object|null} */
|
|
25
32
|
this.metadata = null;
|
|
26
33
|
/** @type {Array<object>} */
|
package/src/trace-github.js
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
import { execSync } from "node:child_process";
|
|
2
|
-
import { createWriteStream } from "node:fs";
|
|
3
|
-
import { mkdir } from "node:fs/promises";
|
|
4
1
|
import path from "node:path";
|
|
5
2
|
import { pipeline } from "node:stream/promises";
|
|
6
3
|
import { Readable } from "node:stream";
|
|
7
4
|
|
|
5
|
+
import { isoTimestamp } from "@forwardimpact/libutil";
|
|
6
|
+
|
|
8
7
|
const API = "https://api.github.com";
|
|
9
8
|
|
|
10
9
|
/**
|
|
@@ -17,11 +16,15 @@ export class TraceGitHub {
|
|
|
17
16
|
* @param {string} deps.token - GitHub token
|
|
18
17
|
* @param {string} deps.owner - Repository owner
|
|
19
18
|
* @param {string} deps.repo - Repository name
|
|
19
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} deps.runtime -
|
|
20
|
+
* Ambient collaborators; uses `fs`, `subprocess`, `clock`.
|
|
20
21
|
*/
|
|
21
|
-
constructor({ token, owner, repo }) {
|
|
22
|
+
constructor({ token, owner, repo, runtime }) {
|
|
23
|
+
if (!runtime) throw new Error("runtime is required");
|
|
22
24
|
this.token = token;
|
|
23
25
|
this.owner = owner;
|
|
24
26
|
this.repo = repo;
|
|
27
|
+
this.runtime = runtime;
|
|
25
28
|
}
|
|
26
29
|
|
|
27
30
|
/**
|
|
@@ -35,7 +38,7 @@ export class TraceGitHub {
|
|
|
35
38
|
*/
|
|
36
39
|
async listRuns(opts = {}) {
|
|
37
40
|
const { pattern = "agent", limit = 50, lookback = "7d" } = opts;
|
|
38
|
-
const cutoff = parseLookback(lookback);
|
|
41
|
+
const cutoff = parseLookback(lookback, this.runtime.clock.now());
|
|
39
42
|
|
|
40
43
|
const params = new URLSearchParams({
|
|
41
44
|
per_page: String(Math.min(limit, 100)),
|
|
@@ -77,8 +80,9 @@ export class TraceGitHub {
|
|
|
77
80
|
* @returns {Promise<{dir: string, artifact: string, files: string[]}>}
|
|
78
81
|
*/
|
|
79
82
|
async downloadTrace(runId, opts = {}) {
|
|
83
|
+
const fs = this.runtime.fs;
|
|
80
84
|
const dir = opts.dir ?? `/tmp/trace-${runId}`;
|
|
81
|
-
await mkdir(dir, { recursive: true });
|
|
85
|
+
await fs.mkdir(dir, { recursive: true });
|
|
82
86
|
|
|
83
87
|
// List artifacts for this run.
|
|
84
88
|
const url = `${API}/repos/${this.owner}/${this.repo}/actions/runs/${runId}/artifacts`;
|
|
@@ -121,15 +125,27 @@ export class TraceGitHub {
|
|
|
121
125
|
}
|
|
122
126
|
|
|
123
127
|
// Stream to disk then extract.
|
|
124
|
-
await pipeline(
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
`unzip -o -q ${JSON.stringify(zipPath)} -d ${JSON.stringify(dir)}`,
|
|
128
|
+
await pipeline(
|
|
129
|
+
Readable.fromWeb(response.body),
|
|
130
|
+
fs.createWriteStream(zipPath),
|
|
128
131
|
);
|
|
129
132
|
|
|
133
|
+
const unzip = await this.runtime.subprocess.run("unzip", [
|
|
134
|
+
"-o",
|
|
135
|
+
"-q",
|
|
136
|
+
zipPath,
|
|
137
|
+
"-d",
|
|
138
|
+
dir,
|
|
139
|
+
]);
|
|
140
|
+
if (unzip.exitCode !== 0) {
|
|
141
|
+
throw new Error(
|
|
142
|
+
`unzip failed (${unzip.exitCode}): ${unzip.stderr || unzip.stdout}`,
|
|
143
|
+
);
|
|
144
|
+
}
|
|
145
|
+
|
|
130
146
|
// List extracted files.
|
|
131
|
-
const
|
|
132
|
-
const files =
|
|
147
|
+
const entries = await fs.readdir(dir);
|
|
148
|
+
const files = entries.filter((f) => !f.endsWith(".zip"));
|
|
133
149
|
|
|
134
150
|
return { dir, artifact: artifact.name, files };
|
|
135
151
|
}
|
|
@@ -160,14 +176,15 @@ export class TraceGitHub {
|
|
|
160
176
|
* Parse a lookback duration string into an ISO date string.
|
|
161
177
|
* Supports: Nd (days), Nh (hours), Nw (weeks).
|
|
162
178
|
* @param {string} lookback
|
|
179
|
+
* @param {number} nowMs - Current time in ms (`runtime.clock.now()`).
|
|
163
180
|
* @returns {string|null} ISO date string or null if unparseable
|
|
164
181
|
*/
|
|
165
|
-
function parseLookback(lookback) {
|
|
182
|
+
function parseLookback(lookback, nowMs) {
|
|
166
183
|
const match = lookback.match(/^(\d+)([dhw])$/);
|
|
167
184
|
if (!match) return null;
|
|
168
185
|
const [, val, unit] = match;
|
|
169
186
|
const ms = { d: 86400000, h: 3600000, w: 604800000 }[unit];
|
|
170
|
-
return
|
|
187
|
+
return isoTimestamp(nowMs - parseInt(val, 10) * ms);
|
|
171
188
|
}
|
|
172
189
|
|
|
173
190
|
/**
|
|
@@ -203,22 +220,23 @@ export function parseGitRemote(remote) {
|
|
|
203
220
|
* 1. `GITHUB_REPOSITORY` env var (set automatically by GitHub Actions).
|
|
204
221
|
* 2. `git remote get-url origin` in the current working directory.
|
|
205
222
|
*
|
|
206
|
-
* @
|
|
223
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
|
|
224
|
+
* @returns {Promise<{owner: string, repo: string}>}
|
|
207
225
|
* @throws {Error} with a clear message if neither source yields a parseable slug.
|
|
208
226
|
*/
|
|
209
|
-
export function detectRepoSlug() {
|
|
210
|
-
const env =
|
|
227
|
+
export async function detectRepoSlug(runtime) {
|
|
228
|
+
const env = runtime.proc.env.GITHUB_REPOSITORY;
|
|
211
229
|
if (env && env.trim()) {
|
|
212
230
|
return parseGitRemote(env.trim());
|
|
213
231
|
}
|
|
214
232
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
233
|
+
const result = await runtime.subprocess.run("git", [
|
|
234
|
+
"remote",
|
|
235
|
+
"get-url",
|
|
236
|
+
"origin",
|
|
237
|
+
]);
|
|
238
|
+
const remote = result.exitCode === 0 ? result.stdout.trim() : "";
|
|
239
|
+
if (result.exitCode !== 0) {
|
|
222
240
|
throw new Error(
|
|
223
241
|
"Cannot detect repository: set --repo <owner/repo>, export GITHUB_REPOSITORY, or run inside a git checkout with an 'origin' remote.",
|
|
224
242
|
);
|
|
@@ -245,10 +263,12 @@ export function detectRepoSlug() {
|
|
|
245
263
|
* @param {object} opts
|
|
246
264
|
* @param {string} opts.token - GitHub token (e.g. from `Config.ghToken()`)
|
|
247
265
|
* @param {string} [opts.repo] - "owner/repo" override (default: detect from git remote)
|
|
266
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} opts.runtime - Ambient collaborators.
|
|
248
267
|
* @returns {Promise<TraceGitHub>}
|
|
249
268
|
*/
|
|
250
269
|
export async function createTraceGitHub(opts = {}) {
|
|
251
|
-
const { token, repo: repoOverride } = opts;
|
|
270
|
+
const { token, repo: repoOverride, runtime } = opts;
|
|
271
|
+
if (!runtime) throw new Error("createTraceGitHub: runtime is required");
|
|
252
272
|
if (!token) {
|
|
253
273
|
throw new Error(
|
|
254
274
|
"createTraceGitHub: token is required (pass Config.ghToken())",
|
|
@@ -257,7 +277,7 @@ export async function createTraceGitHub(opts = {}) {
|
|
|
257
277
|
|
|
258
278
|
const { owner, repo } = repoOverride
|
|
259
279
|
? parseGitRemote(repoOverride)
|
|
260
|
-
: detectRepoSlug();
|
|
280
|
+
: await detectRepoSlug(runtime);
|
|
261
281
|
|
|
262
|
-
return new TraceGitHub({ token, owner, repo });
|
|
282
|
+
return new TraceGitHub({ token, owner, repo, runtime });
|
|
263
283
|
}
|