@agentv/core 4.10.0 → 4.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-3WGHC7LC.js +149 -0
- package/dist/chunk-3WGHC7LC.js.map +1 -0
- package/dist/{chunk-BWHUWLGW.js → chunk-5POFMJJ7.js} +1 -1
- package/dist/chunk-5POFMJJ7.js.map +1 -0
- package/dist/chunk-SDIANPEY.js +181 -0
- package/dist/chunk-SDIANPEY.js.map +1 -0
- package/dist/docker-workspace-RPPXBT27.js +9 -0
- package/dist/docker-workspace-RPPXBT27.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +70 -3
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +71 -4
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/exec-AR6JUUN5.js +9 -0
- package/dist/exec-AR6JUUN5.js.map +1 -0
- package/dist/index.cjs +1264 -468
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +191 -5
- package/dist/index.d.ts +191 -5
- package/dist/index.js +780 -342
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-BWHUWLGW.js.map +0 -1
package/dist/index.cjs
CHANGED
|
@@ -37,6 +37,161 @@ var init_cjs_shims = __esm({
|
|
|
37
37
|
}
|
|
38
38
|
});
|
|
39
39
|
|
|
40
|
+
// src/runtime/exec.ts
|
|
41
|
+
var exec_exports = {};
|
|
42
|
+
__export(exec_exports, {
|
|
43
|
+
execFileWithStdin: () => execFileWithStdin,
|
|
44
|
+
execShellWithStdin: () => execShellWithStdin
|
|
45
|
+
});
|
|
46
|
+
function shellEscapePath(value) {
|
|
47
|
+
if (process.platform === "win32") {
|
|
48
|
+
return `"${value.replaceAll('"', '""')}"`;
|
|
49
|
+
}
|
|
50
|
+
return `'${value.replaceAll("'", `'"'"'`)}'`;
|
|
51
|
+
}
|
|
52
|
+
async function execFileWithStdin(argv, stdinPayload, options = {}) {
|
|
53
|
+
if (argv.length === 0) {
|
|
54
|
+
throw new Error("Executable argv must include at least one entry");
|
|
55
|
+
}
|
|
56
|
+
if (typeof Bun !== "undefined") {
|
|
57
|
+
return execFileWithStdinBun(argv, stdinPayload, options);
|
|
58
|
+
}
|
|
59
|
+
return execFileWithStdinNode(argv, stdinPayload, options);
|
|
60
|
+
}
|
|
61
|
+
async function execFileWithStdinBun(argv, stdinPayload, options) {
|
|
62
|
+
const command = [...argv];
|
|
63
|
+
const encoder = new TextEncoder();
|
|
64
|
+
const proc = Bun.spawn(command, {
|
|
65
|
+
cwd: options.cwd,
|
|
66
|
+
stdin: encoder.encode(stdinPayload),
|
|
67
|
+
stdout: "pipe",
|
|
68
|
+
stderr: "pipe",
|
|
69
|
+
// Merge additional env vars with process.env
|
|
70
|
+
env: options.env ? { ...process.env, ...options.env } : process.env
|
|
71
|
+
});
|
|
72
|
+
let timedOut = false;
|
|
73
|
+
const timeout = options.timeoutMs !== void 0 ? setTimeout(() => {
|
|
74
|
+
timedOut = true;
|
|
75
|
+
proc.kill("SIGKILL");
|
|
76
|
+
}, options.timeoutMs) : void 0;
|
|
77
|
+
try {
|
|
78
|
+
const stdoutPromise = proc.stdout ? new Response(proc.stdout).text() : Promise.resolve("");
|
|
79
|
+
const stderrPromise = proc.stderr ? new Response(proc.stderr).text() : Promise.resolve("");
|
|
80
|
+
const [stdout, stderr, exitCode] = await Promise.all([
|
|
81
|
+
stdoutPromise,
|
|
82
|
+
stderrPromise,
|
|
83
|
+
proc.exited
|
|
84
|
+
]);
|
|
85
|
+
if (timedOut) {
|
|
86
|
+
throw new Error(`Process timed out after ${options.timeoutMs}ms`);
|
|
87
|
+
}
|
|
88
|
+
return {
|
|
89
|
+
stdout: stdout.replace(/\r\n/g, "\n"),
|
|
90
|
+
stderr: stderr.replace(/\r\n/g, "\n"),
|
|
91
|
+
exitCode
|
|
92
|
+
};
|
|
93
|
+
} finally {
|
|
94
|
+
if (timeout !== void 0) {
|
|
95
|
+
clearTimeout(timeout);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
100
|
+
const { spawn: spawn5 } = await import("child_process");
|
|
101
|
+
return new Promise((resolve, reject) => {
|
|
102
|
+
const [cmd, ...args] = argv;
|
|
103
|
+
const child = spawn5(cmd, args, {
|
|
104
|
+
cwd: options.cwd,
|
|
105
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
106
|
+
// Merge additional env vars with process.env
|
|
107
|
+
env: options.env ? { ...process.env, ...options.env } : process.env
|
|
108
|
+
});
|
|
109
|
+
const stdoutChunks = [];
|
|
110
|
+
const stderrChunks = [];
|
|
111
|
+
child.stdout?.on("data", (chunk) => stdoutChunks.push(chunk));
|
|
112
|
+
child.stderr?.on("data", (chunk) => stderrChunks.push(chunk));
|
|
113
|
+
let timedOut = false;
|
|
114
|
+
const timeout = options.timeoutMs !== void 0 ? setTimeout(() => {
|
|
115
|
+
timedOut = true;
|
|
116
|
+
child.kill("SIGKILL");
|
|
117
|
+
}, options.timeoutMs) : void 0;
|
|
118
|
+
child.on("error", (error) => {
|
|
119
|
+
if (timeout !== void 0) clearTimeout(timeout);
|
|
120
|
+
reject(error);
|
|
121
|
+
});
|
|
122
|
+
child.on("close", (code) => {
|
|
123
|
+
if (timeout !== void 0) clearTimeout(timeout);
|
|
124
|
+
if (timedOut) {
|
|
125
|
+
reject(new Error(`Process timed out after ${options.timeoutMs}ms`));
|
|
126
|
+
return;
|
|
127
|
+
}
|
|
128
|
+
const stdout = Buffer.concat(stdoutChunks).toString("utf8").replace(/\r\n/g, "\n");
|
|
129
|
+
const stderr = Buffer.concat(stderrChunks).toString("utf8").replace(/\r\n/g, "\n");
|
|
130
|
+
resolve({
|
|
131
|
+
stdout,
|
|
132
|
+
stderr,
|
|
133
|
+
exitCode: code ?? 0
|
|
134
|
+
});
|
|
135
|
+
});
|
|
136
|
+
if (child.stdin) {
|
|
137
|
+
child.stdin.write(stdinPayload);
|
|
138
|
+
child.stdin.end();
|
|
139
|
+
}
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
143
|
+
const { mkdir: mkdir17, readFile: readFile20, rm: rm7, writeFile: writeFile9 } = await import("fs/promises");
|
|
144
|
+
const { tmpdir: tmpdir3 } = await import("os");
|
|
145
|
+
const path56 = await import("path");
|
|
146
|
+
const { randomUUID: randomUUID10 } = await import("crypto");
|
|
147
|
+
const dir = path56.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
|
|
148
|
+
await mkdir17(dir, { recursive: true });
|
|
149
|
+
const stdinPath = path56.join(dir, "stdin.txt");
|
|
150
|
+
const stdoutPath = path56.join(dir, "stdout.txt");
|
|
151
|
+
const stderrPath = path56.join(dir, "stderr.txt");
|
|
152
|
+
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
153
|
+
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
154
|
+
const { spawn: spawn5 } = await import("child_process");
|
|
155
|
+
try {
|
|
156
|
+
const exitCode = await new Promise((resolve, reject) => {
|
|
157
|
+
const child = spawn5(wrappedCommand, {
|
|
158
|
+
shell: true,
|
|
159
|
+
cwd: options.cwd,
|
|
160
|
+
stdio: ["ignore", "ignore", "ignore"],
|
|
161
|
+
// Merge additional env vars with process.env
|
|
162
|
+
env: options.env ? { ...process.env, ...options.env } : process.env
|
|
163
|
+
});
|
|
164
|
+
const timeout = options.timeoutMs ? setTimeout(() => {
|
|
165
|
+
child.kill();
|
|
166
|
+
reject(new Error(`Process timed out after ${options.timeoutMs}ms`));
|
|
167
|
+
}, options.timeoutMs) : void 0;
|
|
168
|
+
child.on("error", (error) => {
|
|
169
|
+
if (timeout !== void 0) {
|
|
170
|
+
clearTimeout(timeout);
|
|
171
|
+
}
|
|
172
|
+
reject(error);
|
|
173
|
+
});
|
|
174
|
+
child.on("exit", (code) => {
|
|
175
|
+
if (timeout !== void 0) {
|
|
176
|
+
clearTimeout(timeout);
|
|
177
|
+
}
|
|
178
|
+
resolve(code ?? 0);
|
|
179
|
+
});
|
|
180
|
+
});
|
|
181
|
+
const stdout = (await readFile20(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
|
|
182
|
+
const stderr = (await readFile20(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
183
|
+
return { stdout, stderr, exitCode };
|
|
184
|
+
} finally {
|
|
185
|
+
await rm7(dir, { recursive: true, force: true });
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
var init_exec = __esm({
|
|
189
|
+
"src/runtime/exec.ts"() {
|
|
190
|
+
"use strict";
|
|
191
|
+
init_cjs_shims();
|
|
192
|
+
}
|
|
193
|
+
});
|
|
194
|
+
|
|
40
195
|
// src/evaluation/providers/agentv-provider.ts
|
|
41
196
|
var agentv_provider_exports = {};
|
|
42
197
|
__export(agentv_provider_exports, {
|
|
@@ -109,6 +264,194 @@ var init_agentv_provider = __esm({
|
|
|
109
264
|
}
|
|
110
265
|
});
|
|
111
266
|
|
|
267
|
+
// src/evaluation/workspace/docker-workspace.ts
|
|
268
|
+
var docker_workspace_exports = {};
|
|
269
|
+
__export(docker_workspace_exports, {
|
|
270
|
+
DefaultCommandExecutor: () => DefaultCommandExecutor,
|
|
271
|
+
DockerWorkspaceProvider: () => DockerWorkspaceProvider
|
|
272
|
+
});
|
|
273
|
+
function buildGitCommand(target, args) {
|
|
274
|
+
if (!target?.path) {
|
|
275
|
+
return ["git", ...args];
|
|
276
|
+
}
|
|
277
|
+
return ["git", "-C", target.path, ...args];
|
|
278
|
+
}
|
|
279
|
+
var DefaultCommandExecutor, DEFAULT_TIMEOUT_S, DockerWorkspaceProvider;
|
|
280
|
+
var init_docker_workspace = __esm({
|
|
281
|
+
"src/evaluation/workspace/docker-workspace.ts"() {
|
|
282
|
+
"use strict";
|
|
283
|
+
init_cjs_shims();
|
|
284
|
+
DefaultCommandExecutor = class {
|
|
285
|
+
async exec(argv, options = {}) {
|
|
286
|
+
const { execFileWithStdin: execFileWithStdin2 } = await Promise.resolve().then(() => (init_exec(), exec_exports));
|
|
287
|
+
return execFileWithStdin2(argv, options.stdin ?? "", {
|
|
288
|
+
timeoutMs: options.timeoutMs
|
|
289
|
+
});
|
|
290
|
+
}
|
|
291
|
+
};
|
|
292
|
+
DEFAULT_TIMEOUT_S = 1800;
|
|
293
|
+
DockerWorkspaceProvider = class {
|
|
294
|
+
config;
|
|
295
|
+
executor;
|
|
296
|
+
timeoutMs;
|
|
297
|
+
constructor(config, executor) {
|
|
298
|
+
this.config = config;
|
|
299
|
+
this.executor = executor ?? new DefaultCommandExecutor();
|
|
300
|
+
this.timeoutMs = (config.timeout ?? DEFAULT_TIMEOUT_S) * 1e3;
|
|
301
|
+
}
|
|
302
|
+
/** Check whether the Docker CLI is available on the host. */
|
|
303
|
+
async isDockerAvailable() {
|
|
304
|
+
try {
|
|
305
|
+
const result = await this.executor.exec(
|
|
306
|
+
["docker", "version", "--format", "{{.Server.Version}}"],
|
|
307
|
+
{
|
|
308
|
+
timeoutMs: 1e4
|
|
309
|
+
}
|
|
310
|
+
);
|
|
311
|
+
return result.exitCode === 0;
|
|
312
|
+
} catch {
|
|
313
|
+
return false;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
/** Pull the configured Docker image. No-op if already cached locally. */
|
|
317
|
+
async pullImage() {
|
|
318
|
+
const inspectResult = await this.executor.exec(
|
|
319
|
+
["docker", "image", "inspect", this.config.image],
|
|
320
|
+
{
|
|
321
|
+
timeoutMs: 1e4
|
|
322
|
+
}
|
|
323
|
+
);
|
|
324
|
+
if (inspectResult.exitCode === 0) {
|
|
325
|
+
return;
|
|
326
|
+
}
|
|
327
|
+
const result = await this.executor.exec(["docker", "pull", this.config.image], {
|
|
328
|
+
timeoutMs: this.timeoutMs
|
|
329
|
+
});
|
|
330
|
+
if (result.exitCode !== 0) {
|
|
331
|
+
throw new Error(`docker pull failed (exit ${result.exitCode}): ${result.stderr.trim()}`);
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
/** Create a stopped container from the configured image with resource limits. Returns container ID. */
|
|
335
|
+
async createContainer() {
|
|
336
|
+
const argv = ["docker", "create"];
|
|
337
|
+
if (this.config.memory) {
|
|
338
|
+
argv.push(`--memory=${this.config.memory}`);
|
|
339
|
+
}
|
|
340
|
+
if (this.config.cpus !== void 0) {
|
|
341
|
+
argv.push(`--cpus=${this.config.cpus}`);
|
|
342
|
+
}
|
|
343
|
+
argv.push(this.config.image, "sleep", "infinity");
|
|
344
|
+
const result = await this.executor.exec(argv, { timeoutMs: 3e4 });
|
|
345
|
+
if (result.exitCode !== 0) {
|
|
346
|
+
throw new Error(`docker create failed (exit ${result.exitCode}): ${result.stderr.trim()}`);
|
|
347
|
+
}
|
|
348
|
+
return result.stdout.trim();
|
|
349
|
+
}
|
|
350
|
+
/** Start a previously created container. */
|
|
351
|
+
async startContainer(containerId) {
|
|
352
|
+
const result = await this.executor.exec(["docker", "start", containerId], {
|
|
353
|
+
timeoutMs: 3e4
|
|
354
|
+
});
|
|
355
|
+
if (result.exitCode !== 0) {
|
|
356
|
+
throw new Error(`docker start failed (exit ${result.exitCode}): ${result.stderr.trim()}`);
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
/**
|
|
360
|
+
* Reset the container checkout to the specified target refs, if any.
|
|
361
|
+
* This is used for SWE-bench images where the repo state must match the
|
|
362
|
+
* dataset's base snapshot before grading begins.
|
|
363
|
+
*/
|
|
364
|
+
async resetContainerCheckout(containerId, repoCheckouts) {
|
|
365
|
+
if (!repoCheckouts || repoCheckouts.length === 0) {
|
|
366
|
+
return;
|
|
367
|
+
}
|
|
368
|
+
for (const target of repoCheckouts) {
|
|
369
|
+
const resetResult = await this.execInContainer({
|
|
370
|
+
containerId,
|
|
371
|
+
command: buildGitCommand(target, ["reset", "--hard", target.ref])
|
|
372
|
+
});
|
|
373
|
+
if (resetResult.exitCode !== 0) {
|
|
374
|
+
throw new Error(
|
|
375
|
+
`docker git reset failed (exit ${resetResult.exitCode}): ${resetResult.stderr.trim()}`
|
|
376
|
+
);
|
|
377
|
+
}
|
|
378
|
+
const verifyResult = await this.execInContainer({
|
|
379
|
+
containerId,
|
|
380
|
+
command: buildGitCommand(target, ["rev-parse", "HEAD"]),
|
|
381
|
+
timeoutMs: 3e4
|
|
382
|
+
});
|
|
383
|
+
if (verifyResult.exitCode !== 0) {
|
|
384
|
+
throw new Error(
|
|
385
|
+
`docker checkout verification failed (exit ${verifyResult.exitCode}): ${verifyResult.stderr.trim()}`
|
|
386
|
+
);
|
|
387
|
+
}
|
|
388
|
+
const head = verifyResult.stdout.trim();
|
|
389
|
+
if (head !== target.ref) {
|
|
390
|
+
throw new Error(
|
|
391
|
+
`docker checkout verification failed: expected ${target.ref} but found ${head || "<empty>"}`
|
|
392
|
+
);
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
/** Copy a local file or directory into a running container. */
|
|
397
|
+
async copyToContainer(containerId, localPath, containerPath) {
|
|
398
|
+
const result = await this.executor.exec(
|
|
399
|
+
["docker", "cp", localPath, `${containerId}:${containerPath}`],
|
|
400
|
+
{ timeoutMs: 6e4 }
|
|
401
|
+
);
|
|
402
|
+
if (result.exitCode !== 0) {
|
|
403
|
+
throw new Error(`docker cp failed (exit ${result.exitCode}): ${result.stderr.trim()}`);
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
/**
|
|
407
|
+
* Execute a command inside a running container.
|
|
408
|
+
* If stdin is provided, it is piped via `docker exec -i`.
|
|
409
|
+
*/
|
|
410
|
+
async execInContainer(options) {
|
|
411
|
+
const { containerId, command, timeoutMs, stdin } = options;
|
|
412
|
+
const argv = ["docker", "exec"];
|
|
413
|
+
if (stdin !== void 0) {
|
|
414
|
+
argv.push("-i");
|
|
415
|
+
}
|
|
416
|
+
argv.push(containerId, ...command);
|
|
417
|
+
return this.executor.exec(argv, {
|
|
418
|
+
timeoutMs: timeoutMs ?? this.timeoutMs,
|
|
419
|
+
stdin
|
|
420
|
+
});
|
|
421
|
+
}
|
|
422
|
+
/** Force-remove a container (always succeeds, even if container doesn't exist). */
|
|
423
|
+
async removeContainer(containerId) {
|
|
424
|
+
try {
|
|
425
|
+
await this.executor.exec(["docker", "rm", "-f", containerId], {
|
|
426
|
+
timeoutMs: 3e4
|
|
427
|
+
});
|
|
428
|
+
} catch {
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
/** Full lifecycle: create → start → exec → cleanup. Convenience for single-command grading. */
|
|
432
|
+
async runGraderInContainer(options) {
|
|
433
|
+
const containerId = await this.createContainer();
|
|
434
|
+
try {
|
|
435
|
+
await this.startContainer(containerId);
|
|
436
|
+
await this.resetContainerCheckout(containerId, options.repoCheckouts);
|
|
437
|
+
if (options.copyFiles) {
|
|
438
|
+
for (const file of options.copyFiles) {
|
|
439
|
+
await this.copyToContainer(containerId, file.localPath, file.containerPath);
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
return await this.execInContainer({
|
|
443
|
+
containerId,
|
|
444
|
+
command: options.command,
|
|
445
|
+
stdin: options.stdin
|
|
446
|
+
});
|
|
447
|
+
} finally {
|
|
448
|
+
await this.removeContainer(containerId);
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
};
|
|
452
|
+
}
|
|
453
|
+
});
|
|
454
|
+
|
|
112
455
|
// ../../node_modules/.bun/@opentelemetry+core@2.5.1+460773ef8ff1e07c/node_modules/@opentelemetry/core/build/esm/trace/suppress-tracing.js
|
|
113
456
|
function suppressTracing(context2) {
|
|
114
457
|
return context2.setValue(SUPPRESS_TRACING_KEY, true);
|
|
@@ -1355,13 +1698,13 @@ function serializeAttributeValue(value) {
|
|
|
1355
1698
|
if (Array.isArray(value)) return { arrayValue: { values: value.map(serializeAttributeValue) } };
|
|
1356
1699
|
return { stringValue: String(value) };
|
|
1357
1700
|
}
|
|
1358
|
-
var
|
|
1701
|
+
var import_promises39, import_node_path55, OtlpJsonFileExporter;
|
|
1359
1702
|
var init_otlp_json_file_exporter = __esm({
|
|
1360
1703
|
"src/observability/otlp-json-file-exporter.ts"() {
|
|
1361
1704
|
"use strict";
|
|
1362
1705
|
init_cjs_shims();
|
|
1363
|
-
|
|
1364
|
-
|
|
1706
|
+
import_promises39 = require("fs/promises");
|
|
1707
|
+
import_node_path55 = require("path");
|
|
1365
1708
|
OtlpJsonFileExporter = class {
|
|
1366
1709
|
// biome-ignore lint/suspicious/noExplicitAny: serialized span data
|
|
1367
1710
|
spans = [];
|
|
@@ -1400,7 +1743,7 @@ var init_otlp_json_file_exporter = __esm({
|
|
|
1400
1743
|
}
|
|
1401
1744
|
async flush() {
|
|
1402
1745
|
if (this.spans.length === 0) return;
|
|
1403
|
-
await (0,
|
|
1746
|
+
await (0, import_promises39.mkdir)((0, import_node_path55.dirname)(this.filePath), { recursive: true });
|
|
1404
1747
|
const otlpJson = {
|
|
1405
1748
|
resourceSpans: [
|
|
1406
1749
|
{
|
|
@@ -1434,6 +1777,7 @@ __export(index_exports, {
|
|
|
1434
1777
|
DEFAULT_EXPLORATION_TOOLS: () => DEFAULT_EXPLORATION_TOOLS,
|
|
1435
1778
|
DEFAULT_THRESHOLD: () => DEFAULT_THRESHOLD,
|
|
1436
1779
|
DeterministicAssertionEvaluator: () => DeterministicAssertionEvaluator,
|
|
1780
|
+
DockerWorkspaceProvider: () => DockerWorkspaceProvider,
|
|
1437
1781
|
EvaluatorRegistry: () => EvaluatorRegistry,
|
|
1438
1782
|
ExecutionMetricsEvaluator: () => ExecutionMetricsEvaluator,
|
|
1439
1783
|
FieldAccuracyEvaluator: () => FieldAccuracyEvaluator,
|
|
@@ -1469,9 +1813,11 @@ __export(index_exports, {
|
|
|
1469
1813
|
buildSearchRoots: () => buildSearchRoots2,
|
|
1470
1814
|
calculateRubricScore: () => calculateRubricScore,
|
|
1471
1815
|
captureFileChanges: () => captureFileChanges,
|
|
1816
|
+
checkoutResultsRepoBranch: () => checkoutResultsRepoBranch,
|
|
1472
1817
|
clampScore: () => clampScore,
|
|
1473
1818
|
cleanupEvalWorkspaces: () => cleanupEvalWorkspaces,
|
|
1474
1819
|
cleanupWorkspace: () => cleanupWorkspace,
|
|
1820
|
+
commitAndPushResultsBranch: () => commitAndPushResultsBranch,
|
|
1475
1821
|
computeTraceSummary: () => computeTraceSummary,
|
|
1476
1822
|
computeWorkspaceFingerprint: () => computeWorkspaceFingerprint,
|
|
1477
1823
|
consumeClaudeLogEntries: () => consumeClaudeLogEntries,
|
|
@@ -1482,6 +1828,7 @@ __export(index_exports, {
|
|
|
1482
1828
|
createAgentKernel: () => createAgentKernel,
|
|
1483
1829
|
createBuiltinProviderRegistry: () => createBuiltinProviderRegistry,
|
|
1484
1830
|
createBuiltinRegistry: () => createBuiltinRegistry,
|
|
1831
|
+
createDraftResultsPr: () => createDraftResultsPr,
|
|
1485
1832
|
createProvider: () => createProvider,
|
|
1486
1833
|
createTempWorkspace: () => createTempWorkspace,
|
|
1487
1834
|
deepEqual: () => deepEqual,
|
|
@@ -1489,6 +1836,7 @@ __export(index_exports, {
|
|
|
1489
1836
|
deriveCategory: () => deriveCategory,
|
|
1490
1837
|
deriveProjectId: () => deriveProjectId,
|
|
1491
1838
|
detectFormat: () => detectFormat,
|
|
1839
|
+
directorySizeBytes: () => directorySizeBytes,
|
|
1492
1840
|
discoverAssertions: () => discoverAssertions,
|
|
1493
1841
|
discoverClaudeSessions: () => discoverClaudeSessions,
|
|
1494
1842
|
discoverCodexSessions: () => discoverCodexSessions,
|
|
@@ -1497,6 +1845,7 @@ __export(index_exports, {
|
|
|
1497
1845
|
discoverJudges: () => discoverGraders,
|
|
1498
1846
|
discoverProjects: () => discoverProjects,
|
|
1499
1847
|
discoverProviders: () => discoverProviders,
|
|
1848
|
+
ensureResultsRepoClone: () => ensureResultsRepoClone,
|
|
1500
1849
|
ensureVSCodeSubagents: () => ensureVSCodeSubagents,
|
|
1501
1850
|
evaluate: () => evaluate,
|
|
1502
1851
|
executeScript: () => executeScript,
|
|
@@ -1521,6 +1870,8 @@ __export(index_exports, {
|
|
|
1521
1870
|
getOutputFilenames: () => getOutputFilenames,
|
|
1522
1871
|
getProject: () => getProject,
|
|
1523
1872
|
getProjectsRegistryPath: () => getProjectsRegistryPath,
|
|
1873
|
+
getResultsRepoCachePaths: () => getResultsRepoCachePaths,
|
|
1874
|
+
getResultsRepoStatus: () => getResultsRepoStatus,
|
|
1524
1875
|
getSubagentsRoot: () => getSubagentsRoot,
|
|
1525
1876
|
getTextContent: () => getTextContent,
|
|
1526
1877
|
getTraceStateRoot: () => getTraceStateRoot,
|
|
@@ -1550,12 +1901,15 @@ __export(index_exports, {
|
|
|
1550
1901
|
mergeExecutionMetrics: () => mergeExecutionMetrics,
|
|
1551
1902
|
negateScore: () => negateScore,
|
|
1552
1903
|
normalizeLineEndings: () => normalizeLineEndings,
|
|
1904
|
+
normalizeResultsExportConfig: () => normalizeResultsExportConfig,
|
|
1553
1905
|
parseAgentSkillsEvals: () => parseAgentSkillsEvals,
|
|
1554
1906
|
parseClaudeSession: () => parseClaudeSession,
|
|
1555
1907
|
parseCodexSession: () => parseCodexSession,
|
|
1556
1908
|
parseCopilotEvents: () => parseCopilotEvents,
|
|
1557
1909
|
parseJsonFromText: () => parseJsonFromText,
|
|
1558
1910
|
parseJsonSafe: () => parseJsonSafe,
|
|
1911
|
+
prepareResultsRepoBranch: () => prepareResultsRepoBranch,
|
|
1912
|
+
pushResultsRepoBranch: () => pushResultsRepoBranch,
|
|
1559
1913
|
readJsonFile: () => readJsonFile,
|
|
1560
1914
|
readTargetDefinitions: () => readTargetDefinitions,
|
|
1561
1915
|
readTestSuiteMetadata: () => readTestSuiteMetadata,
|
|
@@ -1566,6 +1920,8 @@ __export(index_exports, {
|
|
|
1566
1920
|
resolveAndCreateProvider: () => resolveAndCreateProvider,
|
|
1567
1921
|
resolveDelegatedTargetDefinition: () => resolveDelegatedTargetDefinition,
|
|
1568
1922
|
resolveFileReference: () => resolveFileReference3,
|
|
1923
|
+
resolveResultsRepoRunsDir: () => resolveResultsRepoRunsDir,
|
|
1924
|
+
resolveResultsRepoUrl: () => resolveResultsRepoUrl,
|
|
1569
1925
|
resolveTargetDefinition: () => resolveTargetDefinition,
|
|
1570
1926
|
resolveWorkspaceTemplate: () => resolveWorkspaceTemplate,
|
|
1571
1927
|
rubricEvaluationSchema: () => rubricEvaluationSchema,
|
|
@@ -1587,12 +1943,14 @@ __export(index_exports, {
|
|
|
1587
1943
|
scoreToVerdict: () => scoreToVerdict,
|
|
1588
1944
|
shouldEnableCache: () => shouldEnableCache,
|
|
1589
1945
|
shouldSkipCacheForTemperature: () => shouldSkipCacheForTemperature,
|
|
1946
|
+
stageResultsArtifacts: () => stageResultsArtifacts,
|
|
1590
1947
|
subscribeToClaudeLogEntries: () => subscribeToClaudeLogEntries,
|
|
1591
1948
|
subscribeToCodexLogEntries: () => subscribeToCodexLogEntries,
|
|
1592
1949
|
subscribeToCopilotCliLogEntries: () => subscribeToCopilotCliLogEntries,
|
|
1593
1950
|
subscribeToCopilotSdkLogEntries: () => subscribeToCopilotSdkLogEntries,
|
|
1594
1951
|
subscribeToPiLogEntries: () => subscribeToPiLogEntries,
|
|
1595
1952
|
substituteVariables: () => substituteVariables,
|
|
1953
|
+
syncResultsRepo: () => syncResultsRepo,
|
|
1596
1954
|
toCamelCaseDeep: () => toCamelCaseDeep,
|
|
1597
1955
|
toSnakeCaseDeep: () => toSnakeCaseDeep,
|
|
1598
1956
|
toTranscriptJsonLine: () => toTranscriptJsonLine,
|
|
@@ -1829,10 +2187,10 @@ function mergeExecutionMetrics(computed, metrics) {
|
|
|
1829
2187
|
|
|
1830
2188
|
// src/evaluation/yaml-parser.ts
|
|
1831
2189
|
init_cjs_shims();
|
|
1832
|
-
var
|
|
2190
|
+
var import_promises10 = require("fs/promises");
|
|
1833
2191
|
var import_node_path9 = __toESM(require("path"), 1);
|
|
1834
2192
|
var import_micromatch2 = __toESM(require("micromatch"), 1);
|
|
1835
|
-
var
|
|
2193
|
+
var import_yaml5 = require("yaml");
|
|
1836
2194
|
|
|
1837
2195
|
// src/evaluation/input-message-utils.ts
|
|
1838
2196
|
init_cjs_shims();
|
|
@@ -2261,10 +2619,12 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
2261
2619
|
parsed.execution,
|
|
2262
2620
|
configPath
|
|
2263
2621
|
);
|
|
2622
|
+
const results = parseResultsConfig(parsed.results, configPath);
|
|
2264
2623
|
return {
|
|
2265
2624
|
required_version: requiredVersion,
|
|
2266
2625
|
eval_patterns: evalPatterns,
|
|
2267
|
-
execution: executionDefaults
|
|
2626
|
+
execution: executionDefaults,
|
|
2627
|
+
results
|
|
2268
2628
|
};
|
|
2269
2629
|
} catch (error) {
|
|
2270
2630
|
logWarning(
|
|
@@ -2499,166 +2859,77 @@ function parseExecutionDefaults(raw, configPath) {
|
|
|
2499
2859
|
}
|
|
2500
2860
|
return Object.keys(result).length > 0 ? result : void 0;
|
|
2501
2861
|
}
|
|
2862
|
+
function parseResultsConfig(raw, configPath) {
|
|
2863
|
+
if (raw === void 0 || raw === null) {
|
|
2864
|
+
return void 0;
|
|
2865
|
+
}
|
|
2866
|
+
if (typeof raw !== "object" || Array.isArray(raw)) {
|
|
2867
|
+
logWarning(`Invalid results in ${configPath}, expected object`);
|
|
2868
|
+
return void 0;
|
|
2869
|
+
}
|
|
2870
|
+
const obj = raw;
|
|
2871
|
+
const exportConfig = parseResultsExportConfig(obj.export, configPath);
|
|
2872
|
+
if (!exportConfig) {
|
|
2873
|
+
return void 0;
|
|
2874
|
+
}
|
|
2875
|
+
return { export: exportConfig };
|
|
2876
|
+
}
|
|
2877
|
+
function parseResultsExportConfig(raw, configPath) {
|
|
2878
|
+
if (raw === void 0 || raw === null) {
|
|
2879
|
+
return void 0;
|
|
2880
|
+
}
|
|
2881
|
+
if (typeof raw !== "object" || Array.isArray(raw)) {
|
|
2882
|
+
logWarning(`Invalid results.export in ${configPath}, expected object`);
|
|
2883
|
+
return void 0;
|
|
2884
|
+
}
|
|
2885
|
+
const obj = raw;
|
|
2886
|
+
const repo = typeof obj.repo === "string" ? obj.repo.trim() : "";
|
|
2887
|
+
const exportPath = typeof obj.path === "string" ? obj.path.trim() : "";
|
|
2888
|
+
if (!repo) {
|
|
2889
|
+
logWarning(`Invalid results.export.repo in ${configPath}, expected non-empty string`);
|
|
2890
|
+
return void 0;
|
|
2891
|
+
}
|
|
2892
|
+
if (!exportPath) {
|
|
2893
|
+
logWarning(`Invalid results.export.path in ${configPath}, expected non-empty string`);
|
|
2894
|
+
return void 0;
|
|
2895
|
+
}
|
|
2896
|
+
if (obj.auto_push !== void 0 && typeof obj.auto_push !== "boolean") {
|
|
2897
|
+
logWarning(`Invalid results.export.auto_push in ${configPath}, expected boolean`);
|
|
2898
|
+
return void 0;
|
|
2899
|
+
}
|
|
2900
|
+
let branchPrefix;
|
|
2901
|
+
if (obj.branch_prefix !== void 0) {
|
|
2902
|
+
if (typeof obj.branch_prefix !== "string" || obj.branch_prefix.trim().length === 0) {
|
|
2903
|
+
logWarning(
|
|
2904
|
+
`Invalid results.export.branch_prefix in ${configPath}, expected non-empty string`
|
|
2905
|
+
);
|
|
2906
|
+
return void 0;
|
|
2907
|
+
}
|
|
2908
|
+
branchPrefix = obj.branch_prefix.trim();
|
|
2909
|
+
}
|
|
2910
|
+
return {
|
|
2911
|
+
repo,
|
|
2912
|
+
path: exportPath,
|
|
2913
|
+
...typeof obj.auto_push === "boolean" && { auto_push: obj.auto_push },
|
|
2914
|
+
...branchPrefix && { branch_prefix: branchPrefix }
|
|
2915
|
+
};
|
|
2916
|
+
}
|
|
2502
2917
|
function logWarning(message) {
|
|
2503
2918
|
console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET3}`);
|
|
2504
2919
|
}
|
|
2505
2920
|
|
|
2506
2921
|
// src/evaluation/loaders/evaluator-parser.ts
|
|
2507
2922
|
init_cjs_shims();
|
|
2923
|
+
var import_promises7 = require("fs/promises");
|
|
2508
2924
|
var import_node_path6 = __toESM(require("path"), 1);
|
|
2925
|
+
var import_yaml3 = require("yaml");
|
|
2509
2926
|
|
|
2510
2927
|
// src/evaluation/content-preprocessor.ts
|
|
2511
2928
|
init_cjs_shims();
|
|
2512
2929
|
var import_promises5 = require("fs/promises");
|
|
2513
2930
|
var import_node_path5 = __toESM(require("path"), 1);
|
|
2514
2931
|
var import_node_url2 = require("url");
|
|
2515
|
-
|
|
2516
|
-
// src/runtime/exec.ts
|
|
2517
|
-
init_cjs_shims();
|
|
2518
|
-
function shellEscapePath(value) {
|
|
2519
|
-
if (process.platform === "win32") {
|
|
2520
|
-
return `"${value.replaceAll('"', '""')}"`;
|
|
2521
|
-
}
|
|
2522
|
-
return `'${value.replaceAll("'", `'"'"'`)}'`;
|
|
2523
|
-
}
|
|
2524
|
-
async function execFileWithStdin(argv, stdinPayload, options = {}) {
|
|
2525
|
-
if (argv.length === 0) {
|
|
2526
|
-
throw new Error("Executable argv must include at least one entry");
|
|
2527
|
-
}
|
|
2528
|
-
if (typeof Bun !== "undefined") {
|
|
2529
|
-
return execFileWithStdinBun(argv, stdinPayload, options);
|
|
2530
|
-
}
|
|
2531
|
-
return execFileWithStdinNode(argv, stdinPayload, options);
|
|
2532
|
-
}
|
|
2533
|
-
async function execFileWithStdinBun(argv, stdinPayload, options) {
|
|
2534
|
-
const command = [...argv];
|
|
2535
|
-
const encoder = new TextEncoder();
|
|
2536
|
-
const proc = Bun.spawn(command, {
|
|
2537
|
-
cwd: options.cwd,
|
|
2538
|
-
stdin: encoder.encode(stdinPayload),
|
|
2539
|
-
stdout: "pipe",
|
|
2540
|
-
stderr: "pipe",
|
|
2541
|
-
// Merge additional env vars with process.env
|
|
2542
|
-
env: options.env ? { ...process.env, ...options.env } : process.env
|
|
2543
|
-
});
|
|
2544
|
-
let timedOut = false;
|
|
2545
|
-
const timeout = options.timeoutMs !== void 0 ? setTimeout(() => {
|
|
2546
|
-
timedOut = true;
|
|
2547
|
-
proc.kill("SIGKILL");
|
|
2548
|
-
}, options.timeoutMs) : void 0;
|
|
2549
|
-
try {
|
|
2550
|
-
const stdoutPromise = proc.stdout ? new Response(proc.stdout).text() : Promise.resolve("");
|
|
2551
|
-
const stderrPromise = proc.stderr ? new Response(proc.stderr).text() : Promise.resolve("");
|
|
2552
|
-
const [stdout, stderr, exitCode] = await Promise.all([
|
|
2553
|
-
stdoutPromise,
|
|
2554
|
-
stderrPromise,
|
|
2555
|
-
proc.exited
|
|
2556
|
-
]);
|
|
2557
|
-
if (timedOut) {
|
|
2558
|
-
throw new Error(`Process timed out after ${options.timeoutMs}ms`);
|
|
2559
|
-
}
|
|
2560
|
-
return {
|
|
2561
|
-
stdout: stdout.replace(/\r\n/g, "\n"),
|
|
2562
|
-
stderr: stderr.replace(/\r\n/g, "\n"),
|
|
2563
|
-
exitCode
|
|
2564
|
-
};
|
|
2565
|
-
} finally {
|
|
2566
|
-
if (timeout !== void 0) {
|
|
2567
|
-
clearTimeout(timeout);
|
|
2568
|
-
}
|
|
2569
|
-
}
|
|
2570
|
-
}
|
|
2571
|
-
async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
2572
|
-
const { spawn: spawn5 } = await import("child_process");
|
|
2573
|
-
return new Promise((resolve, reject) => {
|
|
2574
|
-
const [cmd, ...args] = argv;
|
|
2575
|
-
const child = spawn5(cmd, args, {
|
|
2576
|
-
cwd: options.cwd,
|
|
2577
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
2578
|
-
// Merge additional env vars with process.env
|
|
2579
|
-
env: options.env ? { ...process.env, ...options.env } : process.env
|
|
2580
|
-
});
|
|
2581
|
-
const stdoutChunks = [];
|
|
2582
|
-
const stderrChunks = [];
|
|
2583
|
-
child.stdout?.on("data", (chunk) => stdoutChunks.push(chunk));
|
|
2584
|
-
child.stderr?.on("data", (chunk) => stderrChunks.push(chunk));
|
|
2585
|
-
let timedOut = false;
|
|
2586
|
-
const timeout = options.timeoutMs !== void 0 ? setTimeout(() => {
|
|
2587
|
-
timedOut = true;
|
|
2588
|
-
child.kill("SIGKILL");
|
|
2589
|
-
}, options.timeoutMs) : void 0;
|
|
2590
|
-
child.on("error", (error) => {
|
|
2591
|
-
if (timeout !== void 0) clearTimeout(timeout);
|
|
2592
|
-
reject(error);
|
|
2593
|
-
});
|
|
2594
|
-
child.on("close", (code) => {
|
|
2595
|
-
if (timeout !== void 0) clearTimeout(timeout);
|
|
2596
|
-
if (timedOut) {
|
|
2597
|
-
reject(new Error(`Process timed out after ${options.timeoutMs}ms`));
|
|
2598
|
-
return;
|
|
2599
|
-
}
|
|
2600
|
-
const stdout = Buffer.concat(stdoutChunks).toString("utf8").replace(/\r\n/g, "\n");
|
|
2601
|
-
const stderr = Buffer.concat(stderrChunks).toString("utf8").replace(/\r\n/g, "\n");
|
|
2602
|
-
resolve({
|
|
2603
|
-
stdout,
|
|
2604
|
-
stderr,
|
|
2605
|
-
exitCode: code ?? 0
|
|
2606
|
-
});
|
|
2607
|
-
});
|
|
2608
|
-
if (child.stdin) {
|
|
2609
|
-
child.stdin.write(stdinPayload);
|
|
2610
|
-
child.stdin.end();
|
|
2611
|
-
}
|
|
2612
|
-
});
|
|
2613
|
-
}
|
|
2614
|
-
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
2615
|
-
const { mkdir: mkdir17, readFile: readFile19, rm: rm6, writeFile: writeFile9 } = await import("fs/promises");
|
|
2616
|
-
const { tmpdir: tmpdir3 } = await import("os");
|
|
2617
|
-
const path55 = await import("path");
|
|
2618
|
-
const { randomUUID: randomUUID10 } = await import("crypto");
|
|
2619
|
-
const dir = path55.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
|
|
2620
|
-
await mkdir17(dir, { recursive: true });
|
|
2621
|
-
const stdinPath = path55.join(dir, "stdin.txt");
|
|
2622
|
-
const stdoutPath = path55.join(dir, "stdout.txt");
|
|
2623
|
-
const stderrPath = path55.join(dir, "stderr.txt");
|
|
2624
|
-
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
2625
|
-
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
2626
|
-
const { spawn: spawn5 } = await import("child_process");
|
|
2627
|
-
try {
|
|
2628
|
-
const exitCode = await new Promise((resolve, reject) => {
|
|
2629
|
-
const child = spawn5(wrappedCommand, {
|
|
2630
|
-
shell: true,
|
|
2631
|
-
cwd: options.cwd,
|
|
2632
|
-
stdio: ["ignore", "ignore", "ignore"],
|
|
2633
|
-
// Merge additional env vars with process.env
|
|
2634
|
-
env: options.env ? { ...process.env, ...options.env } : process.env
|
|
2635
|
-
});
|
|
2636
|
-
const timeout = options.timeoutMs ? setTimeout(() => {
|
|
2637
|
-
child.kill();
|
|
2638
|
-
reject(new Error(`Process timed out after ${options.timeoutMs}ms`));
|
|
2639
|
-
}, options.timeoutMs) : void 0;
|
|
2640
|
-
child.on("error", (error) => {
|
|
2641
|
-
if (timeout !== void 0) {
|
|
2642
|
-
clearTimeout(timeout);
|
|
2643
|
-
}
|
|
2644
|
-
reject(error);
|
|
2645
|
-
});
|
|
2646
|
-
child.on("exit", (code) => {
|
|
2647
|
-
if (timeout !== void 0) {
|
|
2648
|
-
clearTimeout(timeout);
|
|
2649
|
-
}
|
|
2650
|
-
resolve(code ?? 0);
|
|
2651
|
-
});
|
|
2652
|
-
});
|
|
2653
|
-
const stdout = (await readFile19(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
|
|
2654
|
-
const stderr = (await readFile19(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
2655
|
-
return { stdout, stderr, exitCode };
|
|
2656
|
-
} finally {
|
|
2657
|
-
await rm6(dir, { recursive: true, force: true });
|
|
2658
|
-
}
|
|
2659
|
-
}
|
|
2660
|
-
|
|
2661
|
-
// src/evaluation/content-preprocessor.ts
|
|
2932
|
+
init_exec();
|
|
2662
2933
|
var MIME_TYPE_ALIASES = {
|
|
2663
2934
|
csv: "text/csv",
|
|
2664
2935
|
docx: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
@@ -2901,6 +3172,7 @@ function validateTemplateVariables(content, source) {
|
|
|
2901
3172
|
// src/evaluation/loaders/evaluator-parser.ts
|
|
2902
3173
|
var ANSI_YELLOW4 = "\x1B[33m";
|
|
2903
3174
|
var ANSI_RESET5 = "\x1B[0m";
|
|
3175
|
+
var MAX_ASSERTION_INCLUDE_DEPTH = 3;
|
|
2904
3176
|
var PROMPT_FILE_PREFIX = "file://";
|
|
2905
3177
|
function normalizeEvaluatorType(type) {
|
|
2906
3178
|
return type.replace(/_/g, "-");
|
|
@@ -2933,7 +3205,79 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
|
|
|
2933
3205
|
const evaluators = [...parsedCase ?? [], ...parsedRoot ?? []];
|
|
2934
3206
|
return evaluators.length > 0 ? evaluators : void 0;
|
|
2935
3207
|
}
|
|
2936
|
-
|
|
3208
|
+
function isIncludeEntry(value) {
|
|
3209
|
+
return isJsonObject2(value) && typeof value.include === "string" && Object.keys(value).length === 1;
|
|
3210
|
+
}
|
|
3211
|
+
function isTemplateReference(value) {
|
|
3212
|
+
return !value.startsWith(".") && !value.includes("/") && !value.includes("\\");
|
|
3213
|
+
}
|
|
3214
|
+
async function resolveAssertionTemplateReference(include, searchRoots) {
|
|
3215
|
+
const templateCandidates = isTemplateReference(include) ? [
|
|
3216
|
+
import_node_path6.default.join(".agentv", "templates", `${include}.yaml`),
|
|
3217
|
+
import_node_path6.default.join(".agentv", "templates", `${include}.yml`)
|
|
3218
|
+
] : [include];
|
|
3219
|
+
const attempted = [];
|
|
3220
|
+
for (const candidate of templateCandidates) {
|
|
3221
|
+
const resolved = await resolveFileReference2(candidate, searchRoots);
|
|
3222
|
+
attempted.push(...resolved.attempted);
|
|
3223
|
+
if (resolved.resolvedPath) {
|
|
3224
|
+
return {
|
|
3225
|
+
displayPath: resolved.displayPath,
|
|
3226
|
+
resolvedPath: resolved.resolvedPath,
|
|
3227
|
+
attempted
|
|
3228
|
+
};
|
|
3229
|
+
}
|
|
3230
|
+
}
|
|
3231
|
+
return {
|
|
3232
|
+
displayPath: templateCandidates[0] ?? include,
|
|
3233
|
+
resolvedPath: "",
|
|
3234
|
+
attempted
|
|
3235
|
+
};
|
|
3236
|
+
}
|
|
3237
|
+
async function loadAssertionTemplateEntries(include, searchRoots, evalId, includeContext) {
|
|
3238
|
+
const nextDepth = includeContext.depth + 1;
|
|
3239
|
+
if (nextDepth > MAX_ASSERTION_INCLUDE_DEPTH) {
|
|
3240
|
+
const chain = [...includeContext.chain, include].join(" -> ");
|
|
3241
|
+
throw new Error(
|
|
3242
|
+
`Assertion template include depth exceeded ${MAX_ASSERTION_INCLUDE_DEPTH} in '${evalId}'. Include chain: ${chain}`
|
|
3243
|
+
);
|
|
3244
|
+
}
|
|
3245
|
+
const resolved = await resolveAssertionTemplateReference(include, searchRoots);
|
|
3246
|
+
if (!resolved.resolvedPath) {
|
|
3247
|
+
const attempted = resolved.attempted.length > 0 ? `
|
|
3248
|
+
${resolved.attempted.map((attempt) => ` Tried: ${attempt}`).join("\n")}` : "";
|
|
3249
|
+
throw new Error(
|
|
3250
|
+
`Assertion template not found in '${evalId}': ${resolved.displayPath}${attempted}`
|
|
3251
|
+
);
|
|
3252
|
+
}
|
|
3253
|
+
if (includeContext.chain.includes(resolved.resolvedPath)) {
|
|
3254
|
+
const cycle = [...includeContext.chain, resolved.resolvedPath].join(" -> ");
|
|
3255
|
+
throw new Error(`Assertion template cycle detected in '${evalId}': ${cycle}`);
|
|
3256
|
+
}
|
|
3257
|
+
const content = await (0, import_promises7.readFile)(resolved.resolvedPath, "utf8");
|
|
3258
|
+
const parsed = interpolateEnv((0, import_yaml3.parse)(content), process.env);
|
|
3259
|
+
if (!isJsonObject2(parsed)) {
|
|
3260
|
+
throw new Error(
|
|
3261
|
+
`Invalid assertion template file in '${evalId}': ${resolved.resolvedPath} (expected a YAML object with an assertions array)`
|
|
3262
|
+
);
|
|
3263
|
+
}
|
|
3264
|
+
const assertions = parsed.assertions;
|
|
3265
|
+
if (!Array.isArray(assertions)) {
|
|
3266
|
+
throw new Error(
|
|
3267
|
+
`Invalid assertion template file in '${evalId}': ${resolved.resolvedPath} is missing a top-level assertions array`
|
|
3268
|
+
);
|
|
3269
|
+
}
|
|
3270
|
+
const templateDir = import_node_path6.default.dirname(resolved.resolvedPath);
|
|
3271
|
+
const nestedSearchRoots = [
|
|
3272
|
+
templateDir,
|
|
3273
|
+
...searchRoots.filter((root) => import_node_path6.default.resolve(root) !== templateDir)
|
|
3274
|
+
];
|
|
3275
|
+
return await expandEvaluatorEntries(assertions, nestedSearchRoots, evalId, {
|
|
3276
|
+
depth: nextDepth,
|
|
3277
|
+
chain: [...includeContext.chain, resolved.resolvedPath]
|
|
3278
|
+
}) ?? [];
|
|
3279
|
+
}
|
|
3280
|
+
async function expandEvaluatorEntries(candidateEvaluators, searchRoots, evalId, includeContext = { depth: 0, chain: [] }) {
|
|
2937
3281
|
if (candidateEvaluators === void 0) {
|
|
2938
3282
|
return void 0;
|
|
2939
3283
|
}
|
|
@@ -2941,13 +3285,34 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId, defa
|
|
|
2941
3285
|
logWarning2(`Skipping evaluators for '${evalId}': expected array`);
|
|
2942
3286
|
return void 0;
|
|
2943
3287
|
}
|
|
2944
|
-
const
|
|
2945
|
-
const
|
|
3288
|
+
const expanded = [];
|
|
3289
|
+
for (const rawEvaluator of candidateEvaluators) {
|
|
3290
|
+
if (isIncludeEntry(rawEvaluator)) {
|
|
3291
|
+
const included = await loadAssertionTemplateEntries(
|
|
3292
|
+
rawEvaluator.include,
|
|
3293
|
+
searchRoots,
|
|
3294
|
+
evalId,
|
|
3295
|
+
includeContext
|
|
3296
|
+
);
|
|
3297
|
+
expanded.push(...included);
|
|
3298
|
+
continue;
|
|
3299
|
+
}
|
|
3300
|
+
expanded.push(rawEvaluator);
|
|
3301
|
+
}
|
|
3302
|
+
return expanded;
|
|
3303
|
+
}
|
|
3304
|
+
async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId, defaultPreprocessors) {
|
|
3305
|
+
const expandedEvaluators = await expandEvaluatorEntries(candidateEvaluators, searchRoots, evalId);
|
|
3306
|
+
if (!expandedEvaluators) {
|
|
3307
|
+
return void 0;
|
|
3308
|
+
}
|
|
3309
|
+
const firstStringIndex = expandedEvaluators.findIndex((e) => typeof e === "string");
|
|
3310
|
+
const processedEvaluators = firstStringIndex === -1 ? [...expandedEvaluators] : (() => {
|
|
2946
3311
|
const PLACEHOLDER = Symbol("rubric-placeholder");
|
|
2947
3312
|
const strings = [];
|
|
2948
3313
|
const result = [];
|
|
2949
3314
|
let rubricInserted = false;
|
|
2950
|
-
for (const item of
|
|
3315
|
+
for (const item of expandedEvaluators) {
|
|
2951
3316
|
if (typeof item === "string") {
|
|
2952
3317
|
const trimmed = item.trim();
|
|
2953
3318
|
if (trimmed.length === 0) {
|
|
@@ -3162,8 +3527,16 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId, defa
|
|
|
3162
3527
|
);
|
|
3163
3528
|
continue;
|
|
3164
3529
|
}
|
|
3530
|
+
const expandedMembers = await expandEvaluatorEntries(
|
|
3531
|
+
rawMembers,
|
|
3532
|
+
searchRoots,
|
|
3533
|
+
`${evalId}:${name}`
|
|
3534
|
+
);
|
|
3535
|
+
if (!expandedMembers) {
|
|
3536
|
+
continue;
|
|
3537
|
+
}
|
|
3165
3538
|
const memberEvaluators = [];
|
|
3166
|
-
for (const rawMember of
|
|
3539
|
+
for (const rawMember of expandedMembers) {
|
|
3167
3540
|
if (!isJsonObject2(rawMember)) {
|
|
3168
3541
|
logWarning2(`Skipping invalid member evaluator in composite '${name}' (expected object)`);
|
|
3169
3542
|
continue;
|
|
@@ -4490,14 +4863,14 @@ function parseInlineRubrics(rawRubrics) {
|
|
|
4490
4863
|
|
|
4491
4864
|
// src/evaluation/loaders/jsonl-parser.ts
|
|
4492
4865
|
init_cjs_shims();
|
|
4493
|
-
var
|
|
4866
|
+
var import_promises9 = require("fs/promises");
|
|
4494
4867
|
var import_node_path8 = __toESM(require("path"), 1);
|
|
4495
4868
|
var import_micromatch = __toESM(require("micromatch"), 1);
|
|
4496
|
-
var
|
|
4869
|
+
var import_yaml4 = require("yaml");
|
|
4497
4870
|
|
|
4498
4871
|
// src/evaluation/loaders/message-processor.ts
|
|
4499
4872
|
init_cjs_shims();
|
|
4500
|
-
var
|
|
4873
|
+
var import_promises8 = require("fs/promises");
|
|
4501
4874
|
var import_node_path7 = __toESM(require("path"), 1);
|
|
4502
4875
|
|
|
4503
4876
|
// src/evaluation/formatting/segment-formatter.ts
|
|
@@ -4615,7 +4988,7 @@ async function processMessages(options) {
|
|
|
4615
4988
|
continue;
|
|
4616
4989
|
}
|
|
4617
4990
|
try {
|
|
4618
|
-
const fileContent = (await (0,
|
|
4991
|
+
const fileContent = (await (0, import_promises8.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
4619
4992
|
processedContent.push({
|
|
4620
4993
|
...cloneJsonObject(rawSegment),
|
|
4621
4994
|
path: displayPath,
|
|
@@ -4656,7 +5029,7 @@ async function processMessages(options) {
|
|
|
4656
5029
|
continue;
|
|
4657
5030
|
}
|
|
4658
5031
|
try {
|
|
4659
|
-
const imageBuffer = await (0,
|
|
5032
|
+
const imageBuffer = await (0, import_promises8.readFile)(resolvedPath);
|
|
4660
5033
|
const base64 = imageBuffer.toString("base64");
|
|
4661
5034
|
processedContent.push({
|
|
4662
5035
|
type: "image",
|
|
@@ -4733,7 +5106,7 @@ async function processExpectedMessages(options) {
|
|
|
4733
5106
|
continue;
|
|
4734
5107
|
}
|
|
4735
5108
|
try {
|
|
4736
|
-
const fileContent = (await (0,
|
|
5109
|
+
const fileContent = (await (0, import_promises8.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
4737
5110
|
processedContent.push({
|
|
4738
5111
|
type: "file",
|
|
4739
5112
|
path: displayPath,
|
|
@@ -4773,7 +5146,7 @@ async function processExpectedMessages(options) {
|
|
|
4773
5146
|
continue;
|
|
4774
5147
|
}
|
|
4775
5148
|
try {
|
|
4776
|
-
const imageBuffer = await (0,
|
|
5149
|
+
const imageBuffer = await (0, import_promises8.readFile)(resolvedPath);
|
|
4777
5150
|
const base64 = imageBuffer.toString("base64");
|
|
4778
5151
|
processedContent.push({
|
|
4779
5152
|
type: "image",
|
|
@@ -4902,8 +5275,8 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
|
4902
5275
|
return {};
|
|
4903
5276
|
}
|
|
4904
5277
|
try {
|
|
4905
|
-
const content = await (0,
|
|
4906
|
-
const parsed = interpolateEnv((0,
|
|
5278
|
+
const content = await (0, import_promises9.readFile)(sidecarPath, "utf8");
|
|
5279
|
+
const parsed = interpolateEnv((0, import_yaml4.parse)(content), process.env);
|
|
4907
5280
|
if (!isJsonObject(parsed)) {
|
|
4908
5281
|
logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
|
|
4909
5282
|
return {};
|
|
@@ -4947,7 +5320,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
4947
5320
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
4948
5321
|
const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
|
|
4949
5322
|
const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
|
|
4950
|
-
const rawFile = await (0,
|
|
5323
|
+
const rawFile = await (0, import_promises9.readFile)(absoluteTestPath, "utf8");
|
|
4951
5324
|
const rawCases = parseJsonlContent(rawFile, evalFilePath);
|
|
4952
5325
|
const fallbackSuiteName = import_node_path8.default.basename(absoluteTestPath, ".jsonl") || "eval";
|
|
4953
5326
|
const suiteName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackSuiteName;
|
|
@@ -5131,11 +5504,13 @@ function parseRepoCheckout(raw) {
|
|
|
5131
5504
|
if (!isJsonObject(raw)) return void 0;
|
|
5132
5505
|
const obj = raw;
|
|
5133
5506
|
const ref = typeof obj.ref === "string" ? obj.ref : void 0;
|
|
5507
|
+
const baseCommit = typeof obj.base_commit === "string" ? obj.base_commit : void 0;
|
|
5134
5508
|
const resolve = obj.resolve === "remote" || obj.resolve === "local" ? obj.resolve : void 0;
|
|
5135
5509
|
const ancestor = typeof obj.ancestor === "number" ? obj.ancestor : void 0;
|
|
5136
|
-
if (!ref && !resolve && ancestor === void 0) return void 0;
|
|
5510
|
+
if (!ref && !baseCommit && !resolve && ancestor === void 0) return void 0;
|
|
5137
5511
|
return {
|
|
5138
5512
|
...ref !== void 0 && { ref },
|
|
5513
|
+
...baseCommit !== void 0 && { base_commit: baseCommit },
|
|
5139
5514
|
...resolve !== void 0 && { resolve },
|
|
5140
5515
|
...ancestor !== void 0 && { ancestor }
|
|
5141
5516
|
};
|
|
@@ -5158,12 +5533,12 @@ function parseRepoConfig(raw) {
|
|
|
5158
5533
|
const obj = raw;
|
|
5159
5534
|
const repoPath = typeof obj.path === "string" ? obj.path : void 0;
|
|
5160
5535
|
const source = parseRepoSource(obj.source);
|
|
5161
|
-
if (!repoPath || !source) return void 0;
|
|
5162
5536
|
const checkout = parseRepoCheckout(obj.checkout);
|
|
5163
5537
|
const clone = parseRepoClone(obj.clone);
|
|
5538
|
+
if (!repoPath && !source && !checkout && !clone) return void 0;
|
|
5164
5539
|
return {
|
|
5165
|
-
path: repoPath,
|
|
5166
|
-
source,
|
|
5540
|
+
...repoPath !== void 0 && { path: repoPath },
|
|
5541
|
+
...source !== void 0 && { source },
|
|
5167
5542
|
...checkout !== void 0 && { checkout },
|
|
5168
5543
|
...clone !== void 0 && { clone }
|
|
5169
5544
|
};
|
|
@@ -5215,7 +5590,8 @@ ${messageContent}`);
|
|
|
5215
5590
|
segmentsByMessage,
|
|
5216
5591
|
mode
|
|
5217
5592
|
}) : void 0;
|
|
5218
|
-
|
|
5593
|
+
const systemMessage = extractSystemMessage(testCase.input, segmentsByMessage, mode);
|
|
5594
|
+
return { question, chatPrompt, systemMessage };
|
|
5219
5595
|
}
|
|
5220
5596
|
function needsRoleMarkers(messages, processedSegmentsByMessage) {
|
|
5221
5597
|
if (messages.some((msg) => msg.role === "assistant" || msg.role === "tool")) {
|
|
@@ -5229,6 +5605,26 @@ function needsRoleMarkers(messages, processedSegmentsByMessage) {
|
|
|
5229
5605
|
}
|
|
5230
5606
|
return messagesWithContent > 1;
|
|
5231
5607
|
}
|
|
5608
|
+
function extractSystemMessage(messages, segmentsByMessage, mode) {
|
|
5609
|
+
const systemParts = [];
|
|
5610
|
+
for (let i = 0; i < messages.length; i++) {
|
|
5611
|
+
if (messages[i].role !== "system") {
|
|
5612
|
+
break;
|
|
5613
|
+
}
|
|
5614
|
+
const segments = segmentsByMessage[i];
|
|
5615
|
+
const contentParts = [];
|
|
5616
|
+
for (const segment of segments) {
|
|
5617
|
+
const formatted = formatSegment(segment, mode);
|
|
5618
|
+
if (formatted) {
|
|
5619
|
+
contentParts.push(formatted);
|
|
5620
|
+
}
|
|
5621
|
+
}
|
|
5622
|
+
if (contentParts.length > 0) {
|
|
5623
|
+
systemParts.push(contentParts.join("\n"));
|
|
5624
|
+
}
|
|
5625
|
+
}
|
|
5626
|
+
return systemParts.length > 0 ? systemParts.join("\n\n") : void 0;
|
|
5627
|
+
}
|
|
5232
5628
|
function buildChatPromptFromSegments(options) {
|
|
5233
5629
|
const { messages, segmentsByMessage, systemPrompt, mode = "lm" } = options;
|
|
5234
5630
|
if (messages.length === 0) {
|
|
@@ -5312,8 +5708,8 @@ function resolveTests(suite) {
|
|
|
5312
5708
|
async function readTestSuiteMetadata(testFilePath) {
|
|
5313
5709
|
try {
|
|
5314
5710
|
const absolutePath = import_node_path9.default.resolve(testFilePath);
|
|
5315
|
-
const content = await (0,
|
|
5316
|
-
const parsed = interpolateEnv((0,
|
|
5711
|
+
const content = await (0, import_promises10.readFile)(absolutePath, "utf8");
|
|
5712
|
+
const parsed = interpolateEnv((0, import_yaml5.parse)(content), process.env);
|
|
5317
5713
|
if (!isJsonObject(parsed)) {
|
|
5318
5714
|
return {};
|
|
5319
5715
|
}
|
|
@@ -5370,8 +5766,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
5370
5766
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
5371
5767
|
const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
|
|
5372
5768
|
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
5373
|
-
const rawFile = await (0,
|
|
5374
|
-
const interpolated = interpolateEnv((0,
|
|
5769
|
+
const rawFile = await (0, import_promises10.readFile)(absoluteTestPath, "utf8");
|
|
5770
|
+
const interpolated = interpolateEnv((0, import_yaml5.parse)(rawFile), process.env);
|
|
5375
5771
|
if (!isJsonObject(interpolated)) {
|
|
5376
5772
|
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
5377
5773
|
}
|
|
@@ -5512,7 +5908,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
5512
5908
|
const testCase = {
|
|
5513
5909
|
id,
|
|
5514
5910
|
suite: suiteName,
|
|
5515
|
-
category: options?.category,
|
|
5911
|
+
category: suite.category ?? options?.category,
|
|
5516
5912
|
conversation_id: conversationId,
|
|
5517
5913
|
question,
|
|
5518
5914
|
input: inputMessages,
|
|
@@ -5605,11 +6001,11 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
|
5605
6001
|
const workspaceFilePath = import_node_path9.default.resolve(evalFileDir, raw);
|
|
5606
6002
|
let content;
|
|
5607
6003
|
try {
|
|
5608
|
-
content = await (0,
|
|
6004
|
+
content = await (0, import_promises10.readFile)(workspaceFilePath, "utf8");
|
|
5609
6005
|
} catch {
|
|
5610
6006
|
throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
|
|
5611
6007
|
}
|
|
5612
|
-
const parsed = interpolateEnv((0,
|
|
6008
|
+
const parsed = interpolateEnv((0, import_yaml5.parse)(content), process.env);
|
|
5613
6009
|
if (!isJsonObject(parsed)) {
|
|
5614
6010
|
throw new Error(
|
|
5615
6011
|
`Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
|
|
@@ -5644,14 +6040,28 @@ function parseWorkspaceConfig(raw, evalFileDir) {
|
|
|
5644
6040
|
const explicitMode = obj.mode === "pooled" || obj.mode === "temp" || obj.mode === "static" ? obj.mode : void 0;
|
|
5645
6041
|
const workspacePath = typeof obj.path === "string" ? obj.path : void 0;
|
|
5646
6042
|
const mode = explicitMode ?? (workspacePath ? "static" : void 0);
|
|
5647
|
-
|
|
6043
|
+
const docker = parseDockerWorkspaceConfig(obj.docker);
|
|
6044
|
+
if (!template && !isolation && !repos && !hooks && !mode && !workspacePath && !docker)
|
|
6045
|
+
return void 0;
|
|
5648
6046
|
return {
|
|
5649
6047
|
...template !== void 0 && { template },
|
|
5650
6048
|
...isolation !== void 0 && { isolation },
|
|
5651
6049
|
...repos !== void 0 && { repos },
|
|
5652
6050
|
...hooks !== void 0 && { hooks },
|
|
5653
6051
|
...mode !== void 0 && { mode },
|
|
5654
|
-
...workspacePath !== void 0 && { path: workspacePath }
|
|
6052
|
+
...workspacePath !== void 0 && { path: workspacePath },
|
|
6053
|
+
...docker !== void 0 && { docker }
|
|
6054
|
+
};
|
|
6055
|
+
}
|
|
6056
|
+
function parseDockerWorkspaceConfig(raw) {
|
|
6057
|
+
if (!isJsonObject(raw)) return void 0;
|
|
6058
|
+
const obj = raw;
|
|
6059
|
+
if (typeof obj.image !== "string") return void 0;
|
|
6060
|
+
return {
|
|
6061
|
+
image: obj.image,
|
|
6062
|
+
...typeof obj.timeout === "number" && { timeout: obj.timeout },
|
|
6063
|
+
...typeof obj.memory === "string" && { memory: obj.memory },
|
|
6064
|
+
...typeof obj.cpus === "number" && { cpus: obj.cpus }
|
|
5655
6065
|
};
|
|
5656
6066
|
}
|
|
5657
6067
|
function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
|
|
@@ -5680,7 +6090,8 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
|
|
|
5680
6090
|
repos: caseLevel.repos ?? suiteLevel.repos,
|
|
5681
6091
|
...hasHooks && { hooks: mergedHooks },
|
|
5682
6092
|
mode: caseLevel.mode ?? suiteLevel.mode,
|
|
5683
|
-
path: caseLevel.path ?? suiteLevel.path
|
|
6093
|
+
path: caseLevel.path ?? suiteLevel.path,
|
|
6094
|
+
docker: caseLevel.docker ?? suiteLevel.docker
|
|
5684
6095
|
};
|
|
5685
6096
|
}
|
|
5686
6097
|
function asString5(value) {
|
|
@@ -5709,7 +6120,7 @@ ${detailBlock}${ANSI_RESET8}`);
|
|
|
5709
6120
|
init_cjs_shims();
|
|
5710
6121
|
var import_node_fs2 = require("fs");
|
|
5711
6122
|
var import_node_path10 = __toESM(require("path"), 1);
|
|
5712
|
-
var
|
|
6123
|
+
var import_yaml6 = require("yaml");
|
|
5713
6124
|
function codeGraderInstruction(graderName, description) {
|
|
5714
6125
|
const desc = description ? ` This grader: ${description}.` : "";
|
|
5715
6126
|
return `Run \`agentv eval assert ${graderName} --agent-output <agent_output> --agent-input <original_prompt>\` and check the result.${desc} The command accepts --agent-output (the agent's full response text) and --agent-input (the original user prompt). It returns JSON on stdout: {"score": 0-1, "reasoning": "..."}. A score >= 0.5 means pass (exit 0); below 0.5 means fail (exit 1).`;
|
|
@@ -5948,7 +6359,7 @@ function transpileEvalYaml(suite, source = "EVAL.yaml") {
|
|
|
5948
6359
|
}
|
|
5949
6360
|
function transpileEvalYamlFile(evalYamlPath) {
|
|
5950
6361
|
const content = (0, import_node_fs2.readFileSync)(evalYamlPath, "utf8");
|
|
5951
|
-
const parsed = (0,
|
|
6362
|
+
const parsed = (0, import_yaml6.parse)(content);
|
|
5952
6363
|
return transpileEvalYaml(parsed, import_node_path10.default.basename(evalYamlPath));
|
|
5953
6364
|
}
|
|
5954
6365
|
function getOutputFilenames(result) {
|
|
@@ -5969,11 +6380,11 @@ function getOutputFilenames(result) {
|
|
|
5969
6380
|
// src/evaluation/file-utils.ts
|
|
5970
6381
|
init_cjs_shims();
|
|
5971
6382
|
var import_node_fs3 = require("fs");
|
|
5972
|
-
var
|
|
6383
|
+
var import_promises11 = require("fs/promises");
|
|
5973
6384
|
var import_node_path11 = __toESM(require("path"), 1);
|
|
5974
6385
|
async function fileExists2(filePath) {
|
|
5975
6386
|
try {
|
|
5976
|
-
await (0,
|
|
6387
|
+
await (0, import_promises11.access)(filePath, import_node_fs3.constants.F_OK);
|
|
5977
6388
|
return true;
|
|
5978
6389
|
} catch {
|
|
5979
6390
|
return false;
|
|
@@ -5983,11 +6394,11 @@ function normalizeLineEndings(content) {
|
|
|
5983
6394
|
return content.replace(/\r\n/g, "\n");
|
|
5984
6395
|
}
|
|
5985
6396
|
async function readTextFile(filePath) {
|
|
5986
|
-
const content = await (0,
|
|
6397
|
+
const content = await (0, import_promises11.readFile)(filePath, "utf8");
|
|
5987
6398
|
return normalizeLineEndings(content);
|
|
5988
6399
|
}
|
|
5989
6400
|
async function readJsonFile(filePath) {
|
|
5990
|
-
const content = await (0,
|
|
6401
|
+
const content = await (0, import_promises11.readFile)(filePath, "utf8");
|
|
5991
6402
|
return JSON.parse(content);
|
|
5992
6403
|
}
|
|
5993
6404
|
async function findGitRoot(startPath) {
|
|
@@ -6508,7 +6919,7 @@ init_cjs_shims();
|
|
|
6508
6919
|
var import_node_child_process = require("child_process");
|
|
6509
6920
|
var import_node_crypto = require("crypto");
|
|
6510
6921
|
var import_node_fs4 = require("fs");
|
|
6511
|
-
var
|
|
6922
|
+
var import_promises12 = require("fs/promises");
|
|
6512
6923
|
var import_node_path13 = __toESM(require("path"), 1);
|
|
6513
6924
|
|
|
6514
6925
|
// src/evaluation/providers/claude-content.ts
|
|
@@ -6840,7 +7251,7 @@ var ClaudeCliProvider = class {
|
|
|
6840
7251
|
return void 0;
|
|
6841
7252
|
}
|
|
6842
7253
|
try {
|
|
6843
|
-
await (0,
|
|
7254
|
+
await (0, import_promises12.mkdir)(logDir, { recursive: true });
|
|
6844
7255
|
} catch (error) {
|
|
6845
7256
|
const message = error instanceof Error ? error.message : String(error);
|
|
6846
7257
|
console.warn(`Skipping Claude CLI stream logging (could not create ${logDir}): ${message}`);
|
|
@@ -7148,7 +7559,7 @@ function tryParseJson(line) {
|
|
|
7148
7559
|
init_cjs_shims();
|
|
7149
7560
|
var import_node_crypto2 = require("crypto");
|
|
7150
7561
|
var import_node_fs5 = require("fs");
|
|
7151
|
-
var
|
|
7562
|
+
var import_promises13 = require("fs/promises");
|
|
7152
7563
|
var import_node_path14 = __toESM(require("path"), 1);
|
|
7153
7564
|
var claudeSdkModule = null;
|
|
7154
7565
|
async function loadClaudeSdk() {
|
|
@@ -7333,7 +7744,7 @@ var ClaudeSdkProvider = class {
|
|
|
7333
7744
|
return void 0;
|
|
7334
7745
|
}
|
|
7335
7746
|
try {
|
|
7336
|
-
await (0,
|
|
7747
|
+
await (0, import_promises13.mkdir)(logDir, { recursive: true });
|
|
7337
7748
|
} catch (error) {
|
|
7338
7749
|
const message = error instanceof Error ? error.message : String(error);
|
|
7339
7750
|
console.warn(`Skipping Claude stream logging (could not create ${logDir}): ${message}`);
|
|
@@ -7526,7 +7937,7 @@ function formatElapsed2(startedAt) {
|
|
|
7526
7937
|
// src/evaluation/providers/cli.ts
|
|
7527
7938
|
init_cjs_shims();
|
|
7528
7939
|
var import_node_child_process2 = require("child_process");
|
|
7529
|
-
var
|
|
7940
|
+
var import_promises14 = __toESM(require("fs/promises"), 1);
|
|
7530
7941
|
var import_node_os = __toESM(require("os"), 1);
|
|
7531
7942
|
var import_node_path15 = __toESM(require("path"), 1);
|
|
7532
7943
|
var import_node_util = require("util");
|
|
@@ -7925,7 +8336,7 @@ var CliProvider = class {
|
|
|
7925
8336
|
throw new Error(`Failed to read output file '${filePath}': ${errorMsg}`);
|
|
7926
8337
|
} finally {
|
|
7927
8338
|
if (!this.keepTempFiles) {
|
|
7928
|
-
await
|
|
8339
|
+
await import_promises14.default.unlink(filePath).catch(() => {
|
|
7929
8340
|
});
|
|
7930
8341
|
}
|
|
7931
8342
|
}
|
|
@@ -8005,7 +8416,7 @@ var CliProvider = class {
|
|
|
8005
8416
|
async function buildTemplateValues(request, config, outputFilePath) {
|
|
8006
8417
|
const inputFiles = normalizeInputFiles2(request.inputFiles);
|
|
8007
8418
|
const promptFilePath = generateOutputFilePath(request.evalCaseId, ".prompt.txt");
|
|
8008
|
-
await
|
|
8419
|
+
await import_promises14.default.writeFile(promptFilePath, request.question ?? "", "utf8");
|
|
8009
8420
|
return {
|
|
8010
8421
|
values: {
|
|
8011
8422
|
PROMPT: shellEscape(request.question ?? ""),
|
|
@@ -8022,7 +8433,7 @@ async function cleanupTempFile(filePath, keepTempFiles) {
|
|
|
8022
8433
|
if (!filePath || keepTempFiles) {
|
|
8023
8434
|
return;
|
|
8024
8435
|
}
|
|
8025
|
-
await
|
|
8436
|
+
await import_promises14.default.unlink(filePath).catch(() => {
|
|
8026
8437
|
});
|
|
8027
8438
|
}
|
|
8028
8439
|
function normalizeInputFiles2(inputFiles) {
|
|
@@ -8083,7 +8494,7 @@ function formatTimeoutSuffix2(timeoutMs) {
|
|
|
8083
8494
|
init_cjs_shims();
|
|
8084
8495
|
var import_node_crypto3 = require("crypto");
|
|
8085
8496
|
var import_node_fs6 = require("fs");
|
|
8086
|
-
var
|
|
8497
|
+
var import_promises15 = require("fs/promises");
|
|
8087
8498
|
var import_node_path16 = __toESM(require("path"), 1);
|
|
8088
8499
|
|
|
8089
8500
|
// src/evaluation/providers/codex-log-tracker.ts
|
|
@@ -8343,7 +8754,7 @@ ${basePrompt}` : basePrompt;
|
|
|
8343
8754
|
return void 0;
|
|
8344
8755
|
}
|
|
8345
8756
|
try {
|
|
8346
|
-
await (0,
|
|
8757
|
+
await (0, import_promises15.mkdir)(logDir, { recursive: true });
|
|
8347
8758
|
} catch (error) {
|
|
8348
8759
|
const message = error instanceof Error ? error.message : String(error);
|
|
8349
8760
|
console.warn(`Skipping Codex SDK stream logging (could not create ${logDir}): ${message}`);
|
|
@@ -8493,7 +8904,7 @@ function formatElapsed3(startedAt) {
|
|
|
8493
8904
|
// src/evaluation/providers/copilot-cli.ts
|
|
8494
8905
|
init_cjs_shims();
|
|
8495
8906
|
var import_node_crypto5 = require("crypto");
|
|
8496
|
-
var
|
|
8907
|
+
var import_promises16 = require("fs/promises");
|
|
8497
8908
|
var import_node_path18 = __toESM(require("path"), 1);
|
|
8498
8909
|
var import_node_stream = require("stream");
|
|
8499
8910
|
var import_node_child_process3 = require("child_process");
|
|
@@ -8562,7 +8973,7 @@ var import_node_path17 = __toESM(require("path"), 1);
|
|
|
8562
8973
|
var import_node_url3 = require("url");
|
|
8563
8974
|
var import_meta = {};
|
|
8564
8975
|
function resolvePlatformCliPath() {
|
|
8565
|
-
const
|
|
8976
|
+
const os4 = (0, import_node_os2.platform)();
|
|
8566
8977
|
const cpu = (0, import_node_os2.arch)();
|
|
8567
8978
|
const platformMap = {
|
|
8568
8979
|
linux: "linux",
|
|
@@ -8573,13 +8984,13 @@ function resolvePlatformCliPath() {
|
|
|
8573
8984
|
x64: "x64",
|
|
8574
8985
|
arm64: "arm64"
|
|
8575
8986
|
};
|
|
8576
|
-
const osPart = platformMap[
|
|
8987
|
+
const osPart = platformMap[os4];
|
|
8577
8988
|
const archPart = archMap[cpu];
|
|
8578
8989
|
if (!osPart || !archPart) {
|
|
8579
8990
|
return void 0;
|
|
8580
8991
|
}
|
|
8581
8992
|
const packageName = `@github/copilot-${osPart}-${archPart}`;
|
|
8582
|
-
const binaryName =
|
|
8993
|
+
const binaryName = os4 === "win32" ? "copilot.exe" : "copilot";
|
|
8583
8994
|
try {
|
|
8584
8995
|
const resolved = import_meta.resolve(`${packageName}/package.json`);
|
|
8585
8996
|
const packageJsonPath = resolved.startsWith("file:") ? (0, import_node_url3.fileURLToPath)(resolved) : resolved;
|
|
@@ -8997,7 +9408,7 @@ var CopilotCliProvider = class {
|
|
|
8997
9408
|
return void 0;
|
|
8998
9409
|
}
|
|
8999
9410
|
try {
|
|
9000
|
-
await (0,
|
|
9411
|
+
await (0, import_promises16.mkdir)(logDir, { recursive: true });
|
|
9001
9412
|
} catch (error) {
|
|
9002
9413
|
const message = error instanceof Error ? error.message : String(error);
|
|
9003
9414
|
console.warn(`Skipping Copilot CLI stream logging (could not create ${logDir}): ${message}`);
|
|
@@ -9097,7 +9508,7 @@ function summarizeAcpEvent(eventType, data) {
|
|
|
9097
9508
|
|
|
9098
9509
|
// src/evaluation/providers/copilot-log.ts
|
|
9099
9510
|
init_cjs_shims();
|
|
9100
|
-
var
|
|
9511
|
+
var import_promises18 = require("fs/promises");
|
|
9101
9512
|
var import_node_os4 = require("os");
|
|
9102
9513
|
var import_node_path20 = __toESM(require("path"), 1);
|
|
9103
9514
|
|
|
@@ -9233,17 +9644,17 @@ function parseCopilotEvents(eventsJsonl) {
|
|
|
9233
9644
|
|
|
9234
9645
|
// src/evaluation/providers/copilot-session-discovery.ts
|
|
9235
9646
|
init_cjs_shims();
|
|
9236
|
-
var
|
|
9647
|
+
var import_promises17 = require("fs/promises");
|
|
9237
9648
|
var import_node_os3 = require("os");
|
|
9238
9649
|
var import_node_path19 = __toESM(require("path"), 1);
|
|
9239
|
-
var
|
|
9650
|
+
var import_yaml7 = require("yaml");
|
|
9240
9651
|
var DEFAULT_SESSION_STATE_DIR = () => import_node_path19.default.join((0, import_node_os3.homedir)(), ".copilot", "session-state");
|
|
9241
9652
|
async function discoverCopilotSessions(opts) {
|
|
9242
9653
|
const sessionStateDir = opts?.sessionStateDir ?? DEFAULT_SESSION_STATE_DIR();
|
|
9243
9654
|
const limit = opts?.limit ?? 10;
|
|
9244
9655
|
let entries;
|
|
9245
9656
|
try {
|
|
9246
|
-
entries = await (0,
|
|
9657
|
+
entries = await (0, import_promises17.readdir)(sessionStateDir);
|
|
9247
9658
|
} catch {
|
|
9248
9659
|
return [];
|
|
9249
9660
|
}
|
|
@@ -9253,12 +9664,12 @@ async function discoverCopilotSessions(opts) {
|
|
|
9253
9664
|
const workspacePath = import_node_path19.default.join(sessionDir, "workspace.yaml");
|
|
9254
9665
|
const eventsPath = import_node_path19.default.join(sessionDir, "events.jsonl");
|
|
9255
9666
|
try {
|
|
9256
|
-
const workspaceContent = await (0,
|
|
9257
|
-
const workspace = (0,
|
|
9667
|
+
const workspaceContent = await (0, import_promises17.readFile)(workspacePath, "utf8");
|
|
9668
|
+
const workspace = (0, import_yaml7.parse)(workspaceContent) ?? {};
|
|
9258
9669
|
const cwd = String(workspace.cwd ?? "");
|
|
9259
9670
|
let updatedAt;
|
|
9260
9671
|
try {
|
|
9261
|
-
const eventsStat = await (0,
|
|
9672
|
+
const eventsStat = await (0, import_promises17.stat)(eventsPath);
|
|
9262
9673
|
updatedAt = eventsStat.mtime;
|
|
9263
9674
|
} catch {
|
|
9264
9675
|
updatedAt = /* @__PURE__ */ new Date(0);
|
|
@@ -9315,7 +9726,7 @@ var CopilotLogProvider = class {
|
|
|
9315
9726
|
const eventsPath = import_node_path20.default.join(sessionDir, "events.jsonl");
|
|
9316
9727
|
let eventsContent;
|
|
9317
9728
|
try {
|
|
9318
|
-
eventsContent = await (0,
|
|
9729
|
+
eventsContent = await (0, import_promises18.readFile)(eventsPath, "utf8");
|
|
9319
9730
|
} catch (err) {
|
|
9320
9731
|
throw new Error(
|
|
9321
9732
|
`Failed to read Copilot session transcript at ${eventsPath}: ${err instanceof Error ? err.message : String(err)}`
|
|
@@ -9360,7 +9771,7 @@ var CopilotLogProvider = class {
|
|
|
9360
9771
|
init_cjs_shims();
|
|
9361
9772
|
var import_node_crypto6 = require("crypto");
|
|
9362
9773
|
var import_node_fs8 = require("fs");
|
|
9363
|
-
var
|
|
9774
|
+
var import_promises19 = require("fs/promises");
|
|
9364
9775
|
var import_node_path21 = __toESM(require("path"), 1);
|
|
9365
9776
|
|
|
9366
9777
|
// src/evaluation/providers/copilot-sdk-log-tracker.ts
|
|
@@ -9694,7 +10105,7 @@ var CopilotSdkProvider = class {
|
|
|
9694
10105
|
return void 0;
|
|
9695
10106
|
}
|
|
9696
10107
|
try {
|
|
9697
|
-
await (0,
|
|
10108
|
+
await (0, import_promises19.mkdir)(logDir, { recursive: true });
|
|
9698
10109
|
} catch (error) {
|
|
9699
10110
|
const message = error instanceof Error ? error.message : String(error);
|
|
9700
10111
|
console.warn(`Skipping Copilot SDK stream logging (could not create ${logDir}): ${message}`);
|
|
@@ -9815,7 +10226,7 @@ init_cjs_shims();
|
|
|
9815
10226
|
var import_node_child_process4 = require("child_process");
|
|
9816
10227
|
var import_node_crypto7 = require("crypto");
|
|
9817
10228
|
var import_node_fs9 = require("fs");
|
|
9818
|
-
var
|
|
10229
|
+
var import_promises20 = require("fs/promises");
|
|
9819
10230
|
var import_node_os5 = require("os");
|
|
9820
10231
|
var import_node_path22 = __toESM(require("path"), 1);
|
|
9821
10232
|
|
|
@@ -10027,7 +10438,7 @@ var PiCliProvider = class {
|
|
|
10027
10438
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
10028
10439
|
try {
|
|
10029
10440
|
const promptFile = import_node_path22.default.join(cwd, PROMPT_FILENAME);
|
|
10030
|
-
await (0,
|
|
10441
|
+
await (0, import_promises20.writeFile)(promptFile, request.question, "utf8");
|
|
10031
10442
|
const args = this.buildPiArgs(request.question, inputFiles);
|
|
10032
10443
|
const result = await this.executePi(args, cwd, request.signal, logger);
|
|
10033
10444
|
if (result.timedOut) {
|
|
@@ -10198,11 +10609,11 @@ ${prompt}` : prompt;
|
|
|
10198
10609
|
return env;
|
|
10199
10610
|
}
|
|
10200
10611
|
async createWorkspace() {
|
|
10201
|
-
return await (0,
|
|
10612
|
+
return await (0, import_promises20.mkdtemp)(import_node_path22.default.join((0, import_node_os5.tmpdir)(), WORKSPACE_PREFIX));
|
|
10202
10613
|
}
|
|
10203
10614
|
async cleanupWorkspace(workspaceRoot) {
|
|
10204
10615
|
try {
|
|
10205
|
-
await (0,
|
|
10616
|
+
await (0, import_promises20.rm)(workspaceRoot, { recursive: true, force: true });
|
|
10206
10617
|
} catch {
|
|
10207
10618
|
}
|
|
10208
10619
|
}
|
|
@@ -10218,7 +10629,7 @@ ${prompt}` : prompt;
|
|
|
10218
10629
|
return void 0;
|
|
10219
10630
|
}
|
|
10220
10631
|
try {
|
|
10221
|
-
await (0,
|
|
10632
|
+
await (0, import_promises20.mkdir)(logDir, { recursive: true });
|
|
10222
10633
|
} catch (error) {
|
|
10223
10634
|
const message = error instanceof Error ? error.message : String(error);
|
|
10224
10635
|
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
@@ -10775,7 +11186,7 @@ init_cjs_shims();
|
|
|
10775
11186
|
var import_node_child_process5 = require("child_process");
|
|
10776
11187
|
var import_node_crypto8 = require("crypto");
|
|
10777
11188
|
var import_node_fs10 = require("fs");
|
|
10778
|
-
var
|
|
11189
|
+
var import_promises21 = require("fs/promises");
|
|
10779
11190
|
var import_node_path24 = __toESM(require("path"), 1);
|
|
10780
11191
|
var import_node_readline = require("readline");
|
|
10781
11192
|
var import_node_url4 = require("url");
|
|
@@ -11236,7 +11647,7 @@ ${fileList}`;
|
|
|
11236
11647
|
return void 0;
|
|
11237
11648
|
}
|
|
11238
11649
|
try {
|
|
11239
|
-
await (0,
|
|
11650
|
+
await (0, import_promises21.mkdir)(logDir, { recursive: true });
|
|
11240
11651
|
} catch (error) {
|
|
11241
11652
|
const message = error instanceof Error ? error.message : String(error);
|
|
11242
11653
|
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
@@ -12675,8 +13086,8 @@ function resolveCliConfig(target, env, evalFilePath) {
|
|
|
12675
13086
|
const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
|
|
12676
13087
|
if (!parseResult.success) {
|
|
12677
13088
|
const firstError = parseResult.error.errors[0];
|
|
12678
|
-
const
|
|
12679
|
-
const prefix =
|
|
13089
|
+
const path56 = firstError?.path.join(".") || "";
|
|
13090
|
+
const prefix = path56 ? `${target.name} ${path56}: ` : `${target.name}: `;
|
|
12680
13091
|
throw new Error(`${prefix}${firstError?.message}`);
|
|
12681
13092
|
}
|
|
12682
13093
|
const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
|
|
@@ -12923,7 +13334,7 @@ function resolveOptionalNumberArray(source, description) {
|
|
|
12923
13334
|
// src/evaluation/providers/vscode-provider.ts
|
|
12924
13335
|
init_cjs_shims();
|
|
12925
13336
|
var import_node_child_process7 = require("child_process");
|
|
12926
|
-
var
|
|
13337
|
+
var import_promises28 = require("fs/promises");
|
|
12927
13338
|
var import_node_path36 = __toESM(require("path"), 1);
|
|
12928
13339
|
var import_node_util3 = require("util");
|
|
12929
13340
|
|
|
@@ -12932,27 +13343,27 @@ init_cjs_shims();
|
|
|
12932
13343
|
|
|
12933
13344
|
// src/evaluation/providers/vscode/dispatch/agentDispatch.ts
|
|
12934
13345
|
init_cjs_shims();
|
|
12935
|
-
var
|
|
13346
|
+
var import_promises26 = require("fs/promises");
|
|
12936
13347
|
var import_node_path34 = __toESM(require("path"), 1);
|
|
12937
13348
|
|
|
12938
13349
|
// src/evaluation/providers/vscode/utils/fs.ts
|
|
12939
13350
|
init_cjs_shims();
|
|
12940
13351
|
var import_node_fs11 = require("fs");
|
|
12941
|
-
var
|
|
13352
|
+
var import_promises22 = require("fs/promises");
|
|
12942
13353
|
var import_node_path26 = __toESM(require("path"), 1);
|
|
12943
13354
|
async function pathExists(target) {
|
|
12944
13355
|
try {
|
|
12945
|
-
await (0,
|
|
13356
|
+
await (0, import_promises22.access)(target, import_node_fs11.constants.F_OK);
|
|
12946
13357
|
return true;
|
|
12947
13358
|
} catch {
|
|
12948
13359
|
return false;
|
|
12949
13360
|
}
|
|
12950
13361
|
}
|
|
12951
13362
|
async function ensureDir(target) {
|
|
12952
|
-
await (0,
|
|
13363
|
+
await (0, import_promises22.mkdir)(target, { recursive: true });
|
|
12953
13364
|
}
|
|
12954
13365
|
async function readDirEntries(target) {
|
|
12955
|
-
const entries = await (0,
|
|
13366
|
+
const entries = await (0, import_promises22.readdir)(target, { withFileTypes: true });
|
|
12956
13367
|
return entries.map((entry) => ({
|
|
12957
13368
|
name: entry.name,
|
|
12958
13369
|
absolutePath: import_node_path26.default.join(target, entry.name),
|
|
@@ -12961,7 +13372,7 @@ async function readDirEntries(target) {
|
|
|
12961
13372
|
}
|
|
12962
13373
|
async function removeIfExists(target) {
|
|
12963
13374
|
try {
|
|
12964
|
-
await (0,
|
|
13375
|
+
await (0, import_promises22.rm)(target, { force: true, recursive: false });
|
|
12965
13376
|
} catch (error) {
|
|
12966
13377
|
if (error.code !== "ENOENT") {
|
|
12967
13378
|
throw error;
|
|
@@ -13087,7 +13498,7 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
|
|
|
13087
13498
|
|
|
13088
13499
|
// src/evaluation/providers/vscode/dispatch/responseWaiter.ts
|
|
13089
13500
|
init_cjs_shims();
|
|
13090
|
-
var
|
|
13501
|
+
var import_promises23 = require("fs/promises");
|
|
13091
13502
|
var import_node_path29 = __toESM(require("path"), 1);
|
|
13092
13503
|
|
|
13093
13504
|
// src/evaluation/providers/vscode/utils/time.ts
|
|
@@ -13127,7 +13538,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
13127
13538
|
const maxAttempts = 10;
|
|
13128
13539
|
while (attempts < maxAttempts) {
|
|
13129
13540
|
try {
|
|
13130
|
-
const content = await (0,
|
|
13541
|
+
const content = await (0, import_promises23.readFile)(responseFileFinal, { encoding: "utf8" });
|
|
13131
13542
|
if (!silent) {
|
|
13132
13543
|
process.stdout.write(`${content}
|
|
13133
13544
|
`);
|
|
@@ -13184,7 +13595,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
13184
13595
|
const maxAttempts = 10;
|
|
13185
13596
|
while (attempts < maxAttempts) {
|
|
13186
13597
|
try {
|
|
13187
|
-
const content = await (0,
|
|
13598
|
+
const content = await (0, import_promises23.readFile)(file, { encoding: "utf8" });
|
|
13188
13599
|
if (!silent) {
|
|
13189
13600
|
process.stdout.write(`${content}
|
|
13190
13601
|
`);
|
|
@@ -13208,7 +13619,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
13208
13619
|
// src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
|
|
13209
13620
|
init_cjs_shims();
|
|
13210
13621
|
var import_node_child_process6 = require("child_process");
|
|
13211
|
-
var
|
|
13622
|
+
var import_promises24 = require("fs/promises");
|
|
13212
13623
|
var import_node_path31 = __toESM(require("path"), 1);
|
|
13213
13624
|
var import_node_util2 = require("util");
|
|
13214
13625
|
|
|
@@ -13289,9 +13700,9 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
13289
13700
|
const aliveFile = import_node_path31.default.join(subagentDir, DEFAULT_ALIVE_FILENAME);
|
|
13290
13701
|
await removeIfExists(aliveFile);
|
|
13291
13702
|
const githubAgentsDir = import_node_path31.default.join(subagentDir, ".github", "agents");
|
|
13292
|
-
await (0,
|
|
13703
|
+
await (0, import_promises24.mkdir)(githubAgentsDir, { recursive: true });
|
|
13293
13704
|
const wakeupDst = import_node_path31.default.join(githubAgentsDir, "wakeup.md");
|
|
13294
|
-
await (0,
|
|
13705
|
+
await (0, import_promises24.writeFile)(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
|
|
13295
13706
|
const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
|
|
13296
13707
|
label: "open-workspace"
|
|
13297
13708
|
});
|
|
@@ -13320,9 +13731,9 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
13320
13731
|
async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
|
|
13321
13732
|
const workspacePath = import_node_path31.default.join(subagentDir, `${import_node_path31.default.basename(subagentDir)}.code-workspace`);
|
|
13322
13733
|
const messagesDir = import_node_path31.default.join(subagentDir, "messages");
|
|
13323
|
-
await (0,
|
|
13734
|
+
await (0, import_promises24.mkdir)(messagesDir, { recursive: true });
|
|
13324
13735
|
const reqFile = import_node_path31.default.join(messagesDir, `${timestamp}_req.md`);
|
|
13325
|
-
await (0,
|
|
13736
|
+
await (0, import_promises24.writeFile)(reqFile, requestInstructions, { encoding: "utf8" });
|
|
13326
13737
|
const reqUri = pathToFileUri2(reqFile);
|
|
13327
13738
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
13328
13739
|
for (const attachment of attachmentPaths) {
|
|
@@ -13348,7 +13759,7 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
13348
13759
|
async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
|
|
13349
13760
|
const workspacePath = import_node_path31.default.join(subagentDir, `${import_node_path31.default.basename(subagentDir)}.code-workspace`);
|
|
13350
13761
|
const messagesDir = import_node_path31.default.join(subagentDir, "messages");
|
|
13351
|
-
await (0,
|
|
13762
|
+
await (0, import_promises24.mkdir)(messagesDir, { recursive: true });
|
|
13352
13763
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
13353
13764
|
for (const attachment of attachmentPaths) {
|
|
13354
13765
|
chatArgs.push("-a", attachment);
|
|
@@ -13372,7 +13783,7 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
13372
13783
|
|
|
13373
13784
|
// src/evaluation/providers/vscode/dispatch/workspaceManager.ts
|
|
13374
13785
|
init_cjs_shims();
|
|
13375
|
-
var
|
|
13786
|
+
var import_promises25 = require("fs/promises");
|
|
13376
13787
|
var import_node_path33 = __toESM(require("path"), 1);
|
|
13377
13788
|
|
|
13378
13789
|
// src/evaluation/providers/vscode/utils/workspace.ts
|
|
@@ -13486,11 +13897,11 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
13486
13897
|
if (!await pathExists(workspaceSrc)) {
|
|
13487
13898
|
throw new Error(`workspace template not found: ${workspaceSrc}`);
|
|
13488
13899
|
}
|
|
13489
|
-
const stats = await (0,
|
|
13900
|
+
const stats = await (0, import_promises25.stat)(workspaceSrc);
|
|
13490
13901
|
if (!stats.isFile()) {
|
|
13491
13902
|
throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
|
|
13492
13903
|
}
|
|
13493
|
-
const templateText = await (0,
|
|
13904
|
+
const templateText = await (0, import_promises25.readFile)(workspaceSrc, "utf8");
|
|
13494
13905
|
workspaceContent = JSON.parse(templateText);
|
|
13495
13906
|
} else {
|
|
13496
13907
|
workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
|
|
@@ -13509,15 +13920,15 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
13509
13920
|
transformedContent = JSON.stringify(parsed, null, 2);
|
|
13510
13921
|
}
|
|
13511
13922
|
}
|
|
13512
|
-
await (0,
|
|
13923
|
+
await (0, import_promises25.writeFile)(workspaceDst, transformedContent, "utf8");
|
|
13513
13924
|
const messagesDir = import_node_path33.default.join(subagentDir, "messages");
|
|
13514
|
-
await (0,
|
|
13925
|
+
await (0, import_promises25.mkdir)(messagesDir, { recursive: true });
|
|
13515
13926
|
return { workspace: workspaceDst, messagesDir };
|
|
13516
13927
|
}
|
|
13517
13928
|
async function createSubagentLock(subagentDir) {
|
|
13518
13929
|
const messagesDir = import_node_path33.default.join(subagentDir, "messages");
|
|
13519
13930
|
if (await pathExists(messagesDir)) {
|
|
13520
|
-
const files = await (0,
|
|
13931
|
+
const files = await (0, import_promises25.readdir)(messagesDir);
|
|
13521
13932
|
await Promise.all(
|
|
13522
13933
|
files.map(async (file) => {
|
|
13523
13934
|
const target = import_node_path33.default.join(messagesDir, file);
|
|
@@ -13527,14 +13938,14 @@ async function createSubagentLock(subagentDir) {
|
|
|
13527
13938
|
}
|
|
13528
13939
|
const githubAgentsDir = import_node_path33.default.join(subagentDir, ".github", "agents");
|
|
13529
13940
|
if (await pathExists(githubAgentsDir)) {
|
|
13530
|
-
const agentFiles = await (0,
|
|
13941
|
+
const agentFiles = await (0, import_promises25.readdir)(githubAgentsDir);
|
|
13531
13942
|
const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
|
|
13532
13943
|
await Promise.all(
|
|
13533
13944
|
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(import_node_path33.default.join(githubAgentsDir, file)))
|
|
13534
13945
|
);
|
|
13535
13946
|
}
|
|
13536
13947
|
const lockFile = import_node_path33.default.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
13537
|
-
await (0,
|
|
13948
|
+
await (0, import_promises25.writeFile)(lockFile, "", { encoding: "utf8" });
|
|
13538
13949
|
return lockFile;
|
|
13539
13950
|
}
|
|
13540
13951
|
async function removeSubagentLock(subagentDir) {
|
|
@@ -13559,10 +13970,10 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
|
|
|
13559
13970
|
}
|
|
13560
13971
|
if (promptFile) {
|
|
13561
13972
|
const githubAgentsDir = import_node_path33.default.join(subagentDir, ".github", "agents");
|
|
13562
|
-
await (0,
|
|
13973
|
+
await (0, import_promises25.mkdir)(githubAgentsDir, { recursive: true });
|
|
13563
13974
|
const agentFile = import_node_path33.default.join(githubAgentsDir, `${chatId}.md`);
|
|
13564
13975
|
try {
|
|
13565
|
-
await (0,
|
|
13976
|
+
await (0, import_promises25.copyFile)(promptFile, agentFile);
|
|
13566
13977
|
} catch (error) {
|
|
13567
13978
|
console.error(`error: Failed to copy prompt file to agent mode: ${error.message}`);
|
|
13568
13979
|
return 1;
|
|
@@ -13583,7 +13994,7 @@ async function resolvePromptFile(promptFile) {
|
|
|
13583
13994
|
if (!await pathExists(resolvedPrompt)) {
|
|
13584
13995
|
throw new Error(`Prompt file not found: ${resolvedPrompt}`);
|
|
13585
13996
|
}
|
|
13586
|
-
const promptStats = await (0,
|
|
13997
|
+
const promptStats = await (0, import_promises26.stat)(resolvedPrompt);
|
|
13587
13998
|
if (!promptStats.isFile()) {
|
|
13588
13999
|
throw new Error(`Prompt file must be a file, not a directory: ${resolvedPrompt}`);
|
|
13589
14000
|
}
|
|
@@ -13820,7 +14231,7 @@ async function dispatchBatchAgent(options) {
|
|
|
13820
14231
|
const reqFile = requestFiles[index];
|
|
13821
14232
|
const tmpFile = responseTmpFiles[index];
|
|
13822
14233
|
const finalFile = responseFilesFinal[index];
|
|
13823
|
-
return (0,
|
|
14234
|
+
return (0, import_promises26.writeFile)(
|
|
13824
14235
|
reqFile,
|
|
13825
14236
|
createBatchRequestPrompt(query, tmpFile, finalFile, batchRequestTemplateContent),
|
|
13826
14237
|
{ encoding: "utf8" }
|
|
@@ -13832,7 +14243,7 @@ async function dispatchBatchAgent(options) {
|
|
|
13832
14243
|
responseFilesFinal,
|
|
13833
14244
|
orchestratorTemplateContent
|
|
13834
14245
|
);
|
|
13835
|
-
await (0,
|
|
14246
|
+
await (0, import_promises26.writeFile)(orchestratorFile, orchestratorContent, { encoding: "utf8" });
|
|
13836
14247
|
}
|
|
13837
14248
|
const chatAttachments = [orchestratorFile, ...attachments];
|
|
13838
14249
|
const orchestratorUri = pathToFileUri2(orchestratorFile);
|
|
@@ -13899,7 +14310,7 @@ async function dispatchBatchAgent(options) {
|
|
|
13899
14310
|
|
|
13900
14311
|
// src/evaluation/providers/vscode/dispatch/provision.ts
|
|
13901
14312
|
init_cjs_shims();
|
|
13902
|
-
var
|
|
14313
|
+
var import_promises27 = require("fs/promises");
|
|
13903
14314
|
var import_node_path35 = __toESM(require("path"), 1);
|
|
13904
14315
|
var DEFAULT_WORKSPACE_TEMPLATE2 = {
|
|
13905
14316
|
folders: [
|
|
@@ -13980,8 +14391,8 @@ async function provisionSubagents(options) {
|
|
|
13980
14391
|
if (!dryRun) {
|
|
13981
14392
|
await removeIfExists(lockFile);
|
|
13982
14393
|
await ensureDir(githubAgentsDir);
|
|
13983
|
-
await (0,
|
|
13984
|
-
await (0,
|
|
14394
|
+
await (0, import_promises27.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
14395
|
+
await (0, import_promises27.writeFile)(wakeupDst, wakeupContent, "utf8");
|
|
13985
14396
|
}
|
|
13986
14397
|
created.push(subagentDir);
|
|
13987
14398
|
lockedSubagents.delete(subagentDir);
|
|
@@ -13991,8 +14402,8 @@ async function provisionSubagents(options) {
|
|
|
13991
14402
|
if (!isLocked && force) {
|
|
13992
14403
|
if (!dryRun) {
|
|
13993
14404
|
await ensureDir(githubAgentsDir);
|
|
13994
|
-
await (0,
|
|
13995
|
-
await (0,
|
|
14405
|
+
await (0, import_promises27.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
14406
|
+
await (0, import_promises27.writeFile)(wakeupDst, wakeupContent, "utf8");
|
|
13996
14407
|
}
|
|
13997
14408
|
created.push(subagentDir);
|
|
13998
14409
|
subagentsProvisioned += 1;
|
|
@@ -14000,8 +14411,8 @@ async function provisionSubagents(options) {
|
|
|
14000
14411
|
}
|
|
14001
14412
|
if (!dryRun && !await pathExists(workspaceDst)) {
|
|
14002
14413
|
await ensureDir(githubAgentsDir);
|
|
14003
|
-
await (0,
|
|
14004
|
-
await (0,
|
|
14414
|
+
await (0, import_promises27.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
14415
|
+
await (0, import_promises27.writeFile)(wakeupDst, wakeupContent, "utf8");
|
|
14005
14416
|
}
|
|
14006
14417
|
skippedExisting.push(subagentDir);
|
|
14007
14418
|
subagentsProvisioned += 1;
|
|
@@ -14016,8 +14427,8 @@ async function provisionSubagents(options) {
|
|
|
14016
14427
|
if (!dryRun) {
|
|
14017
14428
|
await ensureDir(subagentDir);
|
|
14018
14429
|
await ensureDir(githubAgentsDir);
|
|
14019
|
-
await (0,
|
|
14020
|
-
await (0,
|
|
14430
|
+
await (0, import_promises27.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
14431
|
+
await (0, import_promises27.writeFile)(wakeupDst, wakeupContent, "utf8");
|
|
14021
14432
|
}
|
|
14022
14433
|
created.push(subagentDir);
|
|
14023
14434
|
subagentsProvisioned += 1;
|
|
@@ -14205,7 +14616,7 @@ async function locateVSCodeExecutable(candidate) {
|
|
|
14205
14616
|
if (includesPathSeparator) {
|
|
14206
14617
|
const resolved = import_node_path36.default.isAbsolute(candidate) ? candidate : import_node_path36.default.resolve(candidate);
|
|
14207
14618
|
try {
|
|
14208
|
-
await (0,
|
|
14619
|
+
await (0, import_promises28.access)(resolved, import_promises28.constants.F_OK);
|
|
14209
14620
|
return resolved;
|
|
14210
14621
|
} catch {
|
|
14211
14622
|
throw new Error(
|
|
@@ -14218,7 +14629,7 @@ async function locateVSCodeExecutable(candidate) {
|
|
|
14218
14629
|
const { stdout } = await execAsync3(`${locator} ${candidate}`);
|
|
14219
14630
|
const lines = stdout.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
|
|
14220
14631
|
if (lines.length > 0 && lines[0]) {
|
|
14221
|
-
await (0,
|
|
14632
|
+
await (0, import_promises28.access)(lines[0], import_promises28.constants.F_OK);
|
|
14222
14633
|
return lines[0];
|
|
14223
14634
|
}
|
|
14224
14635
|
} catch {
|
|
@@ -14232,7 +14643,7 @@ async function resolveWorkspaceTemplateFile(template) {
|
|
|
14232
14643
|
return void 0;
|
|
14233
14644
|
}
|
|
14234
14645
|
try {
|
|
14235
|
-
const stats = await (0,
|
|
14646
|
+
const stats = await (0, import_promises28.stat)(import_node_path36.default.resolve(template));
|
|
14236
14647
|
return stats.isFile() ? template : void 0;
|
|
14237
14648
|
} catch {
|
|
14238
14649
|
return template;
|
|
@@ -14401,9 +14812,9 @@ function isAgentProvider(provider) {
|
|
|
14401
14812
|
// src/evaluation/providers/targets-file.ts
|
|
14402
14813
|
init_cjs_shims();
|
|
14403
14814
|
var import_node_fs12 = require("fs");
|
|
14404
|
-
var
|
|
14815
|
+
var import_promises29 = require("fs/promises");
|
|
14405
14816
|
var import_node_path37 = __toESM(require("path"), 1);
|
|
14406
|
-
var
|
|
14817
|
+
var import_yaml8 = require("yaml");
|
|
14407
14818
|
function isRecord(value) {
|
|
14408
14819
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
14409
14820
|
}
|
|
@@ -14435,7 +14846,7 @@ function assertTargetDefinition(value, index, filePath) {
|
|
|
14435
14846
|
}
|
|
14436
14847
|
async function fileExists3(filePath) {
|
|
14437
14848
|
try {
|
|
14438
|
-
await (0,
|
|
14849
|
+
await (0, import_promises29.access)(filePath, import_node_fs12.constants.F_OK);
|
|
14439
14850
|
return true;
|
|
14440
14851
|
} catch {
|
|
14441
14852
|
return false;
|
|
@@ -14446,8 +14857,8 @@ async function readTargetDefinitions(filePath) {
|
|
|
14446
14857
|
if (!await fileExists3(absolutePath)) {
|
|
14447
14858
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
14448
14859
|
}
|
|
14449
|
-
const raw = await (0,
|
|
14450
|
-
const parsed = (0,
|
|
14860
|
+
const raw = await (0, import_promises29.readFile)(absolutePath, "utf8");
|
|
14861
|
+
const parsed = (0, import_yaml8.parse)(raw);
|
|
14451
14862
|
if (!isRecord(parsed)) {
|
|
14452
14863
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
|
|
14453
14864
|
}
|
|
@@ -14618,9 +15029,10 @@ function negateScore(score) {
|
|
|
14618
15029
|
|
|
14619
15030
|
// src/evaluation/evaluators/code-evaluator.ts
|
|
14620
15031
|
init_cjs_shims();
|
|
14621
|
-
var
|
|
15032
|
+
var import_promises30 = require("fs/promises");
|
|
14622
15033
|
var import_node_os7 = require("os");
|
|
14623
15034
|
var import_node_path39 = require("path");
|
|
15035
|
+
init_exec();
|
|
14624
15036
|
|
|
14625
15037
|
// src/runtime/target-proxy.ts
|
|
14626
15038
|
init_cjs_shims();
|
|
@@ -14900,6 +15312,19 @@ function toCamelCaseDeep(obj) {
|
|
|
14900
15312
|
return obj;
|
|
14901
15313
|
}
|
|
14902
15314
|
|
|
15315
|
+
// src/evaluation/workspace/repo-checkout.ts
|
|
15316
|
+
init_cjs_shims();
|
|
15317
|
+
function getRepoCheckoutRef(checkout) {
|
|
15318
|
+
return checkout?.base_commit ?? checkout?.ref ?? "HEAD";
|
|
15319
|
+
}
|
|
15320
|
+
function getRepoCheckoutTargets(repos) {
|
|
15321
|
+
if (!repos) return [];
|
|
15322
|
+
return repos.filter((repo) => repo.checkout?.base_commit || repo.checkout?.ref).map((repo) => ({
|
|
15323
|
+
path: repo.path,
|
|
15324
|
+
ref: getRepoCheckoutRef(repo.checkout)
|
|
15325
|
+
}));
|
|
15326
|
+
}
|
|
15327
|
+
|
|
14903
15328
|
// src/evaluation/evaluators/code-evaluator.ts
|
|
14904
15329
|
var FILE_BACKED_OUTPUT_THRESHOLD = 5e4;
|
|
14905
15330
|
var DATA_URI_RE = /^data:([^;]+);base64,(.+)$/s;
|
|
@@ -14942,7 +15367,7 @@ async function materializeContentForGrader(messages, getWorkDir) {
|
|
|
14942
15367
|
const ext = mediaType.split("/")[1] === "jpeg" ? "jpg" : mediaType.split("/")[1] ?? "bin";
|
|
14943
15368
|
const dir = await getWorkDir();
|
|
14944
15369
|
const filePath = (0, import_node_path39.join)(dir, `img-${counter++}.${ext}`);
|
|
14945
|
-
await (0,
|
|
15370
|
+
await (0, import_promises30.writeFile)(filePath, Buffer.from(base64Data, "base64"));
|
|
14946
15371
|
blocks.push({ type: "image", media_type: img.media_type, path: filePath });
|
|
14947
15372
|
} else {
|
|
14948
15373
|
blocks.push({ type: "image", media_type: img.media_type, path: img.source });
|
|
@@ -14970,7 +15395,7 @@ var CodeEvaluator = class {
|
|
|
14970
15395
|
let imageTmpDir;
|
|
14971
15396
|
const getImageDir = async () => {
|
|
14972
15397
|
if (!imageTmpDir) {
|
|
14973
|
-
imageTmpDir = await (0,
|
|
15398
|
+
imageTmpDir = await (0, import_promises30.mkdtemp)((0, import_node_path39.join)((0, import_node_os7.tmpdir)(), "agentv-img-"));
|
|
14974
15399
|
}
|
|
14975
15400
|
return imageTmpDir;
|
|
14976
15401
|
};
|
|
@@ -14983,9 +15408,9 @@ var CodeEvaluator = class {
|
|
|
14983
15408
|
if (outputForPayload) {
|
|
14984
15409
|
const serialized = JSON.stringify(outputForPayload);
|
|
14985
15410
|
if (serialized.length > FILE_BACKED_OUTPUT_THRESHOLD) {
|
|
14986
|
-
const tmpDir = await (0,
|
|
15411
|
+
const tmpDir = await (0, import_promises30.mkdtemp)((0, import_node_path39.join)((0, import_node_os7.tmpdir)(), "agentv-grader-"));
|
|
14987
15412
|
outputPath = (0, import_node_path39.join)(tmpDir, "output.json");
|
|
14988
|
-
await (0,
|
|
15413
|
+
await (0, import_promises30.writeFile)(outputPath, serialized);
|
|
14989
15414
|
outputForPayload = null;
|
|
14990
15415
|
}
|
|
14991
15416
|
}
|
|
@@ -15034,13 +15459,31 @@ var CodeEvaluator = class {
|
|
|
15034
15459
|
const workspaceEnv = context2.workspacePath ? { AGENTV_WORKSPACE_PATH: context2.workspacePath } : void 0;
|
|
15035
15460
|
const env = proxyEnv || workspaceEnv ? { ...proxyEnv, ...workspaceEnv } : void 0;
|
|
15036
15461
|
try {
|
|
15037
|
-
|
|
15038
|
-
|
|
15039
|
-
|
|
15040
|
-
|
|
15041
|
-
|
|
15042
|
-
|
|
15043
|
-
|
|
15462
|
+
let stdout;
|
|
15463
|
+
if (context2.dockerConfig) {
|
|
15464
|
+
const { DockerWorkspaceProvider: DockerWorkspaceProvider2 } = await Promise.resolve().then(() => (init_docker_workspace(), docker_workspace_exports));
|
|
15465
|
+
const dockerProvider = new DockerWorkspaceProvider2(context2.dockerConfig);
|
|
15466
|
+
const result = await dockerProvider.runGraderInContainer({
|
|
15467
|
+
command: [...this.command],
|
|
15468
|
+
stdin: inputPayload,
|
|
15469
|
+
repoCheckouts: getRepoCheckoutTargets(context2.evalCase.workspace?.repos)
|
|
15470
|
+
});
|
|
15471
|
+
if (result.exitCode !== 0) {
|
|
15472
|
+
const trimmedErr = result.stderr.trim();
|
|
15473
|
+
throw new Error(
|
|
15474
|
+
trimmedErr.length > 0 ? `Code evaluator exited with code ${result.exitCode}: ${trimmedErr}` : `Code evaluator exited with code ${result.exitCode}`
|
|
15475
|
+
);
|
|
15476
|
+
}
|
|
15477
|
+
stdout = result.stdout.trim();
|
|
15478
|
+
} else {
|
|
15479
|
+
stdout = await executeScript(
|
|
15480
|
+
this.command,
|
|
15481
|
+
inputPayload,
|
|
15482
|
+
this.agentTimeoutMs,
|
|
15483
|
+
this.cwd,
|
|
15484
|
+
env
|
|
15485
|
+
);
|
|
15486
|
+
}
|
|
15044
15487
|
const parsed = parseJsonSafe(stdout);
|
|
15045
15488
|
const score = clampScore(typeof parsed?.score === "number" ? parsed.score : 0);
|
|
15046
15489
|
const assertions = Array.isArray(parsed?.assertions) ? parsed.assertions.filter(
|
|
@@ -15097,11 +15540,11 @@ var CodeEvaluator = class {
|
|
|
15097
15540
|
await proxyShutdown();
|
|
15098
15541
|
}
|
|
15099
15542
|
if (outputPath) {
|
|
15100
|
-
await (0,
|
|
15543
|
+
await (0, import_promises30.rm)((0, import_node_path39.dirname)(outputPath), { recursive: true, force: true }).catch(() => {
|
|
15101
15544
|
});
|
|
15102
15545
|
}
|
|
15103
15546
|
if (imageTmpDir) {
|
|
15104
|
-
await (0,
|
|
15547
|
+
await (0, import_promises30.rm)(imageTmpDir, { recursive: true, force: true }).catch(() => {
|
|
15105
15548
|
});
|
|
15106
15549
|
}
|
|
15107
15550
|
}
|
|
@@ -15134,7 +15577,7 @@ var import_ai3 = require("ai");
|
|
|
15134
15577
|
|
|
15135
15578
|
// src/evaluation/evaluators/llm-grader.ts
|
|
15136
15579
|
init_cjs_shims();
|
|
15137
|
-
var
|
|
15580
|
+
var import_promises31 = __toESM(require("fs/promises"), 1);
|
|
15138
15581
|
var import_node_path40 = __toESM(require("path"), 1);
|
|
15139
15582
|
var import_ai2 = require("ai");
|
|
15140
15583
|
var import_zod4 = require("zod");
|
|
@@ -16185,7 +16628,7 @@ function createFilesystemTools(workspacePath) {
|
|
|
16185
16628
|
execute: async (input) => {
|
|
16186
16629
|
try {
|
|
16187
16630
|
const resolved = resolveSandboxed(workspacePath, input.path);
|
|
16188
|
-
const entries = await
|
|
16631
|
+
const entries = await import_promises31.default.readdir(resolved, { withFileTypes: true });
|
|
16189
16632
|
return entries.map((e) => ({
|
|
16190
16633
|
name: e.name,
|
|
16191
16634
|
type: e.isDirectory() ? "directory" : "file"
|
|
@@ -16203,20 +16646,20 @@ function createFilesystemTools(workspacePath) {
|
|
|
16203
16646
|
execute: async (input) => {
|
|
16204
16647
|
try {
|
|
16205
16648
|
const resolved = resolveSandboxed(workspacePath, input.path);
|
|
16206
|
-
const
|
|
16207
|
-
if (
|
|
16649
|
+
const stat12 = await import_promises31.default.stat(resolved);
|
|
16650
|
+
if (stat12.isDirectory()) {
|
|
16208
16651
|
return { error: `'${input.path}' is a directory, not a file` };
|
|
16209
16652
|
}
|
|
16210
|
-
const buffer = Buffer.alloc(Math.min(
|
|
16211
|
-
const fd = await
|
|
16653
|
+
const buffer = Buffer.alloc(Math.min(stat12.size, MAX_FILE_SIZE));
|
|
16654
|
+
const fd = await import_promises31.default.open(resolved, "r");
|
|
16212
16655
|
try {
|
|
16213
16656
|
await fd.read(buffer, 0, buffer.length, 0);
|
|
16214
16657
|
} finally {
|
|
16215
16658
|
await fd.close();
|
|
16216
16659
|
}
|
|
16217
16660
|
const content = buffer.toString("utf-8");
|
|
16218
|
-
const truncated =
|
|
16219
|
-
return { content, truncated, size:
|
|
16661
|
+
const truncated = stat12.size > MAX_FILE_SIZE;
|
|
16662
|
+
return { content, truncated, size: stat12.size };
|
|
16220
16663
|
} catch (error) {
|
|
16221
16664
|
return { error: error instanceof Error ? error.message : String(error) };
|
|
16222
16665
|
}
|
|
@@ -16253,7 +16696,7 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
16253
16696
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
16254
16697
|
let entries;
|
|
16255
16698
|
try {
|
|
16256
|
-
entries = await
|
|
16699
|
+
entries = await import_promises31.default.readdir(dirPath, { withFileTypes: true });
|
|
16257
16700
|
} catch {
|
|
16258
16701
|
return;
|
|
16259
16702
|
}
|
|
@@ -16267,9 +16710,9 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
16267
16710
|
const ext = import_node_path40.default.extname(entry.name).toLowerCase();
|
|
16268
16711
|
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
16269
16712
|
try {
|
|
16270
|
-
const
|
|
16271
|
-
if (
|
|
16272
|
-
const content = await
|
|
16713
|
+
const stat12 = await import_promises31.default.stat(fullPath);
|
|
16714
|
+
if (stat12.size > MAX_FILE_SIZE) continue;
|
|
16715
|
+
const content = await import_promises31.default.readFile(fullPath, "utf-8");
|
|
16273
16716
|
const lines = content.split("\n");
|
|
16274
16717
|
for (let i = 0; i < lines.length; i++) {
|
|
16275
16718
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
@@ -16912,115 +17355,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
16912
17355
|
* Evaluate a single field against the expected value.
|
|
16913
17356
|
*/
|
|
16914
17357
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
16915
|
-
const { path:
|
|
16916
|
-
const candidateValue = resolvePath(candidateData,
|
|
16917
|
-
const expectedValue = resolvePath(expectedData,
|
|
17358
|
+
const { path: path56, match, required = true, weight = 1 } = fieldConfig;
|
|
17359
|
+
const candidateValue = resolvePath(candidateData, path56);
|
|
17360
|
+
const expectedValue = resolvePath(expectedData, path56);
|
|
16918
17361
|
if (expectedValue === void 0) {
|
|
16919
17362
|
return {
|
|
16920
|
-
path:
|
|
17363
|
+
path: path56,
|
|
16921
17364
|
score: 1,
|
|
16922
17365
|
// No expected value means no comparison needed
|
|
16923
17366
|
weight,
|
|
16924
17367
|
hit: true,
|
|
16925
|
-
message: `${
|
|
17368
|
+
message: `${path56}: no expected value`
|
|
16926
17369
|
};
|
|
16927
17370
|
}
|
|
16928
17371
|
if (candidateValue === void 0) {
|
|
16929
17372
|
if (required) {
|
|
16930
17373
|
return {
|
|
16931
|
-
path:
|
|
17374
|
+
path: path56,
|
|
16932
17375
|
score: 0,
|
|
16933
17376
|
weight,
|
|
16934
17377
|
hit: false,
|
|
16935
|
-
message: `${
|
|
17378
|
+
message: `${path56} (required, missing)`
|
|
16936
17379
|
};
|
|
16937
17380
|
}
|
|
16938
17381
|
return {
|
|
16939
|
-
path:
|
|
17382
|
+
path: path56,
|
|
16940
17383
|
score: 1,
|
|
16941
17384
|
// Don't penalize missing optional fields
|
|
16942
17385
|
weight: 0,
|
|
16943
17386
|
// Zero weight means it won't affect the score
|
|
16944
17387
|
hit: true,
|
|
16945
|
-
message: `${
|
|
17388
|
+
message: `${path56}: optional field missing`
|
|
16946
17389
|
};
|
|
16947
17390
|
}
|
|
16948
17391
|
switch (match) {
|
|
16949
17392
|
case "exact":
|
|
16950
|
-
return this.compareExact(
|
|
17393
|
+
return this.compareExact(path56, candidateValue, expectedValue, weight);
|
|
16951
17394
|
case "numeric_tolerance":
|
|
16952
17395
|
return this.compareNumericTolerance(
|
|
16953
|
-
|
|
17396
|
+
path56,
|
|
16954
17397
|
candidateValue,
|
|
16955
17398
|
expectedValue,
|
|
16956
17399
|
fieldConfig,
|
|
16957
17400
|
weight
|
|
16958
17401
|
);
|
|
16959
17402
|
case "date":
|
|
16960
|
-
return this.compareDate(
|
|
17403
|
+
return this.compareDate(path56, candidateValue, expectedValue, fieldConfig, weight);
|
|
16961
17404
|
default:
|
|
16962
17405
|
return {
|
|
16963
|
-
path:
|
|
17406
|
+
path: path56,
|
|
16964
17407
|
score: 0,
|
|
16965
17408
|
weight,
|
|
16966
17409
|
hit: false,
|
|
16967
|
-
message: `${
|
|
17410
|
+
message: `${path56}: unknown match type "${match}"`
|
|
16968
17411
|
};
|
|
16969
17412
|
}
|
|
16970
17413
|
}
|
|
16971
17414
|
/**
|
|
16972
17415
|
* Exact equality comparison.
|
|
16973
17416
|
*/
|
|
16974
|
-
compareExact(
|
|
17417
|
+
compareExact(path56, candidateValue, expectedValue, weight) {
|
|
16975
17418
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
16976
17419
|
return {
|
|
16977
|
-
path:
|
|
17420
|
+
path: path56,
|
|
16978
17421
|
score: 1,
|
|
16979
17422
|
weight,
|
|
16980
17423
|
hit: true,
|
|
16981
|
-
message:
|
|
17424
|
+
message: path56
|
|
16982
17425
|
};
|
|
16983
17426
|
}
|
|
16984
17427
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
16985
17428
|
return {
|
|
16986
|
-
path:
|
|
17429
|
+
path: path56,
|
|
16987
17430
|
score: 0,
|
|
16988
17431
|
weight,
|
|
16989
17432
|
hit: false,
|
|
16990
|
-
message: `${
|
|
17433
|
+
message: `${path56} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
16991
17434
|
};
|
|
16992
17435
|
}
|
|
16993
17436
|
return {
|
|
16994
|
-
path:
|
|
17437
|
+
path: path56,
|
|
16995
17438
|
score: 0,
|
|
16996
17439
|
weight,
|
|
16997
17440
|
hit: false,
|
|
16998
|
-
message: `${
|
|
17441
|
+
message: `${path56} (value mismatch)`
|
|
16999
17442
|
};
|
|
17000
17443
|
}
|
|
17001
17444
|
/**
|
|
17002
17445
|
* Numeric comparison with absolute or relative tolerance.
|
|
17003
17446
|
*/
|
|
17004
|
-
compareNumericTolerance(
|
|
17447
|
+
compareNumericTolerance(path56, candidateValue, expectedValue, fieldConfig, weight) {
|
|
17005
17448
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
17006
17449
|
const candidateNum = toNumber(candidateValue);
|
|
17007
17450
|
const expectedNum = toNumber(expectedValue);
|
|
17008
17451
|
if (candidateNum === null || expectedNum === null) {
|
|
17009
17452
|
return {
|
|
17010
|
-
path:
|
|
17453
|
+
path: path56,
|
|
17011
17454
|
score: 0,
|
|
17012
17455
|
weight,
|
|
17013
17456
|
hit: false,
|
|
17014
|
-
message: `${
|
|
17457
|
+
message: `${path56} (non-numeric value)`
|
|
17015
17458
|
};
|
|
17016
17459
|
}
|
|
17017
17460
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
17018
17461
|
return {
|
|
17019
|
-
path:
|
|
17462
|
+
path: path56,
|
|
17020
17463
|
score: 0,
|
|
17021
17464
|
weight,
|
|
17022
17465
|
hit: false,
|
|
17023
|
-
message: `${
|
|
17466
|
+
message: `${path56} (invalid numeric value)`
|
|
17024
17467
|
};
|
|
17025
17468
|
}
|
|
17026
17469
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -17033,61 +17476,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
17033
17476
|
}
|
|
17034
17477
|
if (withinTolerance) {
|
|
17035
17478
|
return {
|
|
17036
|
-
path:
|
|
17479
|
+
path: path56,
|
|
17037
17480
|
score: 1,
|
|
17038
17481
|
weight,
|
|
17039
17482
|
hit: true,
|
|
17040
|
-
message: `${
|
|
17483
|
+
message: `${path56} (within tolerance: diff=${diff.toFixed(2)})`
|
|
17041
17484
|
};
|
|
17042
17485
|
}
|
|
17043
17486
|
return {
|
|
17044
|
-
path:
|
|
17487
|
+
path: path56,
|
|
17045
17488
|
score: 0,
|
|
17046
17489
|
weight,
|
|
17047
17490
|
hit: false,
|
|
17048
|
-
message: `${
|
|
17491
|
+
message: `${path56} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
17049
17492
|
};
|
|
17050
17493
|
}
|
|
17051
17494
|
/**
|
|
17052
17495
|
* Date comparison with format normalization.
|
|
17053
17496
|
*/
|
|
17054
|
-
compareDate(
|
|
17497
|
+
compareDate(path56, candidateValue, expectedValue, fieldConfig, weight) {
|
|
17055
17498
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
17056
17499
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
17057
17500
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
17058
17501
|
if (candidateDate === null) {
|
|
17059
17502
|
return {
|
|
17060
|
-
path:
|
|
17503
|
+
path: path56,
|
|
17061
17504
|
score: 0,
|
|
17062
17505
|
weight,
|
|
17063
17506
|
hit: false,
|
|
17064
|
-
message: `${
|
|
17507
|
+
message: `${path56} (unparseable candidate date)`
|
|
17065
17508
|
};
|
|
17066
17509
|
}
|
|
17067
17510
|
if (expectedDate === null) {
|
|
17068
17511
|
return {
|
|
17069
|
-
path:
|
|
17512
|
+
path: path56,
|
|
17070
17513
|
score: 0,
|
|
17071
17514
|
weight,
|
|
17072
17515
|
hit: false,
|
|
17073
|
-
message: `${
|
|
17516
|
+
message: `${path56} (unparseable expected date)`
|
|
17074
17517
|
};
|
|
17075
17518
|
}
|
|
17076
17519
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
17077
17520
|
return {
|
|
17078
|
-
path:
|
|
17521
|
+
path: path56,
|
|
17079
17522
|
score: 1,
|
|
17080
17523
|
weight,
|
|
17081
17524
|
hit: true,
|
|
17082
|
-
message:
|
|
17525
|
+
message: path56
|
|
17083
17526
|
};
|
|
17084
17527
|
}
|
|
17085
17528
|
return {
|
|
17086
|
-
path:
|
|
17529
|
+
path: path56,
|
|
17087
17530
|
score: 0,
|
|
17088
17531
|
weight,
|
|
17089
17532
|
hit: false,
|
|
17090
|
-
message: `${
|
|
17533
|
+
message: `${path56} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
17091
17534
|
};
|
|
17092
17535
|
}
|
|
17093
17536
|
/**
|
|
@@ -17120,11 +17563,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
17120
17563
|
};
|
|
17121
17564
|
}
|
|
17122
17565
|
};
|
|
17123
|
-
function resolvePath(obj,
|
|
17124
|
-
if (!
|
|
17566
|
+
function resolvePath(obj, path56) {
|
|
17567
|
+
if (!path56 || !obj) {
|
|
17125
17568
|
return void 0;
|
|
17126
17569
|
}
|
|
17127
|
-
const parts =
|
|
17570
|
+
const parts = path56.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
17128
17571
|
let current = obj;
|
|
17129
17572
|
for (const part of parts) {
|
|
17130
17573
|
if (current === null || current === void 0) {
|
|
@@ -17621,8 +18064,8 @@ var TokenUsageEvaluator = class {
|
|
|
17621
18064
|
|
|
17622
18065
|
// src/evaluation/evaluators/tool-trajectory.ts
|
|
17623
18066
|
init_cjs_shims();
|
|
17624
|
-
function getNestedValue(obj,
|
|
17625
|
-
const parts =
|
|
18067
|
+
function getNestedValue(obj, path56) {
|
|
18068
|
+
const parts = path56.split(".");
|
|
17626
18069
|
let current = obj;
|
|
17627
18070
|
for (const part of parts) {
|
|
17628
18071
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -18246,7 +18689,7 @@ function runEqualsAssertion(output, value) {
|
|
|
18246
18689
|
init_cjs_shims();
|
|
18247
18690
|
var import_node_crypto11 = require("crypto");
|
|
18248
18691
|
var import_node_fs16 = require("fs");
|
|
18249
|
-
var
|
|
18692
|
+
var import_promises35 = require("fs/promises");
|
|
18250
18693
|
var import_node_path49 = __toESM(require("path"), 1);
|
|
18251
18694
|
var import_micromatch3 = __toESM(require("micromatch"), 1);
|
|
18252
18695
|
|
|
@@ -18503,6 +18946,15 @@ async function resolveCustomPrompt(promptConfig, context2, timeoutMs) {
|
|
|
18503
18946
|
}
|
|
18504
18947
|
return void 0;
|
|
18505
18948
|
}
|
|
18949
|
+
function containsTemplateVariables(text) {
|
|
18950
|
+
const variablePattern = /\{\{\s*([a-zA-Z0-9_]+)\s*\}\}/g;
|
|
18951
|
+
for (const match of text.matchAll(variablePattern)) {
|
|
18952
|
+
if (VALID_TEMPLATE_VARIABLES.has(match[1])) {
|
|
18953
|
+
return true;
|
|
18954
|
+
}
|
|
18955
|
+
}
|
|
18956
|
+
return false;
|
|
18957
|
+
}
|
|
18506
18958
|
async function executePromptTemplate(script, context2, config, timeoutMs) {
|
|
18507
18959
|
const payload = {
|
|
18508
18960
|
criteria: context2.evalCase.criteria,
|
|
@@ -18575,9 +19027,20 @@ var llmGraderFactory = (config, context2) => {
|
|
|
18575
19027
|
},
|
|
18576
19028
|
agentTimeoutMs
|
|
18577
19029
|
);
|
|
19030
|
+
const isFromInlinePrompt = !c.resolvedPromptScript?.length && !c.resolvedPromptPath && !c.promptPath;
|
|
19031
|
+
let evaluatorTemplateOverride;
|
|
19032
|
+
let evalCase = evalContext.evalCase;
|
|
19033
|
+
if (customPrompt) {
|
|
19034
|
+
if (!isFromInlinePrompt || containsTemplateVariables(customPrompt)) {
|
|
19035
|
+
evaluatorTemplateOverride = customPrompt;
|
|
19036
|
+
} else {
|
|
19037
|
+
evalCase = { ...evalCase, criteria: customPrompt };
|
|
19038
|
+
}
|
|
19039
|
+
}
|
|
18578
19040
|
return evaluator.evaluate({
|
|
18579
19041
|
...evalContext,
|
|
18580
|
-
|
|
19042
|
+
evalCase,
|
|
19043
|
+
evaluatorTemplateOverride,
|
|
18581
19044
|
evaluator: c
|
|
18582
19045
|
});
|
|
18583
19046
|
}
|
|
@@ -19070,7 +19533,7 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
19070
19533
|
|
|
19071
19534
|
// src/evaluation/workspace/manager.ts
|
|
19072
19535
|
init_cjs_shims();
|
|
19073
|
-
var
|
|
19536
|
+
var import_promises32 = require("fs/promises");
|
|
19074
19537
|
var import_node_path45 = __toESM(require("path"), 1);
|
|
19075
19538
|
var TemplateNotFoundError = class extends Error {
|
|
19076
19539
|
constructor(templatePath) {
|
|
@@ -19093,7 +19556,7 @@ var WorkspaceCreationError = class extends Error {
|
|
|
19093
19556
|
};
|
|
19094
19557
|
async function isDirectory(filePath) {
|
|
19095
19558
|
try {
|
|
19096
|
-
const stats = await (0,
|
|
19559
|
+
const stats = await (0, import_promises32.stat)(filePath);
|
|
19097
19560
|
return stats.isDirectory();
|
|
19098
19561
|
} catch {
|
|
19099
19562
|
return false;
|
|
@@ -19104,8 +19567,8 @@ function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
|
|
|
19104
19567
|
return import_node_path45.default.join(root, evalRunId, caseId);
|
|
19105
19568
|
}
|
|
19106
19569
|
async function copyDirectoryRecursive(src, dest) {
|
|
19107
|
-
await (0,
|
|
19108
|
-
const entries = await (0,
|
|
19570
|
+
await (0, import_promises32.mkdir)(dest, { recursive: true });
|
|
19571
|
+
const entries = await (0, import_promises32.readdir)(src, { withFileTypes: true });
|
|
19109
19572
|
for (const entry of entries) {
|
|
19110
19573
|
const srcPath = import_node_path45.default.join(src, entry.name);
|
|
19111
19574
|
const destPath = import_node_path45.default.join(dest, entry.name);
|
|
@@ -19115,7 +19578,7 @@ async function copyDirectoryRecursive(src, dest) {
|
|
|
19115
19578
|
if (entry.isDirectory()) {
|
|
19116
19579
|
await copyDirectoryRecursive(srcPath, destPath);
|
|
19117
19580
|
} else {
|
|
19118
|
-
await (0,
|
|
19581
|
+
await (0, import_promises32.cp)(srcPath, destPath, { preserveTimestamps: true });
|
|
19119
19582
|
}
|
|
19120
19583
|
}
|
|
19121
19584
|
}
|
|
@@ -19130,7 +19593,7 @@ async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoo
|
|
|
19130
19593
|
const workspacePath = getWorkspacePath(evalRunId, caseId, workspaceRoot);
|
|
19131
19594
|
try {
|
|
19132
19595
|
if (await fileExists2(workspacePath)) {
|
|
19133
|
-
await (0,
|
|
19596
|
+
await (0, import_promises32.rm)(workspacePath, { recursive: true, force: true });
|
|
19134
19597
|
}
|
|
19135
19598
|
await copyDirectoryRecursive(resolvedTemplatePath, workspacePath);
|
|
19136
19599
|
return workspacePath;
|
|
@@ -19164,14 +19627,14 @@ async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoo
|
|
|
19164
19627
|
}
|
|
19165
19628
|
async function cleanupWorkspace(workspacePath) {
|
|
19166
19629
|
if (await fileExists2(workspacePath)) {
|
|
19167
|
-
await (0,
|
|
19630
|
+
await (0, import_promises32.rm)(workspacePath, { recursive: true, force: true });
|
|
19168
19631
|
}
|
|
19169
19632
|
}
|
|
19170
19633
|
async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
19171
19634
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
19172
19635
|
const evalDir = import_node_path45.default.join(root, evalRunId);
|
|
19173
19636
|
if (await fileExists2(evalDir)) {
|
|
19174
|
-
await (0,
|
|
19637
|
+
await (0, import_promises32.rm)(evalDir, { recursive: true, force: true });
|
|
19175
19638
|
}
|
|
19176
19639
|
}
|
|
19177
19640
|
|
|
@@ -19180,7 +19643,7 @@ init_cjs_shims();
|
|
|
19180
19643
|
var import_node_child_process9 = require("child_process");
|
|
19181
19644
|
var import_node_crypto10 = require("crypto");
|
|
19182
19645
|
var import_node_fs14 = require("fs");
|
|
19183
|
-
var
|
|
19646
|
+
var import_promises33 = require("fs/promises");
|
|
19184
19647
|
var import_node_path46 = __toESM(require("path"), 1);
|
|
19185
19648
|
var import_node_util5 = require("util");
|
|
19186
19649
|
var execFileAsync = (0, import_node_util5.promisify)(import_node_child_process9.execFile);
|
|
@@ -19208,12 +19671,14 @@ async function git(args, opts) {
|
|
|
19208
19671
|
return stdout.trim();
|
|
19209
19672
|
}
|
|
19210
19673
|
function normalizeRepoForFingerprint(repo) {
|
|
19211
|
-
const
|
|
19212
|
-
|
|
19213
|
-
path
|
|
19214
|
-
|
|
19215
|
-
|
|
19216
|
-
|
|
19674
|
+
const result = {};
|
|
19675
|
+
if (repo.path) {
|
|
19676
|
+
result.path = repo.path;
|
|
19677
|
+
}
|
|
19678
|
+
if (repo.source) {
|
|
19679
|
+
result.source = repo.source.type === "git" ? { type: "git", url: repo.source.url.toLowerCase().replace(/\.git$/, "") } : { type: "local", path: repo.source.path };
|
|
19680
|
+
}
|
|
19681
|
+
result.ref = getRepoCheckoutRef(repo.checkout);
|
|
19217
19682
|
if (repo.clone?.depth !== void 0) {
|
|
19218
19683
|
result.depth = repo.clone.depth;
|
|
19219
19684
|
}
|
|
@@ -19227,13 +19692,13 @@ function normalizeRepoForFingerprint(repo) {
|
|
|
19227
19692
|
}
|
|
19228
19693
|
function computeWorkspaceFingerprint(repos) {
|
|
19229
19694
|
const canonical = {
|
|
19230
|
-
repos: [...repos].sort((a, b) => a.path.localeCompare(b.path)).map(normalizeRepoForFingerprint)
|
|
19695
|
+
repos: [...repos].sort((a, b) => (a.path ?? "").localeCompare(b.path ?? "")).map(normalizeRepoForFingerprint)
|
|
19231
19696
|
};
|
|
19232
19697
|
return (0, import_node_crypto10.createHash)("sha256").update(JSON.stringify(canonical)).digest("hex");
|
|
19233
19698
|
}
|
|
19234
19699
|
async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
19235
|
-
await (0,
|
|
19236
|
-
const entries = await (0,
|
|
19700
|
+
await (0, import_promises33.mkdir)(dest, { recursive: true });
|
|
19701
|
+
const entries = await (0, import_promises33.readdir)(src, { withFileTypes: true });
|
|
19237
19702
|
for (const entry of entries) {
|
|
19238
19703
|
const srcPath = import_node_path46.default.join(src, entry.name);
|
|
19239
19704
|
const destPath = import_node_path46.default.join(dest, entry.name);
|
|
@@ -19246,7 +19711,7 @@ async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
|
19246
19711
|
}
|
|
19247
19712
|
await copyDirectoryRecursive2(srcPath, destPath, skipDirs);
|
|
19248
19713
|
} else {
|
|
19249
|
-
await (0,
|
|
19714
|
+
await (0, import_promises33.cp)(srcPath, destPath, { preserveTimestamps: true, force: true });
|
|
19250
19715
|
}
|
|
19251
19716
|
}
|
|
19252
19717
|
}
|
|
@@ -19270,7 +19735,7 @@ var WorkspacePoolManager = class {
|
|
|
19270
19735
|
const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
|
|
19271
19736
|
const fingerprint = computeWorkspaceFingerprint(repos);
|
|
19272
19737
|
const poolDir = import_node_path46.default.join(this.poolRoot, fingerprint);
|
|
19273
|
-
await (0,
|
|
19738
|
+
await (0, import_promises33.mkdir)(poolDir, { recursive: true });
|
|
19274
19739
|
const drifted = await this.checkDrift(poolDir, fingerprint);
|
|
19275
19740
|
if (drifted) {
|
|
19276
19741
|
console.warn(
|
|
@@ -19297,7 +19762,7 @@ var WorkspacePoolManager = class {
|
|
|
19297
19762
|
poolDir
|
|
19298
19763
|
};
|
|
19299
19764
|
}
|
|
19300
|
-
await (0,
|
|
19765
|
+
await (0, import_promises33.mkdir)(slotPath, { recursive: true });
|
|
19301
19766
|
if (templatePath) {
|
|
19302
19767
|
await copyDirectoryRecursive2(templatePath, slotPath);
|
|
19303
19768
|
}
|
|
@@ -19321,7 +19786,7 @@ var WorkspacePoolManager = class {
|
|
|
19321
19786
|
/** Remove lock file to release a slot. */
|
|
19322
19787
|
async releaseSlot(slot) {
|
|
19323
19788
|
try {
|
|
19324
|
-
await (0,
|
|
19789
|
+
await (0, import_promises33.unlink)(slot.lockPath);
|
|
19325
19790
|
} catch {
|
|
19326
19791
|
}
|
|
19327
19792
|
}
|
|
@@ -19334,21 +19799,21 @@ var WorkspacePoolManager = class {
|
|
|
19334
19799
|
async tryLock(lockPath) {
|
|
19335
19800
|
for (let attempt = 0; attempt < 3; attempt++) {
|
|
19336
19801
|
try {
|
|
19337
|
-
await (0,
|
|
19802
|
+
await (0, import_promises33.writeFile)(lockPath, String(process.pid), { flag: "wx" });
|
|
19338
19803
|
return true;
|
|
19339
19804
|
} catch (err) {
|
|
19340
19805
|
if (err.code !== "EEXIST") {
|
|
19341
19806
|
throw err;
|
|
19342
19807
|
}
|
|
19343
19808
|
try {
|
|
19344
|
-
const pidStr = await (0,
|
|
19809
|
+
const pidStr = await (0, import_promises33.readFile)(lockPath, "utf-8");
|
|
19345
19810
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
19346
19811
|
if (!Number.isNaN(pid)) {
|
|
19347
19812
|
try {
|
|
19348
19813
|
process.kill(pid, 0);
|
|
19349
19814
|
return false;
|
|
19350
19815
|
} catch {
|
|
19351
|
-
await (0,
|
|
19816
|
+
await (0, import_promises33.unlink)(lockPath).catch(() => {
|
|
19352
19817
|
});
|
|
19353
19818
|
continue;
|
|
19354
19819
|
}
|
|
@@ -19368,7 +19833,7 @@ var WorkspacePoolManager = class {
|
|
|
19368
19833
|
async checkDrift(poolDir, fingerprint) {
|
|
19369
19834
|
const metadataPath = import_node_path46.default.join(poolDir, "metadata.json");
|
|
19370
19835
|
try {
|
|
19371
|
-
const raw = await (0,
|
|
19836
|
+
const raw = await (0, import_promises33.readFile)(metadataPath, "utf-8");
|
|
19372
19837
|
const metadata = JSON.parse(raw);
|
|
19373
19838
|
return metadata.fingerprint !== fingerprint;
|
|
19374
19839
|
} catch {
|
|
@@ -19383,17 +19848,17 @@ var WorkspacePoolManager = class {
|
|
|
19383
19848
|
repos,
|
|
19384
19849
|
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
19385
19850
|
};
|
|
19386
|
-
await (0,
|
|
19851
|
+
await (0, import_promises33.writeFile)(import_node_path46.default.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
19387
19852
|
}
|
|
19388
19853
|
/** Remove all slot directories and their lock files from a pool directory. */
|
|
19389
19854
|
async removeAllSlots(poolDir) {
|
|
19390
|
-
const entries = await (0,
|
|
19855
|
+
const entries = await (0, import_promises33.readdir)(poolDir);
|
|
19391
19856
|
for (const entry of entries) {
|
|
19392
19857
|
if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
|
|
19393
19858
|
const lockPath = import_node_path46.default.join(poolDir, `${entry}.lock`);
|
|
19394
19859
|
if ((0, import_node_fs14.existsSync)(lockPath)) {
|
|
19395
19860
|
try {
|
|
19396
|
-
const pidStr = await (0,
|
|
19861
|
+
const pidStr = await (0, import_promises33.readFile)(lockPath, "utf-8");
|
|
19397
19862
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
19398
19863
|
if (!Number.isNaN(pid)) {
|
|
19399
19864
|
try {
|
|
@@ -19406,12 +19871,12 @@ var WorkspacePoolManager = class {
|
|
|
19406
19871
|
} catch {
|
|
19407
19872
|
}
|
|
19408
19873
|
}
|
|
19409
|
-
await (0,
|
|
19410
|
-
await (0,
|
|
19874
|
+
await (0, import_promises33.rm)(import_node_path46.default.join(poolDir, entry), { recursive: true, force: true });
|
|
19875
|
+
await (0, import_promises33.rm)(lockPath, { force: true }).catch(() => {
|
|
19411
19876
|
});
|
|
19412
19877
|
}
|
|
19413
19878
|
}
|
|
19414
|
-
await (0,
|
|
19879
|
+
await (0, import_promises33.rm)(import_node_path46.default.join(poolDir, "metadata.json"), { force: true }).catch(() => {
|
|
19415
19880
|
});
|
|
19416
19881
|
}
|
|
19417
19882
|
/**
|
|
@@ -19421,6 +19886,7 @@ var WorkspacePoolManager = class {
|
|
|
19421
19886
|
*/
|
|
19422
19887
|
async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
|
|
19423
19888
|
for (const repo of repos) {
|
|
19889
|
+
if (!repo.path || !repo.source) continue;
|
|
19424
19890
|
const repoDir = import_node_path46.default.join(slotPath, repo.path);
|
|
19425
19891
|
if (!(0, import_node_fs14.existsSync)(repoDir)) {
|
|
19426
19892
|
continue;
|
|
@@ -19428,7 +19894,7 @@ var WorkspacePoolManager = class {
|
|
|
19428
19894
|
if (poolReset === "none") {
|
|
19429
19895
|
continue;
|
|
19430
19896
|
}
|
|
19431
|
-
const ref = repo.checkout
|
|
19897
|
+
const ref = getRepoCheckoutRef(repo.checkout);
|
|
19432
19898
|
const resolve = repo.checkout?.resolve ?? "remote";
|
|
19433
19899
|
if (resolve === "remote") {
|
|
19434
19900
|
const fetchArgs = ["fetch", "origin", ref];
|
|
@@ -19445,8 +19911,8 @@ var WorkspacePoolManager = class {
|
|
|
19445
19911
|
}
|
|
19446
19912
|
if (templatePath) {
|
|
19447
19913
|
const repoDirNames = new Set(
|
|
19448
|
-
repos.map((r) => {
|
|
19449
|
-
const normalized = r.path.replace(/^\.\//, "");
|
|
19914
|
+
repos.filter((r) => r.path).map((r) => {
|
|
19915
|
+
const normalized = (r.path ?? "").replace(/^\.\//, "");
|
|
19450
19916
|
return normalized.split("/")[0];
|
|
19451
19917
|
})
|
|
19452
19918
|
);
|
|
@@ -19502,17 +19968,17 @@ var RepoManager = class {
|
|
|
19502
19968
|
static validateLocalPaths(repos) {
|
|
19503
19969
|
const errors = [];
|
|
19504
19970
|
for (const repo of repos) {
|
|
19505
|
-
if (repo.source.type !== "local") continue;
|
|
19971
|
+
if (!repo.source || repo.source.type !== "local") continue;
|
|
19506
19972
|
const sourcePath = repo.source.path;
|
|
19507
19973
|
if (!sourcePath || sourcePath.trim() === "") {
|
|
19508
19974
|
errors.push({
|
|
19509
|
-
repoPath: repo.path,
|
|
19975
|
+
repoPath: repo.path ?? "(none)",
|
|
19510
19976
|
resolvedSourcePath: sourcePath ?? "",
|
|
19511
19977
|
reason: "empty_path"
|
|
19512
19978
|
});
|
|
19513
19979
|
} else if (!(0, import_node_fs15.existsSync)(sourcePath)) {
|
|
19514
19980
|
errors.push({
|
|
19515
|
-
repoPath: repo.path,
|
|
19981
|
+
repoPath: repo.path ?? "(none)",
|
|
19516
19982
|
resolvedSourcePath: sourcePath,
|
|
19517
19983
|
reason: "not_found"
|
|
19518
19984
|
});
|
|
@@ -19559,6 +20025,12 @@ ${lines.join("\n")}`;
|
|
|
19559
20025
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
19560
20026
|
*/
|
|
19561
20027
|
async materialize(repo, workspacePath) {
|
|
20028
|
+
if (!repo.source || !repo.path) {
|
|
20029
|
+
if (this.verbose) {
|
|
20030
|
+
console.log(`[repo] materialize skip path=${repo.path ?? "(none)"} (no source or path)`);
|
|
20031
|
+
}
|
|
20032
|
+
return;
|
|
20033
|
+
}
|
|
19562
20034
|
const targetDir = import_node_path47.default.join(workspacePath, repo.path);
|
|
19563
20035
|
const sourceUrl = getSourceUrl(repo.source);
|
|
19564
20036
|
const startedAt = Date.now();
|
|
@@ -19582,7 +20054,7 @@ ${lines.join("\n")}`;
|
|
|
19582
20054
|
await this.runGit(["sparse-checkout", "init", "--cone"], { cwd: targetDir });
|
|
19583
20055
|
await this.runGit(["sparse-checkout", "set", ...repo.clone.sparse], { cwd: targetDir });
|
|
19584
20056
|
}
|
|
19585
|
-
const ref = repo.checkout
|
|
20057
|
+
const ref = getRepoCheckoutRef(repo.checkout);
|
|
19586
20058
|
const resolve = repo.checkout?.resolve ?? "remote";
|
|
19587
20059
|
let resolvedSha;
|
|
19588
20060
|
if (resolve === "remote" && repo.source.type === "git") {
|
|
@@ -19634,22 +20106,26 @@ ${lines.join("\n")}`;
|
|
|
19634
20106
|
);
|
|
19635
20107
|
}
|
|
19636
20108
|
}
|
|
19637
|
-
/** Materialize all repos into the workspace. */
|
|
20109
|
+
/** Materialize all repos into the workspace. Skips repos without source (Docker-only repos). */
|
|
19638
20110
|
async materializeAll(repos, workspacePath) {
|
|
20111
|
+
const materializableRepos = repos.filter((r) => r.source);
|
|
19639
20112
|
if (this.verbose) {
|
|
19640
|
-
console.log(
|
|
20113
|
+
console.log(
|
|
20114
|
+
`[repo] materializeAll count=${materializableRepos.length} (${repos.length - materializableRepos.length} skipped, no source) workspace=${workspacePath}`
|
|
20115
|
+
);
|
|
19641
20116
|
}
|
|
19642
|
-
for (const repo of
|
|
20117
|
+
for (const repo of materializableRepos) {
|
|
19643
20118
|
await this.materialize(repo, workspacePath);
|
|
19644
20119
|
}
|
|
19645
20120
|
if (this.verbose) {
|
|
19646
20121
|
console.log("[repo] materializeAll complete");
|
|
19647
20122
|
}
|
|
19648
20123
|
}
|
|
19649
|
-
/** Reset repos in workspace to their checkout state. */
|
|
20124
|
+
/** Reset repos in workspace to their checkout state. Skips repos without path or source. */
|
|
19650
20125
|
async reset(repos, workspacePath, reset) {
|
|
19651
20126
|
const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
|
|
19652
20127
|
for (const repo of repos) {
|
|
20128
|
+
if (!repo.path || !repo.source) continue;
|
|
19653
20129
|
const targetDir = import_node_path47.default.join(workspacePath, repo.path);
|
|
19654
20130
|
await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
|
|
19655
20131
|
await this.runGit(["clean", cleanFlag], { cwd: targetDir });
|
|
@@ -19659,14 +20135,14 @@ ${lines.join("\n")}`;
|
|
|
19659
20135
|
|
|
19660
20136
|
// src/evaluation/workspace/resolve.ts
|
|
19661
20137
|
init_cjs_shims();
|
|
19662
|
-
var
|
|
20138
|
+
var import_promises34 = require("fs/promises");
|
|
19663
20139
|
var import_node_path48 = __toESM(require("path"), 1);
|
|
19664
20140
|
async function resolveWorkspaceTemplate(templatePath) {
|
|
19665
20141
|
if (!templatePath) {
|
|
19666
20142
|
return void 0;
|
|
19667
20143
|
}
|
|
19668
20144
|
const resolved = import_node_path48.default.resolve(templatePath);
|
|
19669
|
-
const stats = await (0,
|
|
20145
|
+
const stats = await (0, import_promises34.stat)(resolved);
|
|
19670
20146
|
if (stats.isFile()) {
|
|
19671
20147
|
return {
|
|
19672
20148
|
dir: import_node_path48.default.dirname(resolved),
|
|
@@ -19676,7 +20152,7 @@ async function resolveWorkspaceTemplate(templatePath) {
|
|
|
19676
20152
|
if (!stats.isDirectory()) {
|
|
19677
20153
|
throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
|
|
19678
20154
|
}
|
|
19679
|
-
const entries = await (0,
|
|
20155
|
+
const entries = await (0, import_promises34.readdir)(resolved);
|
|
19680
20156
|
const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
|
|
19681
20157
|
if (workspaceFiles.length === 1) {
|
|
19682
20158
|
return {
|
|
@@ -19696,6 +20172,7 @@ async function resolveWorkspaceTemplate(templatePath) {
|
|
|
19696
20172
|
|
|
19697
20173
|
// src/evaluation/workspace/script-executor.ts
|
|
19698
20174
|
init_cjs_shims();
|
|
20175
|
+
init_exec();
|
|
19699
20176
|
function interpolateArgs(args, context2) {
|
|
19700
20177
|
const vars = {
|
|
19701
20178
|
workspace_path: context2.workspacePath,
|
|
@@ -19975,7 +20452,8 @@ async function runEvaluation(options) {
|
|
|
19975
20452
|
for (const ec of filteredEvalCases) {
|
|
19976
20453
|
if (ec.workspace?.repos) {
|
|
19977
20454
|
for (const repo of ec.workspace.repos) {
|
|
19978
|
-
|
|
20455
|
+
if (!repo.source) continue;
|
|
20456
|
+
const key = `${repo.path ?? ""}::${repo.source.type === "local" ? repo.source.path : ""}`;
|
|
19979
20457
|
if (!allRepos.has(key)) {
|
|
19980
20458
|
allRepos.set(key, repo);
|
|
19981
20459
|
}
|
|
@@ -19988,7 +20466,7 @@ async function runEvaluation(options) {
|
|
|
19988
20466
|
const message = RepoManager.formatValidationErrors(localPathErrors);
|
|
19989
20467
|
console.warn(`Warning: ${message}`);
|
|
19990
20468
|
const invalidLocalRepoPaths = new Set(localPathErrors.map((e) => e.repoPath));
|
|
19991
|
-
if (suiteWorkspace?.repos?.some((r) => invalidLocalRepoPaths.has(r.path))) {
|
|
20469
|
+
if (suiteWorkspace?.repos?.some((r) => r.path && invalidLocalRepoPaths.has(r.path))) {
|
|
19992
20470
|
throw new Error(message);
|
|
19993
20471
|
}
|
|
19994
20472
|
}
|
|
@@ -20049,14 +20527,14 @@ async function runEvaluation(options) {
|
|
|
20049
20527
|
let staticMaterialised = false;
|
|
20050
20528
|
const isYamlConfiguredPath = !cliWorkspacePath && !!yamlWorkspacePath;
|
|
20051
20529
|
if (useStaticWorkspace && configuredStaticPath) {
|
|
20052
|
-
const dirExists = await (0,
|
|
20530
|
+
const dirExists = await (0, import_promises35.stat)(configuredStaticPath).then(
|
|
20053
20531
|
(s) => s.isDirectory(),
|
|
20054
20532
|
() => false
|
|
20055
20533
|
);
|
|
20056
|
-
const isEmpty = dirExists ? (await (0,
|
|
20534
|
+
const isEmpty = dirExists ? (await (0, import_promises35.readdir)(configuredStaticPath)).length === 0 : false;
|
|
20057
20535
|
if (isYamlConfiguredPath && (!dirExists || isEmpty)) {
|
|
20058
20536
|
if (!dirExists) {
|
|
20059
|
-
await (0,
|
|
20537
|
+
await (0, import_promises35.mkdir)(configuredStaticPath, { recursive: true });
|
|
20060
20538
|
}
|
|
20061
20539
|
if (workspaceTemplate) {
|
|
20062
20540
|
await copyDirectoryRecursive(workspaceTemplate, configuredStaticPath);
|
|
@@ -20101,14 +20579,14 @@ async function runEvaluation(options) {
|
|
|
20101
20579
|
}
|
|
20102
20580
|
} else if (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
|
|
20103
20581
|
sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
|
|
20104
|
-
await (0,
|
|
20582
|
+
await (0, import_promises35.mkdir)(sharedWorkspacePath, { recursive: true });
|
|
20105
20583
|
setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
|
|
20106
20584
|
}
|
|
20107
20585
|
try {
|
|
20108
20586
|
if (suiteWorkspaceFile && sharedWorkspacePath) {
|
|
20109
20587
|
const copiedWorkspaceFile = import_node_path49.default.join(sharedWorkspacePath, import_node_path49.default.basename(suiteWorkspaceFile));
|
|
20110
20588
|
try {
|
|
20111
|
-
await (0,
|
|
20589
|
+
await (0, import_promises35.stat)(copiedWorkspaceFile);
|
|
20112
20590
|
suiteWorkspaceFile = copiedWorkspaceFile;
|
|
20113
20591
|
} catch {
|
|
20114
20592
|
}
|
|
@@ -20121,6 +20599,7 @@ async function runEvaluation(options) {
|
|
|
20121
20599
|
try {
|
|
20122
20600
|
if (needsPerRepoCheck) {
|
|
20123
20601
|
for (const repo of suiteWorkspace.repos) {
|
|
20602
|
+
if (!repo.path || !repo.source) continue;
|
|
20124
20603
|
const targetDir = import_node_path49.default.join(sharedWorkspacePath, repo.path);
|
|
20125
20604
|
if ((0, import_node_fs16.existsSync)(targetDir)) {
|
|
20126
20605
|
setupLog(`reusing existing repo at: ${targetDir}`);
|
|
@@ -20145,6 +20624,19 @@ async function runEvaluation(options) {
|
|
|
20145
20624
|
throw new Error(`Failed to materialize repos: ${message}`);
|
|
20146
20625
|
}
|
|
20147
20626
|
}
|
|
20627
|
+
const suiteDockerConfig = suiteWorkspace?.docker;
|
|
20628
|
+
if (suiteDockerConfig) {
|
|
20629
|
+
setupLog(`pulling Docker image: ${suiteDockerConfig.image}`);
|
|
20630
|
+
const { DockerWorkspaceProvider: DockerWorkspaceProvider2 } = await Promise.resolve().then(() => (init_docker_workspace(), docker_workspace_exports));
|
|
20631
|
+
const dockerSetup = new DockerWorkspaceProvider2(suiteDockerConfig);
|
|
20632
|
+
if (!await dockerSetup.isDockerAvailable()) {
|
|
20633
|
+
throw new Error(
|
|
20634
|
+
"Docker workspace configured but Docker CLI is not available. Install Docker and ensure it is running."
|
|
20635
|
+
);
|
|
20636
|
+
}
|
|
20637
|
+
await dockerSetup.pullImage();
|
|
20638
|
+
setupLog("Docker image pull complete");
|
|
20639
|
+
}
|
|
20148
20640
|
const suiteHooksEnabled = hooksEnabled(suiteWorkspace);
|
|
20149
20641
|
const suiteBeforeAllHook = suiteWorkspace?.hooks?.before_all;
|
|
20150
20642
|
if (sharedWorkspacePath && suiteHooksEnabled && hasHookCommand(suiteBeforeAllHook)) {
|
|
@@ -20505,11 +20997,9 @@ async function runBatchEvaluation(options) {
|
|
|
20505
20997
|
const promptInputs = promptInputsList[index];
|
|
20506
20998
|
return {
|
|
20507
20999
|
question: promptInputs.question,
|
|
21000
|
+
systemPrompt: promptInputs.systemMessage,
|
|
20508
21001
|
inputFiles: evalCase.file_paths,
|
|
20509
|
-
evalCaseId: evalCase.id
|
|
20510
|
-
metadata: {
|
|
20511
|
-
systemPrompt: promptInputs.systemMessage ?? ""
|
|
20512
|
-
}
|
|
21002
|
+
evalCaseId: evalCase.id
|
|
20513
21003
|
};
|
|
20514
21004
|
});
|
|
20515
21005
|
const batchResponse = await provider.invokeBatch?.(batchRequests);
|
|
@@ -20710,7 +21200,7 @@ async function runEvalCase(options) {
|
|
|
20710
21200
|
if (caseWorkspaceFile && workspacePath) {
|
|
20711
21201
|
const copiedFile = import_node_path49.default.join(workspacePath, import_node_path49.default.basename(caseWorkspaceFile));
|
|
20712
21202
|
try {
|
|
20713
|
-
await (0,
|
|
21203
|
+
await (0, import_promises35.stat)(copiedFile);
|
|
20714
21204
|
caseWorkspaceFile = copiedFile;
|
|
20715
21205
|
} catch {
|
|
20716
21206
|
}
|
|
@@ -20718,7 +21208,7 @@ async function runEvalCase(options) {
|
|
|
20718
21208
|
}
|
|
20719
21209
|
if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
|
|
20720
21210
|
workspacePath = getWorkspacePath(evalRunId, evalCase.id);
|
|
20721
|
-
await (0,
|
|
21211
|
+
await (0, import_promises35.mkdir)(workspacePath, { recursive: true });
|
|
20722
21212
|
}
|
|
20723
21213
|
if (evalCase.workspace?.repos?.length && workspacePath) {
|
|
20724
21214
|
const localPathErrors = RepoManager.validateLocalPaths(evalCase.workspace.repos);
|
|
@@ -20773,8 +21263,8 @@ async function runEvalCase(options) {
|
|
|
20773
21263
|
const srcPath = import_node_path49.default.resolve(baseDir, relPath);
|
|
20774
21264
|
const destPath = import_node_path49.default.resolve(workspacePath, relPath);
|
|
20775
21265
|
try {
|
|
20776
|
-
await (0,
|
|
20777
|
-
await (0,
|
|
21266
|
+
await (0, import_promises35.mkdir)(import_node_path49.default.dirname(destPath), { recursive: true });
|
|
21267
|
+
await (0, import_promises35.copyFile)(srcPath, destPath);
|
|
20778
21268
|
} catch (error) {
|
|
20779
21269
|
const message = error instanceof Error ? error.message : String(error);
|
|
20780
21270
|
return buildErrorResult(
|
|
@@ -21040,6 +21530,7 @@ async function runEvalCase(options) {
|
|
|
21040
21530
|
availableTargets,
|
|
21041
21531
|
fileChanges,
|
|
21042
21532
|
workspacePath,
|
|
21533
|
+
dockerConfig: evalCase.workspace?.docker,
|
|
21043
21534
|
verbose,
|
|
21044
21535
|
threshold: evalCase.threshold ?? caseThreshold
|
|
21045
21536
|
});
|
|
@@ -21233,6 +21724,7 @@ async function evaluateCandidate(options) {
|
|
|
21233
21724
|
availableTargets,
|
|
21234
21725
|
fileChanges,
|
|
21235
21726
|
workspacePath,
|
|
21727
|
+
dockerConfig,
|
|
21236
21728
|
threshold: evalThreshold
|
|
21237
21729
|
} = options;
|
|
21238
21730
|
const gradeTimestamp = nowFn();
|
|
@@ -21259,6 +21751,7 @@ async function evaluateCandidate(options) {
|
|
|
21259
21751
|
availableTargets,
|
|
21260
21752
|
fileChanges,
|
|
21261
21753
|
workspacePath,
|
|
21754
|
+
dockerConfig,
|
|
21262
21755
|
threshold: evalThreshold
|
|
21263
21756
|
});
|
|
21264
21757
|
const completedAt = nowFn();
|
|
@@ -21334,6 +21827,7 @@ async function runEvaluatorsForCase(options) {
|
|
|
21334
21827
|
availableTargets,
|
|
21335
21828
|
fileChanges,
|
|
21336
21829
|
workspacePath,
|
|
21830
|
+
dockerConfig,
|
|
21337
21831
|
threshold
|
|
21338
21832
|
} = options;
|
|
21339
21833
|
if (evalCase.assertions && evalCase.assertions.length > 0) {
|
|
@@ -21361,6 +21855,7 @@ async function runEvaluatorsForCase(options) {
|
|
|
21361
21855
|
availableTargets,
|
|
21362
21856
|
fileChanges,
|
|
21363
21857
|
workspacePath,
|
|
21858
|
+
dockerConfig,
|
|
21364
21859
|
threshold
|
|
21365
21860
|
});
|
|
21366
21861
|
}
|
|
@@ -21390,6 +21885,7 @@ async function runEvaluatorsForCase(options) {
|
|
|
21390
21885
|
availableTargets,
|
|
21391
21886
|
fileChanges,
|
|
21392
21887
|
workspacePath,
|
|
21888
|
+
dockerConfig,
|
|
21393
21889
|
...implicitEvaluator ? { evaluator: implicitEvaluator } : {}
|
|
21394
21890
|
});
|
|
21395
21891
|
return { score };
|
|
@@ -21428,7 +21924,8 @@ async function runEvaluatorList(options) {
|
|
|
21428
21924
|
targetResolver,
|
|
21429
21925
|
availableTargets,
|
|
21430
21926
|
fileChanges,
|
|
21431
|
-
workspacePath
|
|
21927
|
+
workspacePath,
|
|
21928
|
+
dockerConfig
|
|
21432
21929
|
} = options;
|
|
21433
21930
|
const scored = [];
|
|
21434
21931
|
const scores = [];
|
|
@@ -21451,7 +21948,8 @@ async function runEvaluatorList(options) {
|
|
|
21451
21948
|
targetResolver,
|
|
21452
21949
|
availableTargets,
|
|
21453
21950
|
fileChanges,
|
|
21454
|
-
workspacePath
|
|
21951
|
+
workspacePath,
|
|
21952
|
+
dockerConfig
|
|
21455
21953
|
};
|
|
21456
21954
|
const evalFileDir = evalCase.file_paths[0] ? import_node_path49.default.dirname(evalCase.file_paths[0]) : process.cwd();
|
|
21457
21955
|
const dispatchContext = {
|
|
@@ -21613,13 +22111,11 @@ async function invokeProvider(provider, options) {
|
|
|
21613
22111
|
const braintrustSpanIds = streamCallbacks?.getActiveSpanIds?.() ?? void 0;
|
|
21614
22112
|
return await provider.invoke({
|
|
21615
22113
|
question: promptInputs.question,
|
|
22114
|
+
systemPrompt: promptInputs.systemMessage,
|
|
21616
22115
|
chatPrompt: promptInputs.chatPrompt,
|
|
21617
22116
|
inputFiles: evalCase.file_paths,
|
|
21618
22117
|
evalCaseId: evalCase.id,
|
|
21619
22118
|
attempt,
|
|
21620
|
-
metadata: {
|
|
21621
|
-
systemPrompt: promptInputs.systemMessage ?? ""
|
|
21622
|
-
},
|
|
21623
22119
|
signal: controller.signal,
|
|
21624
22120
|
cwd,
|
|
21625
22121
|
workspaceFile,
|
|
@@ -21991,7 +22487,7 @@ async function discoverDefaultTarget(repoRoot) {
|
|
|
21991
22487
|
return null;
|
|
21992
22488
|
}
|
|
21993
22489
|
async function loadEnvHierarchy(repoRoot, startPath) {
|
|
21994
|
-
const { readFileSync:
|
|
22490
|
+
const { readFileSync: readFileSync5 } = await import("fs");
|
|
21995
22491
|
const chain = buildDirectoryChain2(startPath, repoRoot);
|
|
21996
22492
|
const envFiles = [];
|
|
21997
22493
|
for (const dir of chain) {
|
|
@@ -22000,7 +22496,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
|
|
|
22000
22496
|
}
|
|
22001
22497
|
for (let i = 0; i < envFiles.length; i++) {
|
|
22002
22498
|
try {
|
|
22003
|
-
const content =
|
|
22499
|
+
const content = readFileSync5(envFiles[i], "utf8");
|
|
22004
22500
|
for (const line of content.split("\n")) {
|
|
22005
22501
|
const trimmed = line.trim();
|
|
22006
22502
|
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
@@ -22073,12 +22569,12 @@ var CONFIG_FILE_NAMES = [
|
|
|
22073
22569
|
".agentv/config.js"
|
|
22074
22570
|
];
|
|
22075
22571
|
async function loadTsConfig(projectRoot) {
|
|
22076
|
-
const { existsSync:
|
|
22572
|
+
const { existsSync: existsSync9 } = await import("fs");
|
|
22077
22573
|
const { pathToFileURL: pathToFileURL2 } = await import("url");
|
|
22078
22574
|
const { join: join2 } = await import("path");
|
|
22079
22575
|
for (const fileName of CONFIG_FILE_NAMES) {
|
|
22080
22576
|
const filePath = join2(projectRoot, fileName);
|
|
22081
|
-
if (!
|
|
22577
|
+
if (!existsSync9(filePath)) {
|
|
22082
22578
|
continue;
|
|
22083
22579
|
}
|
|
22084
22580
|
try {
|
|
@@ -22183,9 +22679,9 @@ init_cjs_shims();
|
|
|
22183
22679
|
|
|
22184
22680
|
// src/evaluation/workspace/deps-scanner.ts
|
|
22185
22681
|
init_cjs_shims();
|
|
22186
|
-
var
|
|
22682
|
+
var import_promises36 = require("fs/promises");
|
|
22187
22683
|
var import_node_path51 = __toESM(require("path"), 1);
|
|
22188
|
-
var
|
|
22684
|
+
var import_yaml9 = require("yaml");
|
|
22189
22685
|
function normalizeGitUrl(url) {
|
|
22190
22686
|
let normalized = url.replace(/\.git$/, "");
|
|
22191
22687
|
try {
|
|
@@ -22203,7 +22699,7 @@ async function scanRepoDeps(evalFilePaths) {
|
|
|
22203
22699
|
try {
|
|
22204
22700
|
const repos = await extractReposFromEvalFile(filePath);
|
|
22205
22701
|
for (const repo of repos) {
|
|
22206
|
-
if (repo.source.type !== "git") continue;
|
|
22702
|
+
if (!repo.source || repo.source.type !== "git") continue;
|
|
22207
22703
|
const ref = repo.checkout?.ref;
|
|
22208
22704
|
const key = `${normalizeGitUrl(repo.source.url)}\0${ref ?? ""}`;
|
|
22209
22705
|
const existing = seen.get(key);
|
|
@@ -22231,8 +22727,8 @@ async function scanRepoDeps(evalFilePaths) {
|
|
|
22231
22727
|
return { repos: [...seen.values()], errors };
|
|
22232
22728
|
}
|
|
22233
22729
|
async function extractReposFromEvalFile(filePath) {
|
|
22234
|
-
const content = await (0,
|
|
22235
|
-
const parsed = interpolateEnv((0,
|
|
22730
|
+
const content = await (0, import_promises36.readFile)(filePath, "utf8");
|
|
22731
|
+
const parsed = interpolateEnv((0, import_yaml9.parse)(content), process.env);
|
|
22236
22732
|
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
|
|
22237
22733
|
const obj = parsed;
|
|
22238
22734
|
const evalFileDir = import_node_path51.default.dirname(import_node_path51.default.resolve(filePath));
|
|
@@ -22252,8 +22748,8 @@ async function extractReposFromEvalFile(filePath) {
|
|
|
22252
22748
|
async function extractReposFromWorkspaceRaw(raw, evalFileDir) {
|
|
22253
22749
|
if (typeof raw === "string") {
|
|
22254
22750
|
const workspaceFilePath = import_node_path51.default.resolve(evalFileDir, raw);
|
|
22255
|
-
const content = await (0,
|
|
22256
|
-
const parsed = interpolateEnv((0,
|
|
22751
|
+
const content = await (0, import_promises36.readFile)(workspaceFilePath, "utf8");
|
|
22752
|
+
const parsed = interpolateEnv((0, import_yaml9.parse)(content), process.env);
|
|
22257
22753
|
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
|
|
22258
22754
|
return extractReposFromObject(parsed);
|
|
22259
22755
|
}
|
|
@@ -22279,9 +22775,12 @@ function extractReposFromObject(obj) {
|
|
|
22279
22775
|
return result;
|
|
22280
22776
|
}
|
|
22281
22777
|
|
|
22778
|
+
// src/evaluation/workspace/index.ts
|
|
22779
|
+
init_docker_workspace();
|
|
22780
|
+
|
|
22282
22781
|
// src/evaluation/cache/response-cache.ts
|
|
22283
22782
|
init_cjs_shims();
|
|
22284
|
-
var
|
|
22783
|
+
var import_promises37 = require("fs/promises");
|
|
22285
22784
|
var import_node_path52 = __toESM(require("path"), 1);
|
|
22286
22785
|
var DEFAULT_CACHE_PATH = ".agentv/cache";
|
|
22287
22786
|
var ResponseCache = class {
|
|
@@ -22292,7 +22791,7 @@ var ResponseCache = class {
|
|
|
22292
22791
|
async get(key) {
|
|
22293
22792
|
const filePath = this.keyToPath(key);
|
|
22294
22793
|
try {
|
|
22295
|
-
const data = await (0,
|
|
22794
|
+
const data = await (0, import_promises37.readFile)(filePath, "utf8");
|
|
22296
22795
|
return JSON.parse(data);
|
|
22297
22796
|
} catch {
|
|
22298
22797
|
return void 0;
|
|
@@ -22301,8 +22800,8 @@ var ResponseCache = class {
|
|
|
22301
22800
|
async set(key, value) {
|
|
22302
22801
|
const filePath = this.keyToPath(key);
|
|
22303
22802
|
const dir = import_node_path52.default.dirname(filePath);
|
|
22304
|
-
await (0,
|
|
22305
|
-
await (0,
|
|
22803
|
+
await (0, import_promises37.mkdir)(dir, { recursive: true });
|
|
22804
|
+
await (0, import_promises37.writeFile)(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
22306
22805
|
}
|
|
22307
22806
|
keyToPath(key) {
|
|
22308
22807
|
const prefix = key.slice(0, 2);
|
|
@@ -22321,22 +22820,304 @@ function shouldSkipCacheForTemperature(targetConfig) {
|
|
|
22321
22820
|
return false;
|
|
22322
22821
|
}
|
|
22323
22822
|
|
|
22324
|
-
// src/
|
|
22823
|
+
// src/evaluation/results-repo.ts
|
|
22325
22824
|
init_cjs_shims();
|
|
22825
|
+
var import_node_child_process11 = require("child_process");
|
|
22326
22826
|
var import_node_fs18 = require("fs");
|
|
22827
|
+
var import_promises38 = require("fs/promises");
|
|
22828
|
+
var import_node_os8 = __toESM(require("os"), 1);
|
|
22327
22829
|
var import_node_path53 = __toESM(require("path"), 1);
|
|
22328
|
-
var
|
|
22830
|
+
var import_node_util7 = require("util");
|
|
22831
|
+
var execFileAsync3 = (0, import_node_util7.promisify)(import_node_child_process11.execFile);
|
|
22832
|
+
function sanitizeRepoSlug(repo) {
|
|
22833
|
+
return repo.trim().replace(/[^A-Za-z0-9._-]+/g, "-");
|
|
22834
|
+
}
|
|
22835
|
+
function withFriendlyGitHubAuthError(error) {
|
|
22836
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
22837
|
+
const lower = message.toLowerCase();
|
|
22838
|
+
if (lower.includes("authentication failed") || lower.includes("could not read username") || lower.includes("permission denied") || lower.includes("not logged into any github hosts")) {
|
|
22839
|
+
return new Error(`${message}. Run 'gh auth login' to authenticate.`);
|
|
22840
|
+
}
|
|
22841
|
+
return new Error(message);
|
|
22842
|
+
}
|
|
22843
|
+
function normalizeResultsExportConfig(config) {
|
|
22844
|
+
return {
|
|
22845
|
+
repo: config.repo.trim(),
|
|
22846
|
+
path: config.path.trim().replace(/^\/+|\/+$/g, ""),
|
|
22847
|
+
auto_push: config.auto_push === true,
|
|
22848
|
+
branch_prefix: config.branch_prefix?.trim() || "eval-results"
|
|
22849
|
+
};
|
|
22850
|
+
}
|
|
22851
|
+
function resolveResultsRepoUrl(repo) {
|
|
22852
|
+
if (repo.includes("://") || repo.startsWith("git@")) {
|
|
22853
|
+
return repo;
|
|
22854
|
+
}
|
|
22855
|
+
return `https://github.com/${repo}.git`;
|
|
22856
|
+
}
|
|
22857
|
+
function getResultsRepoCachePaths(repo) {
|
|
22858
|
+
const rootDir = import_node_path53.default.join(getAgentvHome(), "cache", "results-repo", sanitizeRepoSlug(repo));
|
|
22859
|
+
return {
|
|
22860
|
+
rootDir,
|
|
22861
|
+
repoDir: import_node_path53.default.join(rootDir, "repo"),
|
|
22862
|
+
statusFile: import_node_path53.default.join(rootDir, "status.json")
|
|
22863
|
+
};
|
|
22864
|
+
}
|
|
22865
|
+
function readPersistedStatus(statusFile) {
|
|
22866
|
+
if (!(0, import_node_fs18.existsSync)(statusFile)) {
|
|
22867
|
+
return {};
|
|
22868
|
+
}
|
|
22869
|
+
try {
|
|
22870
|
+
return JSON.parse((0, import_node_fs18.readFileSync)(statusFile, "utf8"));
|
|
22871
|
+
} catch {
|
|
22872
|
+
return {};
|
|
22873
|
+
}
|
|
22874
|
+
}
|
|
22875
|
+
function writePersistedStatus(statusFile, status) {
|
|
22876
|
+
(0, import_node_fs18.mkdirSync)(import_node_path53.default.dirname(statusFile), { recursive: true });
|
|
22877
|
+
(0, import_node_fs18.writeFileSync)(statusFile, `${JSON.stringify(status, null, 2)}
|
|
22878
|
+
`, "utf8");
|
|
22879
|
+
}
|
|
22880
|
+
async function runCommand(executable, args, options) {
|
|
22881
|
+
try {
|
|
22882
|
+
const { stdout, stderr } = await execFileAsync3(executable, [...args], {
|
|
22883
|
+
cwd: options?.cwd,
|
|
22884
|
+
env: process.env
|
|
22885
|
+
});
|
|
22886
|
+
return { stdout, stderr };
|
|
22887
|
+
} catch (error) {
|
|
22888
|
+
if (options?.check === false && error && typeof error === "object") {
|
|
22889
|
+
const execError = error;
|
|
22890
|
+
return {
|
|
22891
|
+
stdout: execError.stdout ?? "",
|
|
22892
|
+
stderr: execError.stderr ?? ""
|
|
22893
|
+
};
|
|
22894
|
+
}
|
|
22895
|
+
throw withFriendlyGitHubAuthError(error);
|
|
22896
|
+
}
|
|
22897
|
+
}
|
|
22898
|
+
async function runGit(args, options) {
|
|
22899
|
+
return runCommand("git", args, options);
|
|
22900
|
+
}
|
|
22901
|
+
async function runGh(args, options) {
|
|
22902
|
+
return runCommand("gh", args, options);
|
|
22903
|
+
}
|
|
22904
|
+
async function resolveDefaultBranch(repoDir) {
|
|
22905
|
+
try {
|
|
22906
|
+
const { stdout } = await runGit(["symbolic-ref", "refs/remotes/origin/HEAD"], { cwd: repoDir });
|
|
22907
|
+
const ref = stdout.trim();
|
|
22908
|
+
const prefix = "refs/remotes/origin/";
|
|
22909
|
+
if (ref.startsWith(prefix)) {
|
|
22910
|
+
return ref.slice(prefix.length);
|
|
22911
|
+
}
|
|
22912
|
+
} catch {
|
|
22913
|
+
}
|
|
22914
|
+
for (const candidate of ["main", "master"]) {
|
|
22915
|
+
try {
|
|
22916
|
+
await runGit(["rev-parse", "--verify", `origin/${candidate}`], { cwd: repoDir });
|
|
22917
|
+
return candidate;
|
|
22918
|
+
} catch {
|
|
22919
|
+
}
|
|
22920
|
+
}
|
|
22921
|
+
return "main";
|
|
22922
|
+
}
|
|
22923
|
+
async function updateCacheRepo(repoDir, baseBranch) {
|
|
22924
|
+
await runGit(["fetch", "origin", "--prune"], { cwd: repoDir });
|
|
22925
|
+
await runGit(["checkout", baseBranch], { cwd: repoDir });
|
|
22926
|
+
await runGit(["pull", "--ff-only", "origin", baseBranch], { cwd: repoDir });
|
|
22927
|
+
}
|
|
22928
|
+
function updateStatusFile(config, patch) {
|
|
22929
|
+
const cachePaths = getResultsRepoCachePaths(config.repo);
|
|
22930
|
+
const current = readPersistedStatus(cachePaths.statusFile);
|
|
22931
|
+
writePersistedStatus(cachePaths.statusFile, {
|
|
22932
|
+
...current,
|
|
22933
|
+
...patch
|
|
22934
|
+
});
|
|
22935
|
+
}
|
|
22936
|
+
async function ensureResultsRepoClone(config) {
|
|
22937
|
+
const normalized = normalizeResultsExportConfig(config);
|
|
22938
|
+
const cachePaths = getResultsRepoCachePaths(normalized.repo);
|
|
22939
|
+
(0, import_node_fs18.mkdirSync)(cachePaths.rootDir, { recursive: true });
|
|
22940
|
+
if (!(0, import_node_fs18.existsSync)(cachePaths.repoDir)) {
|
|
22941
|
+
try {
|
|
22942
|
+
await runGit([
|
|
22943
|
+
"clone",
|
|
22944
|
+
"--filter=blob:none",
|
|
22945
|
+
resolveResultsRepoUrl(normalized.repo),
|
|
22946
|
+
cachePaths.repoDir
|
|
22947
|
+
]);
|
|
22948
|
+
return cachePaths.repoDir;
|
|
22949
|
+
} catch (error) {
|
|
22950
|
+
updateStatusFile(normalized, { last_error: withFriendlyGitHubAuthError(error).message });
|
|
22951
|
+
throw withFriendlyGitHubAuthError(error);
|
|
22952
|
+
}
|
|
22953
|
+
}
|
|
22954
|
+
if (!(0, import_node_fs18.existsSync)(import_node_path53.default.join(cachePaths.repoDir, ".git"))) {
|
|
22955
|
+
throw new Error(`Results repo cache is not a git repository: ${cachePaths.repoDir}`);
|
|
22956
|
+
}
|
|
22957
|
+
return cachePaths.repoDir;
|
|
22958
|
+
}
|
|
22959
|
+
function getResultsRepoStatus(config) {
|
|
22960
|
+
if (!config) {
|
|
22961
|
+
return {
|
|
22962
|
+
configured: false,
|
|
22963
|
+
available: false,
|
|
22964
|
+
repo: "",
|
|
22965
|
+
cache_dir: ""
|
|
22966
|
+
};
|
|
22967
|
+
}
|
|
22968
|
+
const normalized = normalizeResultsExportConfig(config);
|
|
22969
|
+
const cachePaths = getResultsRepoCachePaths(normalized.repo);
|
|
22970
|
+
const persisted = readPersistedStatus(cachePaths.statusFile);
|
|
22971
|
+
return {
|
|
22972
|
+
configured: true,
|
|
22973
|
+
available: (0, import_node_fs18.existsSync)(cachePaths.repoDir),
|
|
22974
|
+
repo: normalized.repo,
|
|
22975
|
+
path: normalized.path,
|
|
22976
|
+
auto_push: normalized.auto_push,
|
|
22977
|
+
branch_prefix: normalized.branch_prefix,
|
|
22978
|
+
cache_dir: cachePaths.repoDir,
|
|
22979
|
+
last_synced_at: persisted.last_synced_at,
|
|
22980
|
+
last_error: persisted.last_error
|
|
22981
|
+
};
|
|
22982
|
+
}
|
|
22983
|
+
async function syncResultsRepo(config) {
|
|
22984
|
+
const normalized = normalizeResultsExportConfig(config);
|
|
22985
|
+
try {
|
|
22986
|
+
const repoDir = await ensureResultsRepoClone(normalized);
|
|
22987
|
+
const baseBranch = await resolveDefaultBranch(repoDir);
|
|
22988
|
+
await updateCacheRepo(repoDir, baseBranch);
|
|
22989
|
+
updateStatusFile(normalized, {
|
|
22990
|
+
last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
22991
|
+
last_error: void 0
|
|
22992
|
+
});
|
|
22993
|
+
} catch (error) {
|
|
22994
|
+
updateStatusFile(normalized, {
|
|
22995
|
+
last_error: withFriendlyGitHubAuthError(error).message
|
|
22996
|
+
});
|
|
22997
|
+
throw withFriendlyGitHubAuthError(error);
|
|
22998
|
+
}
|
|
22999
|
+
return getResultsRepoStatus(normalized);
|
|
23000
|
+
}
|
|
23001
|
+
async function checkoutResultsRepoBranch(config, branchName) {
|
|
23002
|
+
const normalized = normalizeResultsExportConfig(config);
|
|
23003
|
+
const repoDir = await ensureResultsRepoClone(normalized);
|
|
23004
|
+
const baseBranch = await resolveDefaultBranch(repoDir);
|
|
23005
|
+
await updateCacheRepo(repoDir, baseBranch);
|
|
23006
|
+
await runGit(["checkout", "-B", branchName, `origin/${baseBranch}`], { cwd: repoDir });
|
|
23007
|
+
updateStatusFile(normalized, { last_error: void 0 });
|
|
23008
|
+
return {
|
|
23009
|
+
branchName,
|
|
23010
|
+
baseBranch,
|
|
23011
|
+
repoDir
|
|
23012
|
+
};
|
|
23013
|
+
}
|
|
23014
|
+
async function prepareResultsRepoBranch(config, branchName) {
|
|
23015
|
+
const normalized = normalizeResultsExportConfig(config);
|
|
23016
|
+
const cloneDir = await ensureResultsRepoClone(normalized);
|
|
23017
|
+
const baseBranch = await resolveDefaultBranch(cloneDir);
|
|
23018
|
+
await updateCacheRepo(cloneDir, baseBranch);
|
|
23019
|
+
const worktreeRoot = await (0, import_promises38.mkdtemp)(import_node_path53.default.join(import_node_os8.default.tmpdir(), "agentv-results-repo-"));
|
|
23020
|
+
const worktreeDir = import_node_path53.default.join(worktreeRoot, "repo");
|
|
23021
|
+
await runGit(["worktree", "add", "-B", branchName, worktreeDir, `origin/${baseBranch}`], {
|
|
23022
|
+
cwd: cloneDir
|
|
23023
|
+
});
|
|
23024
|
+
return {
|
|
23025
|
+
branchName,
|
|
23026
|
+
baseBranch,
|
|
23027
|
+
repoDir: worktreeDir,
|
|
23028
|
+
cleanup: async () => {
|
|
23029
|
+
try {
|
|
23030
|
+
await runGit(["worktree", "remove", "--force", worktreeDir], { cwd: cloneDir });
|
|
23031
|
+
} finally {
|
|
23032
|
+
await (0, import_promises38.rm)(worktreeRoot, { recursive: true, force: true }).catch(() => void 0);
|
|
23033
|
+
}
|
|
23034
|
+
}
|
|
23035
|
+
};
|
|
23036
|
+
}
|
|
23037
|
+
async function stageResultsArtifacts(params) {
|
|
23038
|
+
(0, import_node_fs18.rmSync)(params.destinationDir, { recursive: true, force: true });
|
|
23039
|
+
(0, import_node_fs18.mkdirSync)(import_node_path53.default.dirname(params.destinationDir), { recursive: true });
|
|
23040
|
+
await (0, import_promises38.cp)(params.sourceDir, params.destinationDir, { recursive: true });
|
|
23041
|
+
}
|
|
23042
|
+
function resolveResultsRepoRunsDir(config) {
|
|
23043
|
+
const normalized = normalizeResultsExportConfig(config);
|
|
23044
|
+
return import_node_path53.default.join(
|
|
23045
|
+
getResultsRepoCachePaths(normalized.repo).repoDir,
|
|
23046
|
+
...normalized.path.split("/")
|
|
23047
|
+
);
|
|
23048
|
+
}
|
|
23049
|
+
async function directorySizeBytes(targetPath) {
|
|
23050
|
+
const entry = await (0, import_promises38.stat)(targetPath);
|
|
23051
|
+
if (entry.isFile()) {
|
|
23052
|
+
return entry.size;
|
|
23053
|
+
}
|
|
23054
|
+
let total = 0;
|
|
23055
|
+
for (const child of await (0, import_promises38.readdir)(targetPath, { withFileTypes: true })) {
|
|
23056
|
+
total += await directorySizeBytes(import_node_path53.default.join(targetPath, child.name));
|
|
23057
|
+
}
|
|
23058
|
+
return total;
|
|
23059
|
+
}
|
|
23060
|
+
async function commitAndPushResultsBranch(params) {
|
|
23061
|
+
await runGit(["add", "--all"], { cwd: params.repoDir });
|
|
23062
|
+
const { stdout: diffStdout } = await runGit(["status", "--porcelain"], {
|
|
23063
|
+
cwd: params.repoDir,
|
|
23064
|
+
check: false
|
|
23065
|
+
});
|
|
23066
|
+
if (diffStdout.trim().length === 0) {
|
|
23067
|
+
return false;
|
|
23068
|
+
}
|
|
23069
|
+
await runGit(["commit", "-m", params.commitMessage], { cwd: params.repoDir });
|
|
23070
|
+
await runGit(["push", "-u", "origin", params.branchName], { cwd: params.repoDir });
|
|
23071
|
+
return true;
|
|
23072
|
+
}
|
|
23073
|
+
async function pushResultsRepoBranch(config, branchName, cwd) {
|
|
23074
|
+
const normalized = normalizeResultsExportConfig(config);
|
|
23075
|
+
await runGit(["push", "-u", "origin", branchName], {
|
|
23076
|
+
cwd: cwd ?? getResultsRepoCachePaths(normalized.repo).repoDir
|
|
23077
|
+
});
|
|
23078
|
+
updateStatusFile(normalized, {
|
|
23079
|
+
last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
23080
|
+
last_error: void 0
|
|
23081
|
+
});
|
|
23082
|
+
}
|
|
23083
|
+
async function createDraftResultsPr(params) {
|
|
23084
|
+
const { stdout } = await runGh(
|
|
23085
|
+
[
|
|
23086
|
+
"pr",
|
|
23087
|
+
"create",
|
|
23088
|
+
"--draft",
|
|
23089
|
+
"--repo",
|
|
23090
|
+
params.repo,
|
|
23091
|
+
"--base",
|
|
23092
|
+
params.baseBranch,
|
|
23093
|
+
"--head",
|
|
23094
|
+
params.branchName,
|
|
23095
|
+
"--title",
|
|
23096
|
+
params.title,
|
|
23097
|
+
"--body",
|
|
23098
|
+
params.body
|
|
23099
|
+
],
|
|
23100
|
+
{ cwd: params.repoDir }
|
|
23101
|
+
);
|
|
23102
|
+
return stdout.trim();
|
|
23103
|
+
}
|
|
23104
|
+
|
|
23105
|
+
// src/projects.ts
|
|
23106
|
+
init_cjs_shims();
|
|
23107
|
+
var import_node_fs19 = require("fs");
|
|
23108
|
+
var import_node_path54 = __toESM(require("path"), 1);
|
|
23109
|
+
var import_yaml10 = require("yaml");
|
|
22329
23110
|
function getProjectsRegistryPath() {
|
|
22330
|
-
return
|
|
23111
|
+
return import_node_path54.default.join(getAgentvHome(), "projects.yaml");
|
|
22331
23112
|
}
|
|
22332
23113
|
function loadProjectRegistry() {
|
|
22333
23114
|
const registryPath = getProjectsRegistryPath();
|
|
22334
|
-
if (!(0,
|
|
23115
|
+
if (!(0, import_node_fs19.existsSync)(registryPath)) {
|
|
22335
23116
|
return { projects: [] };
|
|
22336
23117
|
}
|
|
22337
23118
|
try {
|
|
22338
|
-
const raw = (0,
|
|
22339
|
-
const parsed = (0,
|
|
23119
|
+
const raw = (0, import_node_fs19.readFileSync)(registryPath, "utf-8");
|
|
23120
|
+
const parsed = (0, import_yaml10.parse)(raw);
|
|
22340
23121
|
if (!parsed || !Array.isArray(parsed.projects)) {
|
|
22341
23122
|
return { projects: [] };
|
|
22342
23123
|
}
|
|
@@ -22347,14 +23128,14 @@ function loadProjectRegistry() {
|
|
|
22347
23128
|
}
|
|
22348
23129
|
function saveProjectRegistry(registry) {
|
|
22349
23130
|
const registryPath = getProjectsRegistryPath();
|
|
22350
|
-
const dir =
|
|
22351
|
-
if (!(0,
|
|
22352
|
-
(0,
|
|
23131
|
+
const dir = import_node_path54.default.dirname(registryPath);
|
|
23132
|
+
if (!(0, import_node_fs19.existsSync)(dir)) {
|
|
23133
|
+
(0, import_node_fs19.mkdirSync)(dir, { recursive: true });
|
|
22353
23134
|
}
|
|
22354
|
-
(0,
|
|
23135
|
+
(0, import_node_fs19.writeFileSync)(registryPath, (0, import_yaml10.stringify)(registry), "utf-8");
|
|
22355
23136
|
}
|
|
22356
23137
|
function deriveProjectId(dirPath, existingIds) {
|
|
22357
|
-
const base =
|
|
23138
|
+
const base = import_node_path54.default.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
|
|
22358
23139
|
let candidate = base || "project";
|
|
22359
23140
|
let suffix = 2;
|
|
22360
23141
|
while (existingIds.includes(candidate)) {
|
|
@@ -22364,11 +23145,11 @@ function deriveProjectId(dirPath, existingIds) {
|
|
|
22364
23145
|
return candidate;
|
|
22365
23146
|
}
|
|
22366
23147
|
function addProject(projectPath) {
|
|
22367
|
-
const absPath =
|
|
22368
|
-
if (!(0,
|
|
23148
|
+
const absPath = import_node_path54.default.resolve(projectPath);
|
|
23149
|
+
if (!(0, import_node_fs19.existsSync)(absPath)) {
|
|
22369
23150
|
throw new Error(`Directory not found: ${absPath}`);
|
|
22370
23151
|
}
|
|
22371
|
-
if (!(0,
|
|
23152
|
+
if (!(0, import_node_fs19.existsSync)(import_node_path54.default.join(absPath, ".agentv"))) {
|
|
22372
23153
|
throw new Error(`No .agentv/ directory found in ${absPath}. Run an evaluation first.`);
|
|
22373
23154
|
}
|
|
22374
23155
|
const registry = loadProjectRegistry();
|
|
@@ -22382,7 +23163,7 @@ function addProject(projectPath) {
|
|
|
22382
23163
|
absPath,
|
|
22383
23164
|
registry.projects.map((p) => p.id)
|
|
22384
23165
|
),
|
|
22385
|
-
name:
|
|
23166
|
+
name: import_node_path54.default.basename(absPath),
|
|
22386
23167
|
path: absPath,
|
|
22387
23168
|
addedAt: now,
|
|
22388
23169
|
lastOpenedAt: now
|
|
@@ -22411,24 +23192,24 @@ function touchProject(projectId) {
|
|
|
22411
23192
|
}
|
|
22412
23193
|
}
|
|
22413
23194
|
function discoverProjects(rootDir, maxDepth = 2) {
|
|
22414
|
-
const absRoot =
|
|
22415
|
-
if (!(0,
|
|
23195
|
+
const absRoot = import_node_path54.default.resolve(rootDir);
|
|
23196
|
+
if (!(0, import_node_fs19.existsSync)(absRoot) || !(0, import_node_fs19.statSync)(absRoot).isDirectory()) {
|
|
22416
23197
|
return [];
|
|
22417
23198
|
}
|
|
22418
23199
|
const results = [];
|
|
22419
23200
|
function scan(dir, depth) {
|
|
22420
23201
|
if (depth > maxDepth) return;
|
|
22421
|
-
if ((0,
|
|
23202
|
+
if ((0, import_node_fs19.existsSync)(import_node_path54.default.join(dir, ".agentv"))) {
|
|
22422
23203
|
results.push(dir);
|
|
22423
23204
|
return;
|
|
22424
23205
|
}
|
|
22425
23206
|
if (depth === maxDepth) return;
|
|
22426
23207
|
try {
|
|
22427
|
-
const entries = (0,
|
|
23208
|
+
const entries = (0, import_node_fs19.readdirSync)(dir, { withFileTypes: true });
|
|
22428
23209
|
for (const entry of entries) {
|
|
22429
23210
|
if (!entry.isDirectory()) continue;
|
|
22430
23211
|
if (entry.name.startsWith(".") || entry.name === "node_modules") continue;
|
|
22431
|
-
scan(
|
|
23212
|
+
scan(import_node_path54.default.join(dir, entry.name), depth + 1);
|
|
22432
23213
|
}
|
|
22433
23214
|
} catch {
|
|
22434
23215
|
}
|
|
@@ -23354,33 +24135,33 @@ function extractResponseItemContent(content) {
|
|
|
23354
24135
|
|
|
23355
24136
|
// src/import/codex-session-discovery.ts
|
|
23356
24137
|
init_cjs_shims();
|
|
23357
|
-
var
|
|
23358
|
-
var
|
|
23359
|
-
var
|
|
23360
|
-
var DEFAULT_SESSIONS_DIR = () =>
|
|
24138
|
+
var import_promises40 = require("fs/promises");
|
|
24139
|
+
var import_node_os9 = require("os");
|
|
24140
|
+
var import_node_path56 = __toESM(require("path"), 1);
|
|
24141
|
+
var DEFAULT_SESSIONS_DIR = () => import_node_path56.default.join((0, import_node_os9.homedir)(), ".codex", "sessions");
|
|
23361
24142
|
async function discoverCodexSessions(opts) {
|
|
23362
24143
|
const sessionsDir = opts?.sessionsDir ?? DEFAULT_SESSIONS_DIR();
|
|
23363
24144
|
const limit = opts?.latest ? 1 : opts?.limit ?? 10;
|
|
23364
24145
|
const sessions = [];
|
|
23365
24146
|
let yearDirs;
|
|
23366
24147
|
try {
|
|
23367
|
-
yearDirs = await (0,
|
|
24148
|
+
yearDirs = await (0, import_promises40.readdir)(sessionsDir);
|
|
23368
24149
|
} catch {
|
|
23369
24150
|
return [];
|
|
23370
24151
|
}
|
|
23371
24152
|
for (const year of yearDirs) {
|
|
23372
|
-
const yearPath =
|
|
24153
|
+
const yearPath = import_node_path56.default.join(sessionsDir, year);
|
|
23373
24154
|
let monthDirs;
|
|
23374
24155
|
try {
|
|
23375
|
-
monthDirs = await (0,
|
|
24156
|
+
monthDirs = await (0, import_promises40.readdir)(yearPath);
|
|
23376
24157
|
} catch {
|
|
23377
24158
|
continue;
|
|
23378
24159
|
}
|
|
23379
24160
|
for (const month of monthDirs) {
|
|
23380
|
-
const monthPath =
|
|
24161
|
+
const monthPath = import_node_path56.default.join(yearPath, month);
|
|
23381
24162
|
let dayDirs;
|
|
23382
24163
|
try {
|
|
23383
|
-
dayDirs = await (0,
|
|
24164
|
+
dayDirs = await (0, import_promises40.readdir)(monthPath);
|
|
23384
24165
|
} catch {
|
|
23385
24166
|
continue;
|
|
23386
24167
|
}
|
|
@@ -23389,22 +24170,22 @@ async function discoverCodexSessions(opts) {
|
|
|
23389
24170
|
const dirDate = `${year}-${month}-${day}`;
|
|
23390
24171
|
if (dirDate !== opts.date) continue;
|
|
23391
24172
|
}
|
|
23392
|
-
const dayPath =
|
|
24173
|
+
const dayPath = import_node_path56.default.join(monthPath, day);
|
|
23393
24174
|
let files;
|
|
23394
24175
|
try {
|
|
23395
|
-
files = await (0,
|
|
24176
|
+
files = await (0, import_promises40.readdir)(dayPath);
|
|
23396
24177
|
} catch {
|
|
23397
24178
|
continue;
|
|
23398
24179
|
}
|
|
23399
24180
|
for (const file of files) {
|
|
23400
24181
|
if (!file.startsWith("rollout-") || !file.endsWith(".jsonl")) continue;
|
|
23401
|
-
const filePath =
|
|
24182
|
+
const filePath = import_node_path56.default.join(dayPath, file);
|
|
23402
24183
|
const nameWithoutExt = file.replace(/\.jsonl$/, "");
|
|
23403
24184
|
const parts = nameWithoutExt.split("-");
|
|
23404
24185
|
const sessionId = parts.length >= 6 ? parts.slice(-5).join("-") : nameWithoutExt;
|
|
23405
24186
|
let updatedAt;
|
|
23406
24187
|
try {
|
|
23407
|
-
const fileStat = await (0,
|
|
24188
|
+
const fileStat = await (0, import_promises40.stat)(filePath);
|
|
23408
24189
|
updatedAt = fileStat.mtime;
|
|
23409
24190
|
} catch {
|
|
23410
24191
|
updatedAt = /* @__PURE__ */ new Date(0);
|
|
@@ -23420,10 +24201,10 @@ async function discoverCodexSessions(opts) {
|
|
|
23420
24201
|
|
|
23421
24202
|
// src/import/session-discovery.ts
|
|
23422
24203
|
init_cjs_shims();
|
|
23423
|
-
var
|
|
23424
|
-
var
|
|
23425
|
-
var
|
|
23426
|
-
var DEFAULT_PROJECTS_DIR = () =>
|
|
24204
|
+
var import_promises41 = require("fs/promises");
|
|
24205
|
+
var import_node_os10 = require("os");
|
|
24206
|
+
var import_node_path57 = __toESM(require("path"), 1);
|
|
24207
|
+
var DEFAULT_PROJECTS_DIR = () => import_node_path57.default.join((0, import_node_os10.homedir)(), ".claude", "projects");
|
|
23427
24208
|
function encodeProjectPath(projectPath) {
|
|
23428
24209
|
return projectPath.replace(/\//g, "-");
|
|
23429
24210
|
}
|
|
@@ -23432,7 +24213,7 @@ async function discoverClaudeSessions(opts) {
|
|
|
23432
24213
|
const limit = opts?.latest ? 1 : opts?.limit ?? 10;
|
|
23433
24214
|
let projectDirs;
|
|
23434
24215
|
try {
|
|
23435
|
-
projectDirs = await (0,
|
|
24216
|
+
projectDirs = await (0, import_promises41.readdir)(projectsDir);
|
|
23436
24217
|
} catch {
|
|
23437
24218
|
return [];
|
|
23438
24219
|
}
|
|
@@ -23442,10 +24223,10 @@ async function discoverClaudeSessions(opts) {
|
|
|
23442
24223
|
}
|
|
23443
24224
|
const sessions = [];
|
|
23444
24225
|
for (const projectDir of projectDirs) {
|
|
23445
|
-
const dirPath =
|
|
24226
|
+
const dirPath = import_node_path57.default.join(projectsDir, projectDir);
|
|
23446
24227
|
let entries;
|
|
23447
24228
|
try {
|
|
23448
|
-
entries = await (0,
|
|
24229
|
+
entries = await (0, import_promises41.readdir)(dirPath);
|
|
23449
24230
|
} catch {
|
|
23450
24231
|
continue;
|
|
23451
24232
|
}
|
|
@@ -23453,10 +24234,10 @@ async function discoverClaudeSessions(opts) {
|
|
|
23453
24234
|
if (!entry.endsWith(".jsonl")) continue;
|
|
23454
24235
|
const sessionId = entry.replace(/\.jsonl$/, "");
|
|
23455
24236
|
if (opts?.sessionId && sessionId !== opts.sessionId) continue;
|
|
23456
|
-
const filePath =
|
|
24237
|
+
const filePath = import_node_path57.default.join(dirPath, entry);
|
|
23457
24238
|
let updatedAt;
|
|
23458
24239
|
try {
|
|
23459
|
-
const fileStat = await (0,
|
|
24240
|
+
const fileStat = await (0, import_promises41.stat)(filePath);
|
|
23460
24241
|
updatedAt = fileStat.mtime;
|
|
23461
24242
|
} catch {
|
|
23462
24243
|
updatedAt = /* @__PURE__ */ new Date(0);
|
|
@@ -23478,7 +24259,7 @@ init_cjs_shims();
|
|
|
23478
24259
|
|
|
23479
24260
|
// src/import/types.ts
|
|
23480
24261
|
init_cjs_shims();
|
|
23481
|
-
var
|
|
24262
|
+
var import_promises42 = require("fs/promises");
|
|
23482
24263
|
function toTranscriptJsonLine(entry) {
|
|
23483
24264
|
const firstUserMessage = entry.messages.find((m) => m.role === "user");
|
|
23484
24265
|
const input = typeof firstUserMessage?.content === "string" ? firstUserMessage.content : "";
|
|
@@ -23504,11 +24285,11 @@ function toTranscriptJsonLine(entry) {
|
|
|
23504
24285
|
};
|
|
23505
24286
|
}
|
|
23506
24287
|
async function readTranscriptJsonl(filePath) {
|
|
23507
|
-
const text = await (0,
|
|
24288
|
+
const text = await (0, import_promises42.readFile)(filePath, "utf8");
|
|
23508
24289
|
return text.split("\n").filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
|
|
23509
24290
|
}
|
|
23510
24291
|
async function readTranscriptFile(filePath) {
|
|
23511
|
-
return (0,
|
|
24292
|
+
return (0, import_promises42.readFile)(filePath, "utf8");
|
|
23512
24293
|
}
|
|
23513
24294
|
|
|
23514
24295
|
// src/import/transcript-provider.ts
|
|
@@ -23574,6 +24355,7 @@ function createAgentKernel() {
|
|
|
23574
24355
|
DEFAULT_EXPLORATION_TOOLS,
|
|
23575
24356
|
DEFAULT_THRESHOLD,
|
|
23576
24357
|
DeterministicAssertionEvaluator,
|
|
24358
|
+
DockerWorkspaceProvider,
|
|
23577
24359
|
EvaluatorRegistry,
|
|
23578
24360
|
ExecutionMetricsEvaluator,
|
|
23579
24361
|
FieldAccuracyEvaluator,
|
|
@@ -23609,9 +24391,11 @@ function createAgentKernel() {
|
|
|
23609
24391
|
buildSearchRoots,
|
|
23610
24392
|
calculateRubricScore,
|
|
23611
24393
|
captureFileChanges,
|
|
24394
|
+
checkoutResultsRepoBranch,
|
|
23612
24395
|
clampScore,
|
|
23613
24396
|
cleanupEvalWorkspaces,
|
|
23614
24397
|
cleanupWorkspace,
|
|
24398
|
+
commitAndPushResultsBranch,
|
|
23615
24399
|
computeTraceSummary,
|
|
23616
24400
|
computeWorkspaceFingerprint,
|
|
23617
24401
|
consumeClaudeLogEntries,
|
|
@@ -23622,6 +24406,7 @@ function createAgentKernel() {
|
|
|
23622
24406
|
createAgentKernel,
|
|
23623
24407
|
createBuiltinProviderRegistry,
|
|
23624
24408
|
createBuiltinRegistry,
|
|
24409
|
+
createDraftResultsPr,
|
|
23625
24410
|
createProvider,
|
|
23626
24411
|
createTempWorkspace,
|
|
23627
24412
|
deepEqual,
|
|
@@ -23629,6 +24414,7 @@ function createAgentKernel() {
|
|
|
23629
24414
|
deriveCategory,
|
|
23630
24415
|
deriveProjectId,
|
|
23631
24416
|
detectFormat,
|
|
24417
|
+
directorySizeBytes,
|
|
23632
24418
|
discoverAssertions,
|
|
23633
24419
|
discoverClaudeSessions,
|
|
23634
24420
|
discoverCodexSessions,
|
|
@@ -23637,6 +24423,7 @@ function createAgentKernel() {
|
|
|
23637
24423
|
discoverJudges,
|
|
23638
24424
|
discoverProjects,
|
|
23639
24425
|
discoverProviders,
|
|
24426
|
+
ensureResultsRepoClone,
|
|
23640
24427
|
ensureVSCodeSubagents,
|
|
23641
24428
|
evaluate,
|
|
23642
24429
|
executeScript,
|
|
@@ -23661,6 +24448,8 @@ function createAgentKernel() {
|
|
|
23661
24448
|
getOutputFilenames,
|
|
23662
24449
|
getProject,
|
|
23663
24450
|
getProjectsRegistryPath,
|
|
24451
|
+
getResultsRepoCachePaths,
|
|
24452
|
+
getResultsRepoStatus,
|
|
23664
24453
|
getSubagentsRoot,
|
|
23665
24454
|
getTextContent,
|
|
23666
24455
|
getTraceStateRoot,
|
|
@@ -23690,12 +24479,15 @@ function createAgentKernel() {
|
|
|
23690
24479
|
mergeExecutionMetrics,
|
|
23691
24480
|
negateScore,
|
|
23692
24481
|
normalizeLineEndings,
|
|
24482
|
+
normalizeResultsExportConfig,
|
|
23693
24483
|
parseAgentSkillsEvals,
|
|
23694
24484
|
parseClaudeSession,
|
|
23695
24485
|
parseCodexSession,
|
|
23696
24486
|
parseCopilotEvents,
|
|
23697
24487
|
parseJsonFromText,
|
|
23698
24488
|
parseJsonSafe,
|
|
24489
|
+
prepareResultsRepoBranch,
|
|
24490
|
+
pushResultsRepoBranch,
|
|
23699
24491
|
readJsonFile,
|
|
23700
24492
|
readTargetDefinitions,
|
|
23701
24493
|
readTestSuiteMetadata,
|
|
@@ -23706,6 +24498,8 @@ function createAgentKernel() {
|
|
|
23706
24498
|
resolveAndCreateProvider,
|
|
23707
24499
|
resolveDelegatedTargetDefinition,
|
|
23708
24500
|
resolveFileReference,
|
|
24501
|
+
resolveResultsRepoRunsDir,
|
|
24502
|
+
resolveResultsRepoUrl,
|
|
23709
24503
|
resolveTargetDefinition,
|
|
23710
24504
|
resolveWorkspaceTemplate,
|
|
23711
24505
|
rubricEvaluationSchema,
|
|
@@ -23727,12 +24521,14 @@ function createAgentKernel() {
|
|
|
23727
24521
|
scoreToVerdict,
|
|
23728
24522
|
shouldEnableCache,
|
|
23729
24523
|
shouldSkipCacheForTemperature,
|
|
24524
|
+
stageResultsArtifacts,
|
|
23730
24525
|
subscribeToClaudeLogEntries,
|
|
23731
24526
|
subscribeToCodexLogEntries,
|
|
23732
24527
|
subscribeToCopilotCliLogEntries,
|
|
23733
24528
|
subscribeToCopilotSdkLogEntries,
|
|
23734
24529
|
subscribeToPiLogEntries,
|
|
23735
24530
|
substituteVariables,
|
|
24531
|
+
syncResultsRepo,
|
|
23736
24532
|
toCamelCaseDeep,
|
|
23737
24533
|
toSnakeCaseDeep,
|
|
23738
24534
|
toTranscriptJsonLine,
|