@agentv/core 4.9.0 → 4.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-VCVVKCC4.js → chunk-BWHUWLGW.js} +1 -1
- package/dist/chunk-BWHUWLGW.js.map +1 -0
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +1 -1
- package/dist/index.cjs +1042 -749
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +20 -5
- package/dist/index.d.ts +20 -5
- package/dist/index.js +865 -573
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-VCVVKCC4.js.map +0 -1
package/dist/index.cjs
CHANGED
|
@@ -1355,13 +1355,13 @@ function serializeAttributeValue(value) {
|
|
|
1355
1355
|
if (Array.isArray(value)) return { arrayValue: { values: value.map(serializeAttributeValue) } };
|
|
1356
1356
|
return { stringValue: String(value) };
|
|
1357
1357
|
}
|
|
1358
|
-
var
|
|
1358
|
+
var import_promises37, import_node_path54, OtlpJsonFileExporter;
|
|
1359
1359
|
var init_otlp_json_file_exporter = __esm({
|
|
1360
1360
|
"src/observability/otlp-json-file-exporter.ts"() {
|
|
1361
1361
|
"use strict";
|
|
1362
1362
|
init_cjs_shims();
|
|
1363
|
-
|
|
1364
|
-
|
|
1363
|
+
import_promises37 = require("fs/promises");
|
|
1364
|
+
import_node_path54 = require("path");
|
|
1365
1365
|
OtlpJsonFileExporter = class {
|
|
1366
1366
|
// biome-ignore lint/suspicious/noExplicitAny: serialized span data
|
|
1367
1367
|
spans = [];
|
|
@@ -1400,7 +1400,7 @@ var init_otlp_json_file_exporter = __esm({
|
|
|
1400
1400
|
}
|
|
1401
1401
|
async flush() {
|
|
1402
1402
|
if (this.spans.length === 0) return;
|
|
1403
|
-
await (0,
|
|
1403
|
+
await (0, import_promises37.mkdir)((0, import_node_path54.dirname)(this.filePath), { recursive: true });
|
|
1404
1404
|
const otlpJson = {
|
|
1405
1405
|
resourceSpans: [
|
|
1406
1406
|
{
|
|
@@ -1829,8 +1829,8 @@ function mergeExecutionMetrics(computed, metrics) {
|
|
|
1829
1829
|
|
|
1830
1830
|
// src/evaluation/yaml-parser.ts
|
|
1831
1831
|
init_cjs_shims();
|
|
1832
|
-
var
|
|
1833
|
-
var
|
|
1832
|
+
var import_promises9 = require("fs/promises");
|
|
1833
|
+
var import_node_path9 = __toESM(require("path"), 1);
|
|
1834
1834
|
var import_micromatch2 = __toESM(require("micromatch"), 1);
|
|
1835
1835
|
var import_yaml4 = require("yaml");
|
|
1836
1836
|
|
|
@@ -2505,11 +2505,321 @@ function logWarning(message) {
|
|
|
2505
2505
|
|
|
2506
2506
|
// src/evaluation/loaders/evaluator-parser.ts
|
|
2507
2507
|
init_cjs_shims();
|
|
2508
|
+
var import_node_path6 = __toESM(require("path"), 1);
|
|
2509
|
+
|
|
2510
|
+
// src/evaluation/content-preprocessor.ts
|
|
2511
|
+
init_cjs_shims();
|
|
2512
|
+
var import_promises5 = require("fs/promises");
|
|
2508
2513
|
var import_node_path5 = __toESM(require("path"), 1);
|
|
2514
|
+
var import_node_url2 = require("url");
|
|
2515
|
+
|
|
2516
|
+
// src/runtime/exec.ts
|
|
2517
|
+
init_cjs_shims();
|
|
2518
|
+
function shellEscapePath(value) {
|
|
2519
|
+
if (process.platform === "win32") {
|
|
2520
|
+
return `"${value.replaceAll('"', '""')}"`;
|
|
2521
|
+
}
|
|
2522
|
+
return `'${value.replaceAll("'", `'"'"'`)}'`;
|
|
2523
|
+
}
|
|
2524
|
+
async function execFileWithStdin(argv, stdinPayload, options = {}) {
|
|
2525
|
+
if (argv.length === 0) {
|
|
2526
|
+
throw new Error("Executable argv must include at least one entry");
|
|
2527
|
+
}
|
|
2528
|
+
if (typeof Bun !== "undefined") {
|
|
2529
|
+
return execFileWithStdinBun(argv, stdinPayload, options);
|
|
2530
|
+
}
|
|
2531
|
+
return execFileWithStdinNode(argv, stdinPayload, options);
|
|
2532
|
+
}
|
|
2533
|
+
async function execFileWithStdinBun(argv, stdinPayload, options) {
|
|
2534
|
+
const command = [...argv];
|
|
2535
|
+
const encoder = new TextEncoder();
|
|
2536
|
+
const proc = Bun.spawn(command, {
|
|
2537
|
+
cwd: options.cwd,
|
|
2538
|
+
stdin: encoder.encode(stdinPayload),
|
|
2539
|
+
stdout: "pipe",
|
|
2540
|
+
stderr: "pipe",
|
|
2541
|
+
// Merge additional env vars with process.env
|
|
2542
|
+
env: options.env ? { ...process.env, ...options.env } : process.env
|
|
2543
|
+
});
|
|
2544
|
+
let timedOut = false;
|
|
2545
|
+
const timeout = options.timeoutMs !== void 0 ? setTimeout(() => {
|
|
2546
|
+
timedOut = true;
|
|
2547
|
+
proc.kill("SIGKILL");
|
|
2548
|
+
}, options.timeoutMs) : void 0;
|
|
2549
|
+
try {
|
|
2550
|
+
const stdoutPromise = proc.stdout ? new Response(proc.stdout).text() : Promise.resolve("");
|
|
2551
|
+
const stderrPromise = proc.stderr ? new Response(proc.stderr).text() : Promise.resolve("");
|
|
2552
|
+
const [stdout, stderr, exitCode] = await Promise.all([
|
|
2553
|
+
stdoutPromise,
|
|
2554
|
+
stderrPromise,
|
|
2555
|
+
proc.exited
|
|
2556
|
+
]);
|
|
2557
|
+
if (timedOut) {
|
|
2558
|
+
throw new Error(`Process timed out after ${options.timeoutMs}ms`);
|
|
2559
|
+
}
|
|
2560
|
+
return {
|
|
2561
|
+
stdout: stdout.replace(/\r\n/g, "\n"),
|
|
2562
|
+
stderr: stderr.replace(/\r\n/g, "\n"),
|
|
2563
|
+
exitCode
|
|
2564
|
+
};
|
|
2565
|
+
} finally {
|
|
2566
|
+
if (timeout !== void 0) {
|
|
2567
|
+
clearTimeout(timeout);
|
|
2568
|
+
}
|
|
2569
|
+
}
|
|
2570
|
+
}
|
|
2571
|
+
async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
2572
|
+
const { spawn: spawn5 } = await import("child_process");
|
|
2573
|
+
return new Promise((resolve, reject) => {
|
|
2574
|
+
const [cmd, ...args] = argv;
|
|
2575
|
+
const child = spawn5(cmd, args, {
|
|
2576
|
+
cwd: options.cwd,
|
|
2577
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
2578
|
+
// Merge additional env vars with process.env
|
|
2579
|
+
env: options.env ? { ...process.env, ...options.env } : process.env
|
|
2580
|
+
});
|
|
2581
|
+
const stdoutChunks = [];
|
|
2582
|
+
const stderrChunks = [];
|
|
2583
|
+
child.stdout?.on("data", (chunk) => stdoutChunks.push(chunk));
|
|
2584
|
+
child.stderr?.on("data", (chunk) => stderrChunks.push(chunk));
|
|
2585
|
+
let timedOut = false;
|
|
2586
|
+
const timeout = options.timeoutMs !== void 0 ? setTimeout(() => {
|
|
2587
|
+
timedOut = true;
|
|
2588
|
+
child.kill("SIGKILL");
|
|
2589
|
+
}, options.timeoutMs) : void 0;
|
|
2590
|
+
child.on("error", (error) => {
|
|
2591
|
+
if (timeout !== void 0) clearTimeout(timeout);
|
|
2592
|
+
reject(error);
|
|
2593
|
+
});
|
|
2594
|
+
child.on("close", (code) => {
|
|
2595
|
+
if (timeout !== void 0) clearTimeout(timeout);
|
|
2596
|
+
if (timedOut) {
|
|
2597
|
+
reject(new Error(`Process timed out after ${options.timeoutMs}ms`));
|
|
2598
|
+
return;
|
|
2599
|
+
}
|
|
2600
|
+
const stdout = Buffer.concat(stdoutChunks).toString("utf8").replace(/\r\n/g, "\n");
|
|
2601
|
+
const stderr = Buffer.concat(stderrChunks).toString("utf8").replace(/\r\n/g, "\n");
|
|
2602
|
+
resolve({
|
|
2603
|
+
stdout,
|
|
2604
|
+
stderr,
|
|
2605
|
+
exitCode: code ?? 0
|
|
2606
|
+
});
|
|
2607
|
+
});
|
|
2608
|
+
if (child.stdin) {
|
|
2609
|
+
child.stdin.write(stdinPayload);
|
|
2610
|
+
child.stdin.end();
|
|
2611
|
+
}
|
|
2612
|
+
});
|
|
2613
|
+
}
|
|
2614
|
+
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
2615
|
+
const { mkdir: mkdir17, readFile: readFile19, rm: rm6, writeFile: writeFile9 } = await import("fs/promises");
|
|
2616
|
+
const { tmpdir: tmpdir3 } = await import("os");
|
|
2617
|
+
const path55 = await import("path");
|
|
2618
|
+
const { randomUUID: randomUUID10 } = await import("crypto");
|
|
2619
|
+
const dir = path55.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
|
|
2620
|
+
await mkdir17(dir, { recursive: true });
|
|
2621
|
+
const stdinPath = path55.join(dir, "stdin.txt");
|
|
2622
|
+
const stdoutPath = path55.join(dir, "stdout.txt");
|
|
2623
|
+
const stderrPath = path55.join(dir, "stderr.txt");
|
|
2624
|
+
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
2625
|
+
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
2626
|
+
const { spawn: spawn5 } = await import("child_process");
|
|
2627
|
+
try {
|
|
2628
|
+
const exitCode = await new Promise((resolve, reject) => {
|
|
2629
|
+
const child = spawn5(wrappedCommand, {
|
|
2630
|
+
shell: true,
|
|
2631
|
+
cwd: options.cwd,
|
|
2632
|
+
stdio: ["ignore", "ignore", "ignore"],
|
|
2633
|
+
// Merge additional env vars with process.env
|
|
2634
|
+
env: options.env ? { ...process.env, ...options.env } : process.env
|
|
2635
|
+
});
|
|
2636
|
+
const timeout = options.timeoutMs ? setTimeout(() => {
|
|
2637
|
+
child.kill();
|
|
2638
|
+
reject(new Error(`Process timed out after ${options.timeoutMs}ms`));
|
|
2639
|
+
}, options.timeoutMs) : void 0;
|
|
2640
|
+
child.on("error", (error) => {
|
|
2641
|
+
if (timeout !== void 0) {
|
|
2642
|
+
clearTimeout(timeout);
|
|
2643
|
+
}
|
|
2644
|
+
reject(error);
|
|
2645
|
+
});
|
|
2646
|
+
child.on("exit", (code) => {
|
|
2647
|
+
if (timeout !== void 0) {
|
|
2648
|
+
clearTimeout(timeout);
|
|
2649
|
+
}
|
|
2650
|
+
resolve(code ?? 0);
|
|
2651
|
+
});
|
|
2652
|
+
});
|
|
2653
|
+
const stdout = (await readFile19(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
|
|
2654
|
+
const stderr = (await readFile19(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
2655
|
+
return { stdout, stderr, exitCode };
|
|
2656
|
+
} finally {
|
|
2657
|
+
await rm6(dir, { recursive: true, force: true });
|
|
2658
|
+
}
|
|
2659
|
+
}
|
|
2660
|
+
|
|
2661
|
+
// src/evaluation/content-preprocessor.ts
|
|
2662
|
+
var MIME_TYPE_ALIASES = {
|
|
2663
|
+
csv: "text/csv",
|
|
2664
|
+
docx: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
2665
|
+
htm: "text/html",
|
|
2666
|
+
html: "text/html",
|
|
2667
|
+
json: "application/json",
|
|
2668
|
+
markdown: "text/markdown",
|
|
2669
|
+
md: "text/markdown",
|
|
2670
|
+
pdf: "application/pdf",
|
|
2671
|
+
sql: "application/sql",
|
|
2672
|
+
txt: "text/plain",
|
|
2673
|
+
xhtml: "application/xhtml+xml",
|
|
2674
|
+
xls: "application/vnd.ms-excel",
|
|
2675
|
+
xlsx: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
2676
|
+
xml: "application/xml",
|
|
2677
|
+
yaml: "application/yaml",
|
|
2678
|
+
yml: "application/yaml"
|
|
2679
|
+
};
|
|
2680
|
+
var REPLACEMENT_CHAR = "\uFFFD";
|
|
2681
|
+
async function extractTextWithPreprocessors(content, preprocessors, options = {}) {
|
|
2682
|
+
if (typeof content === "string") {
|
|
2683
|
+
return { text: content, warnings: [] };
|
|
2684
|
+
}
|
|
2685
|
+
if (!content || content.length === 0) {
|
|
2686
|
+
return { text: "", warnings: [] };
|
|
2687
|
+
}
|
|
2688
|
+
const parts = [];
|
|
2689
|
+
const warnings = [];
|
|
2690
|
+
for (const block of content) {
|
|
2691
|
+
if (block.type === "text") {
|
|
2692
|
+
parts.push(block.text);
|
|
2693
|
+
continue;
|
|
2694
|
+
}
|
|
2695
|
+
if (block.type !== "file") {
|
|
2696
|
+
continue;
|
|
2697
|
+
}
|
|
2698
|
+
const result = await preprocessContentFile(block, preprocessors, options.basePath);
|
|
2699
|
+
if (result.text) {
|
|
2700
|
+
parts.push(result.text);
|
|
2701
|
+
}
|
|
2702
|
+
warnings.push(...result.warnings);
|
|
2703
|
+
}
|
|
2704
|
+
return { text: parts.join("\n"), warnings };
|
|
2705
|
+
}
|
|
2706
|
+
async function preprocessContentFile(block, preprocessors, basePath) {
|
|
2707
|
+
const mediaType = normalizePreprocessorType(block.media_type);
|
|
2708
|
+
const resolvedPath = resolveLocalFilePath(block.path, basePath);
|
|
2709
|
+
if (!resolvedPath) {
|
|
2710
|
+
return {
|
|
2711
|
+
text: "",
|
|
2712
|
+
warnings: [
|
|
2713
|
+
{
|
|
2714
|
+
file: block.path,
|
|
2715
|
+
mediaType: block.media_type,
|
|
2716
|
+
reason: "remote file paths are not supported for preprocessing"
|
|
2717
|
+
}
|
|
2718
|
+
]
|
|
2719
|
+
};
|
|
2720
|
+
}
|
|
2721
|
+
const preprocessor = preprocessors?.find(
|
|
2722
|
+
(entry) => normalizePreprocessorType(entry.type) === mediaType
|
|
2723
|
+
);
|
|
2724
|
+
if (preprocessor) {
|
|
2725
|
+
return runContentPreprocessor(block, resolvedPath, preprocessor);
|
|
2726
|
+
}
|
|
2727
|
+
try {
|
|
2728
|
+
const buffer = await (0, import_promises5.readFile)(resolvedPath);
|
|
2729
|
+
const text = buffer.toString("utf8").replace(/\r\n/g, "\n");
|
|
2730
|
+
if (buffer.includes(0) || text.includes(REPLACEMENT_CHAR)) {
|
|
2731
|
+
return {
|
|
2732
|
+
text: "",
|
|
2733
|
+
warnings: [
|
|
2734
|
+
{
|
|
2735
|
+
file: block.path,
|
|
2736
|
+
mediaType: block.media_type,
|
|
2737
|
+
reason: "default UTF-8 read produced binary or invalid text; configure a preprocessor"
|
|
2738
|
+
}
|
|
2739
|
+
]
|
|
2740
|
+
};
|
|
2741
|
+
}
|
|
2742
|
+
return { text: formatFileText(block.path, text), warnings: [] };
|
|
2743
|
+
} catch (error) {
|
|
2744
|
+
return {
|
|
2745
|
+
text: "",
|
|
2746
|
+
warnings: [
|
|
2747
|
+
{
|
|
2748
|
+
file: block.path,
|
|
2749
|
+
mediaType: block.media_type,
|
|
2750
|
+
reason: error instanceof Error ? error.message : String(error)
|
|
2751
|
+
}
|
|
2752
|
+
]
|
|
2753
|
+
};
|
|
2754
|
+
}
|
|
2755
|
+
}
|
|
2756
|
+
async function runContentPreprocessor(block, resolvedPath, preprocessor) {
|
|
2757
|
+
try {
|
|
2758
|
+
const argv = preprocessor.resolvedCommand ?? preprocessor.command;
|
|
2759
|
+
const { stdout, stderr, exitCode } = await execFileWithStdin(
|
|
2760
|
+
argv,
|
|
2761
|
+
JSON.stringify({
|
|
2762
|
+
path: resolvedPath,
|
|
2763
|
+
original_path: block.path,
|
|
2764
|
+
media_type: block.media_type
|
|
2765
|
+
})
|
|
2766
|
+
);
|
|
2767
|
+
if (exitCode !== 0) {
|
|
2768
|
+
return {
|
|
2769
|
+
text: "",
|
|
2770
|
+
warnings: [
|
|
2771
|
+
{
|
|
2772
|
+
file: block.path,
|
|
2773
|
+
mediaType: block.media_type,
|
|
2774
|
+
reason: stderr.trim() || `preprocessor exited with code ${exitCode}`
|
|
2775
|
+
}
|
|
2776
|
+
]
|
|
2777
|
+
};
|
|
2778
|
+
}
|
|
2779
|
+
return { text: formatFileText(block.path, stdout.trim()), warnings: [] };
|
|
2780
|
+
} catch (error) {
|
|
2781
|
+
return {
|
|
2782
|
+
text: "",
|
|
2783
|
+
warnings: [
|
|
2784
|
+
{
|
|
2785
|
+
file: block.path,
|
|
2786
|
+
mediaType: block.media_type,
|
|
2787
|
+
reason: error instanceof Error ? error.message : String(error)
|
|
2788
|
+
}
|
|
2789
|
+
]
|
|
2790
|
+
};
|
|
2791
|
+
}
|
|
2792
|
+
}
|
|
2793
|
+
function appendPreprocessingWarnings(text, warnings) {
|
|
2794
|
+
if (warnings.length === 0) {
|
|
2795
|
+
return text;
|
|
2796
|
+
}
|
|
2797
|
+
const notes = warnings.map(
|
|
2798
|
+
(warning) => `[file preprocessing warning] ${warning.file} (${warning.mediaType}): ${warning.reason}`
|
|
2799
|
+
);
|
|
2800
|
+
return [text, ...notes].filter((part) => part.length > 0).join("\n");
|
|
2801
|
+
}
|
|
2802
|
+
function normalizePreprocessorType(value) {
|
|
2803
|
+
const normalized = value.trim().toLowerCase();
|
|
2804
|
+
return MIME_TYPE_ALIASES[normalized] ?? normalized;
|
|
2805
|
+
}
|
|
2806
|
+
function resolveLocalFilePath(value, basePath) {
|
|
2807
|
+
if (value.startsWith("file://")) {
|
|
2808
|
+
return (0, import_node_url2.fileURLToPath)(value);
|
|
2809
|
+
}
|
|
2810
|
+
if (/^[a-z]+:\/\//i.test(value)) {
|
|
2811
|
+
return void 0;
|
|
2812
|
+
}
|
|
2813
|
+
return basePath ? import_node_path5.default.resolve(basePath, value) : import_node_path5.default.resolve(value);
|
|
2814
|
+
}
|
|
2815
|
+
function formatFileText(filePath, text) {
|
|
2816
|
+
return `[[ file: ${filePath} ]]
|
|
2817
|
+
${text}`;
|
|
2818
|
+
}
|
|
2509
2819
|
|
|
2510
2820
|
// src/evaluation/validation/prompt-validator.ts
|
|
2511
2821
|
init_cjs_shims();
|
|
2512
|
-
var
|
|
2822
|
+
var import_promises6 = require("fs/promises");
|
|
2513
2823
|
|
|
2514
2824
|
// src/evaluation/template-variables.ts
|
|
2515
2825
|
init_cjs_shims();
|
|
@@ -2541,7 +2851,7 @@ var DEPRECATED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Map([
|
|
|
2541
2851
|
var ANSI_YELLOW3 = "\x1B[33m";
|
|
2542
2852
|
var ANSI_RESET4 = "\x1B[0m";
|
|
2543
2853
|
async function validateCustomPromptContent(promptPath) {
|
|
2544
|
-
const content = await (0,
|
|
2854
|
+
const content = await (0, import_promises6.readFile)(promptPath, "utf8");
|
|
2545
2855
|
validateTemplateVariables(content, promptPath);
|
|
2546
2856
|
}
|
|
2547
2857
|
function validateTemplateVariables(content, source) {
|
|
@@ -2598,22 +2908,32 @@ function normalizeEvaluatorType(type) {
|
|
|
2598
2908
|
function isDeprecatedJudgeType(type) {
|
|
2599
2909
|
return type === "code-judge" || type === "llm-judge";
|
|
2600
2910
|
}
|
|
2601
|
-
async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
|
|
2911
|
+
async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId, defaultPreprocessors) {
|
|
2602
2912
|
const execution = rawEvalCase.execution;
|
|
2603
2913
|
const executionObject = isJsonObject2(execution) ? execution : void 0;
|
|
2604
2914
|
const caseEvaluators = rawEvalCase.assertions ?? rawEvalCase.assert ?? (executionObject ? executionObject.evaluators : void 0) ?? // deprecated: use assertions
|
|
2605
2915
|
rawEvalCase.evaluators;
|
|
2606
2916
|
const skipDefaults = executionObject?.skip_defaults === true;
|
|
2607
2917
|
const rootEvaluators = skipDefaults ? void 0 : globalExecution?.assertions ?? globalExecution?.assert ?? globalExecution?.evaluators;
|
|
2608
|
-
const parsedCase = await parseEvaluatorList(
|
|
2609
|
-
|
|
2918
|
+
const parsedCase = await parseEvaluatorList(
|
|
2919
|
+
caseEvaluators,
|
|
2920
|
+
searchRoots,
|
|
2921
|
+
evalId,
|
|
2922
|
+
defaultPreprocessors
|
|
2923
|
+
);
|
|
2924
|
+
const parsedRoot = await parseEvaluatorList(
|
|
2925
|
+
rootEvaluators,
|
|
2926
|
+
searchRoots,
|
|
2927
|
+
evalId,
|
|
2928
|
+
defaultPreprocessors
|
|
2929
|
+
);
|
|
2610
2930
|
if (!parsedCase && !parsedRoot) {
|
|
2611
2931
|
return void 0;
|
|
2612
2932
|
}
|
|
2613
2933
|
const evaluators = [...parsedCase ?? [], ...parsedRoot ?? []];
|
|
2614
2934
|
return evaluators.length > 0 ? evaluators : void 0;
|
|
2615
2935
|
}
|
|
2616
|
-
async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
2936
|
+
async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId, defaultPreprocessors) {
|
|
2617
2937
|
if (candidateEvaluators === void 0) {
|
|
2618
2938
|
return void 0;
|
|
2619
2939
|
}
|
|
@@ -2678,6 +2998,13 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
2678
2998
|
continue;
|
|
2679
2999
|
}
|
|
2680
3000
|
const negate = rawEvaluator.negate === true ? true : void 0;
|
|
3001
|
+
const mergedPreprocessors = await parseMergedPreprocessors(
|
|
3002
|
+
rawEvaluator.preprocessors,
|
|
3003
|
+
defaultPreprocessors,
|
|
3004
|
+
searchRoots,
|
|
3005
|
+
name,
|
|
3006
|
+
evalId
|
|
3007
|
+
);
|
|
2681
3008
|
if (isCustomType) {
|
|
2682
3009
|
const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
|
|
2683
3010
|
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
@@ -2736,7 +3063,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
2736
3063
|
if (cwd) {
|
|
2737
3064
|
const resolved = await resolveFileReference2(cwd, searchRoots);
|
|
2738
3065
|
if (resolved.resolvedPath) {
|
|
2739
|
-
resolvedCwd =
|
|
3066
|
+
resolvedCwd = import_node_path6.default.resolve(resolved.resolvedPath);
|
|
2740
3067
|
} else {
|
|
2741
3068
|
logWarning2(
|
|
2742
3069
|
`Code-grader evaluator '${name}' in '${evalId}': cwd not found (${resolved.displayPath})`,
|
|
@@ -2782,6 +3109,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
2782
3109
|
"cwd",
|
|
2783
3110
|
"weight",
|
|
2784
3111
|
"target",
|
|
3112
|
+
"preprocessors",
|
|
2785
3113
|
"required",
|
|
2786
3114
|
"negate"
|
|
2787
3115
|
]);
|
|
@@ -2802,6 +3130,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
2802
3130
|
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
2803
3131
|
...negate !== void 0 ? { negate } : {},
|
|
2804
3132
|
...Object.keys(config2).length > 0 ? { config: config2 } : {},
|
|
3133
|
+
...mergedPreprocessors ? { preprocessors: mergedPreprocessors } : {},
|
|
2805
3134
|
...targetConfig !== void 0 ? { target: targetConfig } : {}
|
|
2806
3135
|
});
|
|
2807
3136
|
continue;
|
|
@@ -2911,7 +3240,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
2911
3240
|
aggregatorPrompt = fileRef;
|
|
2912
3241
|
const resolved = await resolveFileReference2(fileRef, searchRoots);
|
|
2913
3242
|
if (resolved.resolvedPath) {
|
|
2914
|
-
promptPath2 =
|
|
3243
|
+
promptPath2 = import_node_path6.default.resolve(resolved.resolvedPath);
|
|
2915
3244
|
} else {
|
|
2916
3245
|
throw new Error(
|
|
2917
3246
|
`Composite aggregator in '${evalId}': prompt file not found: ${resolved.displayPath}`
|
|
@@ -3565,7 +3894,8 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3565
3894
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
3566
3895
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
3567
3896
|
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
3568
|
-
...negate !== void 0 ? { negate } : {}
|
|
3897
|
+
...negate !== void 0 ? { negate } : {},
|
|
3898
|
+
...mergedPreprocessors ? { preprocessors: mergedPreprocessors } : {}
|
|
3569
3899
|
});
|
|
3570
3900
|
continue;
|
|
3571
3901
|
}
|
|
@@ -3590,7 +3920,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3590
3920
|
const commandPath = commandArray[commandArray.length - 1];
|
|
3591
3921
|
const resolved = await resolveFileReference2(commandPath, searchRoots);
|
|
3592
3922
|
if (resolved.resolvedPath) {
|
|
3593
|
-
resolvedPromptScript = [...commandArray.slice(0, -1),
|
|
3923
|
+
resolvedPromptScript = [...commandArray.slice(0, -1), import_node_path6.default.resolve(resolved.resolvedPath)];
|
|
3594
3924
|
} else {
|
|
3595
3925
|
throw new Error(
|
|
3596
3926
|
`Evaluator '${name}' in '${evalId}': prompt command file not found: ${resolved.displayPath}`
|
|
@@ -3605,7 +3935,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3605
3935
|
prompt = fileRef;
|
|
3606
3936
|
const resolved = await resolveFileReference2(fileRef, searchRoots);
|
|
3607
3937
|
if (resolved.resolvedPath) {
|
|
3608
|
-
promptPath =
|
|
3938
|
+
promptPath = import_node_path6.default.resolve(resolved.resolvedPath);
|
|
3609
3939
|
try {
|
|
3610
3940
|
await validateCustomPromptContent(promptPath);
|
|
3611
3941
|
} catch (error) {
|
|
@@ -3648,7 +3978,8 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3648
3978
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
3649
3979
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
3650
3980
|
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
3651
|
-
...negate !== void 0 ? { negate } : {}
|
|
3981
|
+
...negate !== void 0 ? { negate } : {},
|
|
3982
|
+
...mergedPreprocessors ? { preprocessors: mergedPreprocessors } : {}
|
|
3652
3983
|
});
|
|
3653
3984
|
continue;
|
|
3654
3985
|
}
|
|
@@ -3673,7 +4004,8 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3673
4004
|
"negate",
|
|
3674
4005
|
"max_steps",
|
|
3675
4006
|
"maxSteps",
|
|
3676
|
-
"temperature"
|
|
4007
|
+
"temperature",
|
|
4008
|
+
"preprocessors"
|
|
3677
4009
|
]);
|
|
3678
4010
|
const config = {};
|
|
3679
4011
|
for (const [key, value] of Object.entries(rawEvaluator)) {
|
|
@@ -3703,30 +4035,70 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
3703
4035
|
...negate !== void 0 ? { negate } : {},
|
|
3704
4036
|
...finalConfig ? { config: finalConfig } : {},
|
|
3705
4037
|
...llmMaxSteps !== void 0 ? { max_steps: llmMaxSteps } : {},
|
|
3706
|
-
...llmTemperature !== void 0 ? { temperature: llmTemperature } : {}
|
|
4038
|
+
...llmTemperature !== void 0 ? { temperature: llmTemperature } : {},
|
|
4039
|
+
...mergedPreprocessors ? { preprocessors: mergedPreprocessors } : {}
|
|
3707
4040
|
});
|
|
3708
4041
|
}
|
|
3709
4042
|
return evaluators.length > 0 ? evaluators : void 0;
|
|
3710
4043
|
}
|
|
3711
|
-
|
|
3712
|
-
|
|
3713
|
-
|
|
3714
|
-
|
|
3715
|
-
|
|
3716
|
-
|
|
3717
|
-
|
|
3718
|
-
|
|
3719
|
-
|
|
3720
|
-
|
|
3721
|
-
|
|
3722
|
-
|
|
3723
|
-
|
|
3724
|
-
|
|
3725
|
-
|
|
3726
|
-
function
|
|
3727
|
-
if (
|
|
4044
|
+
async function parseMergedPreprocessors(rawValue, defaultPreprocessors, searchRoots, evaluatorName, evalId) {
|
|
4045
|
+
const parsedDefaults = defaultPreprocessors ?? [];
|
|
4046
|
+
const parsedOverrides = await parsePreprocessors(rawValue, searchRoots, evaluatorName, evalId);
|
|
4047
|
+
if (parsedDefaults.length === 0 && (!parsedOverrides || parsedOverrides.length === 0)) {
|
|
4048
|
+
return void 0;
|
|
4049
|
+
}
|
|
4050
|
+
const merged = /* @__PURE__ */ new Map();
|
|
4051
|
+
for (const entry of parsedDefaults) {
|
|
4052
|
+
merged.set(normalizePreprocessorType(entry.type), entry);
|
|
4053
|
+
}
|
|
4054
|
+
for (const entry of parsedOverrides ?? []) {
|
|
4055
|
+
merged.set(normalizePreprocessorType(entry.type), entry);
|
|
4056
|
+
}
|
|
4057
|
+
return [...merged.values()];
|
|
4058
|
+
}
|
|
4059
|
+
async function parsePreprocessors(rawValue, searchRoots, evaluatorName, evalId) {
|
|
4060
|
+
if (rawValue === void 0) {
|
|
3728
4061
|
return void 0;
|
|
3729
4062
|
}
|
|
4063
|
+
if (!Array.isArray(rawValue)) {
|
|
4064
|
+
throw new Error(`Evaluator '${evaluatorName}' in '${evalId}': preprocessors must be an array`);
|
|
4065
|
+
}
|
|
4066
|
+
const preprocessors = [];
|
|
4067
|
+
for (const rawEntry of rawValue) {
|
|
4068
|
+
if (!isJsonObject2(rawEntry)) {
|
|
4069
|
+
throw new Error(
|
|
4070
|
+
`Evaluator '${evaluatorName}' in '${evalId}': each preprocessor must be an object`
|
|
4071
|
+
);
|
|
4072
|
+
}
|
|
4073
|
+
const type = asString(rawEntry.type)?.trim();
|
|
4074
|
+
if (!type) {
|
|
4075
|
+
throw new Error(`Evaluator '${evaluatorName}' in '${evalId}': preprocessor.type is required`);
|
|
4076
|
+
}
|
|
4077
|
+
const command = asStringArray(
|
|
4078
|
+
rawEntry.command,
|
|
4079
|
+
`preprocessor command for evaluator '${evaluatorName}' in '${evalId}'`
|
|
4080
|
+
);
|
|
4081
|
+
if (!command || command.length === 0) {
|
|
4082
|
+
throw new Error(
|
|
4083
|
+
`Evaluator '${evaluatorName}' in '${evalId}': preprocessor '${type}' requires command`
|
|
4084
|
+
);
|
|
4085
|
+
}
|
|
4086
|
+
const commandPath = command[command.length - 1];
|
|
4087
|
+
const resolved = await resolveFileReference2(commandPath, searchRoots);
|
|
4088
|
+
if (!resolved.resolvedPath) {
|
|
4089
|
+
throw new Error(
|
|
4090
|
+
`Evaluator '${evaluatorName}' in '${evalId}': preprocessor command file not found: ${resolved.displayPath}`
|
|
4091
|
+
);
|
|
4092
|
+
}
|
|
4093
|
+
preprocessors.push({
|
|
4094
|
+
type,
|
|
4095
|
+
command,
|
|
4096
|
+
resolvedCommand: [...command.slice(0, -1), import_node_path6.default.resolve(resolved.resolvedPath)]
|
|
4097
|
+
});
|
|
4098
|
+
}
|
|
4099
|
+
return preprocessors;
|
|
4100
|
+
}
|
|
4101
|
+
function generateAssertionName(typeValue, rawEvaluator) {
|
|
3730
4102
|
const value = asString(rawEvaluator.value);
|
|
3731
4103
|
const arrayValue = Array.isArray(rawEvaluator.value) ? rawEvaluator.value : void 0;
|
|
3732
4104
|
switch (typeValue) {
|
|
@@ -3759,7 +4131,7 @@ function generateAssertionName(typeValue, rawEvaluator) {
|
|
|
3759
4131
|
case "rubrics":
|
|
3760
4132
|
return "rubrics";
|
|
3761
4133
|
default:
|
|
3762
|
-
return
|
|
4134
|
+
return typeValue;
|
|
3763
4135
|
}
|
|
3764
4136
|
}
|
|
3765
4137
|
function coerceEvaluator(candidate, contextId) {
|
|
@@ -4118,15 +4490,15 @@ function parseInlineRubrics(rawRubrics) {
|
|
|
4118
4490
|
|
|
4119
4491
|
// src/evaluation/loaders/jsonl-parser.ts
|
|
4120
4492
|
init_cjs_shims();
|
|
4121
|
-
var
|
|
4122
|
-
var
|
|
4493
|
+
var import_promises8 = require("fs/promises");
|
|
4494
|
+
var import_node_path8 = __toESM(require("path"), 1);
|
|
4123
4495
|
var import_micromatch = __toESM(require("micromatch"), 1);
|
|
4124
4496
|
var import_yaml3 = require("yaml");
|
|
4125
4497
|
|
|
4126
4498
|
// src/evaluation/loaders/message-processor.ts
|
|
4127
4499
|
init_cjs_shims();
|
|
4128
|
-
var
|
|
4129
|
-
var
|
|
4500
|
+
var import_promises7 = require("fs/promises");
|
|
4501
|
+
var import_node_path7 = __toESM(require("path"), 1);
|
|
4130
4502
|
|
|
4131
4503
|
// src/evaluation/formatting/segment-formatter.ts
|
|
4132
4504
|
init_cjs_shims();
|
|
@@ -4193,7 +4565,7 @@ var IMAGE_MEDIA_TYPES = {
|
|
|
4193
4565
|
".bmp": "image/bmp"
|
|
4194
4566
|
};
|
|
4195
4567
|
function detectImageMediaType(filePath) {
|
|
4196
|
-
const ext =
|
|
4568
|
+
const ext = import_node_path7.default.extname(filePath).toLowerCase();
|
|
4197
4569
|
return IMAGE_MEDIA_TYPES[ext];
|
|
4198
4570
|
}
|
|
4199
4571
|
var ANSI_YELLOW5 = "\x1B[33m";
|
|
@@ -4243,12 +4615,12 @@ async function processMessages(options) {
|
|
|
4243
4615
|
continue;
|
|
4244
4616
|
}
|
|
4245
4617
|
try {
|
|
4246
|
-
const fileContent = (await (0,
|
|
4618
|
+
const fileContent = (await (0, import_promises7.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
4247
4619
|
processedContent.push({
|
|
4248
4620
|
...cloneJsonObject(rawSegment),
|
|
4249
4621
|
path: displayPath,
|
|
4250
4622
|
text: fileContent,
|
|
4251
|
-
resolvedPath:
|
|
4623
|
+
resolvedPath: import_node_path7.default.resolve(resolvedPath)
|
|
4252
4624
|
});
|
|
4253
4625
|
if (verbose) {
|
|
4254
4626
|
const label = messageType === "input" ? "[File]" : "[Expected Output File]";
|
|
@@ -4284,7 +4656,7 @@ async function processMessages(options) {
|
|
|
4284
4656
|
continue;
|
|
4285
4657
|
}
|
|
4286
4658
|
try {
|
|
4287
|
-
const imageBuffer = await (0,
|
|
4659
|
+
const imageBuffer = await (0, import_promises7.readFile)(resolvedPath);
|
|
4288
4660
|
const base64 = imageBuffer.toString("base64");
|
|
4289
4661
|
processedContent.push({
|
|
4290
4662
|
type: "image",
|
|
@@ -4361,12 +4733,12 @@ async function processExpectedMessages(options) {
|
|
|
4361
4733
|
continue;
|
|
4362
4734
|
}
|
|
4363
4735
|
try {
|
|
4364
|
-
const fileContent = (await (0,
|
|
4736
|
+
const fileContent = (await (0, import_promises7.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
4365
4737
|
processedContent.push({
|
|
4366
4738
|
type: "file",
|
|
4367
4739
|
path: displayPath,
|
|
4368
4740
|
text: fileContent,
|
|
4369
|
-
resolvedPath:
|
|
4741
|
+
resolvedPath: import_node_path7.default.resolve(resolvedPath)
|
|
4370
4742
|
});
|
|
4371
4743
|
if (verbose) {
|
|
4372
4744
|
console.log(` [Expected Output File] Found: ${displayPath}`);
|
|
@@ -4401,7 +4773,7 @@ async function processExpectedMessages(options) {
|
|
|
4401
4773
|
continue;
|
|
4402
4774
|
}
|
|
4403
4775
|
try {
|
|
4404
|
-
const imageBuffer = await (0,
|
|
4776
|
+
const imageBuffer = await (0, import_promises7.readFile)(resolvedPath);
|
|
4405
4777
|
const base64 = imageBuffer.toString("base64");
|
|
4406
4778
|
processedContent.push({
|
|
4407
4779
|
type: "image",
|
|
@@ -4511,7 +4883,7 @@ function matchesFilter(id, filter) {
|
|
|
4511
4883
|
return typeof filter === "string" ? import_micromatch.default.isMatch(id, filter) : filter.some((pattern) => import_micromatch.default.isMatch(id, pattern));
|
|
4512
4884
|
}
|
|
4513
4885
|
function detectFormat(filePath) {
|
|
4514
|
-
const ext =
|
|
4886
|
+
const ext = import_node_path8.default.extname(filePath).toLowerCase();
|
|
4515
4887
|
if (ext === ".jsonl") return "jsonl";
|
|
4516
4888
|
if (ext === ".yaml" || ext === ".yml") return "yaml";
|
|
4517
4889
|
if (ext === ".json") return "agent-skills-json";
|
|
@@ -4520,9 +4892,9 @@ function detectFormat(filePath) {
|
|
|
4520
4892
|
);
|
|
4521
4893
|
}
|
|
4522
4894
|
async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
4523
|
-
const dir =
|
|
4524
|
-
const base =
|
|
4525
|
-
const sidecarPath =
|
|
4895
|
+
const dir = import_node_path8.default.dirname(jsonlPath);
|
|
4896
|
+
const base = import_node_path8.default.basename(jsonlPath, ".jsonl");
|
|
4897
|
+
const sidecarPath = import_node_path8.default.join(dir, `${base}.yaml`);
|
|
4526
4898
|
if (!await fileExists(sidecarPath)) {
|
|
4527
4899
|
if (verbose) {
|
|
4528
4900
|
logWarning4(`Sidecar metadata file not found: ${sidecarPath} (using defaults)`);
|
|
@@ -4530,7 +4902,7 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
|
4530
4902
|
return {};
|
|
4531
4903
|
}
|
|
4532
4904
|
try {
|
|
4533
|
-
const content = await (0,
|
|
4905
|
+
const content = await (0, import_promises8.readFile)(sidecarPath, "utf8");
|
|
4534
4906
|
const parsed = interpolateEnv((0, import_yaml3.parse)(content), process.env);
|
|
4535
4907
|
if (!isJsonObject(parsed)) {
|
|
4536
4908
|
logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
|
|
@@ -4571,13 +4943,13 @@ function parseJsonlContent(content, filePath) {
|
|
|
4571
4943
|
async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
4572
4944
|
const verbose = options?.verbose ?? false;
|
|
4573
4945
|
const filterPattern = options?.filter;
|
|
4574
|
-
const absoluteTestPath =
|
|
4946
|
+
const absoluteTestPath = import_node_path8.default.resolve(evalFilePath);
|
|
4575
4947
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
4576
4948
|
const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
|
|
4577
4949
|
const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
|
|
4578
|
-
const rawFile = await (0,
|
|
4950
|
+
const rawFile = await (0, import_promises8.readFile)(absoluteTestPath, "utf8");
|
|
4579
4951
|
const rawCases = parseJsonlContent(rawFile, evalFilePath);
|
|
4580
|
-
const fallbackSuiteName =
|
|
4952
|
+
const fallbackSuiteName = import_node_path8.default.basename(absoluteTestPath, ".jsonl") || "eval";
|
|
4581
4953
|
const suiteName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackSuiteName;
|
|
4582
4954
|
const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm-grader";
|
|
4583
4955
|
const globalExecution = sidecar.execution;
|
|
@@ -4939,8 +5311,8 @@ function resolveTests(suite) {
|
|
|
4939
5311
|
}
|
|
4940
5312
|
async function readTestSuiteMetadata(testFilePath) {
|
|
4941
5313
|
try {
|
|
4942
|
-
const absolutePath =
|
|
4943
|
-
const content = await (0,
|
|
5314
|
+
const absolutePath = import_node_path9.default.resolve(testFilePath);
|
|
5315
|
+
const content = await (0, import_promises9.readFile)(absolutePath, "utf8");
|
|
4944
5316
|
const parsed = interpolateEnv((0, import_yaml4.parse)(content), process.env);
|
|
4945
5317
|
if (!isJsonObject(parsed)) {
|
|
4946
5318
|
return {};
|
|
@@ -4994,25 +5366,31 @@ var loadEvalCases = loadTests;
|
|
|
4994
5366
|
async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
4995
5367
|
const verbose = options?.verbose ?? false;
|
|
4996
5368
|
const filterPattern = options?.filter;
|
|
4997
|
-
const absoluteTestPath =
|
|
5369
|
+
const absoluteTestPath = import_node_path9.default.resolve(evalFilePath);
|
|
4998
5370
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
4999
5371
|
const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
|
|
5000
5372
|
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
5001
|
-
const rawFile = await (0,
|
|
5373
|
+
const rawFile = await (0, import_promises9.readFile)(absoluteTestPath, "utf8");
|
|
5002
5374
|
const interpolated = interpolateEnv((0, import_yaml4.parse)(rawFile), process.env);
|
|
5003
5375
|
if (!isJsonObject(interpolated)) {
|
|
5004
5376
|
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
5005
5377
|
}
|
|
5006
5378
|
const suite = interpolated;
|
|
5007
5379
|
const suiteNameFromFile = asString5(suite.name)?.trim();
|
|
5008
|
-
const fallbackSuiteName =
|
|
5380
|
+
const fallbackSuiteName = import_node_path9.default.basename(absoluteTestPath).replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "") || "eval";
|
|
5009
5381
|
const suiteName = suiteNameFromFile && suiteNameFromFile.length > 0 ? suiteNameFromFile : fallbackSuiteName;
|
|
5010
5382
|
const rawTestCases = resolveTests(suite);
|
|
5011
5383
|
const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
|
|
5012
|
-
const
|
|
5384
|
+
const suitePreprocessors = await parsePreprocessors(
|
|
5385
|
+
suite.preprocessors,
|
|
5386
|
+
searchRoots,
|
|
5387
|
+
"<suite>",
|
|
5388
|
+
absoluteTestPath
|
|
5389
|
+
);
|
|
5390
|
+
const evalFileDir = import_node_path9.default.dirname(absoluteTestPath);
|
|
5013
5391
|
let expandedTestCases;
|
|
5014
5392
|
if (typeof rawTestCases === "string") {
|
|
5015
|
-
const externalPath =
|
|
5393
|
+
const externalPath = import_node_path9.default.resolve(evalFileDir, rawTestCases);
|
|
5016
5394
|
expandedTestCases = await loadCasesFromFile(externalPath);
|
|
5017
5395
|
} else if (Array.isArray(rawTestCases)) {
|
|
5018
5396
|
expandedTestCases = await expandFileReferences(rawTestCases, evalFileDir);
|
|
@@ -5110,7 +5488,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
5110
5488
|
testCaseConfig,
|
|
5111
5489
|
globalExecution,
|
|
5112
5490
|
searchRoots,
|
|
5113
|
-
id ?? "unknown"
|
|
5491
|
+
id ?? "unknown",
|
|
5492
|
+
suitePreprocessors
|
|
5114
5493
|
);
|
|
5115
5494
|
} catch (error) {
|
|
5116
5495
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -5143,6 +5522,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
5143
5522
|
criteria: outcome ?? "",
|
|
5144
5523
|
evaluator: testCaseEvaluatorKind,
|
|
5145
5524
|
assertions: evaluators,
|
|
5525
|
+
...suitePreprocessors ? { preprocessors: suitePreprocessors } : {},
|
|
5146
5526
|
workspace: mergedWorkspace,
|
|
5147
5527
|
metadata,
|
|
5148
5528
|
targets: caseTargets,
|
|
@@ -5183,8 +5563,8 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
|
|
|
5183
5563
|
if (!command) return void 0;
|
|
5184
5564
|
const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : void 0;
|
|
5185
5565
|
let cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
|
|
5186
|
-
if (cwd && !
|
|
5187
|
-
cwd =
|
|
5566
|
+
if (cwd && !import_node_path9.default.isAbsolute(cwd)) {
|
|
5567
|
+
cwd = import_node_path9.default.resolve(evalFileDir, cwd);
|
|
5188
5568
|
}
|
|
5189
5569
|
const config = { command };
|
|
5190
5570
|
if (timeoutMs !== void 0) {
|
|
@@ -5222,10 +5602,10 @@ function parseWorkspaceHooksConfig(raw, evalFileDir) {
|
|
|
5222
5602
|
}
|
|
5223
5603
|
async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
5224
5604
|
if (typeof raw === "string") {
|
|
5225
|
-
const workspaceFilePath =
|
|
5605
|
+
const workspaceFilePath = import_node_path9.default.resolve(evalFileDir, raw);
|
|
5226
5606
|
let content;
|
|
5227
5607
|
try {
|
|
5228
|
-
content = await (0,
|
|
5608
|
+
content = await (0, import_promises9.readFile)(workspaceFilePath, "utf8");
|
|
5229
5609
|
} catch {
|
|
5230
5610
|
throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
|
|
5231
5611
|
}
|
|
@@ -5235,7 +5615,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
|
5235
5615
|
`Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
|
|
5236
5616
|
);
|
|
5237
5617
|
}
|
|
5238
|
-
const workspaceFileDir =
|
|
5618
|
+
const workspaceFileDir = import_node_path9.default.dirname(workspaceFilePath);
|
|
5239
5619
|
return parseWorkspaceConfig(parsed, workspaceFileDir);
|
|
5240
5620
|
}
|
|
5241
5621
|
return parseWorkspaceConfig(raw, evalFileDir);
|
|
@@ -5255,8 +5635,8 @@ function parseWorkspaceConfig(raw, evalFileDir) {
|
|
|
5255
5635
|
throw new Error("workspace.static has been removed. Use workspace.mode='static'.");
|
|
5256
5636
|
}
|
|
5257
5637
|
let template = typeof obj.template === "string" ? obj.template : void 0;
|
|
5258
|
-
if (template && !
|
|
5259
|
-
template =
|
|
5638
|
+
if (template && !import_node_path9.default.isAbsolute(template)) {
|
|
5639
|
+
template = import_node_path9.default.resolve(evalFileDir, template);
|
|
5260
5640
|
}
|
|
5261
5641
|
const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
|
|
5262
5642
|
const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
|
|
@@ -5328,7 +5708,7 @@ ${detailBlock}${ANSI_RESET8}`);
|
|
|
5328
5708
|
// src/evaluation/loaders/eval-yaml-transpiler.ts
|
|
5329
5709
|
init_cjs_shims();
|
|
5330
5710
|
var import_node_fs2 = require("fs");
|
|
5331
|
-
var
|
|
5711
|
+
var import_node_path10 = __toESM(require("path"), 1);
|
|
5332
5712
|
var import_yaml5 = require("yaml");
|
|
5333
5713
|
function codeGraderInstruction(graderName, description) {
|
|
5334
5714
|
const desc = description ? ` This grader: ${description}.` : "";
|
|
@@ -5569,7 +5949,7 @@ function transpileEvalYaml(suite, source = "EVAL.yaml") {
|
|
|
5569
5949
|
function transpileEvalYamlFile(evalYamlPath) {
|
|
5570
5950
|
const content = (0, import_node_fs2.readFileSync)(evalYamlPath, "utf8");
|
|
5571
5951
|
const parsed = (0, import_yaml5.parse)(content);
|
|
5572
|
-
return transpileEvalYaml(parsed,
|
|
5952
|
+
return transpileEvalYaml(parsed, import_node_path10.default.basename(evalYamlPath));
|
|
5573
5953
|
}
|
|
5574
5954
|
function getOutputFilenames(result) {
|
|
5575
5955
|
const names = /* @__PURE__ */ new Map();
|
|
@@ -5589,11 +5969,11 @@ function getOutputFilenames(result) {
|
|
|
5589
5969
|
// src/evaluation/file-utils.ts
|
|
5590
5970
|
init_cjs_shims();
|
|
5591
5971
|
var import_node_fs3 = require("fs");
|
|
5592
|
-
var
|
|
5593
|
-
var
|
|
5972
|
+
var import_promises10 = require("fs/promises");
|
|
5973
|
+
var import_node_path11 = __toESM(require("path"), 1);
|
|
5594
5974
|
async function fileExists2(filePath) {
|
|
5595
5975
|
try {
|
|
5596
|
-
await (0,
|
|
5976
|
+
await (0, import_promises10.access)(filePath, import_node_fs3.constants.F_OK);
|
|
5597
5977
|
return true;
|
|
5598
5978
|
} catch {
|
|
5599
5979
|
return false;
|
|
@@ -5603,22 +5983,22 @@ function normalizeLineEndings(content) {
|
|
|
5603
5983
|
return content.replace(/\r\n/g, "\n");
|
|
5604
5984
|
}
|
|
5605
5985
|
async function readTextFile(filePath) {
|
|
5606
|
-
const content = await (0,
|
|
5986
|
+
const content = await (0, import_promises10.readFile)(filePath, "utf8");
|
|
5607
5987
|
return normalizeLineEndings(content);
|
|
5608
5988
|
}
|
|
5609
5989
|
async function readJsonFile(filePath) {
|
|
5610
|
-
const content = await (0,
|
|
5990
|
+
const content = await (0, import_promises10.readFile)(filePath, "utf8");
|
|
5611
5991
|
return JSON.parse(content);
|
|
5612
5992
|
}
|
|
5613
5993
|
async function findGitRoot(startPath) {
|
|
5614
|
-
let currentDir =
|
|
5615
|
-
const root =
|
|
5994
|
+
let currentDir = import_node_path11.default.dirname(import_node_path11.default.resolve(startPath));
|
|
5995
|
+
const root = import_node_path11.default.parse(currentDir).root;
|
|
5616
5996
|
while (currentDir !== root) {
|
|
5617
|
-
const gitPath =
|
|
5997
|
+
const gitPath = import_node_path11.default.join(currentDir, ".git");
|
|
5618
5998
|
if (await fileExists2(gitPath)) {
|
|
5619
5999
|
return currentDir;
|
|
5620
6000
|
}
|
|
5621
|
-
const parentDir =
|
|
6001
|
+
const parentDir = import_node_path11.default.dirname(currentDir);
|
|
5622
6002
|
if (parentDir === currentDir) {
|
|
5623
6003
|
break;
|
|
5624
6004
|
}
|
|
@@ -5629,8 +6009,8 @@ async function findGitRoot(startPath) {
|
|
|
5629
6009
|
function buildDirectoryChain2(filePath, repoRoot) {
|
|
5630
6010
|
const directories = [];
|
|
5631
6011
|
const seen = /* @__PURE__ */ new Set();
|
|
5632
|
-
const boundary =
|
|
5633
|
-
let current =
|
|
6012
|
+
const boundary = import_node_path11.default.resolve(repoRoot);
|
|
6013
|
+
let current = import_node_path11.default.resolve(import_node_path11.default.dirname(filePath));
|
|
5634
6014
|
while (current !== void 0) {
|
|
5635
6015
|
if (!seen.has(current)) {
|
|
5636
6016
|
directories.push(current);
|
|
@@ -5639,7 +6019,7 @@ function buildDirectoryChain2(filePath, repoRoot) {
|
|
|
5639
6019
|
if (current === boundary) {
|
|
5640
6020
|
break;
|
|
5641
6021
|
}
|
|
5642
|
-
const parent =
|
|
6022
|
+
const parent = import_node_path11.default.dirname(current);
|
|
5643
6023
|
if (parent === current) {
|
|
5644
6024
|
break;
|
|
5645
6025
|
}
|
|
@@ -5653,16 +6033,16 @@ function buildDirectoryChain2(filePath, repoRoot) {
|
|
|
5653
6033
|
function buildSearchRoots2(evalPath, repoRoot) {
|
|
5654
6034
|
const uniqueRoots = [];
|
|
5655
6035
|
const addRoot = (root) => {
|
|
5656
|
-
const normalized =
|
|
6036
|
+
const normalized = import_node_path11.default.resolve(root);
|
|
5657
6037
|
if (!uniqueRoots.includes(normalized)) {
|
|
5658
6038
|
uniqueRoots.push(normalized);
|
|
5659
6039
|
}
|
|
5660
6040
|
};
|
|
5661
|
-
let currentDir =
|
|
6041
|
+
let currentDir = import_node_path11.default.dirname(evalPath);
|
|
5662
6042
|
let reachedBoundary = false;
|
|
5663
6043
|
while (!reachedBoundary) {
|
|
5664
6044
|
addRoot(currentDir);
|
|
5665
|
-
const parentDir =
|
|
6045
|
+
const parentDir = import_node_path11.default.dirname(currentDir);
|
|
5666
6046
|
if (currentDir === repoRoot || parentDir === currentDir) {
|
|
5667
6047
|
reachedBoundary = true;
|
|
5668
6048
|
} else {
|
|
@@ -5680,16 +6060,16 @@ function trimLeadingSeparators2(value) {
|
|
|
5680
6060
|
async function resolveFileReference3(rawValue, searchRoots) {
|
|
5681
6061
|
const displayPath = trimLeadingSeparators2(rawValue);
|
|
5682
6062
|
const potentialPaths = [];
|
|
5683
|
-
if (
|
|
5684
|
-
potentialPaths.push(
|
|
6063
|
+
if (import_node_path11.default.isAbsolute(rawValue)) {
|
|
6064
|
+
potentialPaths.push(import_node_path11.default.normalize(rawValue));
|
|
5685
6065
|
}
|
|
5686
6066
|
for (const base of searchRoots) {
|
|
5687
|
-
potentialPaths.push(
|
|
6067
|
+
potentialPaths.push(import_node_path11.default.resolve(base, displayPath));
|
|
5688
6068
|
}
|
|
5689
6069
|
const attempted = [];
|
|
5690
6070
|
const seen = /* @__PURE__ */ new Set();
|
|
5691
6071
|
for (const candidate of potentialPaths) {
|
|
5692
|
-
const absoluteCandidate =
|
|
6072
|
+
const absoluteCandidate = import_node_path11.default.resolve(candidate);
|
|
5693
6073
|
if (seen.has(absoluteCandidate)) {
|
|
5694
6074
|
continue;
|
|
5695
6075
|
}
|
|
@@ -6128,8 +6508,8 @@ init_cjs_shims();
|
|
|
6128
6508
|
var import_node_child_process = require("child_process");
|
|
6129
6509
|
var import_node_crypto = require("crypto");
|
|
6130
6510
|
var import_node_fs4 = require("fs");
|
|
6131
|
-
var
|
|
6132
|
-
var
|
|
6511
|
+
var import_promises11 = require("fs/promises");
|
|
6512
|
+
var import_node_path13 = __toESM(require("path"), 1);
|
|
6133
6513
|
|
|
6134
6514
|
// src/evaluation/providers/claude-content.ts
|
|
6135
6515
|
init_cjs_shims();
|
|
@@ -6231,7 +6611,7 @@ function subscribeToClaudeLogEntries(listener) {
|
|
|
6231
6611
|
|
|
6232
6612
|
// src/evaluation/providers/preread.ts
|
|
6233
6613
|
init_cjs_shims();
|
|
6234
|
-
var
|
|
6614
|
+
var import_node_path12 = __toESM(require("path"), 1);
|
|
6235
6615
|
function buildPromptDocument(request, inputFiles) {
|
|
6236
6616
|
const parts = [];
|
|
6237
6617
|
const inputFilesList = collectInputFiles(inputFiles);
|
|
@@ -6248,7 +6628,7 @@ function normalizeInputFiles(inputFiles) {
|
|
|
6248
6628
|
}
|
|
6249
6629
|
const deduped = /* @__PURE__ */ new Map();
|
|
6250
6630
|
for (const inputFile of inputFiles) {
|
|
6251
|
-
const absolutePath =
|
|
6631
|
+
const absolutePath = import_node_path12.default.resolve(inputFile);
|
|
6252
6632
|
if (!deduped.has(absolutePath)) {
|
|
6253
6633
|
deduped.set(absolutePath, absolutePath);
|
|
6254
6634
|
}
|
|
@@ -6261,7 +6641,7 @@ function collectInputFiles(inputFiles) {
|
|
|
6261
6641
|
}
|
|
6262
6642
|
const unique = /* @__PURE__ */ new Map();
|
|
6263
6643
|
for (const inputFile of inputFiles) {
|
|
6264
|
-
const absolutePath =
|
|
6644
|
+
const absolutePath = import_node_path12.default.resolve(inputFile);
|
|
6265
6645
|
if (!unique.has(absolutePath)) {
|
|
6266
6646
|
unique.set(absolutePath, absolutePath);
|
|
6267
6647
|
}
|
|
@@ -6273,7 +6653,7 @@ function buildMandatoryPrereadBlock(inputFiles) {
|
|
|
6273
6653
|
return "";
|
|
6274
6654
|
}
|
|
6275
6655
|
const buildList = (files) => files.map((absolutePath) => {
|
|
6276
|
-
const fileName =
|
|
6656
|
+
const fileName = import_node_path12.default.basename(absolutePath);
|
|
6277
6657
|
const fileUri = pathToFileUri(absolutePath);
|
|
6278
6658
|
return `* [${fileName}](${fileUri})`;
|
|
6279
6659
|
});
|
|
@@ -6289,7 +6669,7 @@ ${buildList(inputFiles).join("\n")}.`);
|
|
|
6289
6669
|
return sections.join("\n");
|
|
6290
6670
|
}
|
|
6291
6671
|
function pathToFileUri(filePath) {
|
|
6292
|
-
const absolutePath =
|
|
6672
|
+
const absolutePath = import_node_path12.default.isAbsolute(filePath) ? filePath : import_node_path12.default.resolve(filePath);
|
|
6293
6673
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
6294
6674
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
6295
6675
|
return `file:///${normalizedPath}`;
|
|
@@ -6437,10 +6817,10 @@ var ClaudeCliProvider = class {
|
|
|
6437
6817
|
}
|
|
6438
6818
|
resolveCwd(cwdOverride) {
|
|
6439
6819
|
if (cwdOverride) {
|
|
6440
|
-
return
|
|
6820
|
+
return import_node_path13.default.resolve(cwdOverride);
|
|
6441
6821
|
}
|
|
6442
6822
|
if (this.config.cwd) {
|
|
6443
|
-
return
|
|
6823
|
+
return import_node_path13.default.resolve(this.config.cwd);
|
|
6444
6824
|
}
|
|
6445
6825
|
return void 0;
|
|
6446
6826
|
}
|
|
@@ -6450,9 +6830,9 @@ var ClaudeCliProvider = class {
|
|
|
6450
6830
|
return void 0;
|
|
6451
6831
|
}
|
|
6452
6832
|
if (this.config.logDir) {
|
|
6453
|
-
return
|
|
6833
|
+
return import_node_path13.default.resolve(this.config.logDir);
|
|
6454
6834
|
}
|
|
6455
|
-
return
|
|
6835
|
+
return import_node_path13.default.join(process.cwd(), ".agentv", "logs", "claude-cli");
|
|
6456
6836
|
}
|
|
6457
6837
|
async createStreamLogger(request) {
|
|
6458
6838
|
const logDir = this.resolveLogDirectory();
|
|
@@ -6460,13 +6840,13 @@ var ClaudeCliProvider = class {
|
|
|
6460
6840
|
return void 0;
|
|
6461
6841
|
}
|
|
6462
6842
|
try {
|
|
6463
|
-
await (0,
|
|
6843
|
+
await (0, import_promises11.mkdir)(logDir, { recursive: true });
|
|
6464
6844
|
} catch (error) {
|
|
6465
6845
|
const message = error instanceof Error ? error.message : String(error);
|
|
6466
6846
|
console.warn(`Skipping Claude CLI stream logging (could not create ${logDir}): ${message}`);
|
|
6467
6847
|
return void 0;
|
|
6468
6848
|
}
|
|
6469
|
-
const filePath =
|
|
6849
|
+
const filePath = import_node_path13.default.join(logDir, buildLogFilename(request, this.targetName));
|
|
6470
6850
|
try {
|
|
6471
6851
|
const logger = await ClaudeCliStreamLogger.create({
|
|
6472
6852
|
filePath,
|
|
@@ -6768,8 +7148,8 @@ function tryParseJson(line) {
|
|
|
6768
7148
|
init_cjs_shims();
|
|
6769
7149
|
var import_node_crypto2 = require("crypto");
|
|
6770
7150
|
var import_node_fs5 = require("fs");
|
|
6771
|
-
var
|
|
6772
|
-
var
|
|
7151
|
+
var import_promises12 = require("fs/promises");
|
|
7152
|
+
var import_node_path14 = __toESM(require("path"), 1);
|
|
6773
7153
|
var claudeSdkModule = null;
|
|
6774
7154
|
async function loadClaudeSdk() {
|
|
6775
7155
|
if (!claudeSdkModule) {
|
|
@@ -6930,10 +7310,10 @@ var ClaudeSdkProvider = class {
|
|
|
6930
7310
|
}
|
|
6931
7311
|
resolveCwd(cwdOverride) {
|
|
6932
7312
|
if (cwdOverride) {
|
|
6933
|
-
return
|
|
7313
|
+
return import_node_path14.default.resolve(cwdOverride);
|
|
6934
7314
|
}
|
|
6935
7315
|
if (this.config.cwd) {
|
|
6936
|
-
return
|
|
7316
|
+
return import_node_path14.default.resolve(this.config.cwd);
|
|
6937
7317
|
}
|
|
6938
7318
|
return void 0;
|
|
6939
7319
|
}
|
|
@@ -6943,9 +7323,9 @@ var ClaudeSdkProvider = class {
|
|
|
6943
7323
|
return void 0;
|
|
6944
7324
|
}
|
|
6945
7325
|
if (this.config.logDir) {
|
|
6946
|
-
return
|
|
7326
|
+
return import_node_path14.default.resolve(this.config.logDir);
|
|
6947
7327
|
}
|
|
6948
|
-
return
|
|
7328
|
+
return import_node_path14.default.join(process.cwd(), ".agentv", "logs", "claude");
|
|
6949
7329
|
}
|
|
6950
7330
|
async createStreamLogger(request) {
|
|
6951
7331
|
const logDir = this.resolveLogDirectory();
|
|
@@ -6953,13 +7333,13 @@ var ClaudeSdkProvider = class {
|
|
|
6953
7333
|
return void 0;
|
|
6954
7334
|
}
|
|
6955
7335
|
try {
|
|
6956
|
-
await (0,
|
|
7336
|
+
await (0, import_promises12.mkdir)(logDir, { recursive: true });
|
|
6957
7337
|
} catch (error) {
|
|
6958
7338
|
const message = error instanceof Error ? error.message : String(error);
|
|
6959
7339
|
console.warn(`Skipping Claude stream logging (could not create ${logDir}): ${message}`);
|
|
6960
7340
|
return void 0;
|
|
6961
7341
|
}
|
|
6962
|
-
const filePath =
|
|
7342
|
+
const filePath = import_node_path14.default.join(logDir, buildLogFilename2(request, this.targetName));
|
|
6963
7343
|
try {
|
|
6964
7344
|
const logger = await ClaudeStreamLogger.create({
|
|
6965
7345
|
filePath,
|
|
@@ -7146,9 +7526,9 @@ function formatElapsed2(startedAt) {
|
|
|
7146
7526
|
// src/evaluation/providers/cli.ts
|
|
7147
7527
|
init_cjs_shims();
|
|
7148
7528
|
var import_node_child_process2 = require("child_process");
|
|
7149
|
-
var
|
|
7529
|
+
var import_promises13 = __toESM(require("fs/promises"), 1);
|
|
7150
7530
|
var import_node_os = __toESM(require("os"), 1);
|
|
7151
|
-
var
|
|
7531
|
+
var import_node_path15 = __toESM(require("path"), 1);
|
|
7152
7532
|
var import_node_util = require("util");
|
|
7153
7533
|
var import_zod2 = require("zod");
|
|
7154
7534
|
var ToolCallSchema = import_zod2.z.object({
|
|
@@ -7545,7 +7925,7 @@ var CliProvider = class {
|
|
|
7545
7925
|
throw new Error(`Failed to read output file '${filePath}': ${errorMsg}`);
|
|
7546
7926
|
} finally {
|
|
7547
7927
|
if (!this.keepTempFiles) {
|
|
7548
|
-
await
|
|
7928
|
+
await import_promises13.default.unlink(filePath).catch(() => {
|
|
7549
7929
|
});
|
|
7550
7930
|
}
|
|
7551
7931
|
}
|
|
@@ -7625,7 +8005,7 @@ var CliProvider = class {
|
|
|
7625
8005
|
async function buildTemplateValues(request, config, outputFilePath) {
|
|
7626
8006
|
const inputFiles = normalizeInputFiles2(request.inputFiles);
|
|
7627
8007
|
const promptFilePath = generateOutputFilePath(request.evalCaseId, ".prompt.txt");
|
|
7628
|
-
await
|
|
8008
|
+
await import_promises13.default.writeFile(promptFilePath, request.question ?? "", "utf8");
|
|
7629
8009
|
return {
|
|
7630
8010
|
values: {
|
|
7631
8011
|
PROMPT: shellEscape(request.question ?? ""),
|
|
@@ -7642,7 +8022,7 @@ async function cleanupTempFile(filePath, keepTempFiles) {
|
|
|
7642
8022
|
if (!filePath || keepTempFiles) {
|
|
7643
8023
|
return;
|
|
7644
8024
|
}
|
|
7645
|
-
await
|
|
8025
|
+
await import_promises13.default.unlink(filePath).catch(() => {
|
|
7646
8026
|
});
|
|
7647
8027
|
}
|
|
7648
8028
|
function normalizeInputFiles2(inputFiles) {
|
|
@@ -7651,7 +8031,7 @@ function normalizeInputFiles2(inputFiles) {
|
|
|
7651
8031
|
}
|
|
7652
8032
|
const unique = /* @__PURE__ */ new Map();
|
|
7653
8033
|
for (const inputFile of inputFiles) {
|
|
7654
|
-
const absolutePath =
|
|
8034
|
+
const absolutePath = import_node_path15.default.resolve(inputFile);
|
|
7655
8035
|
if (!unique.has(absolutePath)) {
|
|
7656
8036
|
unique.set(absolutePath, absolutePath);
|
|
7657
8037
|
}
|
|
@@ -7665,7 +8045,7 @@ function formatFileList(files, template) {
|
|
|
7665
8045
|
const formatter = template ?? "{path}";
|
|
7666
8046
|
return files.map((filePath) => {
|
|
7667
8047
|
const escapedPath = shellEscape(filePath);
|
|
7668
|
-
const escapedName = shellEscape(
|
|
8048
|
+
const escapedName = shellEscape(import_node_path15.default.basename(filePath));
|
|
7669
8049
|
return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
|
|
7670
8050
|
}).join(" ");
|
|
7671
8051
|
}
|
|
@@ -7689,7 +8069,7 @@ function generateOutputFilePath(evalCaseId, extension = ".json") {
|
|
|
7689
8069
|
const safeEvalId = evalCaseId || "unknown";
|
|
7690
8070
|
const timestamp = Date.now();
|
|
7691
8071
|
const random = Math.random().toString(36).substring(2, 9);
|
|
7692
|
-
return
|
|
8072
|
+
return import_node_path15.default.join(import_node_os.default.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
|
|
7693
8073
|
}
|
|
7694
8074
|
function formatTimeoutSuffix2(timeoutMs) {
|
|
7695
8075
|
if (!timeoutMs || timeoutMs <= 0) {
|
|
@@ -7703,8 +8083,8 @@ function formatTimeoutSuffix2(timeoutMs) {
|
|
|
7703
8083
|
init_cjs_shims();
|
|
7704
8084
|
var import_node_crypto3 = require("crypto");
|
|
7705
8085
|
var import_node_fs6 = require("fs");
|
|
7706
|
-
var
|
|
7707
|
-
var
|
|
8086
|
+
var import_promises14 = require("fs/promises");
|
|
8087
|
+
var import_node_path16 = __toESM(require("path"), 1);
|
|
7708
8088
|
|
|
7709
8089
|
// src/evaluation/providers/codex-log-tracker.ts
|
|
7710
8090
|
init_cjs_shims();
|
|
@@ -7940,10 +8320,10 @@ ${basePrompt}` : basePrompt;
|
|
|
7940
8320
|
}
|
|
7941
8321
|
resolveCwd(cwdOverride) {
|
|
7942
8322
|
if (cwdOverride) {
|
|
7943
|
-
return
|
|
8323
|
+
return import_node_path16.default.resolve(cwdOverride);
|
|
7944
8324
|
}
|
|
7945
8325
|
if (this.config.cwd) {
|
|
7946
|
-
return
|
|
8326
|
+
return import_node_path16.default.resolve(this.config.cwd);
|
|
7947
8327
|
}
|
|
7948
8328
|
return void 0;
|
|
7949
8329
|
}
|
|
@@ -7953,9 +8333,9 @@ ${basePrompt}` : basePrompt;
|
|
|
7953
8333
|
return void 0;
|
|
7954
8334
|
}
|
|
7955
8335
|
if (this.config.logDir) {
|
|
7956
|
-
return
|
|
8336
|
+
return import_node_path16.default.resolve(this.config.logDir);
|
|
7957
8337
|
}
|
|
7958
|
-
return
|
|
8338
|
+
return import_node_path16.default.join(process.cwd(), ".agentv", "logs", "codex");
|
|
7959
8339
|
}
|
|
7960
8340
|
async createStreamLogger(request) {
|
|
7961
8341
|
const logDir = this.resolveLogDirectory();
|
|
@@ -7963,13 +8343,13 @@ ${basePrompt}` : basePrompt;
|
|
|
7963
8343
|
return void 0;
|
|
7964
8344
|
}
|
|
7965
8345
|
try {
|
|
7966
|
-
await (0,
|
|
8346
|
+
await (0, import_promises14.mkdir)(logDir, { recursive: true });
|
|
7967
8347
|
} catch (error) {
|
|
7968
8348
|
const message = error instanceof Error ? error.message : String(error);
|
|
7969
8349
|
console.warn(`Skipping Codex SDK stream logging (could not create ${logDir}): ${message}`);
|
|
7970
8350
|
return void 0;
|
|
7971
8351
|
}
|
|
7972
|
-
const filePath =
|
|
8352
|
+
const filePath = import_node_path16.default.join(logDir, buildLogFilename3(request, this.targetName));
|
|
7973
8353
|
try {
|
|
7974
8354
|
const logger = await CodexSdkStreamLogger.create({
|
|
7975
8355
|
filePath,
|
|
@@ -8113,8 +8493,8 @@ function formatElapsed3(startedAt) {
|
|
|
8113
8493
|
// src/evaluation/providers/copilot-cli.ts
|
|
8114
8494
|
init_cjs_shims();
|
|
8115
8495
|
var import_node_crypto5 = require("crypto");
|
|
8116
|
-
var
|
|
8117
|
-
var
|
|
8496
|
+
var import_promises15 = require("fs/promises");
|
|
8497
|
+
var import_node_path18 = __toESM(require("path"), 1);
|
|
8118
8498
|
var import_node_stream = require("stream");
|
|
8119
8499
|
var import_node_child_process3 = require("child_process");
|
|
8120
8500
|
var acp = __toESM(require("@agentclientprotocol/sdk"), 1);
|
|
@@ -8178,8 +8558,8 @@ init_cjs_shims();
|
|
|
8178
8558
|
var import_node_crypto4 = require("crypto");
|
|
8179
8559
|
var import_node_fs7 = require("fs");
|
|
8180
8560
|
var import_node_os2 = require("os");
|
|
8181
|
-
var
|
|
8182
|
-
var
|
|
8561
|
+
var import_node_path17 = __toESM(require("path"), 1);
|
|
8562
|
+
var import_node_url3 = require("url");
|
|
8183
8563
|
var import_meta = {};
|
|
8184
8564
|
function resolvePlatformCliPath() {
|
|
8185
8565
|
const os3 = (0, import_node_os2.platform)();
|
|
@@ -8202,8 +8582,8 @@ function resolvePlatformCliPath() {
|
|
|
8202
8582
|
const binaryName = os3 === "win32" ? "copilot.exe" : "copilot";
|
|
8203
8583
|
try {
|
|
8204
8584
|
const resolved = import_meta.resolve(`${packageName}/package.json`);
|
|
8205
|
-
const packageJsonPath = resolved.startsWith("file:") ? (0,
|
|
8206
|
-
const binaryPath =
|
|
8585
|
+
const packageJsonPath = resolved.startsWith("file:") ? (0, import_node_url3.fileURLToPath)(resolved) : resolved;
|
|
8586
|
+
const binaryPath = import_node_path17.default.join(import_node_path17.default.dirname(packageJsonPath), binaryName);
|
|
8207
8587
|
if ((0, import_node_fs7.existsSync)(binaryPath)) {
|
|
8208
8588
|
return binaryPath;
|
|
8209
8589
|
}
|
|
@@ -8211,7 +8591,7 @@ function resolvePlatformCliPath() {
|
|
|
8211
8591
|
}
|
|
8212
8592
|
let searchDir = process.cwd();
|
|
8213
8593
|
for (let i = 0; i < 10; i++) {
|
|
8214
|
-
const standardPath =
|
|
8594
|
+
const standardPath = import_node_path17.default.join(
|
|
8215
8595
|
searchDir,
|
|
8216
8596
|
"node_modules",
|
|
8217
8597
|
...packageName.split("/"),
|
|
@@ -8220,13 +8600,13 @@ function resolvePlatformCliPath() {
|
|
|
8220
8600
|
if ((0, import_node_fs7.existsSync)(standardPath)) {
|
|
8221
8601
|
return standardPath;
|
|
8222
8602
|
}
|
|
8223
|
-
const bunDir =
|
|
8603
|
+
const bunDir = import_node_path17.default.join(searchDir, "node_modules", ".bun");
|
|
8224
8604
|
const prefix = `@github+copilot-${osPart}-${archPart}@`;
|
|
8225
8605
|
try {
|
|
8226
8606
|
const entries = (0, import_node_fs7.readdirSync)(bunDir);
|
|
8227
8607
|
for (const entry of entries) {
|
|
8228
8608
|
if (entry.startsWith(prefix)) {
|
|
8229
|
-
const candidate =
|
|
8609
|
+
const candidate = import_node_path17.default.join(
|
|
8230
8610
|
bunDir,
|
|
8231
8611
|
entry,
|
|
8232
8612
|
"node_modules",
|
|
@@ -8241,7 +8621,7 @@ function resolvePlatformCliPath() {
|
|
|
8241
8621
|
}
|
|
8242
8622
|
} catch {
|
|
8243
8623
|
}
|
|
8244
|
-
const parent =
|
|
8624
|
+
const parent = import_node_path17.default.dirname(searchDir);
|
|
8245
8625
|
if (parent === searchDir) break;
|
|
8246
8626
|
searchDir = parent;
|
|
8247
8627
|
}
|
|
@@ -8585,10 +8965,10 @@ var CopilotCliProvider = class {
|
|
|
8585
8965
|
}
|
|
8586
8966
|
resolveCwd(cwdOverride) {
|
|
8587
8967
|
if (cwdOverride) {
|
|
8588
|
-
return
|
|
8968
|
+
return import_node_path18.default.resolve(cwdOverride);
|
|
8589
8969
|
}
|
|
8590
8970
|
if (this.config.cwd) {
|
|
8591
|
-
return
|
|
8971
|
+
return import_node_path18.default.resolve(this.config.cwd);
|
|
8592
8972
|
}
|
|
8593
8973
|
return void 0;
|
|
8594
8974
|
}
|
|
@@ -8607,9 +8987,9 @@ var CopilotCliProvider = class {
|
|
|
8607
8987
|
return void 0;
|
|
8608
8988
|
}
|
|
8609
8989
|
if (this.config.logDir) {
|
|
8610
|
-
return
|
|
8990
|
+
return import_node_path18.default.resolve(this.config.logDir);
|
|
8611
8991
|
}
|
|
8612
|
-
return
|
|
8992
|
+
return import_node_path18.default.join(process.cwd(), ".agentv", "logs", "copilot-cli");
|
|
8613
8993
|
}
|
|
8614
8994
|
async createStreamLogger(request) {
|
|
8615
8995
|
const logDir = this.resolveLogDirectory();
|
|
@@ -8617,13 +8997,13 @@ var CopilotCliProvider = class {
|
|
|
8617
8997
|
return void 0;
|
|
8618
8998
|
}
|
|
8619
8999
|
try {
|
|
8620
|
-
await (0,
|
|
9000
|
+
await (0, import_promises15.mkdir)(logDir, { recursive: true });
|
|
8621
9001
|
} catch (error) {
|
|
8622
9002
|
const message = error instanceof Error ? error.message : String(error);
|
|
8623
9003
|
console.warn(`Skipping Copilot CLI stream logging (could not create ${logDir}): ${message}`);
|
|
8624
9004
|
return void 0;
|
|
8625
9005
|
}
|
|
8626
|
-
const filePath =
|
|
9006
|
+
const filePath = import_node_path18.default.join(logDir, buildLogFilename4(request, this.targetName, "copilot-cli"));
|
|
8627
9007
|
try {
|
|
8628
9008
|
const logger = await CopilotStreamLogger.create(
|
|
8629
9009
|
{
|
|
@@ -8717,9 +9097,9 @@ function summarizeAcpEvent(eventType, data) {
|
|
|
8717
9097
|
|
|
8718
9098
|
// src/evaluation/providers/copilot-log.ts
|
|
8719
9099
|
init_cjs_shims();
|
|
8720
|
-
var
|
|
9100
|
+
var import_promises17 = require("fs/promises");
|
|
8721
9101
|
var import_node_os4 = require("os");
|
|
8722
|
-
var
|
|
9102
|
+
var import_node_path20 = __toESM(require("path"), 1);
|
|
8723
9103
|
|
|
8724
9104
|
// src/evaluation/providers/copilot-log-parser.ts
|
|
8725
9105
|
init_cjs_shims();
|
|
@@ -8853,32 +9233,32 @@ function parseCopilotEvents(eventsJsonl) {
|
|
|
8853
9233
|
|
|
8854
9234
|
// src/evaluation/providers/copilot-session-discovery.ts
|
|
8855
9235
|
init_cjs_shims();
|
|
8856
|
-
var
|
|
9236
|
+
var import_promises16 = require("fs/promises");
|
|
8857
9237
|
var import_node_os3 = require("os");
|
|
8858
|
-
var
|
|
9238
|
+
var import_node_path19 = __toESM(require("path"), 1);
|
|
8859
9239
|
var import_yaml6 = require("yaml");
|
|
8860
|
-
var DEFAULT_SESSION_STATE_DIR = () =>
|
|
9240
|
+
var DEFAULT_SESSION_STATE_DIR = () => import_node_path19.default.join((0, import_node_os3.homedir)(), ".copilot", "session-state");
|
|
8861
9241
|
async function discoverCopilotSessions(opts) {
|
|
8862
9242
|
const sessionStateDir = opts?.sessionStateDir ?? DEFAULT_SESSION_STATE_DIR();
|
|
8863
9243
|
const limit = opts?.limit ?? 10;
|
|
8864
9244
|
let entries;
|
|
8865
9245
|
try {
|
|
8866
|
-
entries = await (0,
|
|
9246
|
+
entries = await (0, import_promises16.readdir)(sessionStateDir);
|
|
8867
9247
|
} catch {
|
|
8868
9248
|
return [];
|
|
8869
9249
|
}
|
|
8870
9250
|
const sessions = [];
|
|
8871
9251
|
for (const entry of entries) {
|
|
8872
|
-
const sessionDir =
|
|
8873
|
-
const workspacePath =
|
|
8874
|
-
const eventsPath =
|
|
9252
|
+
const sessionDir = import_node_path19.default.join(sessionStateDir, entry);
|
|
9253
|
+
const workspacePath = import_node_path19.default.join(sessionDir, "workspace.yaml");
|
|
9254
|
+
const eventsPath = import_node_path19.default.join(sessionDir, "events.jsonl");
|
|
8875
9255
|
try {
|
|
8876
|
-
const workspaceContent = await (0,
|
|
9256
|
+
const workspaceContent = await (0, import_promises16.readFile)(workspacePath, "utf8");
|
|
8877
9257
|
const workspace = (0, import_yaml6.parse)(workspaceContent) ?? {};
|
|
8878
9258
|
const cwd = String(workspace.cwd ?? "");
|
|
8879
9259
|
let updatedAt;
|
|
8880
9260
|
try {
|
|
8881
|
-
const eventsStat = await (0,
|
|
9261
|
+
const eventsStat = await (0, import_promises16.stat)(eventsPath);
|
|
8882
9262
|
updatedAt = eventsStat.mtime;
|
|
8883
9263
|
} catch {
|
|
8884
9264
|
updatedAt = /* @__PURE__ */ new Date(0);
|
|
@@ -8932,10 +9312,10 @@ var CopilotLogProvider = class {
|
|
|
8932
9312
|
}
|
|
8933
9313
|
async invoke(_request) {
|
|
8934
9314
|
const sessionDir = await this.resolveSessionDir();
|
|
8935
|
-
const eventsPath =
|
|
9315
|
+
const eventsPath = import_node_path20.default.join(sessionDir, "events.jsonl");
|
|
8936
9316
|
let eventsContent;
|
|
8937
9317
|
try {
|
|
8938
|
-
eventsContent = await (0,
|
|
9318
|
+
eventsContent = await (0, import_promises17.readFile)(eventsPath, "utf8");
|
|
8939
9319
|
} catch (err) {
|
|
8940
9320
|
throw new Error(
|
|
8941
9321
|
`Failed to read Copilot session transcript at ${eventsPath}: ${err instanceof Error ? err.message : String(err)}`
|
|
@@ -8954,8 +9334,8 @@ var CopilotLogProvider = class {
|
|
|
8954
9334
|
return this.config.sessionDir;
|
|
8955
9335
|
}
|
|
8956
9336
|
if (this.config.sessionId) {
|
|
8957
|
-
const stateDir = this.config.sessionStateDir ??
|
|
8958
|
-
return
|
|
9337
|
+
const stateDir = this.config.sessionStateDir ?? import_node_path20.default.join((0, import_node_os4.homedir)(), ".copilot", "session-state");
|
|
9338
|
+
return import_node_path20.default.join(stateDir, this.config.sessionId);
|
|
8959
9339
|
}
|
|
8960
9340
|
if (this.config.discover === "latest") {
|
|
8961
9341
|
const sessions = await discoverCopilotSessions({
|
|
@@ -8980,8 +9360,8 @@ var CopilotLogProvider = class {
|
|
|
8980
9360
|
init_cjs_shims();
|
|
8981
9361
|
var import_node_crypto6 = require("crypto");
|
|
8982
9362
|
var import_node_fs8 = require("fs");
|
|
8983
|
-
var
|
|
8984
|
-
var
|
|
9363
|
+
var import_promises18 = require("fs/promises");
|
|
9364
|
+
var import_node_path21 = __toESM(require("path"), 1);
|
|
8985
9365
|
|
|
8986
9366
|
// src/evaluation/providers/copilot-sdk-log-tracker.ts
|
|
8987
9367
|
init_cjs_shims();
|
|
@@ -9292,10 +9672,10 @@ var CopilotSdkProvider = class {
|
|
|
9292
9672
|
}
|
|
9293
9673
|
resolveCwd(cwdOverride) {
|
|
9294
9674
|
if (cwdOverride) {
|
|
9295
|
-
return
|
|
9675
|
+
return import_node_path21.default.resolve(cwdOverride);
|
|
9296
9676
|
}
|
|
9297
9677
|
if (this.config.cwd) {
|
|
9298
|
-
return
|
|
9678
|
+
return import_node_path21.default.resolve(this.config.cwd);
|
|
9299
9679
|
}
|
|
9300
9680
|
return void 0;
|
|
9301
9681
|
}
|
|
@@ -9304,9 +9684,9 @@ var CopilotSdkProvider = class {
|
|
|
9304
9684
|
return void 0;
|
|
9305
9685
|
}
|
|
9306
9686
|
if (this.config.logDir) {
|
|
9307
|
-
return
|
|
9687
|
+
return import_node_path21.default.resolve(this.config.logDir);
|
|
9308
9688
|
}
|
|
9309
|
-
return
|
|
9689
|
+
return import_node_path21.default.join(process.cwd(), ".agentv", "logs", "copilot-sdk");
|
|
9310
9690
|
}
|
|
9311
9691
|
async createStreamLogger(request) {
|
|
9312
9692
|
const logDir = this.resolveLogDirectory();
|
|
@@ -9314,13 +9694,13 @@ var CopilotSdkProvider = class {
|
|
|
9314
9694
|
return void 0;
|
|
9315
9695
|
}
|
|
9316
9696
|
try {
|
|
9317
|
-
await (0,
|
|
9697
|
+
await (0, import_promises18.mkdir)(logDir, { recursive: true });
|
|
9318
9698
|
} catch (error) {
|
|
9319
9699
|
const message = error instanceof Error ? error.message : String(error);
|
|
9320
9700
|
console.warn(`Skipping Copilot SDK stream logging (could not create ${logDir}): ${message}`);
|
|
9321
9701
|
return void 0;
|
|
9322
9702
|
}
|
|
9323
|
-
const filePath =
|
|
9703
|
+
const filePath = import_node_path21.default.join(logDir, buildLogFilename4(request, this.targetName, "copilot-sdk"));
|
|
9324
9704
|
try {
|
|
9325
9705
|
const logger = await CopilotStreamLogger.create(
|
|
9326
9706
|
{
|
|
@@ -9349,9 +9729,9 @@ var CopilotSdkProvider = class {
|
|
|
9349
9729
|
};
|
|
9350
9730
|
function resolveSkillDirectories(cwd) {
|
|
9351
9731
|
const candidates = [
|
|
9352
|
-
|
|
9353
|
-
|
|
9354
|
-
|
|
9732
|
+
import_node_path21.default.join(cwd, ".claude", "skills"),
|
|
9733
|
+
import_node_path21.default.join(cwd, ".agents", "skills"),
|
|
9734
|
+
import_node_path21.default.join(cwd, ".codex", "skills")
|
|
9355
9735
|
];
|
|
9356
9736
|
return candidates.filter((dir) => (0, import_node_fs8.existsSync)(dir));
|
|
9357
9737
|
}
|
|
@@ -9435,9 +9815,9 @@ init_cjs_shims();
|
|
|
9435
9815
|
var import_node_child_process4 = require("child_process");
|
|
9436
9816
|
var import_node_crypto7 = require("crypto");
|
|
9437
9817
|
var import_node_fs9 = require("fs");
|
|
9438
|
-
var
|
|
9818
|
+
var import_promises19 = require("fs/promises");
|
|
9439
9819
|
var import_node_os5 = require("os");
|
|
9440
|
-
var
|
|
9820
|
+
var import_node_path22 = __toESM(require("path"), 1);
|
|
9441
9821
|
|
|
9442
9822
|
// src/evaluation/providers/pi-log-tracker.ts
|
|
9443
9823
|
init_cjs_shims();
|
|
@@ -9646,8 +10026,8 @@ var PiCliProvider = class {
|
|
|
9646
10026
|
const cwd = this.resolveCwd(workspaceRoot, request.cwd);
|
|
9647
10027
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
9648
10028
|
try {
|
|
9649
|
-
const promptFile =
|
|
9650
|
-
await (0,
|
|
10029
|
+
const promptFile = import_node_path22.default.join(cwd, PROMPT_FILENAME);
|
|
10030
|
+
await (0, import_promises19.writeFile)(promptFile, request.question, "utf8");
|
|
9651
10031
|
const args = this.buildPiArgs(request.question, inputFiles);
|
|
9652
10032
|
const result = await this.executePi(args, cwd, request.signal, logger);
|
|
9653
10033
|
if (result.timedOut) {
|
|
@@ -9709,10 +10089,10 @@ var PiCliProvider = class {
|
|
|
9709
10089
|
}
|
|
9710
10090
|
resolveCwd(workspaceRoot, cwdOverride) {
|
|
9711
10091
|
if (cwdOverride) {
|
|
9712
|
-
return
|
|
10092
|
+
return import_node_path22.default.resolve(cwdOverride);
|
|
9713
10093
|
}
|
|
9714
10094
|
if (this.config.cwd) {
|
|
9715
|
-
return
|
|
10095
|
+
return import_node_path22.default.resolve(this.config.cwd);
|
|
9716
10096
|
}
|
|
9717
10097
|
if (workspaceRoot) {
|
|
9718
10098
|
return workspaceRoot;
|
|
@@ -9818,19 +10198,19 @@ ${prompt}` : prompt;
|
|
|
9818
10198
|
return env;
|
|
9819
10199
|
}
|
|
9820
10200
|
async createWorkspace() {
|
|
9821
|
-
return await (0,
|
|
10201
|
+
return await (0, import_promises19.mkdtemp)(import_node_path22.default.join((0, import_node_os5.tmpdir)(), WORKSPACE_PREFIX));
|
|
9822
10202
|
}
|
|
9823
10203
|
async cleanupWorkspace(workspaceRoot) {
|
|
9824
10204
|
try {
|
|
9825
|
-
await (0,
|
|
10205
|
+
await (0, import_promises19.rm)(workspaceRoot, { recursive: true, force: true });
|
|
9826
10206
|
} catch {
|
|
9827
10207
|
}
|
|
9828
10208
|
}
|
|
9829
10209
|
resolveLogDirectory() {
|
|
9830
10210
|
if (this.config.logDir) {
|
|
9831
|
-
return
|
|
10211
|
+
return import_node_path22.default.resolve(this.config.logDir);
|
|
9832
10212
|
}
|
|
9833
|
-
return
|
|
10213
|
+
return import_node_path22.default.join(process.cwd(), ".agentv", "logs", "pi-cli");
|
|
9834
10214
|
}
|
|
9835
10215
|
async createStreamLogger(request) {
|
|
9836
10216
|
const logDir = this.resolveLogDirectory();
|
|
@@ -9838,13 +10218,13 @@ ${prompt}` : prompt;
|
|
|
9838
10218
|
return void 0;
|
|
9839
10219
|
}
|
|
9840
10220
|
try {
|
|
9841
|
-
await (0,
|
|
10221
|
+
await (0, import_promises19.mkdir)(logDir, { recursive: true });
|
|
9842
10222
|
} catch (error) {
|
|
9843
10223
|
const message = error instanceof Error ? error.message : String(error);
|
|
9844
10224
|
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
9845
10225
|
return void 0;
|
|
9846
10226
|
}
|
|
9847
|
-
const filePath =
|
|
10227
|
+
const filePath = import_node_path22.default.join(logDir, buildLogFilename5(request, this.targetName));
|
|
9848
10228
|
try {
|
|
9849
10229
|
const logger = await PiStreamLogger.create({
|
|
9850
10230
|
filePath,
|
|
@@ -10315,8 +10695,8 @@ function resolveWindowsCmd(executable) {
|
|
|
10315
10695
|
const content = (0, import_node_fs9.readFileSync)(cmdPath, "utf-8");
|
|
10316
10696
|
const match = content.match(/"?%_prog%"?\s+"([^"]+\.js)"/);
|
|
10317
10697
|
if (match) {
|
|
10318
|
-
const dp0 =
|
|
10319
|
-
const scriptPath = match[1].replace(/%dp0%[/\\]?/gi, `${dp0}${
|
|
10698
|
+
const dp0 = import_node_path22.default.dirname(import_node_path22.default.resolve(cmdPath));
|
|
10699
|
+
const scriptPath = match[1].replace(/%dp0%[/\\]?/gi, `${dp0}${import_node_path22.default.sep}`);
|
|
10320
10700
|
try {
|
|
10321
10701
|
(0, import_node_fs9.accessSync)(scriptPath);
|
|
10322
10702
|
return ["node", [scriptPath]];
|
|
@@ -10395,15 +10775,15 @@ init_cjs_shims();
|
|
|
10395
10775
|
var import_node_child_process5 = require("child_process");
|
|
10396
10776
|
var import_node_crypto8 = require("crypto");
|
|
10397
10777
|
var import_node_fs10 = require("fs");
|
|
10398
|
-
var
|
|
10399
|
-
var
|
|
10778
|
+
var import_promises20 = require("fs/promises");
|
|
10779
|
+
var import_node_path24 = __toESM(require("path"), 1);
|
|
10400
10780
|
var import_node_readline = require("readline");
|
|
10401
|
-
var
|
|
10781
|
+
var import_node_url4 = require("url");
|
|
10402
10782
|
|
|
10403
10783
|
// src/paths.ts
|
|
10404
10784
|
init_cjs_shims();
|
|
10405
10785
|
var import_node_os6 = __toESM(require("os"), 1);
|
|
10406
|
-
var
|
|
10786
|
+
var import_node_path23 = __toESM(require("path"), 1);
|
|
10407
10787
|
var logged = false;
|
|
10408
10788
|
function getAgentvHome() {
|
|
10409
10789
|
const envHome = process.env.AGENTV_HOME;
|
|
@@ -10414,19 +10794,19 @@ function getAgentvHome() {
|
|
|
10414
10794
|
}
|
|
10415
10795
|
return envHome;
|
|
10416
10796
|
}
|
|
10417
|
-
return
|
|
10797
|
+
return import_node_path23.default.join(import_node_os6.default.homedir(), ".agentv");
|
|
10418
10798
|
}
|
|
10419
10799
|
function getWorkspacesRoot() {
|
|
10420
|
-
return
|
|
10800
|
+
return import_node_path23.default.join(getAgentvHome(), "workspaces");
|
|
10421
10801
|
}
|
|
10422
10802
|
function getSubagentsRoot() {
|
|
10423
|
-
return
|
|
10803
|
+
return import_node_path23.default.join(getAgentvHome(), "subagents");
|
|
10424
10804
|
}
|
|
10425
10805
|
function getTraceStateRoot() {
|
|
10426
|
-
return
|
|
10806
|
+
return import_node_path23.default.join(getAgentvHome(), "trace-state");
|
|
10427
10807
|
}
|
|
10428
10808
|
function getWorkspacePoolRoot() {
|
|
10429
|
-
return
|
|
10809
|
+
return import_node_path23.default.join(getAgentvHome(), "workspace-pool");
|
|
10430
10810
|
}
|
|
10431
10811
|
|
|
10432
10812
|
// src/evaluation/providers/pi-coding-agent.ts
|
|
@@ -10448,7 +10828,7 @@ async function promptInstall() {
|
|
|
10448
10828
|
}
|
|
10449
10829
|
}
|
|
10450
10830
|
function findManagedSdkInstallRoot() {
|
|
10451
|
-
return
|
|
10831
|
+
return import_node_path24.default.join(getAgentvHome(), "deps", "pi-sdk");
|
|
10452
10832
|
}
|
|
10453
10833
|
function resolveGlobalNpmRoot() {
|
|
10454
10834
|
try {
|
|
@@ -10462,7 +10842,7 @@ function resolveGlobalNpmRoot() {
|
|
|
10462
10842
|
}
|
|
10463
10843
|
}
|
|
10464
10844
|
function buildGlobalModuleEntry(moduleName, globalNpmRoot) {
|
|
10465
|
-
return
|
|
10845
|
+
return import_node_path24.default.join(globalNpmRoot, ...moduleName.split("/"), "dist", "index.js");
|
|
10466
10846
|
}
|
|
10467
10847
|
function findAccessiblePath(paths) {
|
|
10468
10848
|
for (const candidate of paths) {
|
|
@@ -10488,11 +10868,11 @@ async function tryImportLocalSdkModules() {
|
|
|
10488
10868
|
async function tryImportManagedSdkModules() {
|
|
10489
10869
|
const managedRoot = findManagedSdkInstallRoot();
|
|
10490
10870
|
const piCodingAgentEntry = findAccessiblePath([
|
|
10491
|
-
|
|
10871
|
+
import_node_path24.default.join(managedRoot, "node_modules", "@mariozechner", "pi-coding-agent", "dist", "index.js")
|
|
10492
10872
|
]);
|
|
10493
10873
|
const piAiEntry = findAccessiblePath([
|
|
10494
|
-
|
|
10495
|
-
|
|
10874
|
+
import_node_path24.default.join(managedRoot, "node_modules", "@mariozechner", "pi-ai", "dist", "index.js"),
|
|
10875
|
+
import_node_path24.default.join(
|
|
10496
10876
|
managedRoot,
|
|
10497
10877
|
"node_modules",
|
|
10498
10878
|
"@mariozechner",
|
|
@@ -10507,8 +10887,8 @@ async function tryImportManagedSdkModules() {
|
|
|
10507
10887
|
if (!piCodingAgentEntry || !piAiEntry) return false;
|
|
10508
10888
|
try {
|
|
10509
10889
|
[piCodingAgentModule, piAiModule] = await Promise.all([
|
|
10510
|
-
import((0,
|
|
10511
|
-
import((0,
|
|
10890
|
+
import((0, import_node_url4.pathToFileURL)(piCodingAgentEntry).href),
|
|
10891
|
+
import((0, import_node_url4.pathToFileURL)(piAiEntry).href)
|
|
10512
10892
|
]);
|
|
10513
10893
|
return true;
|
|
10514
10894
|
} catch {
|
|
@@ -10523,7 +10903,7 @@ async function tryImportGlobalSdkModules() {
|
|
|
10523
10903
|
]);
|
|
10524
10904
|
const piAiEntry = findAccessiblePath([
|
|
10525
10905
|
buildGlobalModuleEntry("@mariozechner/pi-ai", globalNpmRoot),
|
|
10526
|
-
|
|
10906
|
+
import_node_path24.default.join(
|
|
10527
10907
|
globalNpmRoot,
|
|
10528
10908
|
"@mariozechner",
|
|
10529
10909
|
"pi-coding-agent",
|
|
@@ -10537,8 +10917,8 @@ async function tryImportGlobalSdkModules() {
|
|
|
10537
10917
|
if (!piCodingAgentEntry || !piAiEntry) return false;
|
|
10538
10918
|
try {
|
|
10539
10919
|
[piCodingAgentModule, piAiModule] = await Promise.all([
|
|
10540
|
-
import((0,
|
|
10541
|
-
import((0,
|
|
10920
|
+
import((0, import_node_url4.pathToFileURL)(piCodingAgentEntry).href),
|
|
10921
|
+
import((0, import_node_url4.pathToFileURL)(piAiEntry).href)
|
|
10542
10922
|
]);
|
|
10543
10923
|
return true;
|
|
10544
10924
|
} catch {
|
|
@@ -10824,10 +11204,10 @@ ${fileList}`;
|
|
|
10824
11204
|
}
|
|
10825
11205
|
resolveCwd(cwdOverride) {
|
|
10826
11206
|
if (cwdOverride) {
|
|
10827
|
-
return
|
|
11207
|
+
return import_node_path24.default.resolve(cwdOverride);
|
|
10828
11208
|
}
|
|
10829
11209
|
if (this.config.cwd) {
|
|
10830
|
-
return
|
|
11210
|
+
return import_node_path24.default.resolve(this.config.cwd);
|
|
10831
11211
|
}
|
|
10832
11212
|
return process.cwd();
|
|
10833
11213
|
}
|
|
@@ -10846,9 +11226,9 @@ ${fileList}`;
|
|
|
10846
11226
|
}
|
|
10847
11227
|
resolveLogDirectory() {
|
|
10848
11228
|
if (this.config.logDir) {
|
|
10849
|
-
return
|
|
11229
|
+
return import_node_path24.default.resolve(this.config.logDir);
|
|
10850
11230
|
}
|
|
10851
|
-
return
|
|
11231
|
+
return import_node_path24.default.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
|
|
10852
11232
|
}
|
|
10853
11233
|
async createStreamLogger(request) {
|
|
10854
11234
|
const logDir = this.resolveLogDirectory();
|
|
@@ -10856,13 +11236,13 @@ ${fileList}`;
|
|
|
10856
11236
|
return void 0;
|
|
10857
11237
|
}
|
|
10858
11238
|
try {
|
|
10859
|
-
await (0,
|
|
11239
|
+
await (0, import_promises20.mkdir)(logDir, { recursive: true });
|
|
10860
11240
|
} catch (error) {
|
|
10861
11241
|
const message = error instanceof Error ? error.message : String(error);
|
|
10862
11242
|
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
10863
11243
|
return void 0;
|
|
10864
11244
|
}
|
|
10865
|
-
const filePath =
|
|
11245
|
+
const filePath = import_node_path24.default.join(logDir, buildLogFilename6(request, this.targetName));
|
|
10866
11246
|
try {
|
|
10867
11247
|
const logger = await PiStreamLogger2.create({
|
|
10868
11248
|
filePath,
|
|
@@ -11077,7 +11457,7 @@ var ProviderRegistry = class {
|
|
|
11077
11457
|
|
|
11078
11458
|
// src/evaluation/providers/targets.ts
|
|
11079
11459
|
init_cjs_shims();
|
|
11080
|
-
var
|
|
11460
|
+
var import_node_path25 = __toESM(require("path"), 1);
|
|
11081
11461
|
var import_zod3 = require("zod");
|
|
11082
11462
|
var CliHealthcheckHttpInputSchema = import_zod3.z.object({
|
|
11083
11463
|
url: import_zod3.z.string().min(1, "healthcheck URL is required"),
|
|
@@ -11164,11 +11544,11 @@ function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
|
|
|
11164
11544
|
allowLiteral: true,
|
|
11165
11545
|
optionalEnv: true
|
|
11166
11546
|
});
|
|
11167
|
-
if (cwd && evalFilePath && !
|
|
11168
|
-
cwd =
|
|
11547
|
+
if (cwd && evalFilePath && !import_node_path25.default.isAbsolute(cwd)) {
|
|
11548
|
+
cwd = import_node_path25.default.resolve(import_node_path25.default.dirname(import_node_path25.default.resolve(evalFilePath)), cwd);
|
|
11169
11549
|
}
|
|
11170
11550
|
if (!cwd && evalFilePath) {
|
|
11171
|
-
cwd =
|
|
11551
|
+
cwd = import_node_path25.default.dirname(import_node_path25.default.resolve(evalFilePath));
|
|
11172
11552
|
}
|
|
11173
11553
|
return {
|
|
11174
11554
|
command,
|
|
@@ -11191,15 +11571,15 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
11191
11571
|
optionalEnv: true
|
|
11192
11572
|
}
|
|
11193
11573
|
);
|
|
11194
|
-
if (workspaceTemplate && evalFilePath && !
|
|
11195
|
-
workspaceTemplate =
|
|
11574
|
+
if (workspaceTemplate && evalFilePath && !import_node_path25.default.isAbsolute(workspaceTemplate)) {
|
|
11575
|
+
workspaceTemplate = import_node_path25.default.resolve(import_node_path25.default.dirname(import_node_path25.default.resolve(evalFilePath)), workspaceTemplate);
|
|
11196
11576
|
}
|
|
11197
11577
|
let cwd = resolveOptionalString(input.cwd, env, `${targetName} working directory`, {
|
|
11198
11578
|
allowLiteral: true,
|
|
11199
11579
|
optionalEnv: true
|
|
11200
11580
|
});
|
|
11201
|
-
if (cwd && evalFilePath && !
|
|
11202
|
-
cwd =
|
|
11581
|
+
if (cwd && evalFilePath && !import_node_path25.default.isAbsolute(cwd)) {
|
|
11582
|
+
cwd = import_node_path25.default.resolve(import_node_path25.default.dirname(import_node_path25.default.resolve(evalFilePath)), cwd);
|
|
11203
11583
|
}
|
|
11204
11584
|
if (cwd && workspaceTemplate) {
|
|
11205
11585
|
throw new Error(
|
|
@@ -11207,7 +11587,7 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
11207
11587
|
);
|
|
11208
11588
|
}
|
|
11209
11589
|
if (!cwd && !workspaceTemplate && evalFilePath) {
|
|
11210
|
-
cwd =
|
|
11590
|
+
cwd = import_node_path25.default.dirname(import_node_path25.default.resolve(evalFilePath));
|
|
11211
11591
|
}
|
|
11212
11592
|
const timeoutSeconds = input.timeout_seconds;
|
|
11213
11593
|
const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
|
|
@@ -11759,8 +12139,8 @@ function resolveCodexConfig(target, env, evalFilePath) {
|
|
|
11759
12139
|
optionalEnv: true
|
|
11760
12140
|
}
|
|
11761
12141
|
);
|
|
11762
|
-
if (workspaceTemplate && evalFilePath && !
|
|
11763
|
-
workspaceTemplate =
|
|
12142
|
+
if (workspaceTemplate && evalFilePath && !import_node_path25.default.isAbsolute(workspaceTemplate)) {
|
|
12143
|
+
workspaceTemplate = import_node_path25.default.resolve(import_node_path25.default.dirname(import_node_path25.default.resolve(evalFilePath)), workspaceTemplate);
|
|
11764
12144
|
}
|
|
11765
12145
|
if (cwd && workspaceTemplate) {
|
|
11766
12146
|
throw new Error(
|
|
@@ -11844,8 +12224,8 @@ function resolveCopilotSdkConfig(target, env, evalFilePath) {
|
|
|
11844
12224
|
optionalEnv: true
|
|
11845
12225
|
}
|
|
11846
12226
|
);
|
|
11847
|
-
if (workspaceTemplate && evalFilePath && !
|
|
11848
|
-
workspaceTemplate =
|
|
12227
|
+
if (workspaceTemplate && evalFilePath && !import_node_path25.default.isAbsolute(workspaceTemplate)) {
|
|
12228
|
+
workspaceTemplate = import_node_path25.default.resolve(import_node_path25.default.dirname(import_node_path25.default.resolve(evalFilePath)), workspaceTemplate);
|
|
11849
12229
|
}
|
|
11850
12230
|
if (cwd && workspaceTemplate) {
|
|
11851
12231
|
throw new Error(
|
|
@@ -11961,8 +12341,8 @@ function resolveCopilotCliConfig(target, env, evalFilePath) {
|
|
|
11961
12341
|
optionalEnv: true
|
|
11962
12342
|
}
|
|
11963
12343
|
);
|
|
11964
|
-
if (workspaceTemplate && evalFilePath && !
|
|
11965
|
-
workspaceTemplate =
|
|
12344
|
+
if (workspaceTemplate && evalFilePath && !import_node_path25.default.isAbsolute(workspaceTemplate)) {
|
|
12345
|
+
workspaceTemplate = import_node_path25.default.resolve(import_node_path25.default.dirname(import_node_path25.default.resolve(evalFilePath)), workspaceTemplate);
|
|
11966
12346
|
}
|
|
11967
12347
|
if (cwd && workspaceTemplate) {
|
|
11968
12348
|
throw new Error(
|
|
@@ -12055,8 +12435,8 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
|
12055
12435
|
optionalEnv: true
|
|
12056
12436
|
}
|
|
12057
12437
|
);
|
|
12058
|
-
if (workspaceTemplate && evalFilePath && !
|
|
12059
|
-
workspaceTemplate =
|
|
12438
|
+
if (workspaceTemplate && evalFilePath && !import_node_path25.default.isAbsolute(workspaceTemplate)) {
|
|
12439
|
+
workspaceTemplate = import_node_path25.default.resolve(import_node_path25.default.dirname(import_node_path25.default.resolve(evalFilePath)), workspaceTemplate);
|
|
12060
12440
|
}
|
|
12061
12441
|
if (cwd && workspaceTemplate) {
|
|
12062
12442
|
throw new Error(
|
|
@@ -12141,8 +12521,8 @@ function resolvePiCliConfig(target, env, evalFilePath) {
|
|
|
12141
12521
|
`${target.name} pi-cli workspace template`,
|
|
12142
12522
|
{ allowLiteral: true, optionalEnv: true }
|
|
12143
12523
|
);
|
|
12144
|
-
if (workspaceTemplate && evalFilePath && !
|
|
12145
|
-
workspaceTemplate =
|
|
12524
|
+
if (workspaceTemplate && evalFilePath && !import_node_path25.default.isAbsolute(workspaceTemplate)) {
|
|
12525
|
+
workspaceTemplate = import_node_path25.default.resolve(import_node_path25.default.dirname(import_node_path25.default.resolve(evalFilePath)), workspaceTemplate);
|
|
12146
12526
|
}
|
|
12147
12527
|
if (cwd && workspaceTemplate) {
|
|
12148
12528
|
throw new Error(`${target.name}: 'cwd' and 'workspace_template' are mutually exclusive.`);
|
|
@@ -12196,8 +12576,8 @@ function resolveClaudeConfig(target, env, evalFilePath) {
|
|
|
12196
12576
|
optionalEnv: true
|
|
12197
12577
|
}
|
|
12198
12578
|
);
|
|
12199
|
-
if (workspaceTemplate && evalFilePath && !
|
|
12200
|
-
workspaceTemplate =
|
|
12579
|
+
if (workspaceTemplate && evalFilePath && !import_node_path25.default.isAbsolute(workspaceTemplate)) {
|
|
12580
|
+
workspaceTemplate = import_node_path25.default.resolve(import_node_path25.default.dirname(import_node_path25.default.resolve(evalFilePath)), workspaceTemplate);
|
|
12201
12581
|
}
|
|
12202
12582
|
if (cwd && workspaceTemplate) {
|
|
12203
12583
|
throw new Error(
|
|
@@ -12253,8 +12633,8 @@ function resolveVSCodeConfig(target, env, insiders, evalFilePath) {
|
|
|
12253
12633
|
optionalEnv: true
|
|
12254
12634
|
}
|
|
12255
12635
|
) : void 0;
|
|
12256
|
-
if (workspaceTemplate && evalFilePath && !
|
|
12257
|
-
workspaceTemplate =
|
|
12636
|
+
if (workspaceTemplate && evalFilePath && !import_node_path25.default.isAbsolute(workspaceTemplate)) {
|
|
12637
|
+
workspaceTemplate = import_node_path25.default.resolve(import_node_path25.default.dirname(import_node_path25.default.resolve(evalFilePath)), workspaceTemplate);
|
|
12258
12638
|
}
|
|
12259
12639
|
const executableSource = target.executable;
|
|
12260
12640
|
const waitSource = target.wait;
|
|
@@ -12295,8 +12675,8 @@ function resolveCliConfig(target, env, evalFilePath) {
|
|
|
12295
12675
|
const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
|
|
12296
12676
|
if (!parseResult.success) {
|
|
12297
12677
|
const firstError = parseResult.error.errors[0];
|
|
12298
|
-
const
|
|
12299
|
-
const prefix =
|
|
12678
|
+
const path55 = firstError?.path.join(".") || "";
|
|
12679
|
+
const prefix = path55 ? `${target.name} ${path55}: ` : `${target.name}: `;
|
|
12300
12680
|
throw new Error(`${prefix}${firstError?.message}`);
|
|
12301
12681
|
}
|
|
12302
12682
|
const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
|
|
@@ -12317,11 +12697,11 @@ function resolveDiscoveredProviderConfig(target, providerKind, env, evalFilePath
|
|
|
12317
12697
|
allowLiteral: true,
|
|
12318
12698
|
optionalEnv: true
|
|
12319
12699
|
});
|
|
12320
|
-
if (cwd && evalFilePath && !
|
|
12321
|
-
cwd =
|
|
12700
|
+
if (cwd && evalFilePath && !import_node_path25.default.isAbsolute(cwd)) {
|
|
12701
|
+
cwd = import_node_path25.default.resolve(import_node_path25.default.dirname(import_node_path25.default.resolve(evalFilePath)), cwd);
|
|
12322
12702
|
}
|
|
12323
12703
|
if (!cwd && evalFilePath) {
|
|
12324
|
-
cwd =
|
|
12704
|
+
cwd = import_node_path25.default.dirname(import_node_path25.default.resolve(evalFilePath));
|
|
12325
12705
|
}
|
|
12326
12706
|
return {
|
|
12327
12707
|
command,
|
|
@@ -12543,8 +12923,8 @@ function resolveOptionalNumberArray(source, description) {
|
|
|
12543
12923
|
// src/evaluation/providers/vscode-provider.ts
|
|
12544
12924
|
init_cjs_shims();
|
|
12545
12925
|
var import_node_child_process7 = require("child_process");
|
|
12546
|
-
var
|
|
12547
|
-
var
|
|
12926
|
+
var import_promises27 = require("fs/promises");
|
|
12927
|
+
var import_node_path36 = __toESM(require("path"), 1);
|
|
12548
12928
|
var import_node_util3 = require("util");
|
|
12549
12929
|
|
|
12550
12930
|
// src/evaluation/providers/vscode/index.ts
|
|
@@ -12552,36 +12932,36 @@ init_cjs_shims();
|
|
|
12552
12932
|
|
|
12553
12933
|
// src/evaluation/providers/vscode/dispatch/agentDispatch.ts
|
|
12554
12934
|
init_cjs_shims();
|
|
12555
|
-
var
|
|
12556
|
-
var
|
|
12935
|
+
var import_promises25 = require("fs/promises");
|
|
12936
|
+
var import_node_path34 = __toESM(require("path"), 1);
|
|
12557
12937
|
|
|
12558
12938
|
// src/evaluation/providers/vscode/utils/fs.ts
|
|
12559
12939
|
init_cjs_shims();
|
|
12560
12940
|
var import_node_fs11 = require("fs");
|
|
12561
|
-
var
|
|
12562
|
-
var
|
|
12941
|
+
var import_promises21 = require("fs/promises");
|
|
12942
|
+
var import_node_path26 = __toESM(require("path"), 1);
|
|
12563
12943
|
async function pathExists(target) {
|
|
12564
12944
|
try {
|
|
12565
|
-
await (0,
|
|
12945
|
+
await (0, import_promises21.access)(target, import_node_fs11.constants.F_OK);
|
|
12566
12946
|
return true;
|
|
12567
12947
|
} catch {
|
|
12568
12948
|
return false;
|
|
12569
12949
|
}
|
|
12570
12950
|
}
|
|
12571
12951
|
async function ensureDir(target) {
|
|
12572
|
-
await (0,
|
|
12952
|
+
await (0, import_promises21.mkdir)(target, { recursive: true });
|
|
12573
12953
|
}
|
|
12574
12954
|
async function readDirEntries(target) {
|
|
12575
|
-
const entries = await (0,
|
|
12955
|
+
const entries = await (0, import_promises21.readdir)(target, { withFileTypes: true });
|
|
12576
12956
|
return entries.map((entry) => ({
|
|
12577
12957
|
name: entry.name,
|
|
12578
|
-
absolutePath:
|
|
12958
|
+
absolutePath: import_node_path26.default.join(target, entry.name),
|
|
12579
12959
|
isDirectory: entry.isDirectory()
|
|
12580
12960
|
}));
|
|
12581
12961
|
}
|
|
12582
12962
|
async function removeIfExists(target) {
|
|
12583
12963
|
try {
|
|
12584
|
-
await (0,
|
|
12964
|
+
await (0, import_promises21.rm)(target, { force: true, recursive: false });
|
|
12585
12965
|
} catch (error) {
|
|
12586
12966
|
if (error.code !== "ENOENT") {
|
|
12587
12967
|
throw error;
|
|
@@ -12591,9 +12971,9 @@ async function removeIfExists(target) {
|
|
|
12591
12971
|
|
|
12592
12972
|
// src/evaluation/providers/vscode/utils/path.ts
|
|
12593
12973
|
init_cjs_shims();
|
|
12594
|
-
var
|
|
12974
|
+
var import_node_path27 = __toESM(require("path"), 1);
|
|
12595
12975
|
function pathToFileUri2(filePath) {
|
|
12596
|
-
const absolutePath =
|
|
12976
|
+
const absolutePath = import_node_path27.default.isAbsolute(filePath) ? filePath : import_node_path27.default.resolve(filePath);
|
|
12597
12977
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
12598
12978
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
12599
12979
|
return `file:///${normalizedPath}`;
|
|
@@ -12603,7 +12983,7 @@ function pathToFileUri2(filePath) {
|
|
|
12603
12983
|
|
|
12604
12984
|
// src/evaluation/providers/vscode/dispatch/promptBuilder.ts
|
|
12605
12985
|
init_cjs_shims();
|
|
12606
|
-
var
|
|
12986
|
+
var import_node_path28 = __toESM(require("path"), 1);
|
|
12607
12987
|
|
|
12608
12988
|
// src/evaluation/providers/vscode/utils/template.ts
|
|
12609
12989
|
init_cjs_shims();
|
|
@@ -12697,8 +13077,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
|
|
|
12697
13077
|
});
|
|
12698
13078
|
}
|
|
12699
13079
|
function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
|
|
12700
|
-
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${
|
|
12701
|
-
const responseList = responseFiles.map((file) => `"${
|
|
13080
|
+
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${import_node_path28.default.basename(file)}`).join("\n");
|
|
13081
|
+
const responseList = responseFiles.map((file) => `"${import_node_path28.default.basename(file)}"`).join(", ");
|
|
12702
13082
|
return renderTemplate2(templateContent, {
|
|
12703
13083
|
requestFiles: requestLines,
|
|
12704
13084
|
responseList
|
|
@@ -12707,8 +13087,8 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
|
|
|
12707
13087
|
|
|
12708
13088
|
// src/evaluation/providers/vscode/dispatch/responseWaiter.ts
|
|
12709
13089
|
init_cjs_shims();
|
|
12710
|
-
var
|
|
12711
|
-
var
|
|
13090
|
+
var import_promises22 = require("fs/promises");
|
|
13091
|
+
var import_node_path29 = __toESM(require("path"), 1);
|
|
12712
13092
|
|
|
12713
13093
|
// src/evaluation/providers/vscode/utils/time.ts
|
|
12714
13094
|
init_cjs_shims();
|
|
@@ -12747,7 +13127,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
12747
13127
|
const maxAttempts = 10;
|
|
12748
13128
|
while (attempts < maxAttempts) {
|
|
12749
13129
|
try {
|
|
12750
|
-
const content = await (0,
|
|
13130
|
+
const content = await (0, import_promises22.readFile)(responseFileFinal, { encoding: "utf8" });
|
|
12751
13131
|
if (!silent) {
|
|
12752
13132
|
process.stdout.write(`${content}
|
|
12753
13133
|
`);
|
|
@@ -12768,7 +13148,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
12768
13148
|
}
|
|
12769
13149
|
async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
12770
13150
|
if (!silent) {
|
|
12771
|
-
const fileList = responseFilesFinal.map((file) =>
|
|
13151
|
+
const fileList = responseFilesFinal.map((file) => import_node_path29.default.basename(file)).join(", ");
|
|
12772
13152
|
console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
|
|
12773
13153
|
}
|
|
12774
13154
|
const deadline = Date.now() + timeoutMs;
|
|
@@ -12777,7 +13157,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
12777
13157
|
while (pending.size > 0) {
|
|
12778
13158
|
if (Date.now() >= deadline) {
|
|
12779
13159
|
if (!silent) {
|
|
12780
|
-
const remaining = [...pending].map((f) =>
|
|
13160
|
+
const remaining = [...pending].map((f) => import_node_path29.default.basename(f)).join(", ");
|
|
12781
13161
|
console.error(
|
|
12782
13162
|
`error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
|
|
12783
13163
|
);
|
|
@@ -12804,7 +13184,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
12804
13184
|
const maxAttempts = 10;
|
|
12805
13185
|
while (attempts < maxAttempts) {
|
|
12806
13186
|
try {
|
|
12807
|
-
const content = await (0,
|
|
13187
|
+
const content = await (0, import_promises22.readFile)(file, { encoding: "utf8" });
|
|
12808
13188
|
if (!silent) {
|
|
12809
13189
|
process.stdout.write(`${content}
|
|
12810
13190
|
`);
|
|
@@ -12828,18 +13208,18 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
12828
13208
|
// src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
|
|
12829
13209
|
init_cjs_shims();
|
|
12830
13210
|
var import_node_child_process6 = require("child_process");
|
|
12831
|
-
var
|
|
12832
|
-
var
|
|
13211
|
+
var import_promises23 = require("fs/promises");
|
|
13212
|
+
var import_node_path31 = __toESM(require("path"), 1);
|
|
12833
13213
|
var import_node_util2 = require("util");
|
|
12834
13214
|
|
|
12835
13215
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
12836
13216
|
init_cjs_shims();
|
|
12837
|
-
var
|
|
13217
|
+
var import_node_path30 = __toESM(require("path"), 1);
|
|
12838
13218
|
var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
12839
13219
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
12840
13220
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
12841
13221
|
const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
|
|
12842
|
-
return
|
|
13222
|
+
return import_node_path30.default.join(getSubagentsRoot(), folder);
|
|
12843
13223
|
}
|
|
12844
13224
|
var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
12845
13225
|
|
|
@@ -12906,12 +13286,12 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
12906
13286
|
await raceSpawnError(child);
|
|
12907
13287
|
return true;
|
|
12908
13288
|
}
|
|
12909
|
-
const aliveFile =
|
|
13289
|
+
const aliveFile = import_node_path31.default.join(subagentDir, DEFAULT_ALIVE_FILENAME);
|
|
12910
13290
|
await removeIfExists(aliveFile);
|
|
12911
|
-
const githubAgentsDir =
|
|
12912
|
-
await (0,
|
|
12913
|
-
const wakeupDst =
|
|
12914
|
-
await (0,
|
|
13291
|
+
const githubAgentsDir = import_node_path31.default.join(subagentDir, ".github", "agents");
|
|
13292
|
+
await (0, import_promises23.mkdir)(githubAgentsDir, { recursive: true });
|
|
13293
|
+
const wakeupDst = import_node_path31.default.join(githubAgentsDir, "wakeup.md");
|
|
13294
|
+
await (0, import_promises23.writeFile)(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
|
|
12915
13295
|
const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
|
|
12916
13296
|
label: "open-workspace"
|
|
12917
13297
|
});
|
|
@@ -12923,7 +13303,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
12923
13303
|
"chat",
|
|
12924
13304
|
"-m",
|
|
12925
13305
|
wakeupChatId,
|
|
12926
|
-
`create a file named .alive in the ${
|
|
13306
|
+
`create a file named .alive in the ${import_node_path31.default.basename(subagentDir)} folder`
|
|
12927
13307
|
];
|
|
12928
13308
|
const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
|
|
12929
13309
|
await raceSpawnError(wakeupChild);
|
|
@@ -12938,27 +13318,27 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
12938
13318
|
return true;
|
|
12939
13319
|
}
|
|
12940
13320
|
async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
|
|
12941
|
-
const workspacePath =
|
|
12942
|
-
const messagesDir =
|
|
12943
|
-
await (0,
|
|
12944
|
-
const reqFile =
|
|
12945
|
-
await (0,
|
|
13321
|
+
const workspacePath = import_node_path31.default.join(subagentDir, `${import_node_path31.default.basename(subagentDir)}.code-workspace`);
|
|
13322
|
+
const messagesDir = import_node_path31.default.join(subagentDir, "messages");
|
|
13323
|
+
await (0, import_promises23.mkdir)(messagesDir, { recursive: true });
|
|
13324
|
+
const reqFile = import_node_path31.default.join(messagesDir, `${timestamp}_req.md`);
|
|
13325
|
+
await (0, import_promises23.writeFile)(reqFile, requestInstructions, { encoding: "utf8" });
|
|
12946
13326
|
const reqUri = pathToFileUri2(reqFile);
|
|
12947
13327
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
12948
13328
|
for (const attachment of attachmentPaths) {
|
|
12949
13329
|
chatArgs.push("-a", attachment);
|
|
12950
13330
|
}
|
|
12951
13331
|
chatArgs.push("-a", reqFile);
|
|
12952
|
-
chatArgs.push(`Follow instructions in [${
|
|
13332
|
+
chatArgs.push(`Follow instructions in [${import_node_path31.default.basename(reqFile)}](${reqUri})`);
|
|
12953
13333
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
12954
13334
|
workspacePath,
|
|
12955
|
-
|
|
13335
|
+
import_node_path31.default.basename(subagentDir),
|
|
12956
13336
|
subagentDir,
|
|
12957
13337
|
vscodeCmd
|
|
12958
13338
|
);
|
|
12959
13339
|
if (!workspaceReady) {
|
|
12960
13340
|
throw new Error(
|
|
12961
|
-
`VS Code workspace '${
|
|
13341
|
+
`VS Code workspace '${import_node_path31.default.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
12962
13342
|
);
|
|
12963
13343
|
}
|
|
12964
13344
|
await sleep2(500);
|
|
@@ -12966,9 +13346,9 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
12966
13346
|
await raceSpawnError(child);
|
|
12967
13347
|
}
|
|
12968
13348
|
async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
|
|
12969
|
-
const workspacePath =
|
|
12970
|
-
const messagesDir =
|
|
12971
|
-
await (0,
|
|
13349
|
+
const workspacePath = import_node_path31.default.join(subagentDir, `${import_node_path31.default.basename(subagentDir)}.code-workspace`);
|
|
13350
|
+
const messagesDir = import_node_path31.default.join(subagentDir, "messages");
|
|
13351
|
+
await (0, import_promises23.mkdir)(messagesDir, { recursive: true });
|
|
12972
13352
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
12973
13353
|
for (const attachment of attachmentPaths) {
|
|
12974
13354
|
chatArgs.push("-a", attachment);
|
|
@@ -12976,13 +13356,13 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
12976
13356
|
chatArgs.push(chatInstruction);
|
|
12977
13357
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
12978
13358
|
workspacePath,
|
|
12979
|
-
|
|
13359
|
+
import_node_path31.default.basename(subagentDir),
|
|
12980
13360
|
subagentDir,
|
|
12981
13361
|
vscodeCmd
|
|
12982
13362
|
);
|
|
12983
13363
|
if (!workspaceReady) {
|
|
12984
13364
|
throw new Error(
|
|
12985
|
-
`VS Code workspace '${
|
|
13365
|
+
`VS Code workspace '${import_node_path31.default.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
12986
13366
|
);
|
|
12987
13367
|
}
|
|
12988
13368
|
await sleep2(500);
|
|
@@ -12992,12 +13372,12 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
12992
13372
|
|
|
12993
13373
|
// src/evaluation/providers/vscode/dispatch/workspaceManager.ts
|
|
12994
13374
|
init_cjs_shims();
|
|
12995
|
-
var
|
|
12996
|
-
var
|
|
13375
|
+
var import_promises24 = require("fs/promises");
|
|
13376
|
+
var import_node_path33 = __toESM(require("path"), 1);
|
|
12997
13377
|
|
|
12998
13378
|
// src/evaluation/providers/vscode/utils/workspace.ts
|
|
12999
13379
|
init_cjs_shims();
|
|
13000
|
-
var
|
|
13380
|
+
var import_node_path32 = __toESM(require("path"), 1);
|
|
13001
13381
|
var import_json5 = __toESM(require("json5"), 1);
|
|
13002
13382
|
function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
13003
13383
|
let workspace;
|
|
@@ -13014,10 +13394,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
13014
13394
|
}
|
|
13015
13395
|
const transformedFolders = workspace.folders.map((folder) => {
|
|
13016
13396
|
const folderPath = folder.path;
|
|
13017
|
-
if (
|
|
13397
|
+
if (import_node_path32.default.isAbsolute(folderPath)) {
|
|
13018
13398
|
return folder;
|
|
13019
13399
|
}
|
|
13020
|
-
const absolutePath =
|
|
13400
|
+
const absolutePath = import_node_path32.default.resolve(templateDir, folderPath);
|
|
13021
13401
|
return {
|
|
13022
13402
|
...folder,
|
|
13023
13403
|
path: absolutePath
|
|
@@ -13039,19 +13419,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
13039
13419
|
if (locationMap && typeof locationMap === "object") {
|
|
13040
13420
|
const transformedMap = {};
|
|
13041
13421
|
for (const [locationPath, value] of Object.entries(locationMap)) {
|
|
13042
|
-
const isAbsolute =
|
|
13422
|
+
const isAbsolute = import_node_path32.default.isAbsolute(locationPath);
|
|
13043
13423
|
if (isAbsolute) {
|
|
13044
13424
|
transformedMap[locationPath] = value;
|
|
13045
13425
|
} else {
|
|
13046
13426
|
const firstGlobIndex = locationPath.search(/[*]/);
|
|
13047
13427
|
if (firstGlobIndex === -1) {
|
|
13048
|
-
const resolvedPath =
|
|
13428
|
+
const resolvedPath = import_node_path32.default.resolve(templateDir, locationPath).replace(/\\/g, "/");
|
|
13049
13429
|
transformedMap[resolvedPath] = value;
|
|
13050
13430
|
} else {
|
|
13051
13431
|
const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
|
|
13052
13432
|
const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
|
|
13053
13433
|
const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
|
|
13054
|
-
const resolvedPath = (
|
|
13434
|
+
const resolvedPath = (import_node_path32.default.resolve(templateDir, basePath) + patternPath).replace(
|
|
13055
13435
|
/\\/g,
|
|
13056
13436
|
"/"
|
|
13057
13437
|
);
|
|
@@ -13092,7 +13472,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
13092
13472
|
number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
|
|
13093
13473
|
})).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
|
|
13094
13474
|
for (const subagent of subagents) {
|
|
13095
|
-
const lockFile =
|
|
13475
|
+
const lockFile = import_node_path33.default.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
|
|
13096
13476
|
if (!await pathExists(lockFile)) {
|
|
13097
13477
|
return subagent.absolutePath;
|
|
13098
13478
|
}
|
|
@@ -13102,26 +13482,26 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
13102
13482
|
async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
13103
13483
|
let workspaceContent;
|
|
13104
13484
|
if (workspaceTemplate) {
|
|
13105
|
-
const workspaceSrc =
|
|
13485
|
+
const workspaceSrc = import_node_path33.default.resolve(workspaceTemplate);
|
|
13106
13486
|
if (!await pathExists(workspaceSrc)) {
|
|
13107
13487
|
throw new Error(`workspace template not found: ${workspaceSrc}`);
|
|
13108
13488
|
}
|
|
13109
|
-
const stats = await (0,
|
|
13489
|
+
const stats = await (0, import_promises24.stat)(workspaceSrc);
|
|
13110
13490
|
if (!stats.isFile()) {
|
|
13111
13491
|
throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
|
|
13112
13492
|
}
|
|
13113
|
-
const templateText = await (0,
|
|
13493
|
+
const templateText = await (0, import_promises24.readFile)(workspaceSrc, "utf8");
|
|
13114
13494
|
workspaceContent = JSON.parse(templateText);
|
|
13115
13495
|
} else {
|
|
13116
13496
|
workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
|
|
13117
13497
|
}
|
|
13118
|
-
const workspaceName = `${
|
|
13119
|
-
const workspaceDst =
|
|
13120
|
-
const templateDir = workspaceTemplate ?
|
|
13498
|
+
const workspaceName = `${import_node_path33.default.basename(subagentDir)}.code-workspace`;
|
|
13499
|
+
const workspaceDst = import_node_path33.default.join(subagentDir, workspaceName);
|
|
13500
|
+
const templateDir = workspaceTemplate ? import_node_path33.default.dirname(import_node_path33.default.resolve(workspaceTemplate)) : subagentDir;
|
|
13121
13501
|
const workspaceJson = JSON.stringify(workspaceContent, null, 2);
|
|
13122
13502
|
let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
|
|
13123
13503
|
if (cwd) {
|
|
13124
|
-
const absCwd =
|
|
13504
|
+
const absCwd = import_node_path33.default.resolve(cwd);
|
|
13125
13505
|
const parsed = JSON.parse(transformedContent);
|
|
13126
13506
|
const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
|
|
13127
13507
|
if (!alreadyPresent) {
|
|
@@ -13129,36 +13509,36 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
13129
13509
|
transformedContent = JSON.stringify(parsed, null, 2);
|
|
13130
13510
|
}
|
|
13131
13511
|
}
|
|
13132
|
-
await (0,
|
|
13133
|
-
const messagesDir =
|
|
13134
|
-
await (0,
|
|
13512
|
+
await (0, import_promises24.writeFile)(workspaceDst, transformedContent, "utf8");
|
|
13513
|
+
const messagesDir = import_node_path33.default.join(subagentDir, "messages");
|
|
13514
|
+
await (0, import_promises24.mkdir)(messagesDir, { recursive: true });
|
|
13135
13515
|
return { workspace: workspaceDst, messagesDir };
|
|
13136
13516
|
}
|
|
13137
13517
|
async function createSubagentLock(subagentDir) {
|
|
13138
|
-
const messagesDir =
|
|
13518
|
+
const messagesDir = import_node_path33.default.join(subagentDir, "messages");
|
|
13139
13519
|
if (await pathExists(messagesDir)) {
|
|
13140
|
-
const files = await (0,
|
|
13520
|
+
const files = await (0, import_promises24.readdir)(messagesDir);
|
|
13141
13521
|
await Promise.all(
|
|
13142
13522
|
files.map(async (file) => {
|
|
13143
|
-
const target =
|
|
13523
|
+
const target = import_node_path33.default.join(messagesDir, file);
|
|
13144
13524
|
await removeIfExists(target);
|
|
13145
13525
|
})
|
|
13146
13526
|
);
|
|
13147
13527
|
}
|
|
13148
|
-
const githubAgentsDir =
|
|
13528
|
+
const githubAgentsDir = import_node_path33.default.join(subagentDir, ".github", "agents");
|
|
13149
13529
|
if (await pathExists(githubAgentsDir)) {
|
|
13150
|
-
const agentFiles = await (0,
|
|
13530
|
+
const agentFiles = await (0, import_promises24.readdir)(githubAgentsDir);
|
|
13151
13531
|
const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
|
|
13152
13532
|
await Promise.all(
|
|
13153
|
-
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(
|
|
13533
|
+
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(import_node_path33.default.join(githubAgentsDir, file)))
|
|
13154
13534
|
);
|
|
13155
13535
|
}
|
|
13156
|
-
const lockFile =
|
|
13157
|
-
await (0,
|
|
13536
|
+
const lockFile = import_node_path33.default.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
13537
|
+
await (0, import_promises24.writeFile)(lockFile, "", { encoding: "utf8" });
|
|
13158
13538
|
return lockFile;
|
|
13159
13539
|
}
|
|
13160
13540
|
async function removeSubagentLock(subagentDir) {
|
|
13161
|
-
const lockFile =
|
|
13541
|
+
const lockFile = import_node_path33.default.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
13162
13542
|
await removeIfExists(lockFile);
|
|
13163
13543
|
}
|
|
13164
13544
|
async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
|
|
@@ -13178,11 +13558,11 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
|
|
|
13178
13558
|
return 1;
|
|
13179
13559
|
}
|
|
13180
13560
|
if (promptFile) {
|
|
13181
|
-
const githubAgentsDir =
|
|
13182
|
-
await (0,
|
|
13183
|
-
const agentFile =
|
|
13561
|
+
const githubAgentsDir = import_node_path33.default.join(subagentDir, ".github", "agents");
|
|
13562
|
+
await (0, import_promises24.mkdir)(githubAgentsDir, { recursive: true });
|
|
13563
|
+
const agentFile = import_node_path33.default.join(githubAgentsDir, `${chatId}.md`);
|
|
13184
13564
|
try {
|
|
13185
|
-
await (0,
|
|
13565
|
+
await (0, import_promises24.copyFile)(promptFile, agentFile);
|
|
13186
13566
|
} catch (error) {
|
|
13187
13567
|
console.error(`error: Failed to copy prompt file to agent mode: ${error.message}`);
|
|
13188
13568
|
return 1;
|
|
@@ -13199,11 +13579,11 @@ async function resolvePromptFile(promptFile) {
|
|
|
13199
13579
|
if (!promptFile) {
|
|
13200
13580
|
return void 0;
|
|
13201
13581
|
}
|
|
13202
|
-
const resolvedPrompt =
|
|
13582
|
+
const resolvedPrompt = import_node_path34.default.resolve(promptFile);
|
|
13203
13583
|
if (!await pathExists(resolvedPrompt)) {
|
|
13204
13584
|
throw new Error(`Prompt file not found: ${resolvedPrompt}`);
|
|
13205
13585
|
}
|
|
13206
|
-
const promptStats = await (0,
|
|
13586
|
+
const promptStats = await (0, import_promises25.stat)(resolvedPrompt);
|
|
13207
13587
|
if (!promptStats.isFile()) {
|
|
13208
13588
|
throw new Error(`Prompt file must be a file, not a directory: ${resolvedPrompt}`);
|
|
13209
13589
|
}
|
|
@@ -13215,7 +13595,7 @@ async function resolveAttachments(extraAttachments) {
|
|
|
13215
13595
|
}
|
|
13216
13596
|
const resolved = [];
|
|
13217
13597
|
for (const attachment of extraAttachments) {
|
|
13218
|
-
const resolvedPath =
|
|
13598
|
+
const resolvedPath = import_node_path34.default.resolve(attachment);
|
|
13219
13599
|
if (!await pathExists(resolvedPath)) {
|
|
13220
13600
|
throw new Error(`Attachment not found: ${resolvedPath}`);
|
|
13221
13601
|
}
|
|
@@ -13257,7 +13637,7 @@ async function dispatchAgentSession(options) {
|
|
|
13257
13637
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
13258
13638
|
};
|
|
13259
13639
|
}
|
|
13260
|
-
const subagentName =
|
|
13640
|
+
const subagentName = import_node_path34.default.basename(subagentDir);
|
|
13261
13641
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
13262
13642
|
const preparationResult = await prepareSubagentDirectory(
|
|
13263
13643
|
subagentDir,
|
|
@@ -13285,9 +13665,9 @@ async function dispatchAgentSession(options) {
|
|
|
13285
13665
|
};
|
|
13286
13666
|
}
|
|
13287
13667
|
const timestamp = generateTimestamp();
|
|
13288
|
-
const messagesDir =
|
|
13289
|
-
const responseFileTmp =
|
|
13290
|
-
const responseFileFinal =
|
|
13668
|
+
const messagesDir = import_node_path34.default.join(subagentDir, "messages");
|
|
13669
|
+
const responseFileTmp = import_node_path34.default.join(messagesDir, `${timestamp}_res.tmp.md`);
|
|
13670
|
+
const responseFileFinal = import_node_path34.default.join(messagesDir, `${timestamp}_res.md`);
|
|
13291
13671
|
const requestInstructions = createRequestPrompt(
|
|
13292
13672
|
userQuery,
|
|
13293
13673
|
responseFileTmp,
|
|
@@ -13392,7 +13772,7 @@ async function dispatchBatchAgent(options) {
|
|
|
13392
13772
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
13393
13773
|
};
|
|
13394
13774
|
}
|
|
13395
|
-
subagentName =
|
|
13775
|
+
subagentName = import_node_path34.default.basename(subagentDir);
|
|
13396
13776
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
13397
13777
|
const preparationResult = await prepareSubagentDirectory(
|
|
13398
13778
|
subagentDir,
|
|
@@ -13423,24 +13803,24 @@ async function dispatchBatchAgent(options) {
|
|
|
13423
13803
|
};
|
|
13424
13804
|
}
|
|
13425
13805
|
const timestamp = generateTimestamp();
|
|
13426
|
-
const messagesDir =
|
|
13806
|
+
const messagesDir = import_node_path34.default.join(subagentDir, "messages");
|
|
13427
13807
|
requestFiles = userQueries.map(
|
|
13428
|
-
(_, index) =>
|
|
13808
|
+
(_, index) => import_node_path34.default.join(messagesDir, `${timestamp}_${index}_req.md`)
|
|
13429
13809
|
);
|
|
13430
13810
|
const responseTmpFiles = userQueries.map(
|
|
13431
|
-
(_, index) =>
|
|
13811
|
+
(_, index) => import_node_path34.default.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
|
|
13432
13812
|
);
|
|
13433
13813
|
responseFilesFinal = userQueries.map(
|
|
13434
|
-
(_, index) =>
|
|
13814
|
+
(_, index) => import_node_path34.default.join(messagesDir, `${timestamp}_${index}_res.md`)
|
|
13435
13815
|
);
|
|
13436
|
-
const orchestratorFile =
|
|
13816
|
+
const orchestratorFile = import_node_path34.default.join(messagesDir, `${timestamp}_orchestrator.md`);
|
|
13437
13817
|
if (!dryRun) {
|
|
13438
13818
|
await Promise.all(
|
|
13439
13819
|
userQueries.map((query, index) => {
|
|
13440
13820
|
const reqFile = requestFiles[index];
|
|
13441
13821
|
const tmpFile = responseTmpFiles[index];
|
|
13442
13822
|
const finalFile = responseFilesFinal[index];
|
|
13443
|
-
return (0,
|
|
13823
|
+
return (0, import_promises25.writeFile)(
|
|
13444
13824
|
reqFile,
|
|
13445
13825
|
createBatchRequestPrompt(query, tmpFile, finalFile, batchRequestTemplateContent),
|
|
13446
13826
|
{ encoding: "utf8" }
|
|
@@ -13452,7 +13832,7 @@ async function dispatchBatchAgent(options) {
|
|
|
13452
13832
|
responseFilesFinal,
|
|
13453
13833
|
orchestratorTemplateContent
|
|
13454
13834
|
);
|
|
13455
|
-
await (0,
|
|
13835
|
+
await (0, import_promises25.writeFile)(orchestratorFile, orchestratorContent, { encoding: "utf8" });
|
|
13456
13836
|
}
|
|
13457
13837
|
const chatAttachments = [orchestratorFile, ...attachments];
|
|
13458
13838
|
const orchestratorUri = pathToFileUri2(orchestratorFile);
|
|
@@ -13519,8 +13899,8 @@ async function dispatchBatchAgent(options) {
|
|
|
13519
13899
|
|
|
13520
13900
|
// src/evaluation/providers/vscode/dispatch/provision.ts
|
|
13521
13901
|
init_cjs_shims();
|
|
13522
|
-
var
|
|
13523
|
-
var
|
|
13902
|
+
var import_promises26 = require("fs/promises");
|
|
13903
|
+
var import_node_path35 = __toESM(require("path"), 1);
|
|
13524
13904
|
var DEFAULT_WORKSPACE_TEMPLATE2 = {
|
|
13525
13905
|
folders: [
|
|
13526
13906
|
{
|
|
@@ -13551,7 +13931,7 @@ async function provisionSubagents(options) {
|
|
|
13551
13931
|
if (!Number.isInteger(subagents) || subagents < 1) {
|
|
13552
13932
|
throw new Error("subagents must be a positive integer");
|
|
13553
13933
|
}
|
|
13554
|
-
const targetPath =
|
|
13934
|
+
const targetPath = import_node_path35.default.resolve(targetRoot);
|
|
13555
13935
|
if (!dryRun) {
|
|
13556
13936
|
await ensureDir(targetPath);
|
|
13557
13937
|
}
|
|
@@ -13571,7 +13951,7 @@ async function provisionSubagents(options) {
|
|
|
13571
13951
|
continue;
|
|
13572
13952
|
}
|
|
13573
13953
|
highestNumber = Math.max(highestNumber, parsed);
|
|
13574
|
-
const lockFile =
|
|
13954
|
+
const lockFile = import_node_path35.default.join(entry.absolutePath, lockName);
|
|
13575
13955
|
const locked = await pathExists(lockFile);
|
|
13576
13956
|
if (locked) {
|
|
13577
13957
|
lockedSubagents.add(entry.absolutePath);
|
|
@@ -13588,10 +13968,10 @@ async function provisionSubagents(options) {
|
|
|
13588
13968
|
break;
|
|
13589
13969
|
}
|
|
13590
13970
|
const subagentDir = subagent.absolutePath;
|
|
13591
|
-
const githubAgentsDir =
|
|
13592
|
-
const lockFile =
|
|
13593
|
-
const workspaceDst =
|
|
13594
|
-
const wakeupDst =
|
|
13971
|
+
const githubAgentsDir = import_node_path35.default.join(subagentDir, ".github", "agents");
|
|
13972
|
+
const lockFile = import_node_path35.default.join(subagentDir, lockName);
|
|
13973
|
+
const workspaceDst = import_node_path35.default.join(subagentDir, `${import_node_path35.default.basename(subagentDir)}.code-workspace`);
|
|
13974
|
+
const wakeupDst = import_node_path35.default.join(githubAgentsDir, "wakeup.md");
|
|
13595
13975
|
const isLocked = await pathExists(lockFile);
|
|
13596
13976
|
if (isLocked && !force) {
|
|
13597
13977
|
continue;
|
|
@@ -13600,8 +13980,8 @@ async function provisionSubagents(options) {
|
|
|
13600
13980
|
if (!dryRun) {
|
|
13601
13981
|
await removeIfExists(lockFile);
|
|
13602
13982
|
await ensureDir(githubAgentsDir);
|
|
13603
|
-
await (0,
|
|
13604
|
-
await (0,
|
|
13983
|
+
await (0, import_promises26.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
13984
|
+
await (0, import_promises26.writeFile)(wakeupDst, wakeupContent, "utf8");
|
|
13605
13985
|
}
|
|
13606
13986
|
created.push(subagentDir);
|
|
13607
13987
|
lockedSubagents.delete(subagentDir);
|
|
@@ -13611,8 +13991,8 @@ async function provisionSubagents(options) {
|
|
|
13611
13991
|
if (!isLocked && force) {
|
|
13612
13992
|
if (!dryRun) {
|
|
13613
13993
|
await ensureDir(githubAgentsDir);
|
|
13614
|
-
await (0,
|
|
13615
|
-
await (0,
|
|
13994
|
+
await (0, import_promises26.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
13995
|
+
await (0, import_promises26.writeFile)(wakeupDst, wakeupContent, "utf8");
|
|
13616
13996
|
}
|
|
13617
13997
|
created.push(subagentDir);
|
|
13618
13998
|
subagentsProvisioned += 1;
|
|
@@ -13620,8 +14000,8 @@ async function provisionSubagents(options) {
|
|
|
13620
14000
|
}
|
|
13621
14001
|
if (!dryRun && !await pathExists(workspaceDst)) {
|
|
13622
14002
|
await ensureDir(githubAgentsDir);
|
|
13623
|
-
await (0,
|
|
13624
|
-
await (0,
|
|
14003
|
+
await (0, import_promises26.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
14004
|
+
await (0, import_promises26.writeFile)(wakeupDst, wakeupContent, "utf8");
|
|
13625
14005
|
}
|
|
13626
14006
|
skippedExisting.push(subagentDir);
|
|
13627
14007
|
subagentsProvisioned += 1;
|
|
@@ -13629,15 +14009,15 @@ async function provisionSubagents(options) {
|
|
|
13629
14009
|
let nextIndex = highestNumber;
|
|
13630
14010
|
while (subagentsProvisioned < subagents) {
|
|
13631
14011
|
nextIndex += 1;
|
|
13632
|
-
const subagentDir =
|
|
13633
|
-
const githubAgentsDir =
|
|
13634
|
-
const workspaceDst =
|
|
13635
|
-
const wakeupDst =
|
|
14012
|
+
const subagentDir = import_node_path35.default.join(targetPath, `subagent-${nextIndex}`);
|
|
14013
|
+
const githubAgentsDir = import_node_path35.default.join(subagentDir, ".github", "agents");
|
|
14014
|
+
const workspaceDst = import_node_path35.default.join(subagentDir, `${import_node_path35.default.basename(subagentDir)}.code-workspace`);
|
|
14015
|
+
const wakeupDst = import_node_path35.default.join(githubAgentsDir, "wakeup.md");
|
|
13636
14016
|
if (!dryRun) {
|
|
13637
14017
|
await ensureDir(subagentDir);
|
|
13638
14018
|
await ensureDir(githubAgentsDir);
|
|
13639
|
-
await (0,
|
|
13640
|
-
await (0,
|
|
14019
|
+
await (0, import_promises26.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
14020
|
+
await (0, import_promises26.writeFile)(wakeupDst, wakeupContent, "utf8");
|
|
13641
14021
|
}
|
|
13642
14022
|
created.push(subagentDir);
|
|
13643
14023
|
subagentsProvisioned += 1;
|
|
@@ -13823,9 +14203,9 @@ var VSCodeProvider = class {
|
|
|
13823
14203
|
async function locateVSCodeExecutable(candidate) {
|
|
13824
14204
|
const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
|
|
13825
14205
|
if (includesPathSeparator) {
|
|
13826
|
-
const resolved =
|
|
14206
|
+
const resolved = import_node_path36.default.isAbsolute(candidate) ? candidate : import_node_path36.default.resolve(candidate);
|
|
13827
14207
|
try {
|
|
13828
|
-
await (0,
|
|
14208
|
+
await (0, import_promises27.access)(resolved, import_promises27.constants.F_OK);
|
|
13829
14209
|
return resolved;
|
|
13830
14210
|
} catch {
|
|
13831
14211
|
throw new Error(
|
|
@@ -13838,7 +14218,7 @@ async function locateVSCodeExecutable(candidate) {
|
|
|
13838
14218
|
const { stdout } = await execAsync3(`${locator} ${candidate}`);
|
|
13839
14219
|
const lines = stdout.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
|
|
13840
14220
|
if (lines.length > 0 && lines[0]) {
|
|
13841
|
-
await (0,
|
|
14221
|
+
await (0, import_promises27.access)(lines[0], import_promises27.constants.F_OK);
|
|
13842
14222
|
return lines[0];
|
|
13843
14223
|
}
|
|
13844
14224
|
} catch {
|
|
@@ -13852,7 +14232,7 @@ async function resolveWorkspaceTemplateFile(template) {
|
|
|
13852
14232
|
return void 0;
|
|
13853
14233
|
}
|
|
13854
14234
|
try {
|
|
13855
|
-
const stats = await (0,
|
|
14235
|
+
const stats = await (0, import_promises27.stat)(import_node_path36.default.resolve(template));
|
|
13856
14236
|
return stats.isFile() ? template : void 0;
|
|
13857
14237
|
} catch {
|
|
13858
14238
|
return template;
|
|
@@ -13876,7 +14256,7 @@ function buildMandatoryPrereadBlock2(attachmentFiles) {
|
|
|
13876
14256
|
return "";
|
|
13877
14257
|
}
|
|
13878
14258
|
const buildList = (files) => files.map((absolutePath) => {
|
|
13879
|
-
const fileName =
|
|
14259
|
+
const fileName = import_node_path36.default.basename(absolutePath);
|
|
13880
14260
|
const fileUri = pathToFileUri3(absolutePath);
|
|
13881
14261
|
return `* [${fileName}](${fileUri})`;
|
|
13882
14262
|
});
|
|
@@ -13897,7 +14277,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
13897
14277
|
}
|
|
13898
14278
|
const unique = /* @__PURE__ */ new Map();
|
|
13899
14279
|
for (const attachment of attachments) {
|
|
13900
|
-
const absolutePath =
|
|
14280
|
+
const absolutePath = import_node_path36.default.resolve(attachment);
|
|
13901
14281
|
if (!unique.has(absolutePath)) {
|
|
13902
14282
|
unique.set(absolutePath, absolutePath);
|
|
13903
14283
|
}
|
|
@@ -13905,7 +14285,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
13905
14285
|
return Array.from(unique.values());
|
|
13906
14286
|
}
|
|
13907
14287
|
function pathToFileUri3(filePath) {
|
|
13908
|
-
const absolutePath =
|
|
14288
|
+
const absolutePath = import_node_path36.default.isAbsolute(filePath) ? filePath : import_node_path36.default.resolve(filePath);
|
|
13909
14289
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
13910
14290
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
13911
14291
|
return `file:///${normalizedPath}`;
|
|
@@ -13918,7 +14298,7 @@ function normalizeAttachments(attachments) {
|
|
|
13918
14298
|
}
|
|
13919
14299
|
const deduped = /* @__PURE__ */ new Set();
|
|
13920
14300
|
for (const attachment of attachments) {
|
|
13921
|
-
deduped.add(
|
|
14301
|
+
deduped.add(import_node_path36.default.resolve(attachment));
|
|
13922
14302
|
}
|
|
13923
14303
|
return Array.from(deduped);
|
|
13924
14304
|
}
|
|
@@ -13927,7 +14307,7 @@ function mergeAttachments(all) {
|
|
|
13927
14307
|
for (const list of all) {
|
|
13928
14308
|
if (!list) continue;
|
|
13929
14309
|
for (const inputFile of list) {
|
|
13930
|
-
deduped.add(
|
|
14310
|
+
deduped.add(import_node_path36.default.resolve(inputFile));
|
|
13931
14311
|
}
|
|
13932
14312
|
}
|
|
13933
14313
|
return deduped.size > 0 ? Array.from(deduped) : void 0;
|
|
@@ -14021,8 +14401,8 @@ function isAgentProvider(provider) {
|
|
|
14021
14401
|
// src/evaluation/providers/targets-file.ts
|
|
14022
14402
|
init_cjs_shims();
|
|
14023
14403
|
var import_node_fs12 = require("fs");
|
|
14024
|
-
var
|
|
14025
|
-
var
|
|
14404
|
+
var import_promises28 = require("fs/promises");
|
|
14405
|
+
var import_node_path37 = __toESM(require("path"), 1);
|
|
14026
14406
|
var import_yaml7 = require("yaml");
|
|
14027
14407
|
function isRecord(value) {
|
|
14028
14408
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
@@ -14055,18 +14435,18 @@ function assertTargetDefinition(value, index, filePath) {
|
|
|
14055
14435
|
}
|
|
14056
14436
|
async function fileExists3(filePath) {
|
|
14057
14437
|
try {
|
|
14058
|
-
await (0,
|
|
14438
|
+
await (0, import_promises28.access)(filePath, import_node_fs12.constants.F_OK);
|
|
14059
14439
|
return true;
|
|
14060
14440
|
} catch {
|
|
14061
14441
|
return false;
|
|
14062
14442
|
}
|
|
14063
14443
|
}
|
|
14064
14444
|
async function readTargetDefinitions(filePath) {
|
|
14065
|
-
const absolutePath =
|
|
14445
|
+
const absolutePath = import_node_path37.default.resolve(filePath);
|
|
14066
14446
|
if (!await fileExists3(absolutePath)) {
|
|
14067
14447
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
14068
14448
|
}
|
|
14069
|
-
const raw = await (0,
|
|
14449
|
+
const raw = await (0, import_promises28.readFile)(absolutePath, "utf8");
|
|
14070
14450
|
const parsed = (0, import_yaml7.parse)(raw);
|
|
14071
14451
|
if (!isRecord(parsed)) {
|
|
14072
14452
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
|
|
@@ -14083,16 +14463,16 @@ function listTargetNames(definitions) {
|
|
|
14083
14463
|
|
|
14084
14464
|
// src/evaluation/providers/provider-discovery.ts
|
|
14085
14465
|
init_cjs_shims();
|
|
14086
|
-
var
|
|
14466
|
+
var import_node_path38 = __toESM(require("path"), 1);
|
|
14087
14467
|
var import_fast_glob2 = __toESM(require("fast-glob"), 1);
|
|
14088
14468
|
async function discoverProviders(registry, baseDir) {
|
|
14089
14469
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
14090
14470
|
const candidateDirs = [];
|
|
14091
|
-
let dir =
|
|
14092
|
-
const root =
|
|
14471
|
+
let dir = import_node_path38.default.resolve(baseDir);
|
|
14472
|
+
const root = import_node_path38.default.parse(dir).root;
|
|
14093
14473
|
while (dir !== root) {
|
|
14094
|
-
candidateDirs.push(
|
|
14095
|
-
dir =
|
|
14474
|
+
candidateDirs.push(import_node_path38.default.join(dir, ".agentv", "providers"));
|
|
14475
|
+
dir = import_node_path38.default.dirname(dir);
|
|
14096
14476
|
}
|
|
14097
14477
|
let files = [];
|
|
14098
14478
|
for (const providersDir of candidateDirs) {
|
|
@@ -14108,7 +14488,7 @@ async function discoverProviders(registry, baseDir) {
|
|
|
14108
14488
|
}
|
|
14109
14489
|
const discoveredKinds = [];
|
|
14110
14490
|
for (const filePath of files) {
|
|
14111
|
-
const basename =
|
|
14491
|
+
const basename = import_node_path38.default.basename(filePath);
|
|
14112
14492
|
const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
14113
14493
|
if (registry.has(kindName)) {
|
|
14114
14494
|
continue;
|
|
@@ -14238,154 +14618,9 @@ function negateScore(score) {
|
|
|
14238
14618
|
|
|
14239
14619
|
// src/evaluation/evaluators/code-evaluator.ts
|
|
14240
14620
|
init_cjs_shims();
|
|
14241
|
-
var
|
|
14621
|
+
var import_promises29 = require("fs/promises");
|
|
14242
14622
|
var import_node_os7 = require("os");
|
|
14243
|
-
var
|
|
14244
|
-
|
|
14245
|
-
// src/runtime/exec.ts
|
|
14246
|
-
init_cjs_shims();
|
|
14247
|
-
function shellEscapePath(value) {
|
|
14248
|
-
if (process.platform === "win32") {
|
|
14249
|
-
return `"${value.replaceAll('"', '""')}"`;
|
|
14250
|
-
}
|
|
14251
|
-
return `'${value.replaceAll("'", `'"'"'`)}'`;
|
|
14252
|
-
}
|
|
14253
|
-
async function execFileWithStdin(argv, stdinPayload, options = {}) {
|
|
14254
|
-
if (argv.length === 0) {
|
|
14255
|
-
throw new Error("Executable argv must include at least one entry");
|
|
14256
|
-
}
|
|
14257
|
-
if (typeof Bun !== "undefined") {
|
|
14258
|
-
return execFileWithStdinBun(argv, stdinPayload, options);
|
|
14259
|
-
}
|
|
14260
|
-
return execFileWithStdinNode(argv, stdinPayload, options);
|
|
14261
|
-
}
|
|
14262
|
-
async function execFileWithStdinBun(argv, stdinPayload, options) {
|
|
14263
|
-
const command = [...argv];
|
|
14264
|
-
const encoder = new TextEncoder();
|
|
14265
|
-
const proc = Bun.spawn(command, {
|
|
14266
|
-
cwd: options.cwd,
|
|
14267
|
-
stdin: encoder.encode(stdinPayload),
|
|
14268
|
-
stdout: "pipe",
|
|
14269
|
-
stderr: "pipe",
|
|
14270
|
-
// Merge additional env vars with process.env
|
|
14271
|
-
env: options.env ? { ...process.env, ...options.env } : process.env
|
|
14272
|
-
});
|
|
14273
|
-
let timedOut = false;
|
|
14274
|
-
const timeout = options.timeoutMs !== void 0 ? setTimeout(() => {
|
|
14275
|
-
timedOut = true;
|
|
14276
|
-
proc.kill("SIGKILL");
|
|
14277
|
-
}, options.timeoutMs) : void 0;
|
|
14278
|
-
try {
|
|
14279
|
-
const stdoutPromise = proc.stdout ? new Response(proc.stdout).text() : Promise.resolve("");
|
|
14280
|
-
const stderrPromise = proc.stderr ? new Response(proc.stderr).text() : Promise.resolve("");
|
|
14281
|
-
const [stdout, stderr, exitCode] = await Promise.all([
|
|
14282
|
-
stdoutPromise,
|
|
14283
|
-
stderrPromise,
|
|
14284
|
-
proc.exited
|
|
14285
|
-
]);
|
|
14286
|
-
if (timedOut) {
|
|
14287
|
-
throw new Error(`Process timed out after ${options.timeoutMs}ms`);
|
|
14288
|
-
}
|
|
14289
|
-
return {
|
|
14290
|
-
stdout: stdout.replace(/\r\n/g, "\n"),
|
|
14291
|
-
stderr: stderr.replace(/\r\n/g, "\n"),
|
|
14292
|
-
exitCode
|
|
14293
|
-
};
|
|
14294
|
-
} finally {
|
|
14295
|
-
if (timeout !== void 0) {
|
|
14296
|
-
clearTimeout(timeout);
|
|
14297
|
-
}
|
|
14298
|
-
}
|
|
14299
|
-
}
|
|
14300
|
-
async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
14301
|
-
const { spawn: spawn5 } = await import("child_process");
|
|
14302
|
-
return new Promise((resolve, reject) => {
|
|
14303
|
-
const [cmd, ...args] = argv;
|
|
14304
|
-
const child = spawn5(cmd, args, {
|
|
14305
|
-
cwd: options.cwd,
|
|
14306
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
14307
|
-
// Merge additional env vars with process.env
|
|
14308
|
-
env: options.env ? { ...process.env, ...options.env } : process.env
|
|
14309
|
-
});
|
|
14310
|
-
const stdoutChunks = [];
|
|
14311
|
-
const stderrChunks = [];
|
|
14312
|
-
child.stdout?.on("data", (chunk) => stdoutChunks.push(chunk));
|
|
14313
|
-
child.stderr?.on("data", (chunk) => stderrChunks.push(chunk));
|
|
14314
|
-
let timedOut = false;
|
|
14315
|
-
const timeout = options.timeoutMs !== void 0 ? setTimeout(() => {
|
|
14316
|
-
timedOut = true;
|
|
14317
|
-
child.kill("SIGKILL");
|
|
14318
|
-
}, options.timeoutMs) : void 0;
|
|
14319
|
-
child.on("error", (error) => {
|
|
14320
|
-
if (timeout !== void 0) clearTimeout(timeout);
|
|
14321
|
-
reject(error);
|
|
14322
|
-
});
|
|
14323
|
-
child.on("close", (code) => {
|
|
14324
|
-
if (timeout !== void 0) clearTimeout(timeout);
|
|
14325
|
-
if (timedOut) {
|
|
14326
|
-
reject(new Error(`Process timed out after ${options.timeoutMs}ms`));
|
|
14327
|
-
return;
|
|
14328
|
-
}
|
|
14329
|
-
const stdout = Buffer.concat(stdoutChunks).toString("utf8").replace(/\r\n/g, "\n");
|
|
14330
|
-
const stderr = Buffer.concat(stderrChunks).toString("utf8").replace(/\r\n/g, "\n");
|
|
14331
|
-
resolve({
|
|
14332
|
-
stdout,
|
|
14333
|
-
stderr,
|
|
14334
|
-
exitCode: code ?? 0
|
|
14335
|
-
});
|
|
14336
|
-
});
|
|
14337
|
-
if (child.stdin) {
|
|
14338
|
-
child.stdin.write(stdinPayload);
|
|
14339
|
-
child.stdin.end();
|
|
14340
|
-
}
|
|
14341
|
-
});
|
|
14342
|
-
}
|
|
14343
|
-
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
14344
|
-
const { mkdir: mkdir17, readFile: readFile18, rm: rm6, writeFile: writeFile9 } = await import("fs/promises");
|
|
14345
|
-
const { tmpdir: tmpdir3 } = await import("os");
|
|
14346
|
-
const path54 = await import("path");
|
|
14347
|
-
const { randomUUID: randomUUID10 } = await import("crypto");
|
|
14348
|
-
const dir = path54.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
|
|
14349
|
-
await mkdir17(dir, { recursive: true });
|
|
14350
|
-
const stdinPath = path54.join(dir, "stdin.txt");
|
|
14351
|
-
const stdoutPath = path54.join(dir, "stdout.txt");
|
|
14352
|
-
const stderrPath = path54.join(dir, "stderr.txt");
|
|
14353
|
-
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
14354
|
-
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
14355
|
-
const { spawn: spawn5 } = await import("child_process");
|
|
14356
|
-
try {
|
|
14357
|
-
const exitCode = await new Promise((resolve, reject) => {
|
|
14358
|
-
const child = spawn5(wrappedCommand, {
|
|
14359
|
-
shell: true,
|
|
14360
|
-
cwd: options.cwd,
|
|
14361
|
-
stdio: ["ignore", "ignore", "ignore"],
|
|
14362
|
-
// Merge additional env vars with process.env
|
|
14363
|
-
env: options.env ? { ...process.env, ...options.env } : process.env
|
|
14364
|
-
});
|
|
14365
|
-
const timeout = options.timeoutMs ? setTimeout(() => {
|
|
14366
|
-
child.kill();
|
|
14367
|
-
reject(new Error(`Process timed out after ${options.timeoutMs}ms`));
|
|
14368
|
-
}, options.timeoutMs) : void 0;
|
|
14369
|
-
child.on("error", (error) => {
|
|
14370
|
-
if (timeout !== void 0) {
|
|
14371
|
-
clearTimeout(timeout);
|
|
14372
|
-
}
|
|
14373
|
-
reject(error);
|
|
14374
|
-
});
|
|
14375
|
-
child.on("exit", (code) => {
|
|
14376
|
-
if (timeout !== void 0) {
|
|
14377
|
-
clearTimeout(timeout);
|
|
14378
|
-
}
|
|
14379
|
-
resolve(code ?? 0);
|
|
14380
|
-
});
|
|
14381
|
-
});
|
|
14382
|
-
const stdout = (await readFile18(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
|
|
14383
|
-
const stderr = (await readFile18(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
14384
|
-
return { stdout, stderr, exitCode };
|
|
14385
|
-
} finally {
|
|
14386
|
-
await rm6(dir, { recursive: true, force: true });
|
|
14387
|
-
}
|
|
14388
|
-
}
|
|
14623
|
+
var import_node_path39 = require("path");
|
|
14389
14624
|
|
|
14390
14625
|
// src/runtime/target-proxy.ts
|
|
14391
14626
|
init_cjs_shims();
|
|
@@ -14706,8 +14941,8 @@ async function materializeContentForGrader(messages, getWorkDir) {
|
|
|
14706
14941
|
const [, mediaType, base64Data] = match;
|
|
14707
14942
|
const ext = mediaType.split("/")[1] === "jpeg" ? "jpg" : mediaType.split("/")[1] ?? "bin";
|
|
14708
14943
|
const dir = await getWorkDir();
|
|
14709
|
-
const filePath = (0,
|
|
14710
|
-
await (0,
|
|
14944
|
+
const filePath = (0, import_node_path39.join)(dir, `img-${counter++}.${ext}`);
|
|
14945
|
+
await (0, import_promises29.writeFile)(filePath, Buffer.from(base64Data, "base64"));
|
|
14711
14946
|
blocks.push({ type: "image", media_type: img.media_type, path: filePath });
|
|
14712
14947
|
} else {
|
|
14713
14948
|
blocks.push({ type: "image", media_type: img.media_type, path: img.source });
|
|
@@ -14735,7 +14970,7 @@ var CodeEvaluator = class {
|
|
|
14735
14970
|
let imageTmpDir;
|
|
14736
14971
|
const getImageDir = async () => {
|
|
14737
14972
|
if (!imageTmpDir) {
|
|
14738
|
-
imageTmpDir = await (0,
|
|
14973
|
+
imageTmpDir = await (0, import_promises29.mkdtemp)((0, import_node_path39.join)((0, import_node_os7.tmpdir)(), "agentv-img-"));
|
|
14739
14974
|
}
|
|
14740
14975
|
return imageTmpDir;
|
|
14741
14976
|
};
|
|
@@ -14748,9 +14983,9 @@ var CodeEvaluator = class {
|
|
|
14748
14983
|
if (outputForPayload) {
|
|
14749
14984
|
const serialized = JSON.stringify(outputForPayload);
|
|
14750
14985
|
if (serialized.length > FILE_BACKED_OUTPUT_THRESHOLD) {
|
|
14751
|
-
const tmpDir = await (0,
|
|
14752
|
-
outputPath = (0,
|
|
14753
|
-
await (0,
|
|
14986
|
+
const tmpDir = await (0, import_promises29.mkdtemp)((0, import_node_path39.join)((0, import_node_os7.tmpdir)(), "agentv-grader-"));
|
|
14987
|
+
outputPath = (0, import_node_path39.join)(tmpDir, "output.json");
|
|
14988
|
+
await (0, import_promises29.writeFile)(outputPath, serialized);
|
|
14754
14989
|
outputForPayload = null;
|
|
14755
14990
|
}
|
|
14756
14991
|
}
|
|
@@ -14862,11 +15097,11 @@ var CodeEvaluator = class {
|
|
|
14862
15097
|
await proxyShutdown();
|
|
14863
15098
|
}
|
|
14864
15099
|
if (outputPath) {
|
|
14865
|
-
await (0,
|
|
15100
|
+
await (0, import_promises29.rm)((0, import_node_path39.dirname)(outputPath), { recursive: true, force: true }).catch(() => {
|
|
14866
15101
|
});
|
|
14867
15102
|
}
|
|
14868
15103
|
if (imageTmpDir) {
|
|
14869
|
-
await (0,
|
|
15104
|
+
await (0, import_promises29.rm)(imageTmpDir, { recursive: true, force: true }).catch(() => {
|
|
14870
15105
|
});
|
|
14871
15106
|
}
|
|
14872
15107
|
}
|
|
@@ -14899,8 +15134,8 @@ var import_ai3 = require("ai");
|
|
|
14899
15134
|
|
|
14900
15135
|
// src/evaluation/evaluators/llm-grader.ts
|
|
14901
15136
|
init_cjs_shims();
|
|
14902
|
-
var
|
|
14903
|
-
var
|
|
15137
|
+
var import_promises30 = __toESM(require("fs/promises"), 1);
|
|
15138
|
+
var import_node_path40 = __toESM(require("path"), 1);
|
|
14904
15139
|
var import_ai2 = require("ai");
|
|
14905
15140
|
var import_zod4 = require("zod");
|
|
14906
15141
|
var DEFAULT_MAX_STEPS = 10;
|
|
@@ -14984,6 +15219,15 @@ var scoreRangeEvaluationSchema = import_zod4.z.object({
|
|
|
14984
15219
|
checks: import_zod4.z.array(scoreRangeCheckResultSchema).describe("Scores for each rubric criterion"),
|
|
14985
15220
|
overall_reasoning: import_zod4.z.string().describe("Overall assessment summary (1-2 sentences)").optional()
|
|
14986
15221
|
});
|
|
15222
|
+
function resolveContentBasePath(context2) {
|
|
15223
|
+
if (context2.workspacePath) {
|
|
15224
|
+
return context2.workspacePath;
|
|
15225
|
+
}
|
|
15226
|
+
if ("config" in context2.target && context2.target.config && typeof context2.target.config === "object" && "cwd" in context2.target.config && typeof context2.target.config.cwd === "string") {
|
|
15227
|
+
return context2.target.config.cwd;
|
|
15228
|
+
}
|
|
15229
|
+
return void 0;
|
|
15230
|
+
}
|
|
14987
15231
|
var LlmGraderEvaluator = class {
|
|
14988
15232
|
kind = "llm-grader";
|
|
14989
15233
|
resolveGraderProvider;
|
|
@@ -15001,24 +15245,46 @@ var LlmGraderEvaluator = class {
|
|
|
15001
15245
|
this.graderTargetProvider = options.graderTargetProvider ?? options.judgeTargetProvider;
|
|
15002
15246
|
}
|
|
15003
15247
|
async evaluate(context2) {
|
|
15248
|
+
const preparedContext = await this.prepareContext(context2);
|
|
15004
15249
|
if (this.graderTargetProvider) {
|
|
15005
|
-
return this.evaluateWithGraderTarget(
|
|
15250
|
+
return this.evaluateWithGraderTarget(preparedContext);
|
|
15006
15251
|
}
|
|
15007
|
-
const graderProvider = await this.resolveGraderProvider(
|
|
15252
|
+
const graderProvider = await this.resolveGraderProvider(preparedContext);
|
|
15008
15253
|
if (!graderProvider) {
|
|
15009
15254
|
throw new Error("No grader provider available for LLM grading");
|
|
15010
15255
|
}
|
|
15011
15256
|
if (graderProvider.kind === "agentv") {
|
|
15012
|
-
return this.evaluateBuiltIn(
|
|
15257
|
+
return this.evaluateBuiltIn(preparedContext, graderProvider);
|
|
15013
15258
|
}
|
|
15014
15259
|
if (isAgentProvider(graderProvider)) {
|
|
15015
|
-
return this.evaluateWithDelegatedAgent(
|
|
15260
|
+
return this.evaluateWithDelegatedAgent(preparedContext, graderProvider);
|
|
15016
15261
|
}
|
|
15017
|
-
const config =
|
|
15262
|
+
const config = preparedContext.evaluator;
|
|
15018
15263
|
if (config?.type === "llm-grader" && config.rubrics && config.rubrics.length > 0) {
|
|
15019
|
-
return this.evaluateWithRubrics(
|
|
15264
|
+
return this.evaluateWithRubrics(preparedContext, graderProvider, config.rubrics);
|
|
15265
|
+
}
|
|
15266
|
+
return this.evaluateFreeform(preparedContext, graderProvider);
|
|
15267
|
+
}
|
|
15268
|
+
async prepareContext(context2) {
|
|
15269
|
+
const config = context2.evaluator;
|
|
15270
|
+
if (config?.type !== "llm-grader" || !context2.output) {
|
|
15271
|
+
return context2;
|
|
15272
|
+
}
|
|
15273
|
+
const lastAssistant = [...context2.output].reverse().find((message) => message.role === "assistant" && message.content !== void 0);
|
|
15274
|
+
if (!lastAssistant || typeof lastAssistant.content === "string") {
|
|
15275
|
+
return context2;
|
|
15020
15276
|
}
|
|
15021
|
-
|
|
15277
|
+
const extracted = await extractTextWithPreprocessors(
|
|
15278
|
+
lastAssistant.content,
|
|
15279
|
+
config.preprocessors,
|
|
15280
|
+
{
|
|
15281
|
+
basePath: resolveContentBasePath(context2)
|
|
15282
|
+
}
|
|
15283
|
+
);
|
|
15284
|
+
return {
|
|
15285
|
+
...context2,
|
|
15286
|
+
candidate: appendPreprocessingWarnings(extracted.text, extracted.warnings)
|
|
15287
|
+
};
|
|
15022
15288
|
}
|
|
15023
15289
|
// ---------------------------------------------------------------------------
|
|
15024
15290
|
// LLM mode (existing)
|
|
@@ -15903,8 +16169,8 @@ function toAiSdkImageParts(images) {
|
|
|
15903
16169
|
}));
|
|
15904
16170
|
}
|
|
15905
16171
|
function resolveSandboxed(basePath, relativePath) {
|
|
15906
|
-
const resolved =
|
|
15907
|
-
if (!resolved.startsWith(basePath +
|
|
16172
|
+
const resolved = import_node_path40.default.resolve(basePath, relativePath);
|
|
16173
|
+
if (!resolved.startsWith(basePath + import_node_path40.default.sep) && resolved !== basePath) {
|
|
15908
16174
|
throw new Error(`Path '${relativePath}' is outside the workspace`);
|
|
15909
16175
|
}
|
|
15910
16176
|
return resolved;
|
|
@@ -15919,7 +16185,7 @@ function createFilesystemTools(workspacePath) {
|
|
|
15919
16185
|
execute: async (input) => {
|
|
15920
16186
|
try {
|
|
15921
16187
|
const resolved = resolveSandboxed(workspacePath, input.path);
|
|
15922
|
-
const entries = await
|
|
16188
|
+
const entries = await import_promises30.default.readdir(resolved, { withFileTypes: true });
|
|
15923
16189
|
return entries.map((e) => ({
|
|
15924
16190
|
name: e.name,
|
|
15925
16191
|
type: e.isDirectory() ? "directory" : "file"
|
|
@@ -15937,12 +16203,12 @@ function createFilesystemTools(workspacePath) {
|
|
|
15937
16203
|
execute: async (input) => {
|
|
15938
16204
|
try {
|
|
15939
16205
|
const resolved = resolveSandboxed(workspacePath, input.path);
|
|
15940
|
-
const stat11 = await
|
|
16206
|
+
const stat11 = await import_promises30.default.stat(resolved);
|
|
15941
16207
|
if (stat11.isDirectory()) {
|
|
15942
16208
|
return { error: `'${input.path}' is a directory, not a file` };
|
|
15943
16209
|
}
|
|
15944
16210
|
const buffer = Buffer.alloc(Math.min(stat11.size, MAX_FILE_SIZE));
|
|
15945
|
-
const fd = await
|
|
16211
|
+
const fd = await import_promises30.default.open(resolved, "r");
|
|
15946
16212
|
try {
|
|
15947
16213
|
await fd.read(buffer, 0, buffer.length, 0);
|
|
15948
16214
|
} finally {
|
|
@@ -15987,30 +16253,30 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
15987
16253
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
15988
16254
|
let entries;
|
|
15989
16255
|
try {
|
|
15990
|
-
entries = await
|
|
16256
|
+
entries = await import_promises30.default.readdir(dirPath, { withFileTypes: true });
|
|
15991
16257
|
} catch {
|
|
15992
16258
|
return;
|
|
15993
16259
|
}
|
|
15994
16260
|
for (const entry of entries) {
|
|
15995
16261
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
15996
16262
|
if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
|
|
15997
|
-
const fullPath =
|
|
16263
|
+
const fullPath = import_node_path40.default.join(dirPath, entry.name);
|
|
15998
16264
|
if (entry.isDirectory()) {
|
|
15999
16265
|
await searchDirectory(fullPath, workspacePath, regex, matches);
|
|
16000
16266
|
} else if (entry.isFile()) {
|
|
16001
|
-
const ext =
|
|
16267
|
+
const ext = import_node_path40.default.extname(entry.name).toLowerCase();
|
|
16002
16268
|
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
16003
16269
|
try {
|
|
16004
|
-
const stat11 = await
|
|
16270
|
+
const stat11 = await import_promises30.default.stat(fullPath);
|
|
16005
16271
|
if (stat11.size > MAX_FILE_SIZE) continue;
|
|
16006
|
-
const content = await
|
|
16272
|
+
const content = await import_promises30.default.readFile(fullPath, "utf-8");
|
|
16007
16273
|
const lines = content.split("\n");
|
|
16008
16274
|
for (let i = 0; i < lines.length; i++) {
|
|
16009
16275
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
16010
16276
|
regex.lastIndex = 0;
|
|
16011
16277
|
if (regex.test(lines[i])) {
|
|
16012
16278
|
matches.push({
|
|
16013
|
-
file:
|
|
16279
|
+
file: import_node_path40.default.relative(workspacePath, fullPath),
|
|
16014
16280
|
line: i + 1,
|
|
16015
16281
|
text: lines[i].substring(0, 200)
|
|
16016
16282
|
});
|
|
@@ -16646,115 +16912,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
16646
16912
|
* Evaluate a single field against the expected value.
|
|
16647
16913
|
*/
|
|
16648
16914
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
16649
|
-
const { path:
|
|
16650
|
-
const candidateValue = resolvePath(candidateData,
|
|
16651
|
-
const expectedValue = resolvePath(expectedData,
|
|
16915
|
+
const { path: path55, match, required = true, weight = 1 } = fieldConfig;
|
|
16916
|
+
const candidateValue = resolvePath(candidateData, path55);
|
|
16917
|
+
const expectedValue = resolvePath(expectedData, path55);
|
|
16652
16918
|
if (expectedValue === void 0) {
|
|
16653
16919
|
return {
|
|
16654
|
-
path:
|
|
16920
|
+
path: path55,
|
|
16655
16921
|
score: 1,
|
|
16656
16922
|
// No expected value means no comparison needed
|
|
16657
16923
|
weight,
|
|
16658
16924
|
hit: true,
|
|
16659
|
-
message: `${
|
|
16925
|
+
message: `${path55}: no expected value`
|
|
16660
16926
|
};
|
|
16661
16927
|
}
|
|
16662
16928
|
if (candidateValue === void 0) {
|
|
16663
16929
|
if (required) {
|
|
16664
16930
|
return {
|
|
16665
|
-
path:
|
|
16931
|
+
path: path55,
|
|
16666
16932
|
score: 0,
|
|
16667
16933
|
weight,
|
|
16668
16934
|
hit: false,
|
|
16669
|
-
message: `${
|
|
16935
|
+
message: `${path55} (required, missing)`
|
|
16670
16936
|
};
|
|
16671
16937
|
}
|
|
16672
16938
|
return {
|
|
16673
|
-
path:
|
|
16939
|
+
path: path55,
|
|
16674
16940
|
score: 1,
|
|
16675
16941
|
// Don't penalize missing optional fields
|
|
16676
16942
|
weight: 0,
|
|
16677
16943
|
// Zero weight means it won't affect the score
|
|
16678
16944
|
hit: true,
|
|
16679
|
-
message: `${
|
|
16945
|
+
message: `${path55}: optional field missing`
|
|
16680
16946
|
};
|
|
16681
16947
|
}
|
|
16682
16948
|
switch (match) {
|
|
16683
16949
|
case "exact":
|
|
16684
|
-
return this.compareExact(
|
|
16950
|
+
return this.compareExact(path55, candidateValue, expectedValue, weight);
|
|
16685
16951
|
case "numeric_tolerance":
|
|
16686
16952
|
return this.compareNumericTolerance(
|
|
16687
|
-
|
|
16953
|
+
path55,
|
|
16688
16954
|
candidateValue,
|
|
16689
16955
|
expectedValue,
|
|
16690
16956
|
fieldConfig,
|
|
16691
16957
|
weight
|
|
16692
16958
|
);
|
|
16693
16959
|
case "date":
|
|
16694
|
-
return this.compareDate(
|
|
16960
|
+
return this.compareDate(path55, candidateValue, expectedValue, fieldConfig, weight);
|
|
16695
16961
|
default:
|
|
16696
16962
|
return {
|
|
16697
|
-
path:
|
|
16963
|
+
path: path55,
|
|
16698
16964
|
score: 0,
|
|
16699
16965
|
weight,
|
|
16700
16966
|
hit: false,
|
|
16701
|
-
message: `${
|
|
16967
|
+
message: `${path55}: unknown match type "${match}"`
|
|
16702
16968
|
};
|
|
16703
16969
|
}
|
|
16704
16970
|
}
|
|
16705
16971
|
/**
|
|
16706
16972
|
* Exact equality comparison.
|
|
16707
16973
|
*/
|
|
16708
|
-
compareExact(
|
|
16974
|
+
compareExact(path55, candidateValue, expectedValue, weight) {
|
|
16709
16975
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
16710
16976
|
return {
|
|
16711
|
-
path:
|
|
16977
|
+
path: path55,
|
|
16712
16978
|
score: 1,
|
|
16713
16979
|
weight,
|
|
16714
16980
|
hit: true,
|
|
16715
|
-
message:
|
|
16981
|
+
message: path55
|
|
16716
16982
|
};
|
|
16717
16983
|
}
|
|
16718
16984
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
16719
16985
|
return {
|
|
16720
|
-
path:
|
|
16986
|
+
path: path55,
|
|
16721
16987
|
score: 0,
|
|
16722
16988
|
weight,
|
|
16723
16989
|
hit: false,
|
|
16724
|
-
message: `${
|
|
16990
|
+
message: `${path55} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
16725
16991
|
};
|
|
16726
16992
|
}
|
|
16727
16993
|
return {
|
|
16728
|
-
path:
|
|
16994
|
+
path: path55,
|
|
16729
16995
|
score: 0,
|
|
16730
16996
|
weight,
|
|
16731
16997
|
hit: false,
|
|
16732
|
-
message: `${
|
|
16998
|
+
message: `${path55} (value mismatch)`
|
|
16733
16999
|
};
|
|
16734
17000
|
}
|
|
16735
17001
|
/**
|
|
16736
17002
|
* Numeric comparison with absolute or relative tolerance.
|
|
16737
17003
|
*/
|
|
16738
|
-
compareNumericTolerance(
|
|
17004
|
+
compareNumericTolerance(path55, candidateValue, expectedValue, fieldConfig, weight) {
|
|
16739
17005
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
16740
17006
|
const candidateNum = toNumber(candidateValue);
|
|
16741
17007
|
const expectedNum = toNumber(expectedValue);
|
|
16742
17008
|
if (candidateNum === null || expectedNum === null) {
|
|
16743
17009
|
return {
|
|
16744
|
-
path:
|
|
17010
|
+
path: path55,
|
|
16745
17011
|
score: 0,
|
|
16746
17012
|
weight,
|
|
16747
17013
|
hit: false,
|
|
16748
|
-
message: `${
|
|
17014
|
+
message: `${path55} (non-numeric value)`
|
|
16749
17015
|
};
|
|
16750
17016
|
}
|
|
16751
17017
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
16752
17018
|
return {
|
|
16753
|
-
path:
|
|
17019
|
+
path: path55,
|
|
16754
17020
|
score: 0,
|
|
16755
17021
|
weight,
|
|
16756
17022
|
hit: false,
|
|
16757
|
-
message: `${
|
|
17023
|
+
message: `${path55} (invalid numeric value)`
|
|
16758
17024
|
};
|
|
16759
17025
|
}
|
|
16760
17026
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -16767,61 +17033,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
16767
17033
|
}
|
|
16768
17034
|
if (withinTolerance) {
|
|
16769
17035
|
return {
|
|
16770
|
-
path:
|
|
17036
|
+
path: path55,
|
|
16771
17037
|
score: 1,
|
|
16772
17038
|
weight,
|
|
16773
17039
|
hit: true,
|
|
16774
|
-
message: `${
|
|
17040
|
+
message: `${path55} (within tolerance: diff=${diff.toFixed(2)})`
|
|
16775
17041
|
};
|
|
16776
17042
|
}
|
|
16777
17043
|
return {
|
|
16778
|
-
path:
|
|
17044
|
+
path: path55,
|
|
16779
17045
|
score: 0,
|
|
16780
17046
|
weight,
|
|
16781
17047
|
hit: false,
|
|
16782
|
-
message: `${
|
|
17048
|
+
message: `${path55} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
16783
17049
|
};
|
|
16784
17050
|
}
|
|
16785
17051
|
/**
|
|
16786
17052
|
* Date comparison with format normalization.
|
|
16787
17053
|
*/
|
|
16788
|
-
compareDate(
|
|
17054
|
+
compareDate(path55, candidateValue, expectedValue, fieldConfig, weight) {
|
|
16789
17055
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
16790
17056
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
16791
17057
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
16792
17058
|
if (candidateDate === null) {
|
|
16793
17059
|
return {
|
|
16794
|
-
path:
|
|
17060
|
+
path: path55,
|
|
16795
17061
|
score: 0,
|
|
16796
17062
|
weight,
|
|
16797
17063
|
hit: false,
|
|
16798
|
-
message: `${
|
|
17064
|
+
message: `${path55} (unparseable candidate date)`
|
|
16799
17065
|
};
|
|
16800
17066
|
}
|
|
16801
17067
|
if (expectedDate === null) {
|
|
16802
17068
|
return {
|
|
16803
|
-
path:
|
|
17069
|
+
path: path55,
|
|
16804
17070
|
score: 0,
|
|
16805
17071
|
weight,
|
|
16806
17072
|
hit: false,
|
|
16807
|
-
message: `${
|
|
17073
|
+
message: `${path55} (unparseable expected date)`
|
|
16808
17074
|
};
|
|
16809
17075
|
}
|
|
16810
17076
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
16811
17077
|
return {
|
|
16812
|
-
path:
|
|
17078
|
+
path: path55,
|
|
16813
17079
|
score: 1,
|
|
16814
17080
|
weight,
|
|
16815
17081
|
hit: true,
|
|
16816
|
-
message:
|
|
17082
|
+
message: path55
|
|
16817
17083
|
};
|
|
16818
17084
|
}
|
|
16819
17085
|
return {
|
|
16820
|
-
path:
|
|
17086
|
+
path: path55,
|
|
16821
17087
|
score: 0,
|
|
16822
17088
|
weight,
|
|
16823
17089
|
hit: false,
|
|
16824
|
-
message: `${
|
|
17090
|
+
message: `${path55} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
16825
17091
|
};
|
|
16826
17092
|
}
|
|
16827
17093
|
/**
|
|
@@ -16854,11 +17120,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
16854
17120
|
};
|
|
16855
17121
|
}
|
|
16856
17122
|
};
|
|
16857
|
-
function resolvePath(obj,
|
|
16858
|
-
if (!
|
|
17123
|
+
function resolvePath(obj, path55) {
|
|
17124
|
+
if (!path55 || !obj) {
|
|
16859
17125
|
return void 0;
|
|
16860
17126
|
}
|
|
16861
|
-
const parts =
|
|
17127
|
+
const parts = path55.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
16862
17128
|
let current = obj;
|
|
16863
17129
|
for (const part of parts) {
|
|
16864
17130
|
if (current === null || current === void 0) {
|
|
@@ -17355,8 +17621,8 @@ var TokenUsageEvaluator = class {
|
|
|
17355
17621
|
|
|
17356
17622
|
// src/evaluation/evaluators/tool-trajectory.ts
|
|
17357
17623
|
init_cjs_shims();
|
|
17358
|
-
function getNestedValue(obj,
|
|
17359
|
-
const parts =
|
|
17624
|
+
function getNestedValue(obj, path55) {
|
|
17625
|
+
const parts = path55.split(".");
|
|
17360
17626
|
let current = obj;
|
|
17361
17627
|
for (const part of parts) {
|
|
17362
17628
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -17979,8 +18245,9 @@ function runEqualsAssertion(output, value) {
|
|
|
17979
18245
|
// src/evaluation/orchestrator.ts
|
|
17980
18246
|
init_cjs_shims();
|
|
17981
18247
|
var import_node_crypto11 = require("crypto");
|
|
17982
|
-
var
|
|
17983
|
-
var
|
|
18248
|
+
var import_node_fs16 = require("fs");
|
|
18249
|
+
var import_promises34 = require("fs/promises");
|
|
18250
|
+
var import_node_path49 = __toESM(require("path"), 1);
|
|
17984
18251
|
var import_micromatch3 = __toESM(require("micromatch"), 1);
|
|
17985
18252
|
|
|
17986
18253
|
// ../../node_modules/.bun/p-limit@6.2.0/node_modules/p-limit/index.js
|
|
@@ -18207,7 +18474,7 @@ var InlineAssertEvaluator = class {
|
|
|
18207
18474
|
|
|
18208
18475
|
// src/evaluation/evaluators/prompt-resolution.ts
|
|
18209
18476
|
init_cjs_shims();
|
|
18210
|
-
var
|
|
18477
|
+
var import_node_path41 = __toESM(require("path"), 1);
|
|
18211
18478
|
async function resolveCustomPrompt(promptConfig, context2, timeoutMs) {
|
|
18212
18479
|
if (promptConfig.resolvedPromptScript && promptConfig.resolvedPromptScript.length > 0) {
|
|
18213
18480
|
if (!context2) {
|
|
@@ -18250,7 +18517,7 @@ async function executePromptTemplate(script, context2, config, timeoutMs) {
|
|
|
18250
18517
|
};
|
|
18251
18518
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
18252
18519
|
const scriptPath = script[script.length - 1];
|
|
18253
|
-
const cwd =
|
|
18520
|
+
const cwd = import_node_path41.default.dirname(scriptPath);
|
|
18254
18521
|
try {
|
|
18255
18522
|
const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
|
|
18256
18523
|
const prompt = stdout.trim();
|
|
@@ -18523,16 +18790,16 @@ function createBuiltinRegistry() {
|
|
|
18523
18790
|
|
|
18524
18791
|
// src/evaluation/registry/assertion-discovery.ts
|
|
18525
18792
|
init_cjs_shims();
|
|
18526
|
-
var
|
|
18793
|
+
var import_node_path42 = __toESM(require("path"), 1);
|
|
18527
18794
|
var import_fast_glob3 = __toESM(require("fast-glob"), 1);
|
|
18528
18795
|
async function discoverAssertions(registry, baseDir) {
|
|
18529
18796
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
18530
18797
|
const candidateDirs = [];
|
|
18531
|
-
let dir =
|
|
18532
|
-
const root =
|
|
18798
|
+
let dir = import_node_path42.default.resolve(baseDir);
|
|
18799
|
+
const root = import_node_path42.default.parse(dir).root;
|
|
18533
18800
|
while (dir !== root) {
|
|
18534
|
-
candidateDirs.push(
|
|
18535
|
-
dir =
|
|
18801
|
+
candidateDirs.push(import_node_path42.default.join(dir, ".agentv", "assertions"));
|
|
18802
|
+
dir = import_node_path42.default.dirname(dir);
|
|
18536
18803
|
}
|
|
18537
18804
|
let files = [];
|
|
18538
18805
|
for (const assertionsDir of candidateDirs) {
|
|
@@ -18548,7 +18815,7 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
18548
18815
|
}
|
|
18549
18816
|
const discoveredTypes = [];
|
|
18550
18817
|
for (const filePath of files) {
|
|
18551
|
-
const basename =
|
|
18818
|
+
const basename = import_node_path42.default.basename(filePath);
|
|
18552
18819
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
18553
18820
|
if (registry.has(typeName)) {
|
|
18554
18821
|
continue;
|
|
@@ -18567,17 +18834,17 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
18567
18834
|
|
|
18568
18835
|
// src/evaluation/registry/grader-discovery.ts
|
|
18569
18836
|
init_cjs_shims();
|
|
18570
|
-
var
|
|
18837
|
+
var import_node_path43 = __toESM(require("path"), 1);
|
|
18571
18838
|
var import_fast_glob4 = __toESM(require("fast-glob"), 1);
|
|
18572
18839
|
async function discoverGraders(registry, baseDir) {
|
|
18573
18840
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
18574
18841
|
const candidateDirs = [];
|
|
18575
|
-
let dir =
|
|
18576
|
-
const root =
|
|
18842
|
+
let dir = import_node_path43.default.resolve(baseDir);
|
|
18843
|
+
const root = import_node_path43.default.parse(dir).root;
|
|
18577
18844
|
while (dir !== root) {
|
|
18578
|
-
candidateDirs.push(
|
|
18579
|
-
candidateDirs.push(
|
|
18580
|
-
dir =
|
|
18845
|
+
candidateDirs.push(import_node_path43.default.join(dir, ".agentv", "graders"));
|
|
18846
|
+
candidateDirs.push(import_node_path43.default.join(dir, ".agentv", "judges"));
|
|
18847
|
+
dir = import_node_path43.default.dirname(dir);
|
|
18581
18848
|
}
|
|
18582
18849
|
let files = [];
|
|
18583
18850
|
for (const gradersDir of candidateDirs) {
|
|
@@ -18593,7 +18860,7 @@ async function discoverGraders(registry, baseDir) {
|
|
|
18593
18860
|
}
|
|
18594
18861
|
const discoveredTypes = [];
|
|
18595
18862
|
for (const filePath of files) {
|
|
18596
|
-
const basename =
|
|
18863
|
+
const basename = import_node_path43.default.basename(filePath);
|
|
18597
18864
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
18598
18865
|
if (registry.has(typeName)) {
|
|
18599
18866
|
continue;
|
|
@@ -18755,7 +19022,7 @@ function getTCritical(df) {
|
|
|
18755
19022
|
init_cjs_shims();
|
|
18756
19023
|
var import_node_child_process8 = require("child_process");
|
|
18757
19024
|
var import_node_fs13 = require("fs");
|
|
18758
|
-
var
|
|
19025
|
+
var import_node_path44 = __toESM(require("path"), 1);
|
|
18759
19026
|
var import_node_util4 = require("util");
|
|
18760
19027
|
var execAsync4 = (0, import_node_util4.promisify)(import_node_child_process8.exec);
|
|
18761
19028
|
function gitExecOpts(workspacePath) {
|
|
@@ -18789,10 +19056,10 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
18789
19056
|
}
|
|
18790
19057
|
for (const entry of entries) {
|
|
18791
19058
|
if (entry === ".git" || entry === "node_modules") continue;
|
|
18792
|
-
const childPath =
|
|
19059
|
+
const childPath = import_node_path44.default.join(workspacePath, entry);
|
|
18793
19060
|
try {
|
|
18794
19061
|
if (!(0, import_node_fs13.statSync)(childPath).isDirectory()) continue;
|
|
18795
|
-
if (!(0, import_node_fs13.statSync)(
|
|
19062
|
+
if (!(0, import_node_fs13.statSync)(import_node_path44.default.join(childPath, ".git")).isDirectory()) continue;
|
|
18796
19063
|
} catch {
|
|
18797
19064
|
continue;
|
|
18798
19065
|
}
|
|
@@ -18803,8 +19070,8 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
18803
19070
|
|
|
18804
19071
|
// src/evaluation/workspace/manager.ts
|
|
18805
19072
|
init_cjs_shims();
|
|
18806
|
-
var
|
|
18807
|
-
var
|
|
19073
|
+
var import_promises31 = require("fs/promises");
|
|
19074
|
+
var import_node_path45 = __toESM(require("path"), 1);
|
|
18808
19075
|
var TemplateNotFoundError = class extends Error {
|
|
18809
19076
|
constructor(templatePath) {
|
|
18810
19077
|
super(`Workspace template not found: ${templatePath}`);
|
|
@@ -18826,7 +19093,7 @@ var WorkspaceCreationError = class extends Error {
|
|
|
18826
19093
|
};
|
|
18827
19094
|
async function isDirectory(filePath) {
|
|
18828
19095
|
try {
|
|
18829
|
-
const stats = await (0,
|
|
19096
|
+
const stats = await (0, import_promises31.stat)(filePath);
|
|
18830
19097
|
return stats.isDirectory();
|
|
18831
19098
|
} catch {
|
|
18832
19099
|
return false;
|
|
@@ -18834,26 +19101,26 @@ async function isDirectory(filePath) {
|
|
|
18834
19101
|
}
|
|
18835
19102
|
function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
|
|
18836
19103
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
18837
|
-
return
|
|
19104
|
+
return import_node_path45.default.join(root, evalRunId, caseId);
|
|
18838
19105
|
}
|
|
18839
19106
|
async function copyDirectoryRecursive(src, dest) {
|
|
18840
|
-
await (0,
|
|
18841
|
-
const entries = await (0,
|
|
19107
|
+
await (0, import_promises31.mkdir)(dest, { recursive: true });
|
|
19108
|
+
const entries = await (0, import_promises31.readdir)(src, { withFileTypes: true });
|
|
18842
19109
|
for (const entry of entries) {
|
|
18843
|
-
const srcPath =
|
|
18844
|
-
const destPath =
|
|
19110
|
+
const srcPath = import_node_path45.default.join(src, entry.name);
|
|
19111
|
+
const destPath = import_node_path45.default.join(dest, entry.name);
|
|
18845
19112
|
if (entry.name === ".git") {
|
|
18846
19113
|
continue;
|
|
18847
19114
|
}
|
|
18848
19115
|
if (entry.isDirectory()) {
|
|
18849
19116
|
await copyDirectoryRecursive(srcPath, destPath);
|
|
18850
19117
|
} else {
|
|
18851
|
-
await (0,
|
|
19118
|
+
await (0, import_promises31.cp)(srcPath, destPath, { preserveTimestamps: true });
|
|
18852
19119
|
}
|
|
18853
19120
|
}
|
|
18854
19121
|
}
|
|
18855
19122
|
async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoot) {
|
|
18856
|
-
const resolvedTemplatePath =
|
|
19123
|
+
const resolvedTemplatePath = import_node_path45.default.resolve(templatePath);
|
|
18857
19124
|
if (!await fileExists2(resolvedTemplatePath)) {
|
|
18858
19125
|
throw new TemplateNotFoundError(resolvedTemplatePath);
|
|
18859
19126
|
}
|
|
@@ -18863,7 +19130,7 @@ async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoo
|
|
|
18863
19130
|
const workspacePath = getWorkspacePath(evalRunId, caseId, workspaceRoot);
|
|
18864
19131
|
try {
|
|
18865
19132
|
if (await fileExists2(workspacePath)) {
|
|
18866
|
-
await (0,
|
|
19133
|
+
await (0, import_promises31.rm)(workspacePath, { recursive: true, force: true });
|
|
18867
19134
|
}
|
|
18868
19135
|
await copyDirectoryRecursive(resolvedTemplatePath, workspacePath);
|
|
18869
19136
|
return workspacePath;
|
|
@@ -18897,14 +19164,14 @@ async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoo
|
|
|
18897
19164
|
}
|
|
18898
19165
|
async function cleanupWorkspace(workspacePath) {
|
|
18899
19166
|
if (await fileExists2(workspacePath)) {
|
|
18900
|
-
await (0,
|
|
19167
|
+
await (0, import_promises31.rm)(workspacePath, { recursive: true, force: true });
|
|
18901
19168
|
}
|
|
18902
19169
|
}
|
|
18903
19170
|
async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
18904
19171
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
18905
|
-
const evalDir =
|
|
19172
|
+
const evalDir = import_node_path45.default.join(root, evalRunId);
|
|
18906
19173
|
if (await fileExists2(evalDir)) {
|
|
18907
|
-
await (0,
|
|
19174
|
+
await (0, import_promises31.rm)(evalDir, { recursive: true, force: true });
|
|
18908
19175
|
}
|
|
18909
19176
|
}
|
|
18910
19177
|
|
|
@@ -18913,8 +19180,8 @@ init_cjs_shims();
|
|
|
18913
19180
|
var import_node_child_process9 = require("child_process");
|
|
18914
19181
|
var import_node_crypto10 = require("crypto");
|
|
18915
19182
|
var import_node_fs14 = require("fs");
|
|
18916
|
-
var
|
|
18917
|
-
var
|
|
19183
|
+
var import_promises32 = require("fs/promises");
|
|
19184
|
+
var import_node_path46 = __toESM(require("path"), 1);
|
|
18918
19185
|
var import_node_util5 = require("util");
|
|
18919
19186
|
var execFileAsync = (0, import_node_util5.promisify)(import_node_child_process9.execFile);
|
|
18920
19187
|
function gitEnv() {
|
|
@@ -18965,11 +19232,11 @@ function computeWorkspaceFingerprint(repos) {
|
|
|
18965
19232
|
return (0, import_node_crypto10.createHash)("sha256").update(JSON.stringify(canonical)).digest("hex");
|
|
18966
19233
|
}
|
|
18967
19234
|
async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
18968
|
-
await (0,
|
|
18969
|
-
const entries = await (0,
|
|
19235
|
+
await (0, import_promises32.mkdir)(dest, { recursive: true });
|
|
19236
|
+
const entries = await (0, import_promises32.readdir)(src, { withFileTypes: true });
|
|
18970
19237
|
for (const entry of entries) {
|
|
18971
|
-
const srcPath =
|
|
18972
|
-
const destPath =
|
|
19238
|
+
const srcPath = import_node_path46.default.join(src, entry.name);
|
|
19239
|
+
const destPath = import_node_path46.default.join(dest, entry.name);
|
|
18973
19240
|
if (entry.name === ".git") {
|
|
18974
19241
|
continue;
|
|
18975
19242
|
}
|
|
@@ -18979,7 +19246,7 @@ async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
|
18979
19246
|
}
|
|
18980
19247
|
await copyDirectoryRecursive2(srcPath, destPath, skipDirs);
|
|
18981
19248
|
} else {
|
|
18982
|
-
await (0,
|
|
19249
|
+
await (0, import_promises32.cp)(srcPath, destPath, { preserveTimestamps: true, force: true });
|
|
18983
19250
|
}
|
|
18984
19251
|
}
|
|
18985
19252
|
}
|
|
@@ -19002,8 +19269,8 @@ var WorkspacePoolManager = class {
|
|
|
19002
19269
|
async acquireWorkspace(options) {
|
|
19003
19270
|
const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
|
|
19004
19271
|
const fingerprint = computeWorkspaceFingerprint(repos);
|
|
19005
|
-
const poolDir =
|
|
19006
|
-
await (0,
|
|
19272
|
+
const poolDir = import_node_path46.default.join(this.poolRoot, fingerprint);
|
|
19273
|
+
await (0, import_promises32.mkdir)(poolDir, { recursive: true });
|
|
19007
19274
|
const drifted = await this.checkDrift(poolDir, fingerprint);
|
|
19008
19275
|
if (drifted) {
|
|
19009
19276
|
console.warn(
|
|
@@ -19012,7 +19279,7 @@ var WorkspacePoolManager = class {
|
|
|
19012
19279
|
await this.removeAllSlots(poolDir);
|
|
19013
19280
|
}
|
|
19014
19281
|
for (let i = 0; i < maxSlots; i++) {
|
|
19015
|
-
const slotPath =
|
|
19282
|
+
const slotPath = import_node_path46.default.join(poolDir, `slot-${i}`);
|
|
19016
19283
|
const lockPath = `${slotPath}.lock`;
|
|
19017
19284
|
const locked = await this.tryLock(lockPath);
|
|
19018
19285
|
if (!locked) {
|
|
@@ -19030,7 +19297,7 @@ var WorkspacePoolManager = class {
|
|
|
19030
19297
|
poolDir
|
|
19031
19298
|
};
|
|
19032
19299
|
}
|
|
19033
|
-
await (0,
|
|
19300
|
+
await (0, import_promises32.mkdir)(slotPath, { recursive: true });
|
|
19034
19301
|
if (templatePath) {
|
|
19035
19302
|
await copyDirectoryRecursive2(templatePath, slotPath);
|
|
19036
19303
|
}
|
|
@@ -19054,7 +19321,7 @@ var WorkspacePoolManager = class {
|
|
|
19054
19321
|
/** Remove lock file to release a slot. */
|
|
19055
19322
|
async releaseSlot(slot) {
|
|
19056
19323
|
try {
|
|
19057
|
-
await (0,
|
|
19324
|
+
await (0, import_promises32.unlink)(slot.lockPath);
|
|
19058
19325
|
} catch {
|
|
19059
19326
|
}
|
|
19060
19327
|
}
|
|
@@ -19067,21 +19334,21 @@ var WorkspacePoolManager = class {
|
|
|
19067
19334
|
async tryLock(lockPath) {
|
|
19068
19335
|
for (let attempt = 0; attempt < 3; attempt++) {
|
|
19069
19336
|
try {
|
|
19070
|
-
await (0,
|
|
19337
|
+
await (0, import_promises32.writeFile)(lockPath, String(process.pid), { flag: "wx" });
|
|
19071
19338
|
return true;
|
|
19072
19339
|
} catch (err) {
|
|
19073
19340
|
if (err.code !== "EEXIST") {
|
|
19074
19341
|
throw err;
|
|
19075
19342
|
}
|
|
19076
19343
|
try {
|
|
19077
|
-
const pidStr = await (0,
|
|
19344
|
+
const pidStr = await (0, import_promises32.readFile)(lockPath, "utf-8");
|
|
19078
19345
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
19079
19346
|
if (!Number.isNaN(pid)) {
|
|
19080
19347
|
try {
|
|
19081
19348
|
process.kill(pid, 0);
|
|
19082
19349
|
return false;
|
|
19083
19350
|
} catch {
|
|
19084
|
-
await (0,
|
|
19351
|
+
await (0, import_promises32.unlink)(lockPath).catch(() => {
|
|
19085
19352
|
});
|
|
19086
19353
|
continue;
|
|
19087
19354
|
}
|
|
@@ -19099,9 +19366,9 @@ var WorkspacePoolManager = class {
|
|
|
19099
19366
|
* Returns false (no drift) if metadata.json doesn't exist (first use).
|
|
19100
19367
|
*/
|
|
19101
19368
|
async checkDrift(poolDir, fingerprint) {
|
|
19102
|
-
const metadataPath =
|
|
19369
|
+
const metadataPath = import_node_path46.default.join(poolDir, "metadata.json");
|
|
19103
19370
|
try {
|
|
19104
|
-
const raw = await (0,
|
|
19371
|
+
const raw = await (0, import_promises32.readFile)(metadataPath, "utf-8");
|
|
19105
19372
|
const metadata = JSON.parse(raw);
|
|
19106
19373
|
return metadata.fingerprint !== fingerprint;
|
|
19107
19374
|
} catch {
|
|
@@ -19116,17 +19383,17 @@ var WorkspacePoolManager = class {
|
|
|
19116
19383
|
repos,
|
|
19117
19384
|
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
19118
19385
|
};
|
|
19119
|
-
await (0,
|
|
19386
|
+
await (0, import_promises32.writeFile)(import_node_path46.default.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
19120
19387
|
}
|
|
19121
19388
|
/** Remove all slot directories and their lock files from a pool directory. */
|
|
19122
19389
|
async removeAllSlots(poolDir) {
|
|
19123
|
-
const entries = await (0,
|
|
19390
|
+
const entries = await (0, import_promises32.readdir)(poolDir);
|
|
19124
19391
|
for (const entry of entries) {
|
|
19125
19392
|
if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
|
|
19126
|
-
const lockPath =
|
|
19393
|
+
const lockPath = import_node_path46.default.join(poolDir, `${entry}.lock`);
|
|
19127
19394
|
if ((0, import_node_fs14.existsSync)(lockPath)) {
|
|
19128
19395
|
try {
|
|
19129
|
-
const pidStr = await (0,
|
|
19396
|
+
const pidStr = await (0, import_promises32.readFile)(lockPath, "utf-8");
|
|
19130
19397
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
19131
19398
|
if (!Number.isNaN(pid)) {
|
|
19132
19399
|
try {
|
|
@@ -19139,12 +19406,12 @@ var WorkspacePoolManager = class {
|
|
|
19139
19406
|
} catch {
|
|
19140
19407
|
}
|
|
19141
19408
|
}
|
|
19142
|
-
await (0,
|
|
19143
|
-
await (0,
|
|
19409
|
+
await (0, import_promises32.rm)(import_node_path46.default.join(poolDir, entry), { recursive: true, force: true });
|
|
19410
|
+
await (0, import_promises32.rm)(lockPath, { force: true }).catch(() => {
|
|
19144
19411
|
});
|
|
19145
19412
|
}
|
|
19146
19413
|
}
|
|
19147
|
-
await (0,
|
|
19414
|
+
await (0, import_promises32.rm)(import_node_path46.default.join(poolDir, "metadata.json"), { force: true }).catch(() => {
|
|
19148
19415
|
});
|
|
19149
19416
|
}
|
|
19150
19417
|
/**
|
|
@@ -19154,7 +19421,7 @@ var WorkspacePoolManager = class {
|
|
|
19154
19421
|
*/
|
|
19155
19422
|
async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
|
|
19156
19423
|
for (const repo of repos) {
|
|
19157
|
-
const repoDir =
|
|
19424
|
+
const repoDir = import_node_path46.default.join(slotPath, repo.path);
|
|
19158
19425
|
if (!(0, import_node_fs14.existsSync)(repoDir)) {
|
|
19159
19426
|
continue;
|
|
19160
19427
|
}
|
|
@@ -19192,7 +19459,7 @@ var WorkspacePoolManager = class {
|
|
|
19192
19459
|
init_cjs_shims();
|
|
19193
19460
|
var import_node_child_process10 = require("child_process");
|
|
19194
19461
|
var import_node_fs15 = require("fs");
|
|
19195
|
-
var
|
|
19462
|
+
var import_node_path47 = __toESM(require("path"), 1);
|
|
19196
19463
|
var import_node_util6 = require("util");
|
|
19197
19464
|
var execFileAsync2 = (0, import_node_util6.promisify)(import_node_child_process10.execFile);
|
|
19198
19465
|
var DEFAULT_TIMEOUT_MS2 = 3e5;
|
|
@@ -19292,7 +19559,7 @@ ${lines.join("\n")}`;
|
|
|
19292
19559
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
19293
19560
|
*/
|
|
19294
19561
|
async materialize(repo, workspacePath) {
|
|
19295
|
-
const targetDir =
|
|
19562
|
+
const targetDir = import_node_path47.default.join(workspacePath, repo.path);
|
|
19296
19563
|
const sourceUrl = getSourceUrl(repo.source);
|
|
19297
19564
|
const startedAt = Date.now();
|
|
19298
19565
|
if (this.verbose) {
|
|
@@ -19383,7 +19650,7 @@ ${lines.join("\n")}`;
|
|
|
19383
19650
|
async reset(repos, workspacePath, reset) {
|
|
19384
19651
|
const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
|
|
19385
19652
|
for (const repo of repos) {
|
|
19386
|
-
const targetDir =
|
|
19653
|
+
const targetDir = import_node_path47.default.join(workspacePath, repo.path);
|
|
19387
19654
|
await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
|
|
19388
19655
|
await this.runGit(["clean", cleanFlag], { cwd: targetDir });
|
|
19389
19656
|
}
|
|
@@ -19392,36 +19659,36 @@ ${lines.join("\n")}`;
|
|
|
19392
19659
|
|
|
19393
19660
|
// src/evaluation/workspace/resolve.ts
|
|
19394
19661
|
init_cjs_shims();
|
|
19395
|
-
var
|
|
19396
|
-
var
|
|
19662
|
+
var import_promises33 = require("fs/promises");
|
|
19663
|
+
var import_node_path48 = __toESM(require("path"), 1);
|
|
19397
19664
|
async function resolveWorkspaceTemplate(templatePath) {
|
|
19398
19665
|
if (!templatePath) {
|
|
19399
19666
|
return void 0;
|
|
19400
19667
|
}
|
|
19401
|
-
const resolved =
|
|
19402
|
-
const stats = await (0,
|
|
19668
|
+
const resolved = import_node_path48.default.resolve(templatePath);
|
|
19669
|
+
const stats = await (0, import_promises33.stat)(resolved);
|
|
19403
19670
|
if (stats.isFile()) {
|
|
19404
19671
|
return {
|
|
19405
|
-
dir:
|
|
19672
|
+
dir: import_node_path48.default.dirname(resolved),
|
|
19406
19673
|
workspaceFile: resolved
|
|
19407
19674
|
};
|
|
19408
19675
|
}
|
|
19409
19676
|
if (!stats.isDirectory()) {
|
|
19410
19677
|
throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
|
|
19411
19678
|
}
|
|
19412
|
-
const entries = await (0,
|
|
19679
|
+
const entries = await (0, import_promises33.readdir)(resolved);
|
|
19413
19680
|
const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
|
|
19414
19681
|
if (workspaceFiles.length === 1) {
|
|
19415
19682
|
return {
|
|
19416
19683
|
dir: resolved,
|
|
19417
|
-
workspaceFile:
|
|
19684
|
+
workspaceFile: import_node_path48.default.join(resolved, workspaceFiles[0])
|
|
19418
19685
|
};
|
|
19419
19686
|
}
|
|
19420
19687
|
if (workspaceFiles.length > 1) {
|
|
19421
19688
|
const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
|
|
19422
19689
|
return {
|
|
19423
19690
|
dir: resolved,
|
|
19424
|
-
workspaceFile: conventionFile ?
|
|
19691
|
+
workspaceFile: conventionFile ? import_node_path48.default.join(resolved, conventionFile) : void 0
|
|
19425
19692
|
};
|
|
19426
19693
|
}
|
|
19427
19694
|
return { dir: resolved };
|
|
@@ -19641,7 +19908,7 @@ async function runEvaluation(options) {
|
|
|
19641
19908
|
];
|
|
19642
19909
|
const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveGraderProvider);
|
|
19643
19910
|
const typeRegistry = createBuiltinRegistry();
|
|
19644
|
-
const discoveryBaseDir = evalFilePath ?
|
|
19911
|
+
const discoveryBaseDir = evalFilePath ? import_node_path49.default.dirname(import_node_path49.default.resolve(evalFilePath)) : process.cwd();
|
|
19645
19912
|
const evalDir = discoveryBaseDir;
|
|
19646
19913
|
await discoverAssertions(typeRegistry, discoveryBaseDir);
|
|
19647
19914
|
await discoverGraders(typeRegistry, discoveryBaseDir);
|
|
@@ -19780,16 +20047,16 @@ async function runEvaluation(options) {
|
|
|
19780
20047
|
const poolSlotBaselines = /* @__PURE__ */ new Map();
|
|
19781
20048
|
const poolMaxSlots = Math.min(configPoolMaxSlots ?? 10, 50);
|
|
19782
20049
|
let staticMaterialised = false;
|
|
20050
|
+
const isYamlConfiguredPath = !cliWorkspacePath && !!yamlWorkspacePath;
|
|
19783
20051
|
if (useStaticWorkspace && configuredStaticPath) {
|
|
19784
|
-
const
|
|
19785
|
-
const dirExists = await (0, import_promises33.stat)(configuredStaticPath).then(
|
|
20052
|
+
const dirExists = await (0, import_promises34.stat)(configuredStaticPath).then(
|
|
19786
20053
|
(s) => s.isDirectory(),
|
|
19787
20054
|
() => false
|
|
19788
20055
|
);
|
|
19789
|
-
const isEmpty = dirExists ? (await (0,
|
|
20056
|
+
const isEmpty = dirExists ? (await (0, import_promises34.readdir)(configuredStaticPath)).length === 0 : false;
|
|
19790
20057
|
if (isYamlConfiguredPath && (!dirExists || isEmpty)) {
|
|
19791
20058
|
if (!dirExists) {
|
|
19792
|
-
await (0,
|
|
20059
|
+
await (0, import_promises34.mkdir)(configuredStaticPath, { recursive: true });
|
|
19793
20060
|
}
|
|
19794
20061
|
if (workspaceTemplate) {
|
|
19795
20062
|
await copyDirectoryRecursive(workspaceTemplate, configuredStaticPath);
|
|
@@ -19834,26 +20101,40 @@ async function runEvaluation(options) {
|
|
|
19834
20101
|
}
|
|
19835
20102
|
} else if (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
|
|
19836
20103
|
sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
|
|
19837
|
-
await (0,
|
|
20104
|
+
await (0, import_promises34.mkdir)(sharedWorkspacePath, { recursive: true });
|
|
19838
20105
|
setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
|
|
19839
20106
|
}
|
|
19840
20107
|
try {
|
|
19841
20108
|
if (suiteWorkspaceFile && sharedWorkspacePath) {
|
|
19842
|
-
const copiedWorkspaceFile =
|
|
20109
|
+
const copiedWorkspaceFile = import_node_path49.default.join(sharedWorkspacePath, import_node_path49.default.basename(suiteWorkspaceFile));
|
|
19843
20110
|
try {
|
|
19844
|
-
await (0,
|
|
20111
|
+
await (0, import_promises34.stat)(copiedWorkspaceFile);
|
|
19845
20112
|
suiteWorkspaceFile = copiedWorkspaceFile;
|
|
19846
20113
|
} catch {
|
|
19847
20114
|
}
|
|
19848
20115
|
}
|
|
19849
|
-
const
|
|
19850
|
-
const
|
|
19851
|
-
|
|
19852
|
-
|
|
19853
|
-
|
|
19854
|
-
);
|
|
20116
|
+
const hasReposToMaterialize = !!suiteWorkspace?.repos?.length && !usePool && !isPerTestIsolation;
|
|
20117
|
+
const needsRepoMaterialisation = hasReposToMaterialize && (!useStaticWorkspace || staticMaterialised);
|
|
20118
|
+
const needsPerRepoCheck = hasReposToMaterialize && useStaticWorkspace && !staticMaterialised && isYamlConfiguredPath;
|
|
20119
|
+
const repoManager = needsRepoMaterialisation || needsPerRepoCheck ? new RepoManager(verbose) : void 0;
|
|
20120
|
+
if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos) {
|
|
19855
20121
|
try {
|
|
19856
|
-
|
|
20122
|
+
if (needsPerRepoCheck) {
|
|
20123
|
+
for (const repo of suiteWorkspace.repos) {
|
|
20124
|
+
const targetDir = import_node_path49.default.join(sharedWorkspacePath, repo.path);
|
|
20125
|
+
if ((0, import_node_fs16.existsSync)(targetDir)) {
|
|
20126
|
+
setupLog(`reusing existing repo at: ${targetDir}`);
|
|
20127
|
+
continue;
|
|
20128
|
+
}
|
|
20129
|
+
setupLog(`materializing missing repo: ${repo.path}`);
|
|
20130
|
+
await repoManager.materialize(repo, sharedWorkspacePath);
|
|
20131
|
+
}
|
|
20132
|
+
} else {
|
|
20133
|
+
setupLog(
|
|
20134
|
+
`materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`
|
|
20135
|
+
);
|
|
20136
|
+
await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
|
|
20137
|
+
}
|
|
19857
20138
|
setupLog("shared repo materialization complete");
|
|
19858
20139
|
} catch (error) {
|
|
19859
20140
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -20427,9 +20708,9 @@ async function runEvalCase(options) {
|
|
|
20427
20708
|
);
|
|
20428
20709
|
}
|
|
20429
20710
|
if (caseWorkspaceFile && workspacePath) {
|
|
20430
|
-
const copiedFile =
|
|
20711
|
+
const copiedFile = import_node_path49.default.join(workspacePath, import_node_path49.default.basename(caseWorkspaceFile));
|
|
20431
20712
|
try {
|
|
20432
|
-
await (0,
|
|
20713
|
+
await (0, import_promises34.stat)(copiedFile);
|
|
20433
20714
|
caseWorkspaceFile = copiedFile;
|
|
20434
20715
|
} catch {
|
|
20435
20716
|
}
|
|
@@ -20437,7 +20718,7 @@ async function runEvalCase(options) {
|
|
|
20437
20718
|
}
|
|
20438
20719
|
if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
|
|
20439
20720
|
workspacePath = getWorkspacePath(evalRunId, evalCase.id);
|
|
20440
|
-
await (0,
|
|
20721
|
+
await (0, import_promises34.mkdir)(workspacePath, { recursive: true });
|
|
20441
20722
|
}
|
|
20442
20723
|
if (evalCase.workspace?.repos?.length && workspacePath) {
|
|
20443
20724
|
const localPathErrors = RepoManager.validateLocalPaths(evalCase.workspace.repos);
|
|
@@ -20489,11 +20770,11 @@ async function runEvalCase(options) {
|
|
|
20489
20770
|
const files = evalCase.metadata.agent_skills_files;
|
|
20490
20771
|
if (baseDir && files.length > 0) {
|
|
20491
20772
|
for (const relPath of files) {
|
|
20492
|
-
const srcPath =
|
|
20493
|
-
const destPath =
|
|
20773
|
+
const srcPath = import_node_path49.default.resolve(baseDir, relPath);
|
|
20774
|
+
const destPath = import_node_path49.default.resolve(workspacePath, relPath);
|
|
20494
20775
|
try {
|
|
20495
|
-
await (0,
|
|
20496
|
-
await (0,
|
|
20776
|
+
await (0, import_promises34.mkdir)(import_node_path49.default.dirname(destPath), { recursive: true });
|
|
20777
|
+
await (0, import_promises34.copyFile)(srcPath, destPath);
|
|
20497
20778
|
} catch (error) {
|
|
20498
20779
|
const message = error instanceof Error ? error.message : String(error);
|
|
20499
20780
|
return buildErrorResult(
|
|
@@ -21088,6 +21369,7 @@ async function runEvaluatorsForCase(options) {
|
|
|
21088
21369
|
if (!activeEvaluator) {
|
|
21089
21370
|
throw new Error(`No evaluator registered for kind '${evaluatorKind}'`);
|
|
21090
21371
|
}
|
|
21372
|
+
const implicitEvaluator = evaluatorKind === "llm-grader" && !evalCase.assertions ? buildImplicitLlmGraderConfig(evalCase) : void 0;
|
|
21091
21373
|
const score = await activeEvaluator.evaluate({
|
|
21092
21374
|
evalCase,
|
|
21093
21375
|
candidate,
|
|
@@ -21107,10 +21389,21 @@ async function runEvaluatorsForCase(options) {
|
|
|
21107
21389
|
targetResolver,
|
|
21108
21390
|
availableTargets,
|
|
21109
21391
|
fileChanges,
|
|
21110
|
-
workspacePath
|
|
21392
|
+
workspacePath,
|
|
21393
|
+
...implicitEvaluator ? { evaluator: implicitEvaluator } : {}
|
|
21111
21394
|
});
|
|
21112
21395
|
return { score };
|
|
21113
21396
|
}
|
|
21397
|
+
function buildImplicitLlmGraderConfig(evalCase) {
|
|
21398
|
+
if (!evalCase.preprocessors || evalCase.preprocessors.length === 0) {
|
|
21399
|
+
return void 0;
|
|
21400
|
+
}
|
|
21401
|
+
return {
|
|
21402
|
+
name: "llm-grader",
|
|
21403
|
+
type: "llm-grader",
|
|
21404
|
+
preprocessors: evalCase.preprocessors
|
|
21405
|
+
};
|
|
21406
|
+
}
|
|
21114
21407
|
async function runEvaluatorList(options) {
|
|
21115
21408
|
const {
|
|
21116
21409
|
evalCase,
|
|
@@ -21160,7 +21453,7 @@ async function runEvaluatorList(options) {
|
|
|
21160
21453
|
fileChanges,
|
|
21161
21454
|
workspacePath
|
|
21162
21455
|
};
|
|
21163
|
-
const evalFileDir = evalCase.file_paths[0] ?
|
|
21456
|
+
const evalFileDir = evalCase.file_paths[0] ? import_node_path49.default.dirname(evalCase.file_paths[0]) : process.cwd();
|
|
21164
21457
|
const dispatchContext = {
|
|
21165
21458
|
graderProvider,
|
|
21166
21459
|
targetResolver,
|
|
@@ -21528,8 +21821,8 @@ function computeWeightedMean(entries) {
|
|
|
21528
21821
|
|
|
21529
21822
|
// src/evaluation/evaluate.ts
|
|
21530
21823
|
init_cjs_shims();
|
|
21531
|
-
var
|
|
21532
|
-
var
|
|
21824
|
+
var import_node_fs17 = require("fs");
|
|
21825
|
+
var import_node_path50 = __toESM(require("path"), 1);
|
|
21533
21826
|
|
|
21534
21827
|
// src/evaluation/providers/function-provider.ts
|
|
21535
21828
|
init_cjs_shims();
|
|
@@ -21567,7 +21860,7 @@ async function evaluate(config) {
|
|
|
21567
21860
|
}
|
|
21568
21861
|
const gitRoot = await findGitRoot(process.cwd());
|
|
21569
21862
|
const repoRoot = gitRoot ?? process.cwd();
|
|
21570
|
-
const testFilePath = config.specFile ?
|
|
21863
|
+
const testFilePath = config.specFile ? import_node_path50.default.resolve(config.specFile) : import_node_path50.default.join(process.cwd(), "__programmatic__.yaml");
|
|
21571
21864
|
await loadEnvHierarchy(repoRoot, testFilePath);
|
|
21572
21865
|
let resolvedTarget;
|
|
21573
21866
|
let taskProvider;
|
|
@@ -21682,11 +21975,11 @@ function computeSummary(results, durationMs, threshold = DEFAULT_THRESHOLD) {
|
|
|
21682
21975
|
var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
|
|
21683
21976
|
async function discoverDefaultTarget(repoRoot) {
|
|
21684
21977
|
const cwd = process.cwd();
|
|
21685
|
-
const chain = buildDirectoryChain2(
|
|
21978
|
+
const chain = buildDirectoryChain2(import_node_path50.default.join(cwd, "_placeholder"), repoRoot);
|
|
21686
21979
|
for (const dir of chain) {
|
|
21687
21980
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
21688
|
-
const targetsPath =
|
|
21689
|
-
if (!(0,
|
|
21981
|
+
const targetsPath = import_node_path50.default.join(dir, candidate);
|
|
21982
|
+
if (!(0, import_node_fs17.existsSync)(targetsPath)) continue;
|
|
21690
21983
|
try {
|
|
21691
21984
|
const definitions = await readTargetDefinitions(targetsPath);
|
|
21692
21985
|
const defaultTarget = definitions.find((d) => d.name === "default");
|
|
@@ -21702,8 +21995,8 @@ async function loadEnvHierarchy(repoRoot, startPath) {
|
|
|
21702
21995
|
const chain = buildDirectoryChain2(startPath, repoRoot);
|
|
21703
21996
|
const envFiles = [];
|
|
21704
21997
|
for (const dir of chain) {
|
|
21705
|
-
const envPath =
|
|
21706
|
-
if ((0,
|
|
21998
|
+
const envPath = import_node_path50.default.join(dir, ".env");
|
|
21999
|
+
if ((0, import_node_fs17.existsSync)(envPath)) envFiles.push(envPath);
|
|
21707
22000
|
}
|
|
21708
22001
|
for (let i = 0; i < envFiles.length; i++) {
|
|
21709
22002
|
try {
|
|
@@ -21780,12 +22073,12 @@ var CONFIG_FILE_NAMES = [
|
|
|
21780
22073
|
".agentv/config.js"
|
|
21781
22074
|
];
|
|
21782
22075
|
async function loadTsConfig(projectRoot) {
|
|
21783
|
-
const { existsSync:
|
|
22076
|
+
const { existsSync: existsSync8 } = await import("fs");
|
|
21784
22077
|
const { pathToFileURL: pathToFileURL2 } = await import("url");
|
|
21785
22078
|
const { join: join2 } = await import("path");
|
|
21786
22079
|
for (const fileName of CONFIG_FILE_NAMES) {
|
|
21787
22080
|
const filePath = join2(projectRoot, fileName);
|
|
21788
|
-
if (!
|
|
22081
|
+
if (!existsSync8(filePath)) {
|
|
21789
22082
|
continue;
|
|
21790
22083
|
}
|
|
21791
22084
|
try {
|
|
@@ -21890,8 +22183,8 @@ init_cjs_shims();
|
|
|
21890
22183
|
|
|
21891
22184
|
// src/evaluation/workspace/deps-scanner.ts
|
|
21892
22185
|
init_cjs_shims();
|
|
21893
|
-
var
|
|
21894
|
-
var
|
|
22186
|
+
var import_promises35 = require("fs/promises");
|
|
22187
|
+
var import_node_path51 = __toESM(require("path"), 1);
|
|
21895
22188
|
var import_yaml8 = require("yaml");
|
|
21896
22189
|
function normalizeGitUrl(url) {
|
|
21897
22190
|
let normalized = url.replace(/\.git$/, "");
|
|
@@ -21938,11 +22231,11 @@ async function scanRepoDeps(evalFilePaths) {
|
|
|
21938
22231
|
return { repos: [...seen.values()], errors };
|
|
21939
22232
|
}
|
|
21940
22233
|
async function extractReposFromEvalFile(filePath) {
|
|
21941
|
-
const content = await (0,
|
|
22234
|
+
const content = await (0, import_promises35.readFile)(filePath, "utf8");
|
|
21942
22235
|
const parsed = interpolateEnv((0, import_yaml8.parse)(content), process.env);
|
|
21943
22236
|
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
|
|
21944
22237
|
const obj = parsed;
|
|
21945
|
-
const evalFileDir =
|
|
22238
|
+
const evalFileDir = import_node_path51.default.dirname(import_node_path51.default.resolve(filePath));
|
|
21946
22239
|
const repos = [];
|
|
21947
22240
|
const suiteRepos = await extractReposFromWorkspaceRaw(obj.workspace, evalFileDir);
|
|
21948
22241
|
repos.push(...suiteRepos);
|
|
@@ -21958,8 +22251,8 @@ async function extractReposFromEvalFile(filePath) {
|
|
|
21958
22251
|
}
|
|
21959
22252
|
async function extractReposFromWorkspaceRaw(raw, evalFileDir) {
|
|
21960
22253
|
if (typeof raw === "string") {
|
|
21961
|
-
const workspaceFilePath =
|
|
21962
|
-
const content = await (0,
|
|
22254
|
+
const workspaceFilePath = import_node_path51.default.resolve(evalFileDir, raw);
|
|
22255
|
+
const content = await (0, import_promises35.readFile)(workspaceFilePath, "utf8");
|
|
21963
22256
|
const parsed = interpolateEnv((0, import_yaml8.parse)(content), process.env);
|
|
21964
22257
|
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
|
|
21965
22258
|
return extractReposFromObject(parsed);
|
|
@@ -21988,8 +22281,8 @@ function extractReposFromObject(obj) {
|
|
|
21988
22281
|
|
|
21989
22282
|
// src/evaluation/cache/response-cache.ts
|
|
21990
22283
|
init_cjs_shims();
|
|
21991
|
-
var
|
|
21992
|
-
var
|
|
22284
|
+
var import_promises36 = require("fs/promises");
|
|
22285
|
+
var import_node_path52 = __toESM(require("path"), 1);
|
|
21993
22286
|
var DEFAULT_CACHE_PATH = ".agentv/cache";
|
|
21994
22287
|
var ResponseCache = class {
|
|
21995
22288
|
cachePath;
|
|
@@ -21999,7 +22292,7 @@ var ResponseCache = class {
|
|
|
21999
22292
|
async get(key) {
|
|
22000
22293
|
const filePath = this.keyToPath(key);
|
|
22001
22294
|
try {
|
|
22002
|
-
const data = await (0,
|
|
22295
|
+
const data = await (0, import_promises36.readFile)(filePath, "utf8");
|
|
22003
22296
|
return JSON.parse(data);
|
|
22004
22297
|
} catch {
|
|
22005
22298
|
return void 0;
|
|
@@ -22007,13 +22300,13 @@ var ResponseCache = class {
|
|
|
22007
22300
|
}
|
|
22008
22301
|
async set(key, value) {
|
|
22009
22302
|
const filePath = this.keyToPath(key);
|
|
22010
|
-
const dir =
|
|
22011
|
-
await (0,
|
|
22012
|
-
await (0,
|
|
22303
|
+
const dir = import_node_path52.default.dirname(filePath);
|
|
22304
|
+
await (0, import_promises36.mkdir)(dir, { recursive: true });
|
|
22305
|
+
await (0, import_promises36.writeFile)(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
22013
22306
|
}
|
|
22014
22307
|
keyToPath(key) {
|
|
22015
22308
|
const prefix = key.slice(0, 2);
|
|
22016
|
-
return
|
|
22309
|
+
return import_node_path52.default.join(this.cachePath, prefix, `${key}.json`);
|
|
22017
22310
|
}
|
|
22018
22311
|
};
|
|
22019
22312
|
function shouldEnableCache(params) {
|
|
@@ -22030,19 +22323,19 @@ function shouldSkipCacheForTemperature(targetConfig) {
|
|
|
22030
22323
|
|
|
22031
22324
|
// src/projects.ts
|
|
22032
22325
|
init_cjs_shims();
|
|
22033
|
-
var
|
|
22034
|
-
var
|
|
22326
|
+
var import_node_fs18 = require("fs");
|
|
22327
|
+
var import_node_path53 = __toESM(require("path"), 1);
|
|
22035
22328
|
var import_yaml9 = require("yaml");
|
|
22036
22329
|
function getProjectsRegistryPath() {
|
|
22037
|
-
return
|
|
22330
|
+
return import_node_path53.default.join(getAgentvHome(), "projects.yaml");
|
|
22038
22331
|
}
|
|
22039
22332
|
function loadProjectRegistry() {
|
|
22040
22333
|
const registryPath = getProjectsRegistryPath();
|
|
22041
|
-
if (!(0,
|
|
22334
|
+
if (!(0, import_node_fs18.existsSync)(registryPath)) {
|
|
22042
22335
|
return { projects: [] };
|
|
22043
22336
|
}
|
|
22044
22337
|
try {
|
|
22045
|
-
const raw = (0,
|
|
22338
|
+
const raw = (0, import_node_fs18.readFileSync)(registryPath, "utf-8");
|
|
22046
22339
|
const parsed = (0, import_yaml9.parse)(raw);
|
|
22047
22340
|
if (!parsed || !Array.isArray(parsed.projects)) {
|
|
22048
22341
|
return { projects: [] };
|
|
@@ -22054,14 +22347,14 @@ function loadProjectRegistry() {
|
|
|
22054
22347
|
}
|
|
22055
22348
|
function saveProjectRegistry(registry) {
|
|
22056
22349
|
const registryPath = getProjectsRegistryPath();
|
|
22057
|
-
const dir =
|
|
22058
|
-
if (!(0,
|
|
22059
|
-
(0,
|
|
22350
|
+
const dir = import_node_path53.default.dirname(registryPath);
|
|
22351
|
+
if (!(0, import_node_fs18.existsSync)(dir)) {
|
|
22352
|
+
(0, import_node_fs18.mkdirSync)(dir, { recursive: true });
|
|
22060
22353
|
}
|
|
22061
|
-
(0,
|
|
22354
|
+
(0, import_node_fs18.writeFileSync)(registryPath, (0, import_yaml9.stringify)(registry), "utf-8");
|
|
22062
22355
|
}
|
|
22063
22356
|
function deriveProjectId(dirPath, existingIds) {
|
|
22064
|
-
const base =
|
|
22357
|
+
const base = import_node_path53.default.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
|
|
22065
22358
|
let candidate = base || "project";
|
|
22066
22359
|
let suffix = 2;
|
|
22067
22360
|
while (existingIds.includes(candidate)) {
|
|
@@ -22071,11 +22364,11 @@ function deriveProjectId(dirPath, existingIds) {
|
|
|
22071
22364
|
return candidate;
|
|
22072
22365
|
}
|
|
22073
22366
|
function addProject(projectPath) {
|
|
22074
|
-
const absPath =
|
|
22075
|
-
if (!(0,
|
|
22367
|
+
const absPath = import_node_path53.default.resolve(projectPath);
|
|
22368
|
+
if (!(0, import_node_fs18.existsSync)(absPath)) {
|
|
22076
22369
|
throw new Error(`Directory not found: ${absPath}`);
|
|
22077
22370
|
}
|
|
22078
|
-
if (!(0,
|
|
22371
|
+
if (!(0, import_node_fs18.existsSync)(import_node_path53.default.join(absPath, ".agentv"))) {
|
|
22079
22372
|
throw new Error(`No .agentv/ directory found in ${absPath}. Run an evaluation first.`);
|
|
22080
22373
|
}
|
|
22081
22374
|
const registry = loadProjectRegistry();
|
|
@@ -22089,7 +22382,7 @@ function addProject(projectPath) {
|
|
|
22089
22382
|
absPath,
|
|
22090
22383
|
registry.projects.map((p) => p.id)
|
|
22091
22384
|
),
|
|
22092
|
-
name:
|
|
22385
|
+
name: import_node_path53.default.basename(absPath),
|
|
22093
22386
|
path: absPath,
|
|
22094
22387
|
addedAt: now,
|
|
22095
22388
|
lastOpenedAt: now
|
|
@@ -22118,24 +22411,24 @@ function touchProject(projectId) {
|
|
|
22118
22411
|
}
|
|
22119
22412
|
}
|
|
22120
22413
|
function discoverProjects(rootDir, maxDepth = 2) {
|
|
22121
|
-
const absRoot =
|
|
22122
|
-
if (!(0,
|
|
22414
|
+
const absRoot = import_node_path53.default.resolve(rootDir);
|
|
22415
|
+
if (!(0, import_node_fs18.existsSync)(absRoot) || !(0, import_node_fs18.statSync)(absRoot).isDirectory()) {
|
|
22123
22416
|
return [];
|
|
22124
22417
|
}
|
|
22125
22418
|
const results = [];
|
|
22126
22419
|
function scan(dir, depth) {
|
|
22127
22420
|
if (depth > maxDepth) return;
|
|
22128
|
-
if ((0,
|
|
22421
|
+
if ((0, import_node_fs18.existsSync)(import_node_path53.default.join(dir, ".agentv"))) {
|
|
22129
22422
|
results.push(dir);
|
|
22130
22423
|
return;
|
|
22131
22424
|
}
|
|
22132
22425
|
if (depth === maxDepth) return;
|
|
22133
22426
|
try {
|
|
22134
|
-
const entries = (0,
|
|
22427
|
+
const entries = (0, import_node_fs18.readdirSync)(dir, { withFileTypes: true });
|
|
22135
22428
|
for (const entry of entries) {
|
|
22136
22429
|
if (!entry.isDirectory()) continue;
|
|
22137
22430
|
if (entry.name.startsWith(".") || entry.name === "node_modules") continue;
|
|
22138
|
-
scan(
|
|
22431
|
+
scan(import_node_path53.default.join(dir, entry.name), depth + 1);
|
|
22139
22432
|
}
|
|
22140
22433
|
} catch {
|
|
22141
22434
|
}
|
|
@@ -23061,33 +23354,33 @@ function extractResponseItemContent(content) {
|
|
|
23061
23354
|
|
|
23062
23355
|
// src/import/codex-session-discovery.ts
|
|
23063
23356
|
init_cjs_shims();
|
|
23064
|
-
var
|
|
23357
|
+
var import_promises38 = require("fs/promises");
|
|
23065
23358
|
var import_node_os8 = require("os");
|
|
23066
|
-
var
|
|
23067
|
-
var DEFAULT_SESSIONS_DIR = () =>
|
|
23359
|
+
var import_node_path55 = __toESM(require("path"), 1);
|
|
23360
|
+
var DEFAULT_SESSIONS_DIR = () => import_node_path55.default.join((0, import_node_os8.homedir)(), ".codex", "sessions");
|
|
23068
23361
|
async function discoverCodexSessions(opts) {
|
|
23069
23362
|
const sessionsDir = opts?.sessionsDir ?? DEFAULT_SESSIONS_DIR();
|
|
23070
23363
|
const limit = opts?.latest ? 1 : opts?.limit ?? 10;
|
|
23071
23364
|
const sessions = [];
|
|
23072
23365
|
let yearDirs;
|
|
23073
23366
|
try {
|
|
23074
|
-
yearDirs = await (0,
|
|
23367
|
+
yearDirs = await (0, import_promises38.readdir)(sessionsDir);
|
|
23075
23368
|
} catch {
|
|
23076
23369
|
return [];
|
|
23077
23370
|
}
|
|
23078
23371
|
for (const year of yearDirs) {
|
|
23079
|
-
const yearPath =
|
|
23372
|
+
const yearPath = import_node_path55.default.join(sessionsDir, year);
|
|
23080
23373
|
let monthDirs;
|
|
23081
23374
|
try {
|
|
23082
|
-
monthDirs = await (0,
|
|
23375
|
+
monthDirs = await (0, import_promises38.readdir)(yearPath);
|
|
23083
23376
|
} catch {
|
|
23084
23377
|
continue;
|
|
23085
23378
|
}
|
|
23086
23379
|
for (const month of monthDirs) {
|
|
23087
|
-
const monthPath =
|
|
23380
|
+
const monthPath = import_node_path55.default.join(yearPath, month);
|
|
23088
23381
|
let dayDirs;
|
|
23089
23382
|
try {
|
|
23090
|
-
dayDirs = await (0,
|
|
23383
|
+
dayDirs = await (0, import_promises38.readdir)(monthPath);
|
|
23091
23384
|
} catch {
|
|
23092
23385
|
continue;
|
|
23093
23386
|
}
|
|
@@ -23096,22 +23389,22 @@ async function discoverCodexSessions(opts) {
|
|
|
23096
23389
|
const dirDate = `${year}-${month}-${day}`;
|
|
23097
23390
|
if (dirDate !== opts.date) continue;
|
|
23098
23391
|
}
|
|
23099
|
-
const dayPath =
|
|
23392
|
+
const dayPath = import_node_path55.default.join(monthPath, day);
|
|
23100
23393
|
let files;
|
|
23101
23394
|
try {
|
|
23102
|
-
files = await (0,
|
|
23395
|
+
files = await (0, import_promises38.readdir)(dayPath);
|
|
23103
23396
|
} catch {
|
|
23104
23397
|
continue;
|
|
23105
23398
|
}
|
|
23106
23399
|
for (const file of files) {
|
|
23107
23400
|
if (!file.startsWith("rollout-") || !file.endsWith(".jsonl")) continue;
|
|
23108
|
-
const filePath =
|
|
23401
|
+
const filePath = import_node_path55.default.join(dayPath, file);
|
|
23109
23402
|
const nameWithoutExt = file.replace(/\.jsonl$/, "");
|
|
23110
23403
|
const parts = nameWithoutExt.split("-");
|
|
23111
23404
|
const sessionId = parts.length >= 6 ? parts.slice(-5).join("-") : nameWithoutExt;
|
|
23112
23405
|
let updatedAt;
|
|
23113
23406
|
try {
|
|
23114
|
-
const fileStat = await (0,
|
|
23407
|
+
const fileStat = await (0, import_promises38.stat)(filePath);
|
|
23115
23408
|
updatedAt = fileStat.mtime;
|
|
23116
23409
|
} catch {
|
|
23117
23410
|
updatedAt = /* @__PURE__ */ new Date(0);
|
|
@@ -23127,10 +23420,10 @@ async function discoverCodexSessions(opts) {
|
|
|
23127
23420
|
|
|
23128
23421
|
// src/import/session-discovery.ts
|
|
23129
23422
|
init_cjs_shims();
|
|
23130
|
-
var
|
|
23423
|
+
var import_promises39 = require("fs/promises");
|
|
23131
23424
|
var import_node_os9 = require("os");
|
|
23132
|
-
var
|
|
23133
|
-
var DEFAULT_PROJECTS_DIR = () =>
|
|
23425
|
+
var import_node_path56 = __toESM(require("path"), 1);
|
|
23426
|
+
var DEFAULT_PROJECTS_DIR = () => import_node_path56.default.join((0, import_node_os9.homedir)(), ".claude", "projects");
|
|
23134
23427
|
function encodeProjectPath(projectPath) {
|
|
23135
23428
|
return projectPath.replace(/\//g, "-");
|
|
23136
23429
|
}
|
|
@@ -23139,7 +23432,7 @@ async function discoverClaudeSessions(opts) {
|
|
|
23139
23432
|
const limit = opts?.latest ? 1 : opts?.limit ?? 10;
|
|
23140
23433
|
let projectDirs;
|
|
23141
23434
|
try {
|
|
23142
|
-
projectDirs = await (0,
|
|
23435
|
+
projectDirs = await (0, import_promises39.readdir)(projectsDir);
|
|
23143
23436
|
} catch {
|
|
23144
23437
|
return [];
|
|
23145
23438
|
}
|
|
@@ -23149,10 +23442,10 @@ async function discoverClaudeSessions(opts) {
|
|
|
23149
23442
|
}
|
|
23150
23443
|
const sessions = [];
|
|
23151
23444
|
for (const projectDir of projectDirs) {
|
|
23152
|
-
const dirPath =
|
|
23445
|
+
const dirPath = import_node_path56.default.join(projectsDir, projectDir);
|
|
23153
23446
|
let entries;
|
|
23154
23447
|
try {
|
|
23155
|
-
entries = await (0,
|
|
23448
|
+
entries = await (0, import_promises39.readdir)(dirPath);
|
|
23156
23449
|
} catch {
|
|
23157
23450
|
continue;
|
|
23158
23451
|
}
|
|
@@ -23160,10 +23453,10 @@ async function discoverClaudeSessions(opts) {
|
|
|
23160
23453
|
if (!entry.endsWith(".jsonl")) continue;
|
|
23161
23454
|
const sessionId = entry.replace(/\.jsonl$/, "");
|
|
23162
23455
|
if (opts?.sessionId && sessionId !== opts.sessionId) continue;
|
|
23163
|
-
const filePath =
|
|
23456
|
+
const filePath = import_node_path56.default.join(dirPath, entry);
|
|
23164
23457
|
let updatedAt;
|
|
23165
23458
|
try {
|
|
23166
|
-
const fileStat = await (0,
|
|
23459
|
+
const fileStat = await (0, import_promises39.stat)(filePath);
|
|
23167
23460
|
updatedAt = fileStat.mtime;
|
|
23168
23461
|
} catch {
|
|
23169
23462
|
updatedAt = /* @__PURE__ */ new Date(0);
|
|
@@ -23185,7 +23478,7 @@ init_cjs_shims();
|
|
|
23185
23478
|
|
|
23186
23479
|
// src/import/types.ts
|
|
23187
23480
|
init_cjs_shims();
|
|
23188
|
-
var
|
|
23481
|
+
var import_promises40 = require("fs/promises");
|
|
23189
23482
|
function toTranscriptJsonLine(entry) {
|
|
23190
23483
|
const firstUserMessage = entry.messages.find((m) => m.role === "user");
|
|
23191
23484
|
const input = typeof firstUserMessage?.content === "string" ? firstUserMessage.content : "";
|
|
@@ -23211,11 +23504,11 @@ function toTranscriptJsonLine(entry) {
|
|
|
23211
23504
|
};
|
|
23212
23505
|
}
|
|
23213
23506
|
async function readTranscriptJsonl(filePath) {
|
|
23214
|
-
const text = await (0,
|
|
23507
|
+
const text = await (0, import_promises40.readFile)(filePath, "utf8");
|
|
23215
23508
|
return text.split("\n").filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
|
|
23216
23509
|
}
|
|
23217
23510
|
async function readTranscriptFile(filePath) {
|
|
23218
|
-
return (0,
|
|
23511
|
+
return (0, import_promises40.readFile)(filePath, "utf8");
|
|
23219
23512
|
}
|
|
23220
23513
|
|
|
23221
23514
|
// src/import/transcript-provider.ts
|