@agentv/core 4.10.0 → 4.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-3WGHC7LC.js +149 -0
- package/dist/chunk-3WGHC7LC.js.map +1 -0
- package/dist/{chunk-BWHUWLGW.js → chunk-5POFMJJ7.js} +1 -1
- package/dist/chunk-5POFMJJ7.js.map +1 -0
- package/dist/chunk-SDIANPEY.js +181 -0
- package/dist/chunk-SDIANPEY.js.map +1 -0
- package/dist/docker-workspace-RPPXBT27.js +9 -0
- package/dist/docker-workspace-RPPXBT27.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +70 -3
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +71 -4
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/exec-AR6JUUN5.js +9 -0
- package/dist/exec-AR6JUUN5.js.map +1 -0
- package/dist/index.cjs +1264 -468
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +191 -5
- package/dist/index.d.ts +191 -5
- package/dist/index.js +780 -342
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-BWHUWLGW.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -25,10 +25,17 @@ import {
|
|
|
25
25
|
resolveDelegatedTargetDefinition,
|
|
26
26
|
resolveFileReference,
|
|
27
27
|
resolveTargetDefinition
|
|
28
|
-
} from "./chunk-
|
|
28
|
+
} from "./chunk-5POFMJJ7.js";
|
|
29
|
+
import {
|
|
30
|
+
execFileWithStdin,
|
|
31
|
+
execShellWithStdin
|
|
32
|
+
} from "./chunk-3WGHC7LC.js";
|
|
29
33
|
import {
|
|
30
34
|
AgentvProvider
|
|
31
35
|
} from "./chunk-PRNXHNLF.js";
|
|
36
|
+
import {
|
|
37
|
+
DockerWorkspaceProvider
|
|
38
|
+
} from "./chunk-SDIANPEY.js";
|
|
32
39
|
import {
|
|
33
40
|
OtlpJsonFileExporter
|
|
34
41
|
} from "./chunk-KPSI5CSL.js";
|
|
@@ -152,10 +159,10 @@ function mergeExecutionMetrics(computed, metrics) {
|
|
|
152
159
|
}
|
|
153
160
|
|
|
154
161
|
// src/evaluation/yaml-parser.ts
|
|
155
|
-
import { readFile as
|
|
162
|
+
import { readFile as readFile8 } from "node:fs/promises";
|
|
156
163
|
import path8 from "node:path";
|
|
157
164
|
import micromatch2 from "micromatch";
|
|
158
|
-
import { parse as
|
|
165
|
+
import { parse as parse3 } from "yaml";
|
|
159
166
|
|
|
160
167
|
// src/evaluation/input-message-utils.ts
|
|
161
168
|
function flattenInputMessages(messages) {
|
|
@@ -441,10 +448,12 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
441
448
|
parsed.execution,
|
|
442
449
|
configPath
|
|
443
450
|
);
|
|
451
|
+
const results = parseResultsConfig(parsed.results, configPath);
|
|
444
452
|
return {
|
|
445
453
|
required_version: requiredVersion,
|
|
446
454
|
eval_patterns: evalPatterns,
|
|
447
|
-
execution: executionDefaults
|
|
455
|
+
execution: executionDefaults,
|
|
456
|
+
results
|
|
448
457
|
};
|
|
449
458
|
} catch (error) {
|
|
450
459
|
logWarning(
|
|
@@ -679,163 +688,74 @@ function parseExecutionDefaults(raw, configPath) {
|
|
|
679
688
|
}
|
|
680
689
|
return Object.keys(result).length > 0 ? result : void 0;
|
|
681
690
|
}
|
|
691
|
+
function parseResultsConfig(raw, configPath) {
|
|
692
|
+
if (raw === void 0 || raw === null) {
|
|
693
|
+
return void 0;
|
|
694
|
+
}
|
|
695
|
+
if (typeof raw !== "object" || Array.isArray(raw)) {
|
|
696
|
+
logWarning(`Invalid results in ${configPath}, expected object`);
|
|
697
|
+
return void 0;
|
|
698
|
+
}
|
|
699
|
+
const obj = raw;
|
|
700
|
+
const exportConfig = parseResultsExportConfig(obj.export, configPath);
|
|
701
|
+
if (!exportConfig) {
|
|
702
|
+
return void 0;
|
|
703
|
+
}
|
|
704
|
+
return { export: exportConfig };
|
|
705
|
+
}
|
|
706
|
+
function parseResultsExportConfig(raw, configPath) {
|
|
707
|
+
if (raw === void 0 || raw === null) {
|
|
708
|
+
return void 0;
|
|
709
|
+
}
|
|
710
|
+
if (typeof raw !== "object" || Array.isArray(raw)) {
|
|
711
|
+
logWarning(`Invalid results.export in ${configPath}, expected object`);
|
|
712
|
+
return void 0;
|
|
713
|
+
}
|
|
714
|
+
const obj = raw;
|
|
715
|
+
const repo = typeof obj.repo === "string" ? obj.repo.trim() : "";
|
|
716
|
+
const exportPath = typeof obj.path === "string" ? obj.path.trim() : "";
|
|
717
|
+
if (!repo) {
|
|
718
|
+
logWarning(`Invalid results.export.repo in ${configPath}, expected non-empty string`);
|
|
719
|
+
return void 0;
|
|
720
|
+
}
|
|
721
|
+
if (!exportPath) {
|
|
722
|
+
logWarning(`Invalid results.export.path in ${configPath}, expected non-empty string`);
|
|
723
|
+
return void 0;
|
|
724
|
+
}
|
|
725
|
+
if (obj.auto_push !== void 0 && typeof obj.auto_push !== "boolean") {
|
|
726
|
+
logWarning(`Invalid results.export.auto_push in ${configPath}, expected boolean`);
|
|
727
|
+
return void 0;
|
|
728
|
+
}
|
|
729
|
+
let branchPrefix;
|
|
730
|
+
if (obj.branch_prefix !== void 0) {
|
|
731
|
+
if (typeof obj.branch_prefix !== "string" || obj.branch_prefix.trim().length === 0) {
|
|
732
|
+
logWarning(
|
|
733
|
+
`Invalid results.export.branch_prefix in ${configPath}, expected non-empty string`
|
|
734
|
+
);
|
|
735
|
+
return void 0;
|
|
736
|
+
}
|
|
737
|
+
branchPrefix = obj.branch_prefix.trim();
|
|
738
|
+
}
|
|
739
|
+
return {
|
|
740
|
+
repo,
|
|
741
|
+
path: exportPath,
|
|
742
|
+
...typeof obj.auto_push === "boolean" && { auto_push: obj.auto_push },
|
|
743
|
+
...branchPrefix && { branch_prefix: branchPrefix }
|
|
744
|
+
};
|
|
745
|
+
}
|
|
682
746
|
function logWarning(message) {
|
|
683
747
|
console.warn(`${ANSI_YELLOW}Warning: ${message}${ANSI_RESET2}`);
|
|
684
748
|
}
|
|
685
749
|
|
|
686
750
|
// src/evaluation/loaders/evaluator-parser.ts
|
|
751
|
+
import { readFile as readFile5 } from "node:fs/promises";
|
|
687
752
|
import path5 from "node:path";
|
|
753
|
+
import { parse as parse2 } from "yaml";
|
|
688
754
|
|
|
689
755
|
// src/evaluation/content-preprocessor.ts
|
|
690
756
|
import { readFile as readFile3 } from "node:fs/promises";
|
|
691
757
|
import path4 from "node:path";
|
|
692
758
|
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
693
|
-
|
|
694
|
-
// src/runtime/exec.ts
|
|
695
|
-
function shellEscapePath(value) {
|
|
696
|
-
if (process.platform === "win32") {
|
|
697
|
-
return `"${value.replaceAll('"', '""')}"`;
|
|
698
|
-
}
|
|
699
|
-
return `'${value.replaceAll("'", `'"'"'`)}'`;
|
|
700
|
-
}
|
|
701
|
-
async function execFileWithStdin(argv, stdinPayload, options = {}) {
|
|
702
|
-
if (argv.length === 0) {
|
|
703
|
-
throw new Error("Executable argv must include at least one entry");
|
|
704
|
-
}
|
|
705
|
-
if (typeof Bun !== "undefined") {
|
|
706
|
-
return execFileWithStdinBun(argv, stdinPayload, options);
|
|
707
|
-
}
|
|
708
|
-
return execFileWithStdinNode(argv, stdinPayload, options);
|
|
709
|
-
}
|
|
710
|
-
async function execFileWithStdinBun(argv, stdinPayload, options) {
|
|
711
|
-
const command = [...argv];
|
|
712
|
-
const encoder = new TextEncoder();
|
|
713
|
-
const proc = Bun.spawn(command, {
|
|
714
|
-
cwd: options.cwd,
|
|
715
|
-
stdin: encoder.encode(stdinPayload),
|
|
716
|
-
stdout: "pipe",
|
|
717
|
-
stderr: "pipe",
|
|
718
|
-
// Merge additional env vars with process.env
|
|
719
|
-
env: options.env ? { ...process.env, ...options.env } : process.env
|
|
720
|
-
});
|
|
721
|
-
let timedOut = false;
|
|
722
|
-
const timeout = options.timeoutMs !== void 0 ? setTimeout(() => {
|
|
723
|
-
timedOut = true;
|
|
724
|
-
proc.kill("SIGKILL");
|
|
725
|
-
}, options.timeoutMs) : void 0;
|
|
726
|
-
try {
|
|
727
|
-
const stdoutPromise = proc.stdout ? new Response(proc.stdout).text() : Promise.resolve("");
|
|
728
|
-
const stderrPromise = proc.stderr ? new Response(proc.stderr).text() : Promise.resolve("");
|
|
729
|
-
const [stdout, stderr, exitCode] = await Promise.all([
|
|
730
|
-
stdoutPromise,
|
|
731
|
-
stderrPromise,
|
|
732
|
-
proc.exited
|
|
733
|
-
]);
|
|
734
|
-
if (timedOut) {
|
|
735
|
-
throw new Error(`Process timed out after ${options.timeoutMs}ms`);
|
|
736
|
-
}
|
|
737
|
-
return {
|
|
738
|
-
stdout: stdout.replace(/\r\n/g, "\n"),
|
|
739
|
-
stderr: stderr.replace(/\r\n/g, "\n"),
|
|
740
|
-
exitCode
|
|
741
|
-
};
|
|
742
|
-
} finally {
|
|
743
|
-
if (timeout !== void 0) {
|
|
744
|
-
clearTimeout(timeout);
|
|
745
|
-
}
|
|
746
|
-
}
|
|
747
|
-
}
|
|
748
|
-
async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
749
|
-
const { spawn: spawn5 } = await import("node:child_process");
|
|
750
|
-
return new Promise((resolve, reject) => {
|
|
751
|
-
const [cmd, ...args] = argv;
|
|
752
|
-
const child = spawn5(cmd, args, {
|
|
753
|
-
cwd: options.cwd,
|
|
754
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
755
|
-
// Merge additional env vars with process.env
|
|
756
|
-
env: options.env ? { ...process.env, ...options.env } : process.env
|
|
757
|
-
});
|
|
758
|
-
const stdoutChunks = [];
|
|
759
|
-
const stderrChunks = [];
|
|
760
|
-
child.stdout?.on("data", (chunk) => stdoutChunks.push(chunk));
|
|
761
|
-
child.stderr?.on("data", (chunk) => stderrChunks.push(chunk));
|
|
762
|
-
let timedOut = false;
|
|
763
|
-
const timeout = options.timeoutMs !== void 0 ? setTimeout(() => {
|
|
764
|
-
timedOut = true;
|
|
765
|
-
child.kill("SIGKILL");
|
|
766
|
-
}, options.timeoutMs) : void 0;
|
|
767
|
-
child.on("error", (error) => {
|
|
768
|
-
if (timeout !== void 0) clearTimeout(timeout);
|
|
769
|
-
reject(error);
|
|
770
|
-
});
|
|
771
|
-
child.on("close", (code) => {
|
|
772
|
-
if (timeout !== void 0) clearTimeout(timeout);
|
|
773
|
-
if (timedOut) {
|
|
774
|
-
reject(new Error(`Process timed out after ${options.timeoutMs}ms`));
|
|
775
|
-
return;
|
|
776
|
-
}
|
|
777
|
-
const stdout = Buffer.concat(stdoutChunks).toString("utf8").replace(/\r\n/g, "\n");
|
|
778
|
-
const stderr = Buffer.concat(stderrChunks).toString("utf8").replace(/\r\n/g, "\n");
|
|
779
|
-
resolve({
|
|
780
|
-
stdout,
|
|
781
|
-
stderr,
|
|
782
|
-
exitCode: code ?? 0
|
|
783
|
-
});
|
|
784
|
-
});
|
|
785
|
-
if (child.stdin) {
|
|
786
|
-
child.stdin.write(stdinPayload);
|
|
787
|
-
child.stdin.end();
|
|
788
|
-
}
|
|
789
|
-
});
|
|
790
|
-
}
|
|
791
|
-
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
792
|
-
const { mkdir: mkdir16, readFile: readFile17, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
|
|
793
|
-
const { tmpdir: tmpdir3 } = await import("node:os");
|
|
794
|
-
const path52 = await import("node:path");
|
|
795
|
-
const { randomUUID: randomUUID10 } = await import("node:crypto");
|
|
796
|
-
const dir = path52.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
|
|
797
|
-
await mkdir16(dir, { recursive: true });
|
|
798
|
-
const stdinPath = path52.join(dir, "stdin.txt");
|
|
799
|
-
const stdoutPath = path52.join(dir, "stdout.txt");
|
|
800
|
-
const stderrPath = path52.join(dir, "stderr.txt");
|
|
801
|
-
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
802
|
-
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
803
|
-
const { spawn: spawn5 } = await import("node:child_process");
|
|
804
|
-
try {
|
|
805
|
-
const exitCode = await new Promise((resolve, reject) => {
|
|
806
|
-
const child = spawn5(wrappedCommand, {
|
|
807
|
-
shell: true,
|
|
808
|
-
cwd: options.cwd,
|
|
809
|
-
stdio: ["ignore", "ignore", "ignore"],
|
|
810
|
-
// Merge additional env vars with process.env
|
|
811
|
-
env: options.env ? { ...process.env, ...options.env } : process.env
|
|
812
|
-
});
|
|
813
|
-
const timeout = options.timeoutMs ? setTimeout(() => {
|
|
814
|
-
child.kill();
|
|
815
|
-
reject(new Error(`Process timed out after ${options.timeoutMs}ms`));
|
|
816
|
-
}, options.timeoutMs) : void 0;
|
|
817
|
-
child.on("error", (error) => {
|
|
818
|
-
if (timeout !== void 0) {
|
|
819
|
-
clearTimeout(timeout);
|
|
820
|
-
}
|
|
821
|
-
reject(error);
|
|
822
|
-
});
|
|
823
|
-
child.on("exit", (code) => {
|
|
824
|
-
if (timeout !== void 0) {
|
|
825
|
-
clearTimeout(timeout);
|
|
826
|
-
}
|
|
827
|
-
resolve(code ?? 0);
|
|
828
|
-
});
|
|
829
|
-
});
|
|
830
|
-
const stdout = (await readFile17(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
|
|
831
|
-
const stderr = (await readFile17(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
832
|
-
return { stdout, stderr, exitCode };
|
|
833
|
-
} finally {
|
|
834
|
-
await rm6(dir, { recursive: true, force: true });
|
|
835
|
-
}
|
|
836
|
-
}
|
|
837
|
-
|
|
838
|
-
// src/evaluation/content-preprocessor.ts
|
|
839
759
|
var MIME_TYPE_ALIASES = {
|
|
840
760
|
csv: "text/csv",
|
|
841
761
|
docx: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
@@ -1076,6 +996,7 @@ function validateTemplateVariables(content, source) {
|
|
|
1076
996
|
// src/evaluation/loaders/evaluator-parser.ts
|
|
1077
997
|
var ANSI_YELLOW3 = "\x1B[33m";
|
|
1078
998
|
var ANSI_RESET4 = "\x1B[0m";
|
|
999
|
+
var MAX_ASSERTION_INCLUDE_DEPTH = 3;
|
|
1079
1000
|
var PROMPT_FILE_PREFIX = "file://";
|
|
1080
1001
|
function normalizeEvaluatorType(type) {
|
|
1081
1002
|
return type.replace(/_/g, "-");
|
|
@@ -1108,7 +1029,79 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
|
|
|
1108
1029
|
const evaluators = [...parsedCase ?? [], ...parsedRoot ?? []];
|
|
1109
1030
|
return evaluators.length > 0 ? evaluators : void 0;
|
|
1110
1031
|
}
|
|
1111
|
-
|
|
1032
|
+
function isIncludeEntry(value) {
|
|
1033
|
+
return isJsonObject2(value) && typeof value.include === "string" && Object.keys(value).length === 1;
|
|
1034
|
+
}
|
|
1035
|
+
function isTemplateReference(value) {
|
|
1036
|
+
return !value.startsWith(".") && !value.includes("/") && !value.includes("\\");
|
|
1037
|
+
}
|
|
1038
|
+
async function resolveAssertionTemplateReference(include, searchRoots) {
|
|
1039
|
+
const templateCandidates = isTemplateReference(include) ? [
|
|
1040
|
+
path5.join(".agentv", "templates", `${include}.yaml`),
|
|
1041
|
+
path5.join(".agentv", "templates", `${include}.yml`)
|
|
1042
|
+
] : [include];
|
|
1043
|
+
const attempted = [];
|
|
1044
|
+
for (const candidate of templateCandidates) {
|
|
1045
|
+
const resolved = await resolveFileReference2(candidate, searchRoots);
|
|
1046
|
+
attempted.push(...resolved.attempted);
|
|
1047
|
+
if (resolved.resolvedPath) {
|
|
1048
|
+
return {
|
|
1049
|
+
displayPath: resolved.displayPath,
|
|
1050
|
+
resolvedPath: resolved.resolvedPath,
|
|
1051
|
+
attempted
|
|
1052
|
+
};
|
|
1053
|
+
}
|
|
1054
|
+
}
|
|
1055
|
+
return {
|
|
1056
|
+
displayPath: templateCandidates[0] ?? include,
|
|
1057
|
+
resolvedPath: "",
|
|
1058
|
+
attempted
|
|
1059
|
+
};
|
|
1060
|
+
}
|
|
1061
|
+
async function loadAssertionTemplateEntries(include, searchRoots, evalId, includeContext) {
|
|
1062
|
+
const nextDepth = includeContext.depth + 1;
|
|
1063
|
+
if (nextDepth > MAX_ASSERTION_INCLUDE_DEPTH) {
|
|
1064
|
+
const chain = [...includeContext.chain, include].join(" -> ");
|
|
1065
|
+
throw new Error(
|
|
1066
|
+
`Assertion template include depth exceeded ${MAX_ASSERTION_INCLUDE_DEPTH} in '${evalId}'. Include chain: ${chain}`
|
|
1067
|
+
);
|
|
1068
|
+
}
|
|
1069
|
+
const resolved = await resolveAssertionTemplateReference(include, searchRoots);
|
|
1070
|
+
if (!resolved.resolvedPath) {
|
|
1071
|
+
const attempted = resolved.attempted.length > 0 ? `
|
|
1072
|
+
${resolved.attempted.map((attempt) => ` Tried: ${attempt}`).join("\n")}` : "";
|
|
1073
|
+
throw new Error(
|
|
1074
|
+
`Assertion template not found in '${evalId}': ${resolved.displayPath}${attempted}`
|
|
1075
|
+
);
|
|
1076
|
+
}
|
|
1077
|
+
if (includeContext.chain.includes(resolved.resolvedPath)) {
|
|
1078
|
+
const cycle = [...includeContext.chain, resolved.resolvedPath].join(" -> ");
|
|
1079
|
+
throw new Error(`Assertion template cycle detected in '${evalId}': ${cycle}`);
|
|
1080
|
+
}
|
|
1081
|
+
const content = await readFile5(resolved.resolvedPath, "utf8");
|
|
1082
|
+
const parsed = interpolateEnv(parse2(content), process.env);
|
|
1083
|
+
if (!isJsonObject2(parsed)) {
|
|
1084
|
+
throw new Error(
|
|
1085
|
+
`Invalid assertion template file in '${evalId}': ${resolved.resolvedPath} (expected a YAML object with an assertions array)`
|
|
1086
|
+
);
|
|
1087
|
+
}
|
|
1088
|
+
const assertions = parsed.assertions;
|
|
1089
|
+
if (!Array.isArray(assertions)) {
|
|
1090
|
+
throw new Error(
|
|
1091
|
+
`Invalid assertion template file in '${evalId}': ${resolved.resolvedPath} is missing a top-level assertions array`
|
|
1092
|
+
);
|
|
1093
|
+
}
|
|
1094
|
+
const templateDir = path5.dirname(resolved.resolvedPath);
|
|
1095
|
+
const nestedSearchRoots = [
|
|
1096
|
+
templateDir,
|
|
1097
|
+
...searchRoots.filter((root) => path5.resolve(root) !== templateDir)
|
|
1098
|
+
];
|
|
1099
|
+
return await expandEvaluatorEntries(assertions, nestedSearchRoots, evalId, {
|
|
1100
|
+
depth: nextDepth,
|
|
1101
|
+
chain: [...includeContext.chain, resolved.resolvedPath]
|
|
1102
|
+
}) ?? [];
|
|
1103
|
+
}
|
|
1104
|
+
async function expandEvaluatorEntries(candidateEvaluators, searchRoots, evalId, includeContext = { depth: 0, chain: [] }) {
|
|
1112
1105
|
if (candidateEvaluators === void 0) {
|
|
1113
1106
|
return void 0;
|
|
1114
1107
|
}
|
|
@@ -1116,13 +1109,34 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId, defa
|
|
|
1116
1109
|
logWarning2(`Skipping evaluators for '${evalId}': expected array`);
|
|
1117
1110
|
return void 0;
|
|
1118
1111
|
}
|
|
1119
|
-
const
|
|
1120
|
-
const
|
|
1112
|
+
const expanded = [];
|
|
1113
|
+
for (const rawEvaluator of candidateEvaluators) {
|
|
1114
|
+
if (isIncludeEntry(rawEvaluator)) {
|
|
1115
|
+
const included = await loadAssertionTemplateEntries(
|
|
1116
|
+
rawEvaluator.include,
|
|
1117
|
+
searchRoots,
|
|
1118
|
+
evalId,
|
|
1119
|
+
includeContext
|
|
1120
|
+
);
|
|
1121
|
+
expanded.push(...included);
|
|
1122
|
+
continue;
|
|
1123
|
+
}
|
|
1124
|
+
expanded.push(rawEvaluator);
|
|
1125
|
+
}
|
|
1126
|
+
return expanded;
|
|
1127
|
+
}
|
|
1128
|
+
async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId, defaultPreprocessors) {
|
|
1129
|
+
const expandedEvaluators = await expandEvaluatorEntries(candidateEvaluators, searchRoots, evalId);
|
|
1130
|
+
if (!expandedEvaluators) {
|
|
1131
|
+
return void 0;
|
|
1132
|
+
}
|
|
1133
|
+
const firstStringIndex = expandedEvaluators.findIndex((e) => typeof e === "string");
|
|
1134
|
+
const processedEvaluators = firstStringIndex === -1 ? [...expandedEvaluators] : (() => {
|
|
1121
1135
|
const PLACEHOLDER = Symbol("rubric-placeholder");
|
|
1122
1136
|
const strings = [];
|
|
1123
1137
|
const result = [];
|
|
1124
1138
|
let rubricInserted = false;
|
|
1125
|
-
for (const item of
|
|
1139
|
+
for (const item of expandedEvaluators) {
|
|
1126
1140
|
if (typeof item === "string") {
|
|
1127
1141
|
const trimmed = item.trim();
|
|
1128
1142
|
if (trimmed.length === 0) {
|
|
@@ -1337,8 +1351,16 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId, defa
|
|
|
1337
1351
|
);
|
|
1338
1352
|
continue;
|
|
1339
1353
|
}
|
|
1354
|
+
const expandedMembers = await expandEvaluatorEntries(
|
|
1355
|
+
rawMembers,
|
|
1356
|
+
searchRoots,
|
|
1357
|
+
`${evalId}:${name}`
|
|
1358
|
+
);
|
|
1359
|
+
if (!expandedMembers) {
|
|
1360
|
+
continue;
|
|
1361
|
+
}
|
|
1340
1362
|
const memberEvaluators = [];
|
|
1341
|
-
for (const rawMember of
|
|
1363
|
+
for (const rawMember of expandedMembers) {
|
|
1342
1364
|
if (!isJsonObject2(rawMember)) {
|
|
1343
1365
|
logWarning2(`Skipping invalid member evaluator in composite '${name}' (expected object)`);
|
|
1344
1366
|
continue;
|
|
@@ -2664,13 +2686,13 @@ function parseInlineRubrics(rawRubrics) {
|
|
|
2664
2686
|
}
|
|
2665
2687
|
|
|
2666
2688
|
// src/evaluation/loaders/jsonl-parser.ts
|
|
2667
|
-
import { readFile as
|
|
2689
|
+
import { readFile as readFile7 } from "node:fs/promises";
|
|
2668
2690
|
import path7 from "node:path";
|
|
2669
2691
|
import micromatch from "micromatch";
|
|
2670
2692
|
import { parse as parseYaml } from "yaml";
|
|
2671
2693
|
|
|
2672
2694
|
// src/evaluation/loaders/message-processor.ts
|
|
2673
|
-
import { readFile as
|
|
2695
|
+
import { readFile as readFile6 } from "node:fs/promises";
|
|
2674
2696
|
import path6 from "node:path";
|
|
2675
2697
|
|
|
2676
2698
|
// src/evaluation/formatting/segment-formatter.ts
|
|
@@ -2787,7 +2809,7 @@ async function processMessages(options) {
|
|
|
2787
2809
|
continue;
|
|
2788
2810
|
}
|
|
2789
2811
|
try {
|
|
2790
|
-
const fileContent = (await
|
|
2812
|
+
const fileContent = (await readFile6(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
2791
2813
|
processedContent.push({
|
|
2792
2814
|
...cloneJsonObject(rawSegment),
|
|
2793
2815
|
path: displayPath,
|
|
@@ -2828,7 +2850,7 @@ async function processMessages(options) {
|
|
|
2828
2850
|
continue;
|
|
2829
2851
|
}
|
|
2830
2852
|
try {
|
|
2831
|
-
const imageBuffer = await
|
|
2853
|
+
const imageBuffer = await readFile6(resolvedPath);
|
|
2832
2854
|
const base64 = imageBuffer.toString("base64");
|
|
2833
2855
|
processedContent.push({
|
|
2834
2856
|
type: "image",
|
|
@@ -2905,7 +2927,7 @@ async function processExpectedMessages(options) {
|
|
|
2905
2927
|
continue;
|
|
2906
2928
|
}
|
|
2907
2929
|
try {
|
|
2908
|
-
const fileContent = (await
|
|
2930
|
+
const fileContent = (await readFile6(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
2909
2931
|
processedContent.push({
|
|
2910
2932
|
type: "file",
|
|
2911
2933
|
path: displayPath,
|
|
@@ -2945,7 +2967,7 @@ async function processExpectedMessages(options) {
|
|
|
2945
2967
|
continue;
|
|
2946
2968
|
}
|
|
2947
2969
|
try {
|
|
2948
|
-
const imageBuffer = await
|
|
2970
|
+
const imageBuffer = await readFile6(resolvedPath);
|
|
2949
2971
|
const base64 = imageBuffer.toString("base64");
|
|
2950
2972
|
processedContent.push({
|
|
2951
2973
|
type: "image",
|
|
@@ -3073,7 +3095,7 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
|
3073
3095
|
return {};
|
|
3074
3096
|
}
|
|
3075
3097
|
try {
|
|
3076
|
-
const content = await
|
|
3098
|
+
const content = await readFile7(sidecarPath, "utf8");
|
|
3077
3099
|
const parsed = interpolateEnv(parseYaml(content), process.env);
|
|
3078
3100
|
if (!isJsonObject(parsed)) {
|
|
3079
3101
|
logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
|
|
@@ -3118,7 +3140,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
3118
3140
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
3119
3141
|
const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
|
|
3120
3142
|
const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
|
|
3121
|
-
const rawFile = await
|
|
3143
|
+
const rawFile = await readFile7(absoluteTestPath, "utf8");
|
|
3122
3144
|
const rawCases = parseJsonlContent(rawFile, evalFilePath);
|
|
3123
3145
|
const fallbackSuiteName = path7.basename(absoluteTestPath, ".jsonl") || "eval";
|
|
3124
3146
|
const suiteName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackSuiteName;
|
|
@@ -3300,11 +3322,13 @@ function parseRepoCheckout(raw) {
|
|
|
3300
3322
|
if (!isJsonObject(raw)) return void 0;
|
|
3301
3323
|
const obj = raw;
|
|
3302
3324
|
const ref = typeof obj.ref === "string" ? obj.ref : void 0;
|
|
3325
|
+
const baseCommit = typeof obj.base_commit === "string" ? obj.base_commit : void 0;
|
|
3303
3326
|
const resolve = obj.resolve === "remote" || obj.resolve === "local" ? obj.resolve : void 0;
|
|
3304
3327
|
const ancestor = typeof obj.ancestor === "number" ? obj.ancestor : void 0;
|
|
3305
|
-
if (!ref && !resolve && ancestor === void 0) return void 0;
|
|
3328
|
+
if (!ref && !baseCommit && !resolve && ancestor === void 0) return void 0;
|
|
3306
3329
|
return {
|
|
3307
3330
|
...ref !== void 0 && { ref },
|
|
3331
|
+
...baseCommit !== void 0 && { base_commit: baseCommit },
|
|
3308
3332
|
...resolve !== void 0 && { resolve },
|
|
3309
3333
|
...ancestor !== void 0 && { ancestor }
|
|
3310
3334
|
};
|
|
@@ -3327,12 +3351,12 @@ function parseRepoConfig(raw) {
|
|
|
3327
3351
|
const obj = raw;
|
|
3328
3352
|
const repoPath = typeof obj.path === "string" ? obj.path : void 0;
|
|
3329
3353
|
const source = parseRepoSource(obj.source);
|
|
3330
|
-
if (!repoPath || !source) return void 0;
|
|
3331
3354
|
const checkout = parseRepoCheckout(obj.checkout);
|
|
3332
3355
|
const clone = parseRepoClone(obj.clone);
|
|
3356
|
+
if (!repoPath && !source && !checkout && !clone) return void 0;
|
|
3333
3357
|
return {
|
|
3334
|
-
path: repoPath,
|
|
3335
|
-
source,
|
|
3358
|
+
...repoPath !== void 0 && { path: repoPath },
|
|
3359
|
+
...source !== void 0 && { source },
|
|
3336
3360
|
...checkout !== void 0 && { checkout },
|
|
3337
3361
|
...clone !== void 0 && { clone }
|
|
3338
3362
|
};
|
|
@@ -3383,7 +3407,8 @@ ${messageContent}`);
|
|
|
3383
3407
|
segmentsByMessage,
|
|
3384
3408
|
mode
|
|
3385
3409
|
}) : void 0;
|
|
3386
|
-
|
|
3410
|
+
const systemMessage = extractSystemMessage(testCase.input, segmentsByMessage, mode);
|
|
3411
|
+
return { question, chatPrompt, systemMessage };
|
|
3387
3412
|
}
|
|
3388
3413
|
function needsRoleMarkers(messages, processedSegmentsByMessage) {
|
|
3389
3414
|
if (messages.some((msg) => msg.role === "assistant" || msg.role === "tool")) {
|
|
@@ -3397,6 +3422,26 @@ function needsRoleMarkers(messages, processedSegmentsByMessage) {
|
|
|
3397
3422
|
}
|
|
3398
3423
|
return messagesWithContent > 1;
|
|
3399
3424
|
}
|
|
3425
|
+
function extractSystemMessage(messages, segmentsByMessage, mode) {
|
|
3426
|
+
const systemParts = [];
|
|
3427
|
+
for (let i = 0; i < messages.length; i++) {
|
|
3428
|
+
if (messages[i].role !== "system") {
|
|
3429
|
+
break;
|
|
3430
|
+
}
|
|
3431
|
+
const segments = segmentsByMessage[i];
|
|
3432
|
+
const contentParts = [];
|
|
3433
|
+
for (const segment of segments) {
|
|
3434
|
+
const formatted = formatSegment(segment, mode);
|
|
3435
|
+
if (formatted) {
|
|
3436
|
+
contentParts.push(formatted);
|
|
3437
|
+
}
|
|
3438
|
+
}
|
|
3439
|
+
if (contentParts.length > 0) {
|
|
3440
|
+
systemParts.push(contentParts.join("\n"));
|
|
3441
|
+
}
|
|
3442
|
+
}
|
|
3443
|
+
return systemParts.length > 0 ? systemParts.join("\n\n") : void 0;
|
|
3444
|
+
}
|
|
3400
3445
|
function buildChatPromptFromSegments(options) {
|
|
3401
3446
|
const { messages, segmentsByMessage, systemPrompt, mode = "lm" } = options;
|
|
3402
3447
|
if (messages.length === 0) {
|
|
@@ -3480,8 +3525,8 @@ function resolveTests(suite) {
|
|
|
3480
3525
|
async function readTestSuiteMetadata(testFilePath) {
|
|
3481
3526
|
try {
|
|
3482
3527
|
const absolutePath = path8.resolve(testFilePath);
|
|
3483
|
-
const content = await
|
|
3484
|
-
const parsed = interpolateEnv(
|
|
3528
|
+
const content = await readFile8(absolutePath, "utf8");
|
|
3529
|
+
const parsed = interpolateEnv(parse3(content), process.env);
|
|
3485
3530
|
if (!isJsonObject(parsed)) {
|
|
3486
3531
|
return {};
|
|
3487
3532
|
}
|
|
@@ -3538,8 +3583,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
3538
3583
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
3539
3584
|
const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
|
|
3540
3585
|
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
3541
|
-
const rawFile = await
|
|
3542
|
-
const interpolated = interpolateEnv(
|
|
3586
|
+
const rawFile = await readFile8(absoluteTestPath, "utf8");
|
|
3587
|
+
const interpolated = interpolateEnv(parse3(rawFile), process.env);
|
|
3543
3588
|
if (!isJsonObject(interpolated)) {
|
|
3544
3589
|
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
3545
3590
|
}
|
|
@@ -3680,7 +3725,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
3680
3725
|
const testCase = {
|
|
3681
3726
|
id,
|
|
3682
3727
|
suite: suiteName,
|
|
3683
|
-
category: options?.category,
|
|
3728
|
+
category: suite.category ?? options?.category,
|
|
3684
3729
|
conversation_id: conversationId,
|
|
3685
3730
|
question,
|
|
3686
3731
|
input: inputMessages,
|
|
@@ -3773,11 +3818,11 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
|
3773
3818
|
const workspaceFilePath = path8.resolve(evalFileDir, raw);
|
|
3774
3819
|
let content;
|
|
3775
3820
|
try {
|
|
3776
|
-
content = await
|
|
3821
|
+
content = await readFile8(workspaceFilePath, "utf8");
|
|
3777
3822
|
} catch {
|
|
3778
3823
|
throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
|
|
3779
3824
|
}
|
|
3780
|
-
const parsed = interpolateEnv(
|
|
3825
|
+
const parsed = interpolateEnv(parse3(content), process.env);
|
|
3781
3826
|
if (!isJsonObject(parsed)) {
|
|
3782
3827
|
throw new Error(
|
|
3783
3828
|
`Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
|
|
@@ -3812,14 +3857,28 @@ function parseWorkspaceConfig(raw, evalFileDir) {
|
|
|
3812
3857
|
const explicitMode = obj.mode === "pooled" || obj.mode === "temp" || obj.mode === "static" ? obj.mode : void 0;
|
|
3813
3858
|
const workspacePath = typeof obj.path === "string" ? obj.path : void 0;
|
|
3814
3859
|
const mode = explicitMode ?? (workspacePath ? "static" : void 0);
|
|
3815
|
-
|
|
3860
|
+
const docker = parseDockerWorkspaceConfig(obj.docker);
|
|
3861
|
+
if (!template && !isolation && !repos && !hooks && !mode && !workspacePath && !docker)
|
|
3862
|
+
return void 0;
|
|
3816
3863
|
return {
|
|
3817
3864
|
...template !== void 0 && { template },
|
|
3818
3865
|
...isolation !== void 0 && { isolation },
|
|
3819
3866
|
...repos !== void 0 && { repos },
|
|
3820
3867
|
...hooks !== void 0 && { hooks },
|
|
3821
3868
|
...mode !== void 0 && { mode },
|
|
3822
|
-
...workspacePath !== void 0 && { path: workspacePath }
|
|
3869
|
+
...workspacePath !== void 0 && { path: workspacePath },
|
|
3870
|
+
...docker !== void 0 && { docker }
|
|
3871
|
+
};
|
|
3872
|
+
}
|
|
3873
|
+
function parseDockerWorkspaceConfig(raw) {
|
|
3874
|
+
if (!isJsonObject(raw)) return void 0;
|
|
3875
|
+
const obj = raw;
|
|
3876
|
+
if (typeof obj.image !== "string") return void 0;
|
|
3877
|
+
return {
|
|
3878
|
+
image: obj.image,
|
|
3879
|
+
...typeof obj.timeout === "number" && { timeout: obj.timeout },
|
|
3880
|
+
...typeof obj.memory === "string" && { memory: obj.memory },
|
|
3881
|
+
...typeof obj.cpus === "number" && { cpus: obj.cpus }
|
|
3823
3882
|
};
|
|
3824
3883
|
}
|
|
3825
3884
|
function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
|
|
@@ -3848,7 +3907,8 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
|
|
|
3848
3907
|
repos: caseLevel.repos ?? suiteLevel.repos,
|
|
3849
3908
|
...hasHooks && { hooks: mergedHooks },
|
|
3850
3909
|
mode: caseLevel.mode ?? suiteLevel.mode,
|
|
3851
|
-
path: caseLevel.path ?? suiteLevel.path
|
|
3910
|
+
path: caseLevel.path ?? suiteLevel.path,
|
|
3911
|
+
docker: caseLevel.docker ?? suiteLevel.docker
|
|
3852
3912
|
};
|
|
3853
3913
|
}
|
|
3854
3914
|
function asString5(value) {
|
|
@@ -3876,7 +3936,7 @@ ${detailBlock}${ANSI_RESET7}`);
|
|
|
3876
3936
|
// src/evaluation/loaders/eval-yaml-transpiler.ts
|
|
3877
3937
|
import { readFileSync } from "node:fs";
|
|
3878
3938
|
import path9 from "node:path";
|
|
3879
|
-
import { parse as
|
|
3939
|
+
import { parse as parse4 } from "yaml";
|
|
3880
3940
|
function codeGraderInstruction(graderName, description) {
|
|
3881
3941
|
const desc = description ? ` This grader: ${description}.` : "";
|
|
3882
3942
|
return `Run \`agentv eval assert ${graderName} --agent-output <agent_output> --agent-input <original_prompt>\` and check the result.${desc} The command accepts --agent-output (the agent's full response text) and --agent-input (the original user prompt). It returns JSON on stdout: {"score": 0-1, "reasoning": "..."}. A score >= 0.5 means pass (exit 0); below 0.5 means fail (exit 1).`;
|
|
@@ -4115,7 +4175,7 @@ function transpileEvalYaml(suite, source = "EVAL.yaml") {
|
|
|
4115
4175
|
}
|
|
4116
4176
|
function transpileEvalYamlFile(evalYamlPath) {
|
|
4117
4177
|
const content = readFileSync(evalYamlPath, "utf8");
|
|
4118
|
-
const parsed =
|
|
4178
|
+
const parsed = parse4(content);
|
|
4119
4179
|
return transpileEvalYaml(parsed, path9.basename(evalYamlPath));
|
|
4120
4180
|
}
|
|
4121
4181
|
function getOutputFilenames(result) {
|
|
@@ -6596,7 +6656,7 @@ import { arch, platform } from "node:os";
|
|
|
6596
6656
|
import path15 from "node:path";
|
|
6597
6657
|
import { fileURLToPath as fileURLToPath3 } from "node:url";
|
|
6598
6658
|
function resolvePlatformCliPath() {
|
|
6599
|
-
const
|
|
6659
|
+
const os4 = platform();
|
|
6600
6660
|
const cpu = arch();
|
|
6601
6661
|
const platformMap = {
|
|
6602
6662
|
linux: "linux",
|
|
@@ -6607,13 +6667,13 @@ function resolvePlatformCliPath() {
|
|
|
6607
6667
|
x64: "x64",
|
|
6608
6668
|
arm64: "arm64"
|
|
6609
6669
|
};
|
|
6610
|
-
const osPart = platformMap[
|
|
6670
|
+
const osPart = platformMap[os4];
|
|
6611
6671
|
const archPart = archMap[cpu];
|
|
6612
6672
|
if (!osPart || !archPart) {
|
|
6613
6673
|
return void 0;
|
|
6614
6674
|
}
|
|
6615
6675
|
const packageName = `@github/copilot-${osPart}-${archPart}`;
|
|
6616
|
-
const binaryName =
|
|
6676
|
+
const binaryName = os4 === "win32" ? "copilot.exe" : "copilot";
|
|
6617
6677
|
try {
|
|
6618
6678
|
const resolved = import.meta.resolve(`${packageName}/package.json`);
|
|
6619
6679
|
const packageJsonPath = resolved.startsWith("file:") ? fileURLToPath3(resolved) : resolved;
|
|
@@ -7130,7 +7190,7 @@ function summarizeAcpEvent(eventType, data) {
|
|
|
7130
7190
|
}
|
|
7131
7191
|
|
|
7132
7192
|
// src/evaluation/providers/copilot-log.ts
|
|
7133
|
-
import { readFile as
|
|
7193
|
+
import { readFile as readFile10 } from "node:fs/promises";
|
|
7134
7194
|
import { homedir as homedir2 } from "node:os";
|
|
7135
7195
|
import path18 from "node:path";
|
|
7136
7196
|
|
|
@@ -7264,7 +7324,7 @@ function parseCopilotEvents(eventsJsonl) {
|
|
|
7264
7324
|
}
|
|
7265
7325
|
|
|
7266
7326
|
// src/evaluation/providers/copilot-session-discovery.ts
|
|
7267
|
-
import { readFile as
|
|
7327
|
+
import { readFile as readFile9, readdir, stat } from "node:fs/promises";
|
|
7268
7328
|
import { homedir } from "node:os";
|
|
7269
7329
|
import path17 from "node:path";
|
|
7270
7330
|
import { parse as parseYaml2 } from "yaml";
|
|
@@ -7284,7 +7344,7 @@ async function discoverCopilotSessions(opts) {
|
|
|
7284
7344
|
const workspacePath = path17.join(sessionDir, "workspace.yaml");
|
|
7285
7345
|
const eventsPath = path17.join(sessionDir, "events.jsonl");
|
|
7286
7346
|
try {
|
|
7287
|
-
const workspaceContent = await
|
|
7347
|
+
const workspaceContent = await readFile9(workspacePath, "utf8");
|
|
7288
7348
|
const workspace = parseYaml2(workspaceContent) ?? {};
|
|
7289
7349
|
const cwd = String(workspace.cwd ?? "");
|
|
7290
7350
|
let updatedAt;
|
|
@@ -7346,7 +7406,7 @@ var CopilotLogProvider = class {
|
|
|
7346
7406
|
const eventsPath = path18.join(sessionDir, "events.jsonl");
|
|
7347
7407
|
let eventsContent;
|
|
7348
7408
|
try {
|
|
7349
|
-
eventsContent = await
|
|
7409
|
+
eventsContent = await readFile10(eventsPath, "utf8");
|
|
7350
7410
|
} catch (err) {
|
|
7351
7411
|
throw new Error(
|
|
7352
7412
|
`Failed to read Copilot session transcript at ${eventsPath}: ${err instanceof Error ? err.message : String(err)}`
|
|
@@ -9632,7 +9692,7 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
|
|
|
9632
9692
|
}
|
|
9633
9693
|
|
|
9634
9694
|
// src/evaluation/providers/vscode/dispatch/responseWaiter.ts
|
|
9635
|
-
import { readFile as
|
|
9695
|
+
import { readFile as readFile11 } from "node:fs/promises";
|
|
9636
9696
|
import path26 from "node:path";
|
|
9637
9697
|
|
|
9638
9698
|
// src/evaluation/providers/vscode/utils/time.ts
|
|
@@ -9671,7 +9731,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
9671
9731
|
const maxAttempts = 10;
|
|
9672
9732
|
while (attempts < maxAttempts) {
|
|
9673
9733
|
try {
|
|
9674
|
-
const content = await
|
|
9734
|
+
const content = await readFile11(responseFileFinal, { encoding: "utf8" });
|
|
9675
9735
|
if (!silent) {
|
|
9676
9736
|
process.stdout.write(`${content}
|
|
9677
9737
|
`);
|
|
@@ -9728,7 +9788,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
9728
9788
|
const maxAttempts = 10;
|
|
9729
9789
|
while (attempts < maxAttempts) {
|
|
9730
9790
|
try {
|
|
9731
|
-
const content = await
|
|
9791
|
+
const content = await readFile11(file, { encoding: "utf8" });
|
|
9732
9792
|
if (!silent) {
|
|
9733
9793
|
process.stdout.write(`${content}
|
|
9734
9794
|
`);
|
|
@@ -9913,7 +9973,7 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
9913
9973
|
}
|
|
9914
9974
|
|
|
9915
9975
|
// src/evaluation/providers/vscode/dispatch/workspaceManager.ts
|
|
9916
|
-
import { copyFile, mkdir as mkdir10, readFile as
|
|
9976
|
+
import { copyFile, mkdir as mkdir10, readFile as readFile12, readdir as readdir3, stat as stat3, writeFile as writeFile3 } from "node:fs/promises";
|
|
9917
9977
|
import path30 from "node:path";
|
|
9918
9978
|
|
|
9919
9979
|
// src/evaluation/providers/vscode/utils/workspace.ts
|
|
@@ -10030,7 +10090,7 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
10030
10090
|
if (!stats.isFile()) {
|
|
10031
10091
|
throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
|
|
10032
10092
|
}
|
|
10033
|
-
const templateText = await
|
|
10093
|
+
const templateText = await readFile12(workspaceSrc, "utf8");
|
|
10034
10094
|
workspaceContent = JSON.parse(templateText);
|
|
10035
10095
|
} else {
|
|
10036
10096
|
workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
|
|
@@ -10893,9 +10953,9 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
|
|
|
10893
10953
|
|
|
10894
10954
|
// src/evaluation/providers/targets-file.ts
|
|
10895
10955
|
import { constants as constants4 } from "node:fs";
|
|
10896
|
-
import { access as access4, readFile as
|
|
10956
|
+
import { access as access4, readFile as readFile13 } from "node:fs/promises";
|
|
10897
10957
|
import path34 from "node:path";
|
|
10898
|
-
import { parse as
|
|
10958
|
+
import { parse as parse5 } from "yaml";
|
|
10899
10959
|
function isRecord(value) {
|
|
10900
10960
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
10901
10961
|
}
|
|
@@ -10938,8 +10998,8 @@ async function readTargetDefinitions(filePath) {
|
|
|
10938
10998
|
if (!await fileExists3(absolutePath)) {
|
|
10939
10999
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
10940
11000
|
}
|
|
10941
|
-
const raw = await
|
|
10942
|
-
const parsed =
|
|
11001
|
+
const raw = await readFile13(absolutePath, "utf8");
|
|
11002
|
+
const parsed = parse5(raw);
|
|
10943
11003
|
if (!isRecord(parsed)) {
|
|
10944
11004
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
|
|
10945
11005
|
}
|
|
@@ -11381,6 +11441,18 @@ function toCamelCaseDeep(obj) {
|
|
|
11381
11441
|
return obj;
|
|
11382
11442
|
}
|
|
11383
11443
|
|
|
11444
|
+
// src/evaluation/workspace/repo-checkout.ts
|
|
11445
|
+
function getRepoCheckoutRef(checkout) {
|
|
11446
|
+
return checkout?.base_commit ?? checkout?.ref ?? "HEAD";
|
|
11447
|
+
}
|
|
11448
|
+
function getRepoCheckoutTargets(repos) {
|
|
11449
|
+
if (!repos) return [];
|
|
11450
|
+
return repos.filter((repo) => repo.checkout?.base_commit || repo.checkout?.ref).map((repo) => ({
|
|
11451
|
+
path: repo.path,
|
|
11452
|
+
ref: getRepoCheckoutRef(repo.checkout)
|
|
11453
|
+
}));
|
|
11454
|
+
}
|
|
11455
|
+
|
|
11384
11456
|
// src/evaluation/evaluators/code-evaluator.ts
|
|
11385
11457
|
var FILE_BACKED_OUTPUT_THRESHOLD = 5e4;
|
|
11386
11458
|
var DATA_URI_RE = /^data:([^;]+);base64,(.+)$/s;
|
|
@@ -11515,13 +11587,31 @@ var CodeEvaluator = class {
|
|
|
11515
11587
|
const workspaceEnv = context.workspacePath ? { AGENTV_WORKSPACE_PATH: context.workspacePath } : void 0;
|
|
11516
11588
|
const env = proxyEnv || workspaceEnv ? { ...proxyEnv, ...workspaceEnv } : void 0;
|
|
11517
11589
|
try {
|
|
11518
|
-
|
|
11519
|
-
|
|
11520
|
-
|
|
11521
|
-
|
|
11522
|
-
|
|
11523
|
-
|
|
11524
|
-
|
|
11590
|
+
let stdout;
|
|
11591
|
+
if (context.dockerConfig) {
|
|
11592
|
+
const { DockerWorkspaceProvider: DockerWorkspaceProvider2 } = await import("./docker-workspace-RPPXBT27.js");
|
|
11593
|
+
const dockerProvider = new DockerWorkspaceProvider2(context.dockerConfig);
|
|
11594
|
+
const result = await dockerProvider.runGraderInContainer({
|
|
11595
|
+
command: [...this.command],
|
|
11596
|
+
stdin: inputPayload,
|
|
11597
|
+
repoCheckouts: getRepoCheckoutTargets(context.evalCase.workspace?.repos)
|
|
11598
|
+
});
|
|
11599
|
+
if (result.exitCode !== 0) {
|
|
11600
|
+
const trimmedErr = result.stderr.trim();
|
|
11601
|
+
throw new Error(
|
|
11602
|
+
trimmedErr.length > 0 ? `Code evaluator exited with code ${result.exitCode}: ${trimmedErr}` : `Code evaluator exited with code ${result.exitCode}`
|
|
11603
|
+
);
|
|
11604
|
+
}
|
|
11605
|
+
stdout = result.stdout.trim();
|
|
11606
|
+
} else {
|
|
11607
|
+
stdout = await executeScript(
|
|
11608
|
+
this.command,
|
|
11609
|
+
inputPayload,
|
|
11610
|
+
this.agentTimeoutMs,
|
|
11611
|
+
this.cwd,
|
|
11612
|
+
env
|
|
11613
|
+
);
|
|
11614
|
+
}
|
|
11525
11615
|
const parsed = parseJsonSafe(stdout);
|
|
11526
11616
|
const score = clampScore(typeof parsed?.score === "number" ? parsed.score : 0);
|
|
11527
11617
|
const assertions = Array.isArray(parsed?.assertions) ? parsed.assertions.filter(
|
|
@@ -12682,11 +12772,11 @@ function createFilesystemTools(workspacePath) {
|
|
|
12682
12772
|
execute: async (input) => {
|
|
12683
12773
|
try {
|
|
12684
12774
|
const resolved = resolveSandboxed(workspacePath, input.path);
|
|
12685
|
-
const
|
|
12686
|
-
if (
|
|
12775
|
+
const stat12 = await fs2.stat(resolved);
|
|
12776
|
+
if (stat12.isDirectory()) {
|
|
12687
12777
|
return { error: `'${input.path}' is a directory, not a file` };
|
|
12688
12778
|
}
|
|
12689
|
-
const buffer = Buffer.alloc(Math.min(
|
|
12779
|
+
const buffer = Buffer.alloc(Math.min(stat12.size, MAX_FILE_SIZE));
|
|
12690
12780
|
const fd = await fs2.open(resolved, "r");
|
|
12691
12781
|
try {
|
|
12692
12782
|
await fd.read(buffer, 0, buffer.length, 0);
|
|
@@ -12694,8 +12784,8 @@ function createFilesystemTools(workspacePath) {
|
|
|
12694
12784
|
await fd.close();
|
|
12695
12785
|
}
|
|
12696
12786
|
const content = buffer.toString("utf-8");
|
|
12697
|
-
const truncated =
|
|
12698
|
-
return { content, truncated, size:
|
|
12787
|
+
const truncated = stat12.size > MAX_FILE_SIZE;
|
|
12788
|
+
return { content, truncated, size: stat12.size };
|
|
12699
12789
|
} catch (error) {
|
|
12700
12790
|
return { error: error instanceof Error ? error.message : String(error) };
|
|
12701
12791
|
}
|
|
@@ -12746,8 +12836,8 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
12746
12836
|
const ext = path36.extname(entry.name).toLowerCase();
|
|
12747
12837
|
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
12748
12838
|
try {
|
|
12749
|
-
const
|
|
12750
|
-
if (
|
|
12839
|
+
const stat12 = await fs2.stat(fullPath);
|
|
12840
|
+
if (stat12.size > MAX_FILE_SIZE) continue;
|
|
12751
12841
|
const content = await fs2.readFile(fullPath, "utf-8");
|
|
12752
12842
|
const lines = content.split("\n");
|
|
12753
12843
|
for (let i = 0; i < lines.length; i++) {
|
|
@@ -13388,115 +13478,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
13388
13478
|
* Evaluate a single field against the expected value.
|
|
13389
13479
|
*/
|
|
13390
13480
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
13391
|
-
const { path:
|
|
13392
|
-
const candidateValue = resolvePath(candidateData,
|
|
13393
|
-
const expectedValue = resolvePath(expectedData,
|
|
13481
|
+
const { path: path53, match, required = true, weight = 1 } = fieldConfig;
|
|
13482
|
+
const candidateValue = resolvePath(candidateData, path53);
|
|
13483
|
+
const expectedValue = resolvePath(expectedData, path53);
|
|
13394
13484
|
if (expectedValue === void 0) {
|
|
13395
13485
|
return {
|
|
13396
|
-
path:
|
|
13486
|
+
path: path53,
|
|
13397
13487
|
score: 1,
|
|
13398
13488
|
// No expected value means no comparison needed
|
|
13399
13489
|
weight,
|
|
13400
13490
|
hit: true,
|
|
13401
|
-
message: `${
|
|
13491
|
+
message: `${path53}: no expected value`
|
|
13402
13492
|
};
|
|
13403
13493
|
}
|
|
13404
13494
|
if (candidateValue === void 0) {
|
|
13405
13495
|
if (required) {
|
|
13406
13496
|
return {
|
|
13407
|
-
path:
|
|
13497
|
+
path: path53,
|
|
13408
13498
|
score: 0,
|
|
13409
13499
|
weight,
|
|
13410
13500
|
hit: false,
|
|
13411
|
-
message: `${
|
|
13501
|
+
message: `${path53} (required, missing)`
|
|
13412
13502
|
};
|
|
13413
13503
|
}
|
|
13414
13504
|
return {
|
|
13415
|
-
path:
|
|
13505
|
+
path: path53,
|
|
13416
13506
|
score: 1,
|
|
13417
13507
|
// Don't penalize missing optional fields
|
|
13418
13508
|
weight: 0,
|
|
13419
13509
|
// Zero weight means it won't affect the score
|
|
13420
13510
|
hit: true,
|
|
13421
|
-
message: `${
|
|
13511
|
+
message: `${path53}: optional field missing`
|
|
13422
13512
|
};
|
|
13423
13513
|
}
|
|
13424
13514
|
switch (match) {
|
|
13425
13515
|
case "exact":
|
|
13426
|
-
return this.compareExact(
|
|
13516
|
+
return this.compareExact(path53, candidateValue, expectedValue, weight);
|
|
13427
13517
|
case "numeric_tolerance":
|
|
13428
13518
|
return this.compareNumericTolerance(
|
|
13429
|
-
|
|
13519
|
+
path53,
|
|
13430
13520
|
candidateValue,
|
|
13431
13521
|
expectedValue,
|
|
13432
13522
|
fieldConfig,
|
|
13433
13523
|
weight
|
|
13434
13524
|
);
|
|
13435
13525
|
case "date":
|
|
13436
|
-
return this.compareDate(
|
|
13526
|
+
return this.compareDate(path53, candidateValue, expectedValue, fieldConfig, weight);
|
|
13437
13527
|
default:
|
|
13438
13528
|
return {
|
|
13439
|
-
path:
|
|
13529
|
+
path: path53,
|
|
13440
13530
|
score: 0,
|
|
13441
13531
|
weight,
|
|
13442
13532
|
hit: false,
|
|
13443
|
-
message: `${
|
|
13533
|
+
message: `${path53}: unknown match type "${match}"`
|
|
13444
13534
|
};
|
|
13445
13535
|
}
|
|
13446
13536
|
}
|
|
13447
13537
|
/**
|
|
13448
13538
|
* Exact equality comparison.
|
|
13449
13539
|
*/
|
|
13450
|
-
compareExact(
|
|
13540
|
+
compareExact(path53, candidateValue, expectedValue, weight) {
|
|
13451
13541
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
13452
13542
|
return {
|
|
13453
|
-
path:
|
|
13543
|
+
path: path53,
|
|
13454
13544
|
score: 1,
|
|
13455
13545
|
weight,
|
|
13456
13546
|
hit: true,
|
|
13457
|
-
message:
|
|
13547
|
+
message: path53
|
|
13458
13548
|
};
|
|
13459
13549
|
}
|
|
13460
13550
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
13461
13551
|
return {
|
|
13462
|
-
path:
|
|
13552
|
+
path: path53,
|
|
13463
13553
|
score: 0,
|
|
13464
13554
|
weight,
|
|
13465
13555
|
hit: false,
|
|
13466
|
-
message: `${
|
|
13556
|
+
message: `${path53} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
13467
13557
|
};
|
|
13468
13558
|
}
|
|
13469
13559
|
return {
|
|
13470
|
-
path:
|
|
13560
|
+
path: path53,
|
|
13471
13561
|
score: 0,
|
|
13472
13562
|
weight,
|
|
13473
13563
|
hit: false,
|
|
13474
|
-
message: `${
|
|
13564
|
+
message: `${path53} (value mismatch)`
|
|
13475
13565
|
};
|
|
13476
13566
|
}
|
|
13477
13567
|
/**
|
|
13478
13568
|
* Numeric comparison with absolute or relative tolerance.
|
|
13479
13569
|
*/
|
|
13480
|
-
compareNumericTolerance(
|
|
13570
|
+
compareNumericTolerance(path53, candidateValue, expectedValue, fieldConfig, weight) {
|
|
13481
13571
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
13482
13572
|
const candidateNum = toNumber(candidateValue);
|
|
13483
13573
|
const expectedNum = toNumber(expectedValue);
|
|
13484
13574
|
if (candidateNum === null || expectedNum === null) {
|
|
13485
13575
|
return {
|
|
13486
|
-
path:
|
|
13576
|
+
path: path53,
|
|
13487
13577
|
score: 0,
|
|
13488
13578
|
weight,
|
|
13489
13579
|
hit: false,
|
|
13490
|
-
message: `${
|
|
13580
|
+
message: `${path53} (non-numeric value)`
|
|
13491
13581
|
};
|
|
13492
13582
|
}
|
|
13493
13583
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
13494
13584
|
return {
|
|
13495
|
-
path:
|
|
13585
|
+
path: path53,
|
|
13496
13586
|
score: 0,
|
|
13497
13587
|
weight,
|
|
13498
13588
|
hit: false,
|
|
13499
|
-
message: `${
|
|
13589
|
+
message: `${path53} (invalid numeric value)`
|
|
13500
13590
|
};
|
|
13501
13591
|
}
|
|
13502
13592
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -13509,61 +13599,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
13509
13599
|
}
|
|
13510
13600
|
if (withinTolerance) {
|
|
13511
13601
|
return {
|
|
13512
|
-
path:
|
|
13602
|
+
path: path53,
|
|
13513
13603
|
score: 1,
|
|
13514
13604
|
weight,
|
|
13515
13605
|
hit: true,
|
|
13516
|
-
message: `${
|
|
13606
|
+
message: `${path53} (within tolerance: diff=${diff.toFixed(2)})`
|
|
13517
13607
|
};
|
|
13518
13608
|
}
|
|
13519
13609
|
return {
|
|
13520
|
-
path:
|
|
13610
|
+
path: path53,
|
|
13521
13611
|
score: 0,
|
|
13522
13612
|
weight,
|
|
13523
13613
|
hit: false,
|
|
13524
|
-
message: `${
|
|
13614
|
+
message: `${path53} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
13525
13615
|
};
|
|
13526
13616
|
}
|
|
13527
13617
|
/**
|
|
13528
13618
|
* Date comparison with format normalization.
|
|
13529
13619
|
*/
|
|
13530
|
-
compareDate(
|
|
13620
|
+
compareDate(path53, candidateValue, expectedValue, fieldConfig, weight) {
|
|
13531
13621
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
13532
13622
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
13533
13623
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
13534
13624
|
if (candidateDate === null) {
|
|
13535
13625
|
return {
|
|
13536
|
-
path:
|
|
13626
|
+
path: path53,
|
|
13537
13627
|
score: 0,
|
|
13538
13628
|
weight,
|
|
13539
13629
|
hit: false,
|
|
13540
|
-
message: `${
|
|
13630
|
+
message: `${path53} (unparseable candidate date)`
|
|
13541
13631
|
};
|
|
13542
13632
|
}
|
|
13543
13633
|
if (expectedDate === null) {
|
|
13544
13634
|
return {
|
|
13545
|
-
path:
|
|
13635
|
+
path: path53,
|
|
13546
13636
|
score: 0,
|
|
13547
13637
|
weight,
|
|
13548
13638
|
hit: false,
|
|
13549
|
-
message: `${
|
|
13639
|
+
message: `${path53} (unparseable expected date)`
|
|
13550
13640
|
};
|
|
13551
13641
|
}
|
|
13552
13642
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
13553
13643
|
return {
|
|
13554
|
-
path:
|
|
13644
|
+
path: path53,
|
|
13555
13645
|
score: 1,
|
|
13556
13646
|
weight,
|
|
13557
13647
|
hit: true,
|
|
13558
|
-
message:
|
|
13648
|
+
message: path53
|
|
13559
13649
|
};
|
|
13560
13650
|
}
|
|
13561
13651
|
return {
|
|
13562
|
-
path:
|
|
13652
|
+
path: path53,
|
|
13563
13653
|
score: 0,
|
|
13564
13654
|
weight,
|
|
13565
13655
|
hit: false,
|
|
13566
|
-
message: `${
|
|
13656
|
+
message: `${path53} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
13567
13657
|
};
|
|
13568
13658
|
}
|
|
13569
13659
|
/**
|
|
@@ -13596,11 +13686,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
13596
13686
|
};
|
|
13597
13687
|
}
|
|
13598
13688
|
};
|
|
13599
|
-
function resolvePath(obj,
|
|
13600
|
-
if (!
|
|
13689
|
+
function resolvePath(obj, path53) {
|
|
13690
|
+
if (!path53 || !obj) {
|
|
13601
13691
|
return void 0;
|
|
13602
13692
|
}
|
|
13603
|
-
const parts =
|
|
13693
|
+
const parts = path53.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
13604
13694
|
let current = obj;
|
|
13605
13695
|
for (const part of parts) {
|
|
13606
13696
|
if (current === null || current === void 0) {
|
|
@@ -14092,8 +14182,8 @@ var TokenUsageEvaluator = class {
|
|
|
14092
14182
|
};
|
|
14093
14183
|
|
|
14094
14184
|
// src/evaluation/evaluators/tool-trajectory.ts
|
|
14095
|
-
function getNestedValue(obj,
|
|
14096
|
-
const parts =
|
|
14185
|
+
function getNestedValue(obj, path53) {
|
|
14186
|
+
const parts = path53.split(".");
|
|
14097
14187
|
let current = obj;
|
|
14098
14188
|
for (const part of parts) {
|
|
14099
14189
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -14959,6 +15049,15 @@ async function resolveCustomPrompt(promptConfig, context, timeoutMs) {
|
|
|
14959
15049
|
}
|
|
14960
15050
|
return void 0;
|
|
14961
15051
|
}
|
|
15052
|
+
function containsTemplateVariables(text) {
|
|
15053
|
+
const variablePattern = /\{\{\s*([a-zA-Z0-9_]+)\s*\}\}/g;
|
|
15054
|
+
for (const match of text.matchAll(variablePattern)) {
|
|
15055
|
+
if (VALID_TEMPLATE_VARIABLES.has(match[1])) {
|
|
15056
|
+
return true;
|
|
15057
|
+
}
|
|
15058
|
+
}
|
|
15059
|
+
return false;
|
|
15060
|
+
}
|
|
14962
15061
|
async function executePromptTemplate(script, context, config, timeoutMs) {
|
|
14963
15062
|
const payload = {
|
|
14964
15063
|
criteria: context.evalCase.criteria,
|
|
@@ -15031,9 +15130,20 @@ var llmGraderFactory = (config, context) => {
|
|
|
15031
15130
|
},
|
|
15032
15131
|
agentTimeoutMs
|
|
15033
15132
|
);
|
|
15133
|
+
const isFromInlinePrompt = !c.resolvedPromptScript?.length && !c.resolvedPromptPath && !c.promptPath;
|
|
15134
|
+
let evaluatorTemplateOverride;
|
|
15135
|
+
let evalCase = evalContext.evalCase;
|
|
15136
|
+
if (customPrompt) {
|
|
15137
|
+
if (!isFromInlinePrompt || containsTemplateVariables(customPrompt)) {
|
|
15138
|
+
evaluatorTemplateOverride = customPrompt;
|
|
15139
|
+
} else {
|
|
15140
|
+
evalCase = { ...evalCase, criteria: customPrompt };
|
|
15141
|
+
}
|
|
15142
|
+
}
|
|
15034
15143
|
return evaluator.evaluate({
|
|
15035
15144
|
...evalContext,
|
|
15036
|
-
|
|
15145
|
+
evalCase,
|
|
15146
|
+
evaluatorTemplateOverride,
|
|
15037
15147
|
evaluator: c
|
|
15038
15148
|
});
|
|
15039
15149
|
}
|
|
@@ -15630,7 +15740,7 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
|
15630
15740
|
import { execFile } from "node:child_process";
|
|
15631
15741
|
import { createHash } from "node:crypto";
|
|
15632
15742
|
import { existsSync as existsSync3 } from "node:fs";
|
|
15633
|
-
import { cp as cp2, mkdir as mkdir13, readFile as
|
|
15743
|
+
import { cp as cp2, mkdir as mkdir13, readFile as readFile14, readdir as readdir5, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
|
|
15634
15744
|
import path42 from "node:path";
|
|
15635
15745
|
import { promisify as promisify5 } from "node:util";
|
|
15636
15746
|
var execFileAsync = promisify5(execFile);
|
|
@@ -15658,12 +15768,14 @@ async function git(args, opts) {
|
|
|
15658
15768
|
return stdout.trim();
|
|
15659
15769
|
}
|
|
15660
15770
|
function normalizeRepoForFingerprint(repo) {
|
|
15661
|
-
const
|
|
15662
|
-
|
|
15663
|
-
path
|
|
15664
|
-
|
|
15665
|
-
|
|
15666
|
-
|
|
15771
|
+
const result = {};
|
|
15772
|
+
if (repo.path) {
|
|
15773
|
+
result.path = repo.path;
|
|
15774
|
+
}
|
|
15775
|
+
if (repo.source) {
|
|
15776
|
+
result.source = repo.source.type === "git" ? { type: "git", url: repo.source.url.toLowerCase().replace(/\.git$/, "") } : { type: "local", path: repo.source.path };
|
|
15777
|
+
}
|
|
15778
|
+
result.ref = getRepoCheckoutRef(repo.checkout);
|
|
15667
15779
|
if (repo.clone?.depth !== void 0) {
|
|
15668
15780
|
result.depth = repo.clone.depth;
|
|
15669
15781
|
}
|
|
@@ -15677,7 +15789,7 @@ function normalizeRepoForFingerprint(repo) {
|
|
|
15677
15789
|
}
|
|
15678
15790
|
function computeWorkspaceFingerprint(repos) {
|
|
15679
15791
|
const canonical = {
|
|
15680
|
-
repos: [...repos].sort((a, b) => a.path.localeCompare(b.path)).map(normalizeRepoForFingerprint)
|
|
15792
|
+
repos: [...repos].sort((a, b) => (a.path ?? "").localeCompare(b.path ?? "")).map(normalizeRepoForFingerprint)
|
|
15681
15793
|
};
|
|
15682
15794
|
return createHash("sha256").update(JSON.stringify(canonical)).digest("hex");
|
|
15683
15795
|
}
|
|
@@ -15791,7 +15903,7 @@ var WorkspacePoolManager = class {
|
|
|
15791
15903
|
throw err;
|
|
15792
15904
|
}
|
|
15793
15905
|
try {
|
|
15794
|
-
const pidStr = await
|
|
15906
|
+
const pidStr = await readFile14(lockPath, "utf-8");
|
|
15795
15907
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
15796
15908
|
if (!Number.isNaN(pid)) {
|
|
15797
15909
|
try {
|
|
@@ -15818,7 +15930,7 @@ var WorkspacePoolManager = class {
|
|
|
15818
15930
|
async checkDrift(poolDir, fingerprint) {
|
|
15819
15931
|
const metadataPath = path42.join(poolDir, "metadata.json");
|
|
15820
15932
|
try {
|
|
15821
|
-
const raw = await
|
|
15933
|
+
const raw = await readFile14(metadataPath, "utf-8");
|
|
15822
15934
|
const metadata = JSON.parse(raw);
|
|
15823
15935
|
return metadata.fingerprint !== fingerprint;
|
|
15824
15936
|
} catch {
|
|
@@ -15843,7 +15955,7 @@ var WorkspacePoolManager = class {
|
|
|
15843
15955
|
const lockPath = path42.join(poolDir, `${entry}.lock`);
|
|
15844
15956
|
if (existsSync3(lockPath)) {
|
|
15845
15957
|
try {
|
|
15846
|
-
const pidStr = await
|
|
15958
|
+
const pidStr = await readFile14(lockPath, "utf-8");
|
|
15847
15959
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
15848
15960
|
if (!Number.isNaN(pid)) {
|
|
15849
15961
|
try {
|
|
@@ -15871,6 +15983,7 @@ var WorkspacePoolManager = class {
|
|
|
15871
15983
|
*/
|
|
15872
15984
|
async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
|
|
15873
15985
|
for (const repo of repos) {
|
|
15986
|
+
if (!repo.path || !repo.source) continue;
|
|
15874
15987
|
const repoDir = path42.join(slotPath, repo.path);
|
|
15875
15988
|
if (!existsSync3(repoDir)) {
|
|
15876
15989
|
continue;
|
|
@@ -15878,7 +15991,7 @@ var WorkspacePoolManager = class {
|
|
|
15878
15991
|
if (poolReset === "none") {
|
|
15879
15992
|
continue;
|
|
15880
15993
|
}
|
|
15881
|
-
const ref = repo.checkout
|
|
15994
|
+
const ref = getRepoCheckoutRef(repo.checkout);
|
|
15882
15995
|
const resolve = repo.checkout?.resolve ?? "remote";
|
|
15883
15996
|
if (resolve === "remote") {
|
|
15884
15997
|
const fetchArgs = ["fetch", "origin", ref];
|
|
@@ -15895,8 +16008,8 @@ var WorkspacePoolManager = class {
|
|
|
15895
16008
|
}
|
|
15896
16009
|
if (templatePath) {
|
|
15897
16010
|
const repoDirNames = new Set(
|
|
15898
|
-
repos.map((r) => {
|
|
15899
|
-
const normalized = r.path.replace(/^\.\//, "");
|
|
16011
|
+
repos.filter((r) => r.path).map((r) => {
|
|
16012
|
+
const normalized = (r.path ?? "").replace(/^\.\//, "");
|
|
15900
16013
|
return normalized.split("/")[0];
|
|
15901
16014
|
})
|
|
15902
16015
|
);
|
|
@@ -15951,17 +16064,17 @@ var RepoManager = class {
|
|
|
15951
16064
|
static validateLocalPaths(repos) {
|
|
15952
16065
|
const errors = [];
|
|
15953
16066
|
for (const repo of repos) {
|
|
15954
|
-
if (repo.source.type !== "local") continue;
|
|
16067
|
+
if (!repo.source || repo.source.type !== "local") continue;
|
|
15955
16068
|
const sourcePath = repo.source.path;
|
|
15956
16069
|
if (!sourcePath || sourcePath.trim() === "") {
|
|
15957
16070
|
errors.push({
|
|
15958
|
-
repoPath: repo.path,
|
|
16071
|
+
repoPath: repo.path ?? "(none)",
|
|
15959
16072
|
resolvedSourcePath: sourcePath ?? "",
|
|
15960
16073
|
reason: "empty_path"
|
|
15961
16074
|
});
|
|
15962
16075
|
} else if (!existsSync4(sourcePath)) {
|
|
15963
16076
|
errors.push({
|
|
15964
|
-
repoPath: repo.path,
|
|
16077
|
+
repoPath: repo.path ?? "(none)",
|
|
15965
16078
|
resolvedSourcePath: sourcePath,
|
|
15966
16079
|
reason: "not_found"
|
|
15967
16080
|
});
|
|
@@ -16008,6 +16121,12 @@ ${lines.join("\n")}`;
|
|
|
16008
16121
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
16009
16122
|
*/
|
|
16010
16123
|
async materialize(repo, workspacePath) {
|
|
16124
|
+
if (!repo.source || !repo.path) {
|
|
16125
|
+
if (this.verbose) {
|
|
16126
|
+
console.log(`[repo] materialize skip path=${repo.path ?? "(none)"} (no source or path)`);
|
|
16127
|
+
}
|
|
16128
|
+
return;
|
|
16129
|
+
}
|
|
16011
16130
|
const targetDir = path43.join(workspacePath, repo.path);
|
|
16012
16131
|
const sourceUrl = getSourceUrl(repo.source);
|
|
16013
16132
|
const startedAt = Date.now();
|
|
@@ -16031,7 +16150,7 @@ ${lines.join("\n")}`;
|
|
|
16031
16150
|
await this.runGit(["sparse-checkout", "init", "--cone"], { cwd: targetDir });
|
|
16032
16151
|
await this.runGit(["sparse-checkout", "set", ...repo.clone.sparse], { cwd: targetDir });
|
|
16033
16152
|
}
|
|
16034
|
-
const ref = repo.checkout
|
|
16153
|
+
const ref = getRepoCheckoutRef(repo.checkout);
|
|
16035
16154
|
const resolve = repo.checkout?.resolve ?? "remote";
|
|
16036
16155
|
let resolvedSha;
|
|
16037
16156
|
if (resolve === "remote" && repo.source.type === "git") {
|
|
@@ -16083,22 +16202,26 @@ ${lines.join("\n")}`;
|
|
|
16083
16202
|
);
|
|
16084
16203
|
}
|
|
16085
16204
|
}
|
|
16086
|
-
/** Materialize all repos into the workspace. */
|
|
16205
|
+
/** Materialize all repos into the workspace. Skips repos without source (Docker-only repos). */
|
|
16087
16206
|
async materializeAll(repos, workspacePath) {
|
|
16207
|
+
const materializableRepos = repos.filter((r) => r.source);
|
|
16088
16208
|
if (this.verbose) {
|
|
16089
|
-
console.log(
|
|
16209
|
+
console.log(
|
|
16210
|
+
`[repo] materializeAll count=${materializableRepos.length} (${repos.length - materializableRepos.length} skipped, no source) workspace=${workspacePath}`
|
|
16211
|
+
);
|
|
16090
16212
|
}
|
|
16091
|
-
for (const repo of
|
|
16213
|
+
for (const repo of materializableRepos) {
|
|
16092
16214
|
await this.materialize(repo, workspacePath);
|
|
16093
16215
|
}
|
|
16094
16216
|
if (this.verbose) {
|
|
16095
16217
|
console.log("[repo] materializeAll complete");
|
|
16096
16218
|
}
|
|
16097
16219
|
}
|
|
16098
|
-
/** Reset repos in workspace to their checkout state. */
|
|
16220
|
+
/** Reset repos in workspace to their checkout state. Skips repos without path or source. */
|
|
16099
16221
|
async reset(repos, workspacePath, reset) {
|
|
16100
16222
|
const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
|
|
16101
16223
|
for (const repo of repos) {
|
|
16224
|
+
if (!repo.path || !repo.source) continue;
|
|
16102
16225
|
const targetDir = path43.join(workspacePath, repo.path);
|
|
16103
16226
|
await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
|
|
16104
16227
|
await this.runGit(["clean", cleanFlag], { cwd: targetDir });
|
|
@@ -16422,7 +16545,8 @@ async function runEvaluation(options) {
|
|
|
16422
16545
|
for (const ec of filteredEvalCases) {
|
|
16423
16546
|
if (ec.workspace?.repos) {
|
|
16424
16547
|
for (const repo of ec.workspace.repos) {
|
|
16425
|
-
|
|
16548
|
+
if (!repo.source) continue;
|
|
16549
|
+
const key = `${repo.path ?? ""}::${repo.source.type === "local" ? repo.source.path : ""}`;
|
|
16426
16550
|
if (!allRepos.has(key)) {
|
|
16427
16551
|
allRepos.set(key, repo);
|
|
16428
16552
|
}
|
|
@@ -16435,7 +16559,7 @@ async function runEvaluation(options) {
|
|
|
16435
16559
|
const message = RepoManager.formatValidationErrors(localPathErrors);
|
|
16436
16560
|
console.warn(`Warning: ${message}`);
|
|
16437
16561
|
const invalidLocalRepoPaths = new Set(localPathErrors.map((e) => e.repoPath));
|
|
16438
|
-
if (suiteWorkspace?.repos?.some((r) => invalidLocalRepoPaths.has(r.path))) {
|
|
16562
|
+
if (suiteWorkspace?.repos?.some((r) => r.path && invalidLocalRepoPaths.has(r.path))) {
|
|
16439
16563
|
throw new Error(message);
|
|
16440
16564
|
}
|
|
16441
16565
|
}
|
|
@@ -16568,6 +16692,7 @@ async function runEvaluation(options) {
|
|
|
16568
16692
|
try {
|
|
16569
16693
|
if (needsPerRepoCheck) {
|
|
16570
16694
|
for (const repo of suiteWorkspace.repos) {
|
|
16695
|
+
if (!repo.path || !repo.source) continue;
|
|
16571
16696
|
const targetDir = path45.join(sharedWorkspacePath, repo.path);
|
|
16572
16697
|
if (existsSync5(targetDir)) {
|
|
16573
16698
|
setupLog(`reusing existing repo at: ${targetDir}`);
|
|
@@ -16592,6 +16717,19 @@ async function runEvaluation(options) {
|
|
|
16592
16717
|
throw new Error(`Failed to materialize repos: ${message}`);
|
|
16593
16718
|
}
|
|
16594
16719
|
}
|
|
16720
|
+
const suiteDockerConfig = suiteWorkspace?.docker;
|
|
16721
|
+
if (suiteDockerConfig) {
|
|
16722
|
+
setupLog(`pulling Docker image: ${suiteDockerConfig.image}`);
|
|
16723
|
+
const { DockerWorkspaceProvider: DockerWorkspaceProvider2 } = await import("./docker-workspace-RPPXBT27.js");
|
|
16724
|
+
const dockerSetup = new DockerWorkspaceProvider2(suiteDockerConfig);
|
|
16725
|
+
if (!await dockerSetup.isDockerAvailable()) {
|
|
16726
|
+
throw new Error(
|
|
16727
|
+
"Docker workspace configured but Docker CLI is not available. Install Docker and ensure it is running."
|
|
16728
|
+
);
|
|
16729
|
+
}
|
|
16730
|
+
await dockerSetup.pullImage();
|
|
16731
|
+
setupLog("Docker image pull complete");
|
|
16732
|
+
}
|
|
16595
16733
|
const suiteHooksEnabled = hooksEnabled(suiteWorkspace);
|
|
16596
16734
|
const suiteBeforeAllHook = suiteWorkspace?.hooks?.before_all;
|
|
16597
16735
|
if (sharedWorkspacePath && suiteHooksEnabled && hasHookCommand(suiteBeforeAllHook)) {
|
|
@@ -16952,11 +17090,9 @@ async function runBatchEvaluation(options) {
|
|
|
16952
17090
|
const promptInputs = promptInputsList[index];
|
|
16953
17091
|
return {
|
|
16954
17092
|
question: promptInputs.question,
|
|
17093
|
+
systemPrompt: promptInputs.systemMessage,
|
|
16955
17094
|
inputFiles: evalCase.file_paths,
|
|
16956
|
-
evalCaseId: evalCase.id
|
|
16957
|
-
metadata: {
|
|
16958
|
-
systemPrompt: promptInputs.systemMessage ?? ""
|
|
16959
|
-
}
|
|
17095
|
+
evalCaseId: evalCase.id
|
|
16960
17096
|
};
|
|
16961
17097
|
});
|
|
16962
17098
|
const batchResponse = await provider.invokeBatch?.(batchRequests);
|
|
@@ -17487,6 +17623,7 @@ async function runEvalCase(options) {
|
|
|
17487
17623
|
availableTargets,
|
|
17488
17624
|
fileChanges,
|
|
17489
17625
|
workspacePath,
|
|
17626
|
+
dockerConfig: evalCase.workspace?.docker,
|
|
17490
17627
|
verbose,
|
|
17491
17628
|
threshold: evalCase.threshold ?? caseThreshold
|
|
17492
17629
|
});
|
|
@@ -17680,6 +17817,7 @@ async function evaluateCandidate(options) {
|
|
|
17680
17817
|
availableTargets,
|
|
17681
17818
|
fileChanges,
|
|
17682
17819
|
workspacePath,
|
|
17820
|
+
dockerConfig,
|
|
17683
17821
|
threshold: evalThreshold
|
|
17684
17822
|
} = options;
|
|
17685
17823
|
const gradeTimestamp = nowFn();
|
|
@@ -17706,6 +17844,7 @@ async function evaluateCandidate(options) {
|
|
|
17706
17844
|
availableTargets,
|
|
17707
17845
|
fileChanges,
|
|
17708
17846
|
workspacePath,
|
|
17847
|
+
dockerConfig,
|
|
17709
17848
|
threshold: evalThreshold
|
|
17710
17849
|
});
|
|
17711
17850
|
const completedAt = nowFn();
|
|
@@ -17781,6 +17920,7 @@ async function runEvaluatorsForCase(options) {
|
|
|
17781
17920
|
availableTargets,
|
|
17782
17921
|
fileChanges,
|
|
17783
17922
|
workspacePath,
|
|
17923
|
+
dockerConfig,
|
|
17784
17924
|
threshold
|
|
17785
17925
|
} = options;
|
|
17786
17926
|
if (evalCase.assertions && evalCase.assertions.length > 0) {
|
|
@@ -17808,6 +17948,7 @@ async function runEvaluatorsForCase(options) {
|
|
|
17808
17948
|
availableTargets,
|
|
17809
17949
|
fileChanges,
|
|
17810
17950
|
workspacePath,
|
|
17951
|
+
dockerConfig,
|
|
17811
17952
|
threshold
|
|
17812
17953
|
});
|
|
17813
17954
|
}
|
|
@@ -17837,6 +17978,7 @@ async function runEvaluatorsForCase(options) {
|
|
|
17837
17978
|
availableTargets,
|
|
17838
17979
|
fileChanges,
|
|
17839
17980
|
workspacePath,
|
|
17981
|
+
dockerConfig,
|
|
17840
17982
|
...implicitEvaluator ? { evaluator: implicitEvaluator } : {}
|
|
17841
17983
|
});
|
|
17842
17984
|
return { score };
|
|
@@ -17875,7 +18017,8 @@ async function runEvaluatorList(options) {
|
|
|
17875
18017
|
targetResolver,
|
|
17876
18018
|
availableTargets,
|
|
17877
18019
|
fileChanges,
|
|
17878
|
-
workspacePath
|
|
18020
|
+
workspacePath,
|
|
18021
|
+
dockerConfig
|
|
17879
18022
|
} = options;
|
|
17880
18023
|
const scored = [];
|
|
17881
18024
|
const scores = [];
|
|
@@ -17898,7 +18041,8 @@ async function runEvaluatorList(options) {
|
|
|
17898
18041
|
targetResolver,
|
|
17899
18042
|
availableTargets,
|
|
17900
18043
|
fileChanges,
|
|
17901
|
-
workspacePath
|
|
18044
|
+
workspacePath,
|
|
18045
|
+
dockerConfig
|
|
17902
18046
|
};
|
|
17903
18047
|
const evalFileDir = evalCase.file_paths[0] ? path45.dirname(evalCase.file_paths[0]) : process.cwd();
|
|
17904
18048
|
const dispatchContext = {
|
|
@@ -18060,13 +18204,11 @@ async function invokeProvider(provider, options) {
|
|
|
18060
18204
|
const braintrustSpanIds = streamCallbacks?.getActiveSpanIds?.() ?? void 0;
|
|
18061
18205
|
return await provider.invoke({
|
|
18062
18206
|
question: promptInputs.question,
|
|
18207
|
+
systemPrompt: promptInputs.systemMessage,
|
|
18063
18208
|
chatPrompt: promptInputs.chatPrompt,
|
|
18064
18209
|
inputFiles: evalCase.file_paths,
|
|
18065
18210
|
evalCaseId: evalCase.id,
|
|
18066
18211
|
attempt,
|
|
18067
|
-
metadata: {
|
|
18068
|
-
systemPrompt: promptInputs.systemMessage ?? ""
|
|
18069
|
-
},
|
|
18070
18212
|
signal: controller.signal,
|
|
18071
18213
|
cwd,
|
|
18072
18214
|
workspaceFile,
|
|
@@ -18436,7 +18578,7 @@ async function discoverDefaultTarget(repoRoot) {
|
|
|
18436
18578
|
return null;
|
|
18437
18579
|
}
|
|
18438
18580
|
async function loadEnvHierarchy(repoRoot, startPath) {
|
|
18439
|
-
const { readFileSync:
|
|
18581
|
+
const { readFileSync: readFileSync5 } = await import("node:fs");
|
|
18440
18582
|
const chain = buildDirectoryChain(startPath, repoRoot);
|
|
18441
18583
|
const envFiles = [];
|
|
18442
18584
|
for (const dir of chain) {
|
|
@@ -18445,7 +18587,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
|
|
|
18445
18587
|
}
|
|
18446
18588
|
for (let i = 0; i < envFiles.length; i++) {
|
|
18447
18589
|
try {
|
|
18448
|
-
const content =
|
|
18590
|
+
const content = readFileSync5(envFiles[i], "utf8");
|
|
18449
18591
|
for (const line of content.split("\n")) {
|
|
18450
18592
|
const trimmed = line.trim();
|
|
18451
18593
|
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
@@ -18517,12 +18659,12 @@ var CONFIG_FILE_NAMES = [
|
|
|
18517
18659
|
".agentv/config.js"
|
|
18518
18660
|
];
|
|
18519
18661
|
async function loadTsConfig(projectRoot) {
|
|
18520
|
-
const { existsSync:
|
|
18662
|
+
const { existsSync: existsSync9 } = await import("node:fs");
|
|
18521
18663
|
const { pathToFileURL: pathToFileURL2 } = await import("node:url");
|
|
18522
18664
|
const { join: join2 } = await import("node:path");
|
|
18523
18665
|
for (const fileName of CONFIG_FILE_NAMES) {
|
|
18524
18666
|
const filePath = join2(projectRoot, fileName);
|
|
18525
|
-
if (!
|
|
18667
|
+
if (!existsSync9(filePath)) {
|
|
18526
18668
|
continue;
|
|
18527
18669
|
}
|
|
18528
18670
|
try {
|
|
@@ -18619,9 +18761,9 @@ function buildPrompt(criteria, question, referenceAnswer) {
|
|
|
18619
18761
|
}
|
|
18620
18762
|
|
|
18621
18763
|
// src/evaluation/workspace/deps-scanner.ts
|
|
18622
|
-
import { readFile as
|
|
18764
|
+
import { readFile as readFile15 } from "node:fs/promises";
|
|
18623
18765
|
import path47 from "node:path";
|
|
18624
|
-
import { parse as
|
|
18766
|
+
import { parse as parse6 } from "yaml";
|
|
18625
18767
|
function normalizeGitUrl(url) {
|
|
18626
18768
|
let normalized = url.replace(/\.git$/, "");
|
|
18627
18769
|
try {
|
|
@@ -18639,7 +18781,7 @@ async function scanRepoDeps(evalFilePaths) {
|
|
|
18639
18781
|
try {
|
|
18640
18782
|
const repos = await extractReposFromEvalFile(filePath);
|
|
18641
18783
|
for (const repo of repos) {
|
|
18642
|
-
if (repo.source.type !== "git") continue;
|
|
18784
|
+
if (!repo.source || repo.source.type !== "git") continue;
|
|
18643
18785
|
const ref = repo.checkout?.ref;
|
|
18644
18786
|
const key = `${normalizeGitUrl(repo.source.url)}\0${ref ?? ""}`;
|
|
18645
18787
|
const existing = seen.get(key);
|
|
@@ -18667,8 +18809,8 @@ async function scanRepoDeps(evalFilePaths) {
|
|
|
18667
18809
|
return { repos: [...seen.values()], errors };
|
|
18668
18810
|
}
|
|
18669
18811
|
async function extractReposFromEvalFile(filePath) {
|
|
18670
|
-
const content = await
|
|
18671
|
-
const parsed = interpolateEnv(
|
|
18812
|
+
const content = await readFile15(filePath, "utf8");
|
|
18813
|
+
const parsed = interpolateEnv(parse6(content), process.env);
|
|
18672
18814
|
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
|
|
18673
18815
|
const obj = parsed;
|
|
18674
18816
|
const evalFileDir = path47.dirname(path47.resolve(filePath));
|
|
@@ -18688,8 +18830,8 @@ async function extractReposFromEvalFile(filePath) {
|
|
|
18688
18830
|
async function extractReposFromWorkspaceRaw(raw, evalFileDir) {
|
|
18689
18831
|
if (typeof raw === "string") {
|
|
18690
18832
|
const workspaceFilePath = path47.resolve(evalFileDir, raw);
|
|
18691
|
-
const content = await
|
|
18692
|
-
const parsed = interpolateEnv(
|
|
18833
|
+
const content = await readFile15(workspaceFilePath, "utf8");
|
|
18834
|
+
const parsed = interpolateEnv(parse6(content), process.env);
|
|
18693
18835
|
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
|
|
18694
18836
|
return extractReposFromObject(parsed);
|
|
18695
18837
|
}
|
|
@@ -18716,7 +18858,7 @@ function extractReposFromObject(obj) {
|
|
|
18716
18858
|
}
|
|
18717
18859
|
|
|
18718
18860
|
// src/evaluation/cache/response-cache.ts
|
|
18719
|
-
import { mkdir as mkdir15, readFile as
|
|
18861
|
+
import { mkdir as mkdir15, readFile as readFile16, writeFile as writeFile8 } from "node:fs/promises";
|
|
18720
18862
|
import path48 from "node:path";
|
|
18721
18863
|
var DEFAULT_CACHE_PATH = ".agentv/cache";
|
|
18722
18864
|
var ResponseCache = class {
|
|
@@ -18727,7 +18869,7 @@ var ResponseCache = class {
|
|
|
18727
18869
|
async get(key) {
|
|
18728
18870
|
const filePath = this.keyToPath(key);
|
|
18729
18871
|
try {
|
|
18730
|
-
const data = await
|
|
18872
|
+
const data = await readFile16(filePath, "utf8");
|
|
18731
18873
|
return JSON.parse(data);
|
|
18732
18874
|
} catch {
|
|
18733
18875
|
return void 0;
|
|
@@ -18756,20 +18898,301 @@ function shouldSkipCacheForTemperature(targetConfig) {
|
|
|
18756
18898
|
return false;
|
|
18757
18899
|
}
|
|
18758
18900
|
|
|
18759
|
-
// src/
|
|
18760
|
-
import {
|
|
18901
|
+
// src/evaluation/results-repo.ts
|
|
18902
|
+
import { execFile as execFile3 } from "node:child_process";
|
|
18903
|
+
import { existsSync as existsSync7, mkdirSync as mkdirSync2, readFileSync as readFileSync3, rmSync, writeFileSync } from "node:fs";
|
|
18904
|
+
import { cp as cp3, mkdtemp as mkdtemp3, readdir as readdir8, rm as rm6, stat as stat9 } from "node:fs/promises";
|
|
18905
|
+
import os3 from "node:os";
|
|
18761
18906
|
import path49 from "node:path";
|
|
18907
|
+
import { promisify as promisify7 } from "node:util";
|
|
18908
|
+
var execFileAsync3 = promisify7(execFile3);
|
|
18909
|
+
function sanitizeRepoSlug(repo) {
|
|
18910
|
+
return repo.trim().replace(/[^A-Za-z0-9._-]+/g, "-");
|
|
18911
|
+
}
|
|
18912
|
+
function withFriendlyGitHubAuthError(error) {
|
|
18913
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
18914
|
+
const lower = message.toLowerCase();
|
|
18915
|
+
if (lower.includes("authentication failed") || lower.includes("could not read username") || lower.includes("permission denied") || lower.includes("not logged into any github hosts")) {
|
|
18916
|
+
return new Error(`${message}. Run 'gh auth login' to authenticate.`);
|
|
18917
|
+
}
|
|
18918
|
+
return new Error(message);
|
|
18919
|
+
}
|
|
18920
|
+
function normalizeResultsExportConfig(config) {
|
|
18921
|
+
return {
|
|
18922
|
+
repo: config.repo.trim(),
|
|
18923
|
+
path: config.path.trim().replace(/^\/+|\/+$/g, ""),
|
|
18924
|
+
auto_push: config.auto_push === true,
|
|
18925
|
+
branch_prefix: config.branch_prefix?.trim() || "eval-results"
|
|
18926
|
+
};
|
|
18927
|
+
}
|
|
18928
|
+
function resolveResultsRepoUrl(repo) {
|
|
18929
|
+
if (repo.includes("://") || repo.startsWith("git@")) {
|
|
18930
|
+
return repo;
|
|
18931
|
+
}
|
|
18932
|
+
return `https://github.com/${repo}.git`;
|
|
18933
|
+
}
|
|
18934
|
+
function getResultsRepoCachePaths(repo) {
|
|
18935
|
+
const rootDir = path49.join(getAgentvHome(), "cache", "results-repo", sanitizeRepoSlug(repo));
|
|
18936
|
+
return {
|
|
18937
|
+
rootDir,
|
|
18938
|
+
repoDir: path49.join(rootDir, "repo"),
|
|
18939
|
+
statusFile: path49.join(rootDir, "status.json")
|
|
18940
|
+
};
|
|
18941
|
+
}
|
|
18942
|
+
function readPersistedStatus(statusFile) {
|
|
18943
|
+
if (!existsSync7(statusFile)) {
|
|
18944
|
+
return {};
|
|
18945
|
+
}
|
|
18946
|
+
try {
|
|
18947
|
+
return JSON.parse(readFileSync3(statusFile, "utf8"));
|
|
18948
|
+
} catch {
|
|
18949
|
+
return {};
|
|
18950
|
+
}
|
|
18951
|
+
}
|
|
18952
|
+
function writePersistedStatus(statusFile, status) {
|
|
18953
|
+
mkdirSync2(path49.dirname(statusFile), { recursive: true });
|
|
18954
|
+
writeFileSync(statusFile, `${JSON.stringify(status, null, 2)}
|
|
18955
|
+
`, "utf8");
|
|
18956
|
+
}
|
|
18957
|
+
async function runCommand(executable, args, options) {
|
|
18958
|
+
try {
|
|
18959
|
+
const { stdout, stderr } = await execFileAsync3(executable, [...args], {
|
|
18960
|
+
cwd: options?.cwd,
|
|
18961
|
+
env: process.env
|
|
18962
|
+
});
|
|
18963
|
+
return { stdout, stderr };
|
|
18964
|
+
} catch (error) {
|
|
18965
|
+
if (options?.check === false && error && typeof error === "object") {
|
|
18966
|
+
const execError = error;
|
|
18967
|
+
return {
|
|
18968
|
+
stdout: execError.stdout ?? "",
|
|
18969
|
+
stderr: execError.stderr ?? ""
|
|
18970
|
+
};
|
|
18971
|
+
}
|
|
18972
|
+
throw withFriendlyGitHubAuthError(error);
|
|
18973
|
+
}
|
|
18974
|
+
}
|
|
18975
|
+
async function runGit(args, options) {
|
|
18976
|
+
return runCommand("git", args, options);
|
|
18977
|
+
}
|
|
18978
|
+
async function runGh(args, options) {
|
|
18979
|
+
return runCommand("gh", args, options);
|
|
18980
|
+
}
|
|
18981
|
+
async function resolveDefaultBranch(repoDir) {
|
|
18982
|
+
try {
|
|
18983
|
+
const { stdout } = await runGit(["symbolic-ref", "refs/remotes/origin/HEAD"], { cwd: repoDir });
|
|
18984
|
+
const ref = stdout.trim();
|
|
18985
|
+
const prefix = "refs/remotes/origin/";
|
|
18986
|
+
if (ref.startsWith(prefix)) {
|
|
18987
|
+
return ref.slice(prefix.length);
|
|
18988
|
+
}
|
|
18989
|
+
} catch {
|
|
18990
|
+
}
|
|
18991
|
+
for (const candidate of ["main", "master"]) {
|
|
18992
|
+
try {
|
|
18993
|
+
await runGit(["rev-parse", "--verify", `origin/${candidate}`], { cwd: repoDir });
|
|
18994
|
+
return candidate;
|
|
18995
|
+
} catch {
|
|
18996
|
+
}
|
|
18997
|
+
}
|
|
18998
|
+
return "main";
|
|
18999
|
+
}
|
|
19000
|
+
async function updateCacheRepo(repoDir, baseBranch) {
|
|
19001
|
+
await runGit(["fetch", "origin", "--prune"], { cwd: repoDir });
|
|
19002
|
+
await runGit(["checkout", baseBranch], { cwd: repoDir });
|
|
19003
|
+
await runGit(["pull", "--ff-only", "origin", baseBranch], { cwd: repoDir });
|
|
19004
|
+
}
|
|
19005
|
+
function updateStatusFile(config, patch) {
|
|
19006
|
+
const cachePaths = getResultsRepoCachePaths(config.repo);
|
|
19007
|
+
const current = readPersistedStatus(cachePaths.statusFile);
|
|
19008
|
+
writePersistedStatus(cachePaths.statusFile, {
|
|
19009
|
+
...current,
|
|
19010
|
+
...patch
|
|
19011
|
+
});
|
|
19012
|
+
}
|
|
19013
|
+
async function ensureResultsRepoClone(config) {
|
|
19014
|
+
const normalized = normalizeResultsExportConfig(config);
|
|
19015
|
+
const cachePaths = getResultsRepoCachePaths(normalized.repo);
|
|
19016
|
+
mkdirSync2(cachePaths.rootDir, { recursive: true });
|
|
19017
|
+
if (!existsSync7(cachePaths.repoDir)) {
|
|
19018
|
+
try {
|
|
19019
|
+
await runGit([
|
|
19020
|
+
"clone",
|
|
19021
|
+
"--filter=blob:none",
|
|
19022
|
+
resolveResultsRepoUrl(normalized.repo),
|
|
19023
|
+
cachePaths.repoDir
|
|
19024
|
+
]);
|
|
19025
|
+
return cachePaths.repoDir;
|
|
19026
|
+
} catch (error) {
|
|
19027
|
+
updateStatusFile(normalized, { last_error: withFriendlyGitHubAuthError(error).message });
|
|
19028
|
+
throw withFriendlyGitHubAuthError(error);
|
|
19029
|
+
}
|
|
19030
|
+
}
|
|
19031
|
+
if (!existsSync7(path49.join(cachePaths.repoDir, ".git"))) {
|
|
19032
|
+
throw new Error(`Results repo cache is not a git repository: ${cachePaths.repoDir}`);
|
|
19033
|
+
}
|
|
19034
|
+
return cachePaths.repoDir;
|
|
19035
|
+
}
|
|
19036
|
+
function getResultsRepoStatus(config) {
|
|
19037
|
+
if (!config) {
|
|
19038
|
+
return {
|
|
19039
|
+
configured: false,
|
|
19040
|
+
available: false,
|
|
19041
|
+
repo: "",
|
|
19042
|
+
cache_dir: ""
|
|
19043
|
+
};
|
|
19044
|
+
}
|
|
19045
|
+
const normalized = normalizeResultsExportConfig(config);
|
|
19046
|
+
const cachePaths = getResultsRepoCachePaths(normalized.repo);
|
|
19047
|
+
const persisted = readPersistedStatus(cachePaths.statusFile);
|
|
19048
|
+
return {
|
|
19049
|
+
configured: true,
|
|
19050
|
+
available: existsSync7(cachePaths.repoDir),
|
|
19051
|
+
repo: normalized.repo,
|
|
19052
|
+
path: normalized.path,
|
|
19053
|
+
auto_push: normalized.auto_push,
|
|
19054
|
+
branch_prefix: normalized.branch_prefix,
|
|
19055
|
+
cache_dir: cachePaths.repoDir,
|
|
19056
|
+
last_synced_at: persisted.last_synced_at,
|
|
19057
|
+
last_error: persisted.last_error
|
|
19058
|
+
};
|
|
19059
|
+
}
|
|
19060
|
+
async function syncResultsRepo(config) {
|
|
19061
|
+
const normalized = normalizeResultsExportConfig(config);
|
|
19062
|
+
try {
|
|
19063
|
+
const repoDir = await ensureResultsRepoClone(normalized);
|
|
19064
|
+
const baseBranch = await resolveDefaultBranch(repoDir);
|
|
19065
|
+
await updateCacheRepo(repoDir, baseBranch);
|
|
19066
|
+
updateStatusFile(normalized, {
|
|
19067
|
+
last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
19068
|
+
last_error: void 0
|
|
19069
|
+
});
|
|
19070
|
+
} catch (error) {
|
|
19071
|
+
updateStatusFile(normalized, {
|
|
19072
|
+
last_error: withFriendlyGitHubAuthError(error).message
|
|
19073
|
+
});
|
|
19074
|
+
throw withFriendlyGitHubAuthError(error);
|
|
19075
|
+
}
|
|
19076
|
+
return getResultsRepoStatus(normalized);
|
|
19077
|
+
}
|
|
19078
|
+
async function checkoutResultsRepoBranch(config, branchName) {
|
|
19079
|
+
const normalized = normalizeResultsExportConfig(config);
|
|
19080
|
+
const repoDir = await ensureResultsRepoClone(normalized);
|
|
19081
|
+
const baseBranch = await resolveDefaultBranch(repoDir);
|
|
19082
|
+
await updateCacheRepo(repoDir, baseBranch);
|
|
19083
|
+
await runGit(["checkout", "-B", branchName, `origin/${baseBranch}`], { cwd: repoDir });
|
|
19084
|
+
updateStatusFile(normalized, { last_error: void 0 });
|
|
19085
|
+
return {
|
|
19086
|
+
branchName,
|
|
19087
|
+
baseBranch,
|
|
19088
|
+
repoDir
|
|
19089
|
+
};
|
|
19090
|
+
}
|
|
19091
|
+
async function prepareResultsRepoBranch(config, branchName) {
|
|
19092
|
+
const normalized = normalizeResultsExportConfig(config);
|
|
19093
|
+
const cloneDir = await ensureResultsRepoClone(normalized);
|
|
19094
|
+
const baseBranch = await resolveDefaultBranch(cloneDir);
|
|
19095
|
+
await updateCacheRepo(cloneDir, baseBranch);
|
|
19096
|
+
const worktreeRoot = await mkdtemp3(path49.join(os3.tmpdir(), "agentv-results-repo-"));
|
|
19097
|
+
const worktreeDir = path49.join(worktreeRoot, "repo");
|
|
19098
|
+
await runGit(["worktree", "add", "-B", branchName, worktreeDir, `origin/${baseBranch}`], {
|
|
19099
|
+
cwd: cloneDir
|
|
19100
|
+
});
|
|
19101
|
+
return {
|
|
19102
|
+
branchName,
|
|
19103
|
+
baseBranch,
|
|
19104
|
+
repoDir: worktreeDir,
|
|
19105
|
+
cleanup: async () => {
|
|
19106
|
+
try {
|
|
19107
|
+
await runGit(["worktree", "remove", "--force", worktreeDir], { cwd: cloneDir });
|
|
19108
|
+
} finally {
|
|
19109
|
+
await rm6(worktreeRoot, { recursive: true, force: true }).catch(() => void 0);
|
|
19110
|
+
}
|
|
19111
|
+
}
|
|
19112
|
+
};
|
|
19113
|
+
}
|
|
19114
|
+
async function stageResultsArtifacts(params) {
|
|
19115
|
+
rmSync(params.destinationDir, { recursive: true, force: true });
|
|
19116
|
+
mkdirSync2(path49.dirname(params.destinationDir), { recursive: true });
|
|
19117
|
+
await cp3(params.sourceDir, params.destinationDir, { recursive: true });
|
|
19118
|
+
}
|
|
19119
|
+
function resolveResultsRepoRunsDir(config) {
|
|
19120
|
+
const normalized = normalizeResultsExportConfig(config);
|
|
19121
|
+
return path49.join(
|
|
19122
|
+
getResultsRepoCachePaths(normalized.repo).repoDir,
|
|
19123
|
+
...normalized.path.split("/")
|
|
19124
|
+
);
|
|
19125
|
+
}
|
|
19126
|
+
async function directorySizeBytes(targetPath) {
|
|
19127
|
+
const entry = await stat9(targetPath);
|
|
19128
|
+
if (entry.isFile()) {
|
|
19129
|
+
return entry.size;
|
|
19130
|
+
}
|
|
19131
|
+
let total = 0;
|
|
19132
|
+
for (const child of await readdir8(targetPath, { withFileTypes: true })) {
|
|
19133
|
+
total += await directorySizeBytes(path49.join(targetPath, child.name));
|
|
19134
|
+
}
|
|
19135
|
+
return total;
|
|
19136
|
+
}
|
|
19137
|
+
async function commitAndPushResultsBranch(params) {
|
|
19138
|
+
await runGit(["add", "--all"], { cwd: params.repoDir });
|
|
19139
|
+
const { stdout: diffStdout } = await runGit(["status", "--porcelain"], {
|
|
19140
|
+
cwd: params.repoDir,
|
|
19141
|
+
check: false
|
|
19142
|
+
});
|
|
19143
|
+
if (diffStdout.trim().length === 0) {
|
|
19144
|
+
return false;
|
|
19145
|
+
}
|
|
19146
|
+
await runGit(["commit", "-m", params.commitMessage], { cwd: params.repoDir });
|
|
19147
|
+
await runGit(["push", "-u", "origin", params.branchName], { cwd: params.repoDir });
|
|
19148
|
+
return true;
|
|
19149
|
+
}
|
|
19150
|
+
async function pushResultsRepoBranch(config, branchName, cwd) {
|
|
19151
|
+
const normalized = normalizeResultsExportConfig(config);
|
|
19152
|
+
await runGit(["push", "-u", "origin", branchName], {
|
|
19153
|
+
cwd: cwd ?? getResultsRepoCachePaths(normalized.repo).repoDir
|
|
19154
|
+
});
|
|
19155
|
+
updateStatusFile(normalized, {
|
|
19156
|
+
last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
19157
|
+
last_error: void 0
|
|
19158
|
+
});
|
|
19159
|
+
}
|
|
19160
|
+
async function createDraftResultsPr(params) {
|
|
19161
|
+
const { stdout } = await runGh(
|
|
19162
|
+
[
|
|
19163
|
+
"pr",
|
|
19164
|
+
"create",
|
|
19165
|
+
"--draft",
|
|
19166
|
+
"--repo",
|
|
19167
|
+
params.repo,
|
|
19168
|
+
"--base",
|
|
19169
|
+
params.baseBranch,
|
|
19170
|
+
"--head",
|
|
19171
|
+
params.branchName,
|
|
19172
|
+
"--title",
|
|
19173
|
+
params.title,
|
|
19174
|
+
"--body",
|
|
19175
|
+
params.body
|
|
19176
|
+
],
|
|
19177
|
+
{ cwd: params.repoDir }
|
|
19178
|
+
);
|
|
19179
|
+
return stdout.trim();
|
|
19180
|
+
}
|
|
19181
|
+
|
|
19182
|
+
// src/projects.ts
|
|
19183
|
+
import { existsSync as existsSync8, mkdirSync as mkdirSync3, readFileSync as readFileSync4, readdirSync as readdirSync3, statSync as statSync2, writeFileSync as writeFileSync2 } from "node:fs";
|
|
19184
|
+
import path50 from "node:path";
|
|
18762
19185
|
import { parse as parseYaml3, stringify as stringifyYaml } from "yaml";
|
|
18763
19186
|
function getProjectsRegistryPath() {
|
|
18764
|
-
return
|
|
19187
|
+
return path50.join(getAgentvHome(), "projects.yaml");
|
|
18765
19188
|
}
|
|
18766
19189
|
function loadProjectRegistry() {
|
|
18767
19190
|
const registryPath = getProjectsRegistryPath();
|
|
18768
|
-
if (!
|
|
19191
|
+
if (!existsSync8(registryPath)) {
|
|
18769
19192
|
return { projects: [] };
|
|
18770
19193
|
}
|
|
18771
19194
|
try {
|
|
18772
|
-
const raw =
|
|
19195
|
+
const raw = readFileSync4(registryPath, "utf-8");
|
|
18773
19196
|
const parsed = parseYaml3(raw);
|
|
18774
19197
|
if (!parsed || !Array.isArray(parsed.projects)) {
|
|
18775
19198
|
return { projects: [] };
|
|
@@ -18781,14 +19204,14 @@ function loadProjectRegistry() {
|
|
|
18781
19204
|
}
|
|
18782
19205
|
function saveProjectRegistry(registry) {
|
|
18783
19206
|
const registryPath = getProjectsRegistryPath();
|
|
18784
|
-
const dir =
|
|
18785
|
-
if (!
|
|
18786
|
-
|
|
19207
|
+
const dir = path50.dirname(registryPath);
|
|
19208
|
+
if (!existsSync8(dir)) {
|
|
19209
|
+
mkdirSync3(dir, { recursive: true });
|
|
18787
19210
|
}
|
|
18788
|
-
|
|
19211
|
+
writeFileSync2(registryPath, stringifyYaml(registry), "utf-8");
|
|
18789
19212
|
}
|
|
18790
19213
|
function deriveProjectId(dirPath, existingIds) {
|
|
18791
|
-
const base =
|
|
19214
|
+
const base = path50.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
|
|
18792
19215
|
let candidate = base || "project";
|
|
18793
19216
|
let suffix = 2;
|
|
18794
19217
|
while (existingIds.includes(candidate)) {
|
|
@@ -18798,11 +19221,11 @@ function deriveProjectId(dirPath, existingIds) {
|
|
|
18798
19221
|
return candidate;
|
|
18799
19222
|
}
|
|
18800
19223
|
function addProject(projectPath) {
|
|
18801
|
-
const absPath =
|
|
18802
|
-
if (!
|
|
19224
|
+
const absPath = path50.resolve(projectPath);
|
|
19225
|
+
if (!existsSync8(absPath)) {
|
|
18803
19226
|
throw new Error(`Directory not found: ${absPath}`);
|
|
18804
19227
|
}
|
|
18805
|
-
if (!
|
|
19228
|
+
if (!existsSync8(path50.join(absPath, ".agentv"))) {
|
|
18806
19229
|
throw new Error(`No .agentv/ directory found in ${absPath}. Run an evaluation first.`);
|
|
18807
19230
|
}
|
|
18808
19231
|
const registry = loadProjectRegistry();
|
|
@@ -18816,7 +19239,7 @@ function addProject(projectPath) {
|
|
|
18816
19239
|
absPath,
|
|
18817
19240
|
registry.projects.map((p) => p.id)
|
|
18818
19241
|
),
|
|
18819
|
-
name:
|
|
19242
|
+
name: path50.basename(absPath),
|
|
18820
19243
|
path: absPath,
|
|
18821
19244
|
addedAt: now,
|
|
18822
19245
|
lastOpenedAt: now
|
|
@@ -18845,14 +19268,14 @@ function touchProject(projectId) {
|
|
|
18845
19268
|
}
|
|
18846
19269
|
}
|
|
18847
19270
|
function discoverProjects(rootDir, maxDepth = 2) {
|
|
18848
|
-
const absRoot =
|
|
18849
|
-
if (!
|
|
19271
|
+
const absRoot = path50.resolve(rootDir);
|
|
19272
|
+
if (!existsSync8(absRoot) || !statSync2(absRoot).isDirectory()) {
|
|
18850
19273
|
return [];
|
|
18851
19274
|
}
|
|
18852
19275
|
const results = [];
|
|
18853
19276
|
function scan(dir, depth) {
|
|
18854
19277
|
if (depth > maxDepth) return;
|
|
18855
|
-
if (
|
|
19278
|
+
if (existsSync8(path50.join(dir, ".agentv"))) {
|
|
18856
19279
|
results.push(dir);
|
|
18857
19280
|
return;
|
|
18858
19281
|
}
|
|
@@ -18862,7 +19285,7 @@ function discoverProjects(rootDir, maxDepth = 2) {
|
|
|
18862
19285
|
for (const entry of entries) {
|
|
18863
19286
|
if (!entry.isDirectory()) continue;
|
|
18864
19287
|
if (entry.name.startsWith(".") || entry.name === "node_modules") continue;
|
|
18865
|
-
scan(
|
|
19288
|
+
scan(path50.join(dir, entry.name), depth + 1);
|
|
18866
19289
|
}
|
|
18867
19290
|
} catch {
|
|
18868
19291
|
}
|
|
@@ -19773,33 +20196,33 @@ function extractResponseItemContent(content) {
|
|
|
19773
20196
|
}
|
|
19774
20197
|
|
|
19775
20198
|
// src/import/codex-session-discovery.ts
|
|
19776
|
-
import { readdir as
|
|
20199
|
+
import { readdir as readdir9, stat as stat10 } from "node:fs/promises";
|
|
19777
20200
|
import { homedir as homedir3 } from "node:os";
|
|
19778
|
-
import
|
|
19779
|
-
var DEFAULT_SESSIONS_DIR = () =>
|
|
20201
|
+
import path51 from "node:path";
|
|
20202
|
+
var DEFAULT_SESSIONS_DIR = () => path51.join(homedir3(), ".codex", "sessions");
|
|
19780
20203
|
async function discoverCodexSessions(opts) {
|
|
19781
20204
|
const sessionsDir = opts?.sessionsDir ?? DEFAULT_SESSIONS_DIR();
|
|
19782
20205
|
const limit = opts?.latest ? 1 : opts?.limit ?? 10;
|
|
19783
20206
|
const sessions = [];
|
|
19784
20207
|
let yearDirs;
|
|
19785
20208
|
try {
|
|
19786
|
-
yearDirs = await
|
|
20209
|
+
yearDirs = await readdir9(sessionsDir);
|
|
19787
20210
|
} catch {
|
|
19788
20211
|
return [];
|
|
19789
20212
|
}
|
|
19790
20213
|
for (const year of yearDirs) {
|
|
19791
|
-
const yearPath =
|
|
20214
|
+
const yearPath = path51.join(sessionsDir, year);
|
|
19792
20215
|
let monthDirs;
|
|
19793
20216
|
try {
|
|
19794
|
-
monthDirs = await
|
|
20217
|
+
monthDirs = await readdir9(yearPath);
|
|
19795
20218
|
} catch {
|
|
19796
20219
|
continue;
|
|
19797
20220
|
}
|
|
19798
20221
|
for (const month of monthDirs) {
|
|
19799
|
-
const monthPath =
|
|
20222
|
+
const monthPath = path51.join(yearPath, month);
|
|
19800
20223
|
let dayDirs;
|
|
19801
20224
|
try {
|
|
19802
|
-
dayDirs = await
|
|
20225
|
+
dayDirs = await readdir9(monthPath);
|
|
19803
20226
|
} catch {
|
|
19804
20227
|
continue;
|
|
19805
20228
|
}
|
|
@@ -19808,22 +20231,22 @@ async function discoverCodexSessions(opts) {
|
|
|
19808
20231
|
const dirDate = `${year}-${month}-${day}`;
|
|
19809
20232
|
if (dirDate !== opts.date) continue;
|
|
19810
20233
|
}
|
|
19811
|
-
const dayPath =
|
|
20234
|
+
const dayPath = path51.join(monthPath, day);
|
|
19812
20235
|
let files;
|
|
19813
20236
|
try {
|
|
19814
|
-
files = await
|
|
20237
|
+
files = await readdir9(dayPath);
|
|
19815
20238
|
} catch {
|
|
19816
20239
|
continue;
|
|
19817
20240
|
}
|
|
19818
20241
|
for (const file of files) {
|
|
19819
20242
|
if (!file.startsWith("rollout-") || !file.endsWith(".jsonl")) continue;
|
|
19820
|
-
const filePath =
|
|
20243
|
+
const filePath = path51.join(dayPath, file);
|
|
19821
20244
|
const nameWithoutExt = file.replace(/\.jsonl$/, "");
|
|
19822
20245
|
const parts = nameWithoutExt.split("-");
|
|
19823
20246
|
const sessionId = parts.length >= 6 ? parts.slice(-5).join("-") : nameWithoutExt;
|
|
19824
20247
|
let updatedAt;
|
|
19825
20248
|
try {
|
|
19826
|
-
const fileStat = await
|
|
20249
|
+
const fileStat = await stat10(filePath);
|
|
19827
20250
|
updatedAt = fileStat.mtime;
|
|
19828
20251
|
} catch {
|
|
19829
20252
|
updatedAt = /* @__PURE__ */ new Date(0);
|
|
@@ -19838,10 +20261,10 @@ async function discoverCodexSessions(opts) {
|
|
|
19838
20261
|
}
|
|
19839
20262
|
|
|
19840
20263
|
// src/import/session-discovery.ts
|
|
19841
|
-
import { readdir as
|
|
20264
|
+
import { readdir as readdir10, stat as stat11 } from "node:fs/promises";
|
|
19842
20265
|
import { homedir as homedir4 } from "node:os";
|
|
19843
|
-
import
|
|
19844
|
-
var DEFAULT_PROJECTS_DIR = () =>
|
|
20266
|
+
import path52 from "node:path";
|
|
20267
|
+
var DEFAULT_PROJECTS_DIR = () => path52.join(homedir4(), ".claude", "projects");
|
|
19845
20268
|
function encodeProjectPath(projectPath) {
|
|
19846
20269
|
return projectPath.replace(/\//g, "-");
|
|
19847
20270
|
}
|
|
@@ -19850,7 +20273,7 @@ async function discoverClaudeSessions(opts) {
|
|
|
19850
20273
|
const limit = opts?.latest ? 1 : opts?.limit ?? 10;
|
|
19851
20274
|
let projectDirs;
|
|
19852
20275
|
try {
|
|
19853
|
-
projectDirs = await
|
|
20276
|
+
projectDirs = await readdir10(projectsDir);
|
|
19854
20277
|
} catch {
|
|
19855
20278
|
return [];
|
|
19856
20279
|
}
|
|
@@ -19860,10 +20283,10 @@ async function discoverClaudeSessions(opts) {
|
|
|
19860
20283
|
}
|
|
19861
20284
|
const sessions = [];
|
|
19862
20285
|
for (const projectDir of projectDirs) {
|
|
19863
|
-
const dirPath =
|
|
20286
|
+
const dirPath = path52.join(projectsDir, projectDir);
|
|
19864
20287
|
let entries;
|
|
19865
20288
|
try {
|
|
19866
|
-
entries = await
|
|
20289
|
+
entries = await readdir10(dirPath);
|
|
19867
20290
|
} catch {
|
|
19868
20291
|
continue;
|
|
19869
20292
|
}
|
|
@@ -19871,10 +20294,10 @@ async function discoverClaudeSessions(opts) {
|
|
|
19871
20294
|
if (!entry.endsWith(".jsonl")) continue;
|
|
19872
20295
|
const sessionId = entry.replace(/\.jsonl$/, "");
|
|
19873
20296
|
if (opts?.sessionId && sessionId !== opts.sessionId) continue;
|
|
19874
|
-
const filePath =
|
|
20297
|
+
const filePath = path52.join(dirPath, entry);
|
|
19875
20298
|
let updatedAt;
|
|
19876
20299
|
try {
|
|
19877
|
-
const fileStat = await
|
|
20300
|
+
const fileStat = await stat11(filePath);
|
|
19878
20301
|
updatedAt = fileStat.mtime;
|
|
19879
20302
|
} catch {
|
|
19880
20303
|
updatedAt = /* @__PURE__ */ new Date(0);
|
|
@@ -19892,7 +20315,7 @@ async function discoverClaudeSessions(opts) {
|
|
|
19892
20315
|
}
|
|
19893
20316
|
|
|
19894
20317
|
// src/import/types.ts
|
|
19895
|
-
import { readFile as
|
|
20318
|
+
import { readFile as readFile17 } from "node:fs/promises";
|
|
19896
20319
|
function toTranscriptJsonLine(entry) {
|
|
19897
20320
|
const firstUserMessage = entry.messages.find((m) => m.role === "user");
|
|
19898
20321
|
const input = typeof firstUserMessage?.content === "string" ? firstUserMessage.content : "";
|
|
@@ -19918,11 +20341,11 @@ function toTranscriptJsonLine(entry) {
|
|
|
19918
20341
|
};
|
|
19919
20342
|
}
|
|
19920
20343
|
async function readTranscriptJsonl(filePath) {
|
|
19921
|
-
const text = await
|
|
20344
|
+
const text = await readFile17(filePath, "utf8");
|
|
19922
20345
|
return text.split("\n").filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
|
|
19923
20346
|
}
|
|
19924
20347
|
async function readTranscriptFile(filePath) {
|
|
19925
|
-
return
|
|
20348
|
+
return readFile17(filePath, "utf8");
|
|
19926
20349
|
}
|
|
19927
20350
|
|
|
19928
20351
|
// src/import/transcript-provider.ts
|
|
@@ -19987,6 +20410,7 @@ export {
|
|
|
19987
20410
|
DEFAULT_EXPLORATION_TOOLS,
|
|
19988
20411
|
DEFAULT_THRESHOLD,
|
|
19989
20412
|
DeterministicAssertionEvaluator,
|
|
20413
|
+
DockerWorkspaceProvider,
|
|
19990
20414
|
EvaluatorRegistry,
|
|
19991
20415
|
ExecutionMetricsEvaluator,
|
|
19992
20416
|
FieldAccuracyEvaluator,
|
|
@@ -20022,9 +20446,11 @@ export {
|
|
|
20022
20446
|
buildSearchRoots,
|
|
20023
20447
|
calculateRubricScore,
|
|
20024
20448
|
captureFileChanges,
|
|
20449
|
+
checkoutResultsRepoBranch,
|
|
20025
20450
|
clampScore,
|
|
20026
20451
|
cleanupEvalWorkspaces,
|
|
20027
20452
|
cleanupWorkspace,
|
|
20453
|
+
commitAndPushResultsBranch,
|
|
20028
20454
|
computeTraceSummary,
|
|
20029
20455
|
computeWorkspaceFingerprint,
|
|
20030
20456
|
consumeClaudeLogEntries,
|
|
@@ -20035,6 +20461,7 @@ export {
|
|
|
20035
20461
|
createAgentKernel,
|
|
20036
20462
|
createBuiltinProviderRegistry,
|
|
20037
20463
|
createBuiltinRegistry,
|
|
20464
|
+
createDraftResultsPr,
|
|
20038
20465
|
createProvider,
|
|
20039
20466
|
createTempWorkspace,
|
|
20040
20467
|
deepEqual,
|
|
@@ -20042,6 +20469,7 @@ export {
|
|
|
20042
20469
|
deriveCategory,
|
|
20043
20470
|
deriveProjectId,
|
|
20044
20471
|
detectFormat,
|
|
20472
|
+
directorySizeBytes,
|
|
20045
20473
|
discoverAssertions,
|
|
20046
20474
|
discoverClaudeSessions,
|
|
20047
20475
|
discoverCodexSessions,
|
|
@@ -20050,6 +20478,7 @@ export {
|
|
|
20050
20478
|
discoverGraders as discoverJudges,
|
|
20051
20479
|
discoverProjects,
|
|
20052
20480
|
discoverProviders,
|
|
20481
|
+
ensureResultsRepoClone,
|
|
20053
20482
|
ensureVSCodeSubagents,
|
|
20054
20483
|
evaluate,
|
|
20055
20484
|
executeScript,
|
|
@@ -20074,6 +20503,8 @@ export {
|
|
|
20074
20503
|
getOutputFilenames,
|
|
20075
20504
|
getProject,
|
|
20076
20505
|
getProjectsRegistryPath,
|
|
20506
|
+
getResultsRepoCachePaths,
|
|
20507
|
+
getResultsRepoStatus,
|
|
20077
20508
|
getSubagentsRoot,
|
|
20078
20509
|
getTextContent,
|
|
20079
20510
|
getTraceStateRoot,
|
|
@@ -20103,12 +20534,15 @@ export {
|
|
|
20103
20534
|
mergeExecutionMetrics,
|
|
20104
20535
|
negateScore,
|
|
20105
20536
|
normalizeLineEndings,
|
|
20537
|
+
normalizeResultsExportConfig,
|
|
20106
20538
|
parseAgentSkillsEvals,
|
|
20107
20539
|
parseClaudeSession,
|
|
20108
20540
|
parseCodexSession,
|
|
20109
20541
|
parseCopilotEvents,
|
|
20110
20542
|
parseJsonFromText,
|
|
20111
20543
|
parseJsonSafe,
|
|
20544
|
+
prepareResultsRepoBranch,
|
|
20545
|
+
pushResultsRepoBranch,
|
|
20112
20546
|
readJsonFile,
|
|
20113
20547
|
readTargetDefinitions,
|
|
20114
20548
|
readTestSuiteMetadata,
|
|
@@ -20119,6 +20553,8 @@ export {
|
|
|
20119
20553
|
resolveAndCreateProvider,
|
|
20120
20554
|
resolveDelegatedTargetDefinition,
|
|
20121
20555
|
resolveFileReference,
|
|
20556
|
+
resolveResultsRepoRunsDir,
|
|
20557
|
+
resolveResultsRepoUrl,
|
|
20122
20558
|
resolveTargetDefinition,
|
|
20123
20559
|
resolveWorkspaceTemplate,
|
|
20124
20560
|
rubricEvaluationSchema,
|
|
@@ -20140,12 +20576,14 @@ export {
|
|
|
20140
20576
|
scoreToVerdict,
|
|
20141
20577
|
shouldEnableCache,
|
|
20142
20578
|
shouldSkipCacheForTemperature,
|
|
20579
|
+
stageResultsArtifacts,
|
|
20143
20580
|
subscribeToClaudeLogEntries,
|
|
20144
20581
|
subscribeToCodexLogEntries,
|
|
20145
20582
|
subscribeToCopilotCliLogEntries,
|
|
20146
20583
|
subscribeToCopilotSdkLogEntries,
|
|
20147
20584
|
subscribeToPiLogEntries,
|
|
20148
20585
|
substituteVariables,
|
|
20586
|
+
syncResultsRepo,
|
|
20149
20587
|
toCamelCaseDeep,
|
|
20150
20588
|
toSnakeCaseDeep,
|
|
20151
20589
|
toTranscriptJsonLine,
|