@agentv/core 4.30.0 → 4.31.1-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-Z2BBOGE4.js → chunk-A27NE3R7.js} +28 -27
- package/dist/chunk-A27NE3R7.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +42 -33
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +42 -33
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +297 -76
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +34 -19
- package/dist/index.d.ts +34 -19
- package/dist/index.js +277 -51
- package/dist/index.js.map +1 -1
- package/dist/{ts-eval-loader-JL5DGTJL.js → ts-eval-loader-XR6DNOZ3.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-Z2BBOGE4.js.map +0 -1
- /package/dist/{ts-eval-loader-JL5DGTJL.js.map → ts-eval-loader-XR6DNOZ3.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -1168,6 +1168,9 @@ function parseExecutionDefaults(raw, configPath) {
|
|
|
1168
1168
|
}
|
|
1169
1169
|
return Object.keys(result).length > 0 ? result : void 0;
|
|
1170
1170
|
}
|
|
1171
|
+
function isFilesystemPath(p) {
|
|
1172
|
+
return p.startsWith("/") || p.startsWith("~/") || p.startsWith("~\\") || p === "~" || /^[A-Za-z]:[/\\]/.test(p);
|
|
1173
|
+
}
|
|
1171
1174
|
function parseResultsConfig(raw, configPath) {
|
|
1172
1175
|
if (raw === void 0 || raw === null) {
|
|
1173
1176
|
return void 0;
|
|
@@ -1177,48 +1180,46 @@ function parseResultsConfig(raw, configPath) {
|
|
|
1177
1180
|
return void 0;
|
|
1178
1181
|
}
|
|
1179
1182
|
const obj = raw;
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
return void 0;
|
|
1183
|
-
}
|
|
1184
|
-
return { export: exportConfig };
|
|
1185
|
-
}
|
|
1186
|
-
function parseResultsExportConfig(raw, configPath) {
|
|
1187
|
-
if (raw === void 0 || raw === null) {
|
|
1188
|
-
return void 0;
|
|
1189
|
-
}
|
|
1190
|
-
if (typeof raw !== "object" || Array.isArray(raw)) {
|
|
1191
|
-
logWarning(`Invalid results.export in ${configPath}, expected object`);
|
|
1183
|
+
if (obj.mode !== "github") {
|
|
1184
|
+
logWarning(`Invalid results.mode in ${configPath}, expected 'github'`);
|
|
1192
1185
|
return void 0;
|
|
1193
1186
|
}
|
|
1194
|
-
const obj = raw;
|
|
1195
1187
|
const repo = typeof obj.repo === "string" ? obj.repo.trim() : "";
|
|
1196
|
-
const exportPath = typeof obj.path === "string" ? obj.path.trim() : "";
|
|
1197
1188
|
if (!repo) {
|
|
1198
|
-
logWarning(`Invalid results.
|
|
1189
|
+
logWarning(`Invalid results.repo in ${configPath}, expected non-empty string`);
|
|
1199
1190
|
return void 0;
|
|
1200
1191
|
}
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1192
|
+
let resultsPath;
|
|
1193
|
+
if (obj.path !== void 0) {
|
|
1194
|
+
if (typeof obj.path !== "string" || obj.path.trim().length === 0) {
|
|
1195
|
+
logWarning(`Invalid results.path in ${configPath}, expected non-empty string`);
|
|
1196
|
+
return void 0;
|
|
1197
|
+
}
|
|
1198
|
+
const trimmedPath = obj.path.trim();
|
|
1199
|
+
if (!isFilesystemPath(trimmedPath)) {
|
|
1200
|
+
logWarning(
|
|
1201
|
+
`Invalid results.path in ${configPath}: '${trimmedPath}' looks like a repo subdirectory. results.path now specifies the local filesystem directory for the clone (e.g., ~/data/agentv-results). Remove 'path' to use the default or set an absolute/home-relative path.`
|
|
1202
|
+
);
|
|
1203
|
+
return void 0;
|
|
1204
|
+
}
|
|
1205
|
+
resultsPath = trimmedPath;
|
|
1204
1206
|
}
|
|
1205
1207
|
if (obj.auto_push !== void 0 && typeof obj.auto_push !== "boolean") {
|
|
1206
|
-
logWarning(`Invalid results.
|
|
1208
|
+
logWarning(`Invalid results.auto_push in ${configPath}, expected boolean`);
|
|
1207
1209
|
return void 0;
|
|
1208
1210
|
}
|
|
1209
1211
|
let branchPrefix;
|
|
1210
1212
|
if (obj.branch_prefix !== void 0) {
|
|
1211
1213
|
if (typeof obj.branch_prefix !== "string" || obj.branch_prefix.trim().length === 0) {
|
|
1212
|
-
logWarning(
|
|
1213
|
-
`Invalid results.export.branch_prefix in ${configPath}, expected non-empty string`
|
|
1214
|
-
);
|
|
1214
|
+
logWarning(`Invalid results.branch_prefix in ${configPath}, expected non-empty string`);
|
|
1215
1215
|
return void 0;
|
|
1216
1216
|
}
|
|
1217
1217
|
branchPrefix = obj.branch_prefix.trim();
|
|
1218
1218
|
}
|
|
1219
1219
|
return {
|
|
1220
|
+
mode: "github",
|
|
1220
1221
|
repo,
|
|
1221
|
-
path:
|
|
1222
|
+
...resultsPath !== void 0 && { path: resultsPath },
|
|
1222
1223
|
...typeof obj.auto_push === "boolean" && { auto_push: obj.auto_push },
|
|
1223
1224
|
...branchPrefix && { branch_prefix: branchPrefix }
|
|
1224
1225
|
};
|
|
@@ -1331,10 +1332,10 @@ async function execFileWithStdinBun(argv, stdinPayload, options) {
|
|
|
1331
1332
|
}
|
|
1332
1333
|
}
|
|
1333
1334
|
async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
1334
|
-
const { spawn:
|
|
1335
|
+
const { spawn: spawn6 } = await import("child_process");
|
|
1335
1336
|
return new Promise((resolve, reject) => {
|
|
1336
1337
|
const [cmd, ...args] = argv;
|
|
1337
|
-
const child =
|
|
1338
|
+
const child = spawn6(cmd, args, {
|
|
1338
1339
|
cwd: options.cwd,
|
|
1339
1340
|
stdio: ["pipe", "pipe", "pipe"],
|
|
1340
1341
|
// Merge additional env vars with process.env
|
|
@@ -1385,10 +1386,10 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
1385
1386
|
const stderrPath = path57.join(dir, "stderr.txt");
|
|
1386
1387
|
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
1387
1388
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
1388
|
-
const { spawn:
|
|
1389
|
+
const { spawn: spawn6 } = await import("child_process");
|
|
1389
1390
|
try {
|
|
1390
1391
|
const exitCode = await new Promise((resolve, reject) => {
|
|
1391
|
-
const child =
|
|
1392
|
+
const child = spawn6(wrappedCommand, {
|
|
1392
1393
|
shell: true,
|
|
1393
1394
|
cwd: options.cwd,
|
|
1394
1395
|
stdio: ["ignore", "ignore", "ignore"],
|
|
@@ -4668,7 +4669,7 @@ function getAgentvHome() {
|
|
|
4668
4669
|
if (envHome && envHome !== "undefined") {
|
|
4669
4670
|
if (!logged) {
|
|
4670
4671
|
logged = true;
|
|
4671
|
-
console.
|
|
4672
|
+
console.log(`Using AGENTV_HOME: ${envHome}`);
|
|
4672
4673
|
}
|
|
4673
4674
|
return envHome;
|
|
4674
4675
|
}
|
|
@@ -24831,7 +24832,7 @@ __export(index_exports, {
|
|
|
24831
24832
|
getOutputFilenames: () => getOutputFilenames,
|
|
24832
24833
|
getProject: () => getProject,
|
|
24833
24834
|
getProjectsRegistryPath: () => getProjectsRegistryPath,
|
|
24834
|
-
|
|
24835
|
+
getResultsRepoLocalPaths: () => getResultsRepoLocalPaths,
|
|
24835
24836
|
getResultsRepoStatus: () => getResultsRepoStatus,
|
|
24836
24837
|
getSubagentsRoot: () => getSubagentsRoot,
|
|
24837
24838
|
getTextContent: () => getTextContent,
|
|
@@ -24851,6 +24852,7 @@ __export(index_exports, {
|
|
|
24851
24852
|
isTestMessage: () => isTestMessage,
|
|
24852
24853
|
isTestMessageRole: () => isTestMessageRole,
|
|
24853
24854
|
killAllTrackedChildren: () => killAllTrackedChildren,
|
|
24855
|
+
listGitRuns: () => listGitRuns,
|
|
24854
24856
|
listTargetNames: () => listTargetNames,
|
|
24855
24857
|
loadConfig: () => loadConfig,
|
|
24856
24858
|
loadEvalCaseById: () => loadEvalCaseById,
|
|
@@ -24862,10 +24864,11 @@ __export(index_exports, {
|
|
|
24862
24864
|
loadTests: () => loadTests,
|
|
24863
24865
|
loadTsConfig: () => loadTsConfig,
|
|
24864
24866
|
loadTsEvalFile: () => loadTsEvalFile,
|
|
24867
|
+
materializeGitRun: () => materializeGitRun,
|
|
24865
24868
|
mergeExecutionMetrics: () => mergeExecutionMetrics,
|
|
24866
24869
|
negateScore: () => negateScore,
|
|
24867
24870
|
normalizeLineEndings: () => normalizeLineEndings,
|
|
24868
|
-
|
|
24871
|
+
normalizeResultsConfig: () => normalizeResultsConfig,
|
|
24869
24872
|
parseAgentSkillsEvals: () => parseAgentSkillsEvals,
|
|
24870
24873
|
parseClaudeSession: () => parseClaudeSession,
|
|
24871
24874
|
parseCodexSession: () => parseCodexSession,
|
|
@@ -25554,10 +25557,19 @@ function withFriendlyGitHubAuthError(error) {
|
|
|
25554
25557
|
}
|
|
25555
25558
|
return new Error(message);
|
|
25556
25559
|
}
|
|
25557
|
-
function
|
|
25560
|
+
function expandHome(p) {
|
|
25561
|
+
if (p === "~" || p.startsWith("~/") || p.startsWith("~\\")) {
|
|
25562
|
+
return import_node_path54.default.join(import_node_os10.default.homedir(), p.slice(1));
|
|
25563
|
+
}
|
|
25564
|
+
return p;
|
|
25565
|
+
}
|
|
25566
|
+
function normalizeResultsConfig(config) {
|
|
25567
|
+
const repo = config.repo.trim();
|
|
25568
|
+
const resolvedPath = config.path ? expandHome(config.path.trim()) : import_node_path54.default.join(getAgentvHome(), "results", sanitizeRepoSlug(repo));
|
|
25558
25569
|
return {
|
|
25559
|
-
|
|
25560
|
-
|
|
25570
|
+
mode: "github",
|
|
25571
|
+
repo,
|
|
25572
|
+
path: resolvedPath,
|
|
25561
25573
|
auto_push: config.auto_push === true,
|
|
25562
25574
|
branch_prefix: config.branch_prefix?.trim() || "eval-results"
|
|
25563
25575
|
};
|
|
@@ -25568,7 +25580,7 @@ function resolveResultsRepoUrl(repo) {
|
|
|
25568
25580
|
}
|
|
25569
25581
|
return `https://github.com/${repo}.git`;
|
|
25570
25582
|
}
|
|
25571
|
-
function
|
|
25583
|
+
function getResultsRepoLocalPaths(repo) {
|
|
25572
25584
|
const rootDir = import_node_path54.default.join(getAgentvHome(), "cache", "results-repo", sanitizeRepoSlug(repo));
|
|
25573
25585
|
return {
|
|
25574
25586
|
rootDir,
|
|
@@ -25595,7 +25607,7 @@ async function runCommand(executable, args, options) {
|
|
|
25595
25607
|
try {
|
|
25596
25608
|
const { stdout, stderr } = await execFileAsync4(executable, [...args], {
|
|
25597
25609
|
cwd: options?.cwd,
|
|
25598
|
-
env: process.env
|
|
25610
|
+
env: options?.env ?? process.env
|
|
25599
25611
|
});
|
|
25600
25612
|
return { stdout, stderr };
|
|
25601
25613
|
} catch (error) {
|
|
@@ -25609,8 +25621,17 @@ async function runCommand(executable, args, options) {
|
|
|
25609
25621
|
throw withFriendlyGitHubAuthError(error);
|
|
25610
25622
|
}
|
|
25611
25623
|
}
|
|
25624
|
+
function getGitEnv() {
|
|
25625
|
+
const env = {};
|
|
25626
|
+
for (const [key, value] of Object.entries(process.env)) {
|
|
25627
|
+
if (value !== void 0 && !(key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND")) {
|
|
25628
|
+
env[key] = value;
|
|
25629
|
+
}
|
|
25630
|
+
}
|
|
25631
|
+
return env;
|
|
25632
|
+
}
|
|
25612
25633
|
async function runGit(args, options) {
|
|
25613
|
-
return runCommand("git", args, options);
|
|
25634
|
+
return runCommand("git", args, { ...options, env: getGitEnv() });
|
|
25614
25635
|
}
|
|
25615
25636
|
async function runGh(args, options) {
|
|
25616
25637
|
return runCommand("gh", args, options);
|
|
@@ -25634,13 +25655,11 @@ async function resolveDefaultBranch(repoDir) {
|
|
|
25634
25655
|
}
|
|
25635
25656
|
return "main";
|
|
25636
25657
|
}
|
|
25637
|
-
async function
|
|
25658
|
+
async function fetchResultsRepo(repoDir) {
|
|
25638
25659
|
await runGit(["fetch", "origin", "--prune"], { cwd: repoDir });
|
|
25639
|
-
await runGit(["checkout", baseBranch], { cwd: repoDir });
|
|
25640
|
-
await runGit(["pull", "--ff-only", "origin", baseBranch], { cwd: repoDir });
|
|
25641
25660
|
}
|
|
25642
25661
|
function updateStatusFile(config, patch) {
|
|
25643
|
-
const cachePaths =
|
|
25662
|
+
const cachePaths = getResultsRepoLocalPaths(config.repo);
|
|
25644
25663
|
const current = readPersistedStatus(cachePaths.statusFile);
|
|
25645
25664
|
writePersistedStatus(cachePaths.statusFile, {
|
|
25646
25665
|
...current,
|
|
@@ -25648,27 +25667,32 @@ function updateStatusFile(config, patch) {
|
|
|
25648
25667
|
});
|
|
25649
25668
|
}
|
|
25650
25669
|
async function ensureResultsRepoClone(config) {
|
|
25651
|
-
const normalized =
|
|
25652
|
-
const cachePaths =
|
|
25670
|
+
const normalized = normalizeResultsConfig(config);
|
|
25671
|
+
const cachePaths = getResultsRepoLocalPaths(normalized.repo);
|
|
25672
|
+
const cloneDir = normalized.path;
|
|
25653
25673
|
(0, import_node_fs19.mkdirSync)(cachePaths.rootDir, { recursive: true });
|
|
25654
|
-
|
|
25674
|
+
(0, import_node_fs19.mkdirSync)(import_node_path54.default.dirname(cloneDir), { recursive: true });
|
|
25675
|
+
const cloneMissing = !(0, import_node_fs19.existsSync)(cloneDir);
|
|
25676
|
+
const gitDir = import_node_path54.default.join(cloneDir, ".git");
|
|
25677
|
+
const cloneEmpty = !cloneMissing && !(0, import_node_fs19.existsSync)(gitDir) && (await (0, import_promises39.readdir)(cloneDir)).length === 0;
|
|
25678
|
+
if (cloneMissing || cloneEmpty) {
|
|
25655
25679
|
try {
|
|
25656
25680
|
await runGit([
|
|
25657
25681
|
"clone",
|
|
25658
25682
|
"--filter=blob:none",
|
|
25659
25683
|
resolveResultsRepoUrl(normalized.repo),
|
|
25660
|
-
|
|
25684
|
+
cloneDir
|
|
25661
25685
|
]);
|
|
25662
|
-
return
|
|
25686
|
+
return cloneDir;
|
|
25663
25687
|
} catch (error) {
|
|
25664
25688
|
updateStatusFile(normalized, { last_error: withFriendlyGitHubAuthError(error).message });
|
|
25665
25689
|
throw withFriendlyGitHubAuthError(error);
|
|
25666
25690
|
}
|
|
25667
25691
|
}
|
|
25668
|
-
if (!(0, import_node_fs19.existsSync)(
|
|
25669
|
-
throw new Error(`Results repo
|
|
25692
|
+
if (!(0, import_node_fs19.existsSync)(gitDir)) {
|
|
25693
|
+
throw new Error(`Results repo clone path is not a git repository: ${cloneDir}`);
|
|
25670
25694
|
}
|
|
25671
|
-
return
|
|
25695
|
+
return cloneDir;
|
|
25672
25696
|
}
|
|
25673
25697
|
function getResultsRepoStatus(config) {
|
|
25674
25698
|
if (!config) {
|
|
@@ -25676,30 +25700,29 @@ function getResultsRepoStatus(config) {
|
|
|
25676
25700
|
configured: false,
|
|
25677
25701
|
available: false,
|
|
25678
25702
|
repo: "",
|
|
25679
|
-
|
|
25703
|
+
local_dir: ""
|
|
25680
25704
|
};
|
|
25681
25705
|
}
|
|
25682
|
-
const normalized =
|
|
25683
|
-
const
|
|
25684
|
-
const persisted = readPersistedStatus(
|
|
25706
|
+
const normalized = normalizeResultsConfig(config);
|
|
25707
|
+
const localPaths = getResultsRepoLocalPaths(normalized.repo);
|
|
25708
|
+
const persisted = readPersistedStatus(localPaths.statusFile);
|
|
25685
25709
|
return {
|
|
25686
25710
|
configured: true,
|
|
25687
|
-
available: (0, import_node_fs19.existsSync)(
|
|
25711
|
+
available: (0, import_node_fs19.existsSync)(normalized.path),
|
|
25688
25712
|
repo: normalized.repo,
|
|
25689
25713
|
path: normalized.path,
|
|
25690
25714
|
auto_push: normalized.auto_push,
|
|
25691
25715
|
branch_prefix: normalized.branch_prefix,
|
|
25692
|
-
|
|
25716
|
+
local_dir: normalized.path,
|
|
25693
25717
|
last_synced_at: persisted.last_synced_at,
|
|
25694
25718
|
last_error: persisted.last_error
|
|
25695
25719
|
};
|
|
25696
25720
|
}
|
|
25697
25721
|
async function syncResultsRepo(config) {
|
|
25698
|
-
const normalized =
|
|
25722
|
+
const normalized = normalizeResultsConfig(config);
|
|
25699
25723
|
try {
|
|
25700
25724
|
const repoDir = await ensureResultsRepoClone(normalized);
|
|
25701
|
-
|
|
25702
|
-
await updateCacheRepo(repoDir, baseBranch);
|
|
25725
|
+
await fetchResultsRepo(repoDir);
|
|
25703
25726
|
updateStatusFile(normalized, {
|
|
25704
25727
|
last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
25705
25728
|
last_error: void 0
|
|
@@ -25713,10 +25736,10 @@ async function syncResultsRepo(config) {
|
|
|
25713
25736
|
return getResultsRepoStatus(normalized);
|
|
25714
25737
|
}
|
|
25715
25738
|
async function checkoutResultsRepoBranch(config, branchName) {
|
|
25716
|
-
const normalized =
|
|
25739
|
+
const normalized = normalizeResultsConfig(config);
|
|
25717
25740
|
const repoDir = await ensureResultsRepoClone(normalized);
|
|
25718
25741
|
const baseBranch = await resolveDefaultBranch(repoDir);
|
|
25719
|
-
await
|
|
25742
|
+
await fetchResultsRepo(repoDir);
|
|
25720
25743
|
await runGit(["checkout", "-B", branchName, `origin/${baseBranch}`], { cwd: repoDir });
|
|
25721
25744
|
updateStatusFile(normalized, { last_error: void 0 });
|
|
25722
25745
|
return {
|
|
@@ -25726,10 +25749,10 @@ async function checkoutResultsRepoBranch(config, branchName) {
|
|
|
25726
25749
|
};
|
|
25727
25750
|
}
|
|
25728
25751
|
async function prepareResultsRepoBranch(config, branchName) {
|
|
25729
|
-
const normalized =
|
|
25752
|
+
const normalized = normalizeResultsConfig(config);
|
|
25730
25753
|
const cloneDir = await ensureResultsRepoClone(normalized);
|
|
25731
25754
|
const baseBranch = await resolveDefaultBranch(cloneDir);
|
|
25732
|
-
await
|
|
25755
|
+
await fetchResultsRepo(cloneDir);
|
|
25733
25756
|
const worktreeRoot = await (0, import_promises39.mkdtemp)(import_node_path54.default.join(import_node_os10.default.tmpdir(), "agentv-results-repo-"));
|
|
25734
25757
|
const worktreeDir = import_node_path54.default.join(worktreeRoot, "repo");
|
|
25735
25758
|
await runGit(["worktree", "add", "-B", branchName, worktreeDir, `origin/${baseBranch}`], {
|
|
@@ -25754,11 +25777,8 @@ async function stageResultsArtifacts(params) {
|
|
|
25754
25777
|
await (0, import_promises39.cp)(params.sourceDir, params.destinationDir, { recursive: true });
|
|
25755
25778
|
}
|
|
25756
25779
|
function resolveResultsRepoRunsDir(config) {
|
|
25757
|
-
const normalized =
|
|
25758
|
-
return import_node_path54.default.join(
|
|
25759
|
-
getResultsRepoCachePaths(normalized.repo).repoDir,
|
|
25760
|
-
...normalized.path.split("/")
|
|
25761
|
-
);
|
|
25780
|
+
const normalized = normalizeResultsConfig(config);
|
|
25781
|
+
return import_node_path54.default.join(normalized.path, "runs");
|
|
25762
25782
|
}
|
|
25763
25783
|
async function directorySizeBytes(targetPath) {
|
|
25764
25784
|
const entry = await (0, import_promises39.stat)(targetPath);
|
|
@@ -25785,9 +25805,9 @@ async function commitAndPushResultsBranch(params) {
|
|
|
25785
25805
|
return true;
|
|
25786
25806
|
}
|
|
25787
25807
|
async function pushResultsRepoBranch(config, branchName, cwd) {
|
|
25788
|
-
const normalized =
|
|
25808
|
+
const normalized = normalizeResultsConfig(config);
|
|
25789
25809
|
await runGit(["push", "-u", "origin", branchName], {
|
|
25790
|
-
cwd: cwd ??
|
|
25810
|
+
cwd: cwd ?? normalized.path
|
|
25791
25811
|
});
|
|
25792
25812
|
updateStatusFile(normalized, {
|
|
25793
25813
|
last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
@@ -25817,11 +25837,11 @@ async function createDraftResultsPr(params) {
|
|
|
25817
25837
|
}
|
|
25818
25838
|
var DIRECT_PUSH_MAX_RETRIES = 3;
|
|
25819
25839
|
async function directPushResults(params) {
|
|
25820
|
-
const normalized =
|
|
25840
|
+
const normalized = normalizeResultsConfig(params.config);
|
|
25821
25841
|
const repoDir = await ensureResultsRepoClone(normalized);
|
|
25822
25842
|
const baseBranch = await resolveDefaultBranch(repoDir);
|
|
25823
|
-
await
|
|
25824
|
-
const destinationDir = import_node_path54.default.join(repoDir,
|
|
25843
|
+
await fetchResultsRepo(repoDir);
|
|
25844
|
+
const destinationDir = import_node_path54.default.join(repoDir, "runs", params.destinationPath);
|
|
25825
25845
|
await stageResultsArtifacts({
|
|
25826
25846
|
repoDir,
|
|
25827
25847
|
sourceDir: params.sourceDir,
|
|
@@ -25835,10 +25855,19 @@ async function directPushResults(params) {
|
|
|
25835
25855
|
if (status.trim().length === 0) {
|
|
25836
25856
|
return false;
|
|
25837
25857
|
}
|
|
25838
|
-
await runGit(
|
|
25858
|
+
await runGit(
|
|
25859
|
+
[
|
|
25860
|
+
"commit",
|
|
25861
|
+
"-m",
|
|
25862
|
+
params.commitMessage,
|
|
25863
|
+
"-m",
|
|
25864
|
+
`Agentv-Run: ${buildGitRunId(params.destinationPath)}`
|
|
25865
|
+
],
|
|
25866
|
+
{ cwd: repoDir }
|
|
25867
|
+
);
|
|
25839
25868
|
for (let attempt = 1; attempt <= DIRECT_PUSH_MAX_RETRIES; attempt++) {
|
|
25840
25869
|
try {
|
|
25841
|
-
await runGit(["push", "origin", baseBranch], { cwd: repoDir });
|
|
25870
|
+
await runGit(["push", "origin", `HEAD:${baseBranch}`], { cwd: repoDir });
|
|
25842
25871
|
updateStatusFile(normalized, {
|
|
25843
25872
|
last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
25844
25873
|
last_error: void 0
|
|
@@ -25847,7 +25876,8 @@ async function directPushResults(params) {
|
|
|
25847
25876
|
} catch (error) {
|
|
25848
25877
|
const message = error instanceof Error ? error.message : String(error);
|
|
25849
25878
|
if (attempt < DIRECT_PUSH_MAX_RETRIES && message.includes("non-fast-forward")) {
|
|
25850
|
-
await
|
|
25879
|
+
await fetchResultsRepo(repoDir);
|
|
25880
|
+
await runGit(["rebase", `origin/${baseBranch}`], { cwd: repoDir });
|
|
25851
25881
|
} else {
|
|
25852
25882
|
throw error;
|
|
25853
25883
|
}
|
|
@@ -25855,6 +25885,195 @@ async function directPushResults(params) {
|
|
|
25855
25885
|
}
|
|
25856
25886
|
return false;
|
|
25857
25887
|
}
|
|
25888
|
+
function buildGitRunId(relativeRunPath) {
|
|
25889
|
+
const normalized = relativeRunPath.split(import_node_path54.default.sep).join("/");
|
|
25890
|
+
const segments = normalized.split("/").filter(Boolean);
|
|
25891
|
+
if (segments.length >= 2) {
|
|
25892
|
+
const experiment = segments.slice(0, -1).join("/");
|
|
25893
|
+
const timestamp = segments.at(-1);
|
|
25894
|
+
if (experiment === "default") {
|
|
25895
|
+
return timestamp ?? normalized;
|
|
25896
|
+
}
|
|
25897
|
+
return `${experiment}::${timestamp}`;
|
|
25898
|
+
}
|
|
25899
|
+
return segments[0] ?? relativeRunPath;
|
|
25900
|
+
}
|
|
25901
|
+
function getRunExperiment(runId, benchmark) {
|
|
25902
|
+
const experiment = benchmark.metadata?.experiment?.trim();
|
|
25903
|
+
if (experiment) {
|
|
25904
|
+
return experiment;
|
|
25905
|
+
}
|
|
25906
|
+
const separatorIndex = runId.lastIndexOf("::");
|
|
25907
|
+
return separatorIndex === -1 ? "default" : runId.slice(0, separatorIndex);
|
|
25908
|
+
}
|
|
25909
|
+
function computeAveragePassRate(runSummary) {
|
|
25910
|
+
if (!runSummary) {
|
|
25911
|
+
return void 0;
|
|
25912
|
+
}
|
|
25913
|
+
const passRates = Object.values(runSummary).map((summary) => summary.pass_rate?.mean).filter((value) => typeof value === "number" && Number.isFinite(value));
|
|
25914
|
+
if (passRates.length === 0) {
|
|
25915
|
+
return void 0;
|
|
25916
|
+
}
|
|
25917
|
+
return passRates.reduce((sum, value) => sum + value, 0) / passRates.length;
|
|
25918
|
+
}
|
|
25919
|
+
async function runGitBatch(repoDir, input) {
|
|
25920
|
+
return new Promise((resolve, reject) => {
|
|
25921
|
+
const child = (0, import_node_child_process12.spawn)("git", ["cat-file", "--batch"], {
|
|
25922
|
+
cwd: repoDir,
|
|
25923
|
+
env: getGitEnv(),
|
|
25924
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
25925
|
+
});
|
|
25926
|
+
const stdoutChunks = [];
|
|
25927
|
+
const stderrChunks = [];
|
|
25928
|
+
child.stdout.on("data", (chunk) => {
|
|
25929
|
+
stdoutChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
25930
|
+
});
|
|
25931
|
+
child.stderr.on("data", (chunk) => {
|
|
25932
|
+
stderrChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
25933
|
+
});
|
|
25934
|
+
child.on("error", (error) => reject(withFriendlyGitHubAuthError(error)));
|
|
25935
|
+
child.on("close", (code) => {
|
|
25936
|
+
if (code === 0) {
|
|
25937
|
+
resolve(Buffer.concat(stdoutChunks));
|
|
25938
|
+
return;
|
|
25939
|
+
}
|
|
25940
|
+
const stderr = Buffer.concat(stderrChunks).toString("utf8").trim();
|
|
25941
|
+
reject(
|
|
25942
|
+
withFriendlyGitHubAuthError(
|
|
25943
|
+
stderr.length > 0 ? new Error(stderr) : new Error("git cat-file failed")
|
|
25944
|
+
)
|
|
25945
|
+
);
|
|
25946
|
+
});
|
|
25947
|
+
child.stdin.end(input);
|
|
25948
|
+
});
|
|
25949
|
+
}
|
|
25950
|
+
function parseGitBatchBlobs(output) {
|
|
25951
|
+
const blobs = [];
|
|
25952
|
+
let offset = 0;
|
|
25953
|
+
while (offset < output.length) {
|
|
25954
|
+
const headerEnd = output.indexOf(10, offset);
|
|
25955
|
+
if (headerEnd === -1) {
|
|
25956
|
+
throw new Error("Malformed git cat-file output: missing header terminator");
|
|
25957
|
+
}
|
|
25958
|
+
const header = output.subarray(offset, headerEnd).toString("utf8");
|
|
25959
|
+
offset = headerEnd + 1;
|
|
25960
|
+
if (header.length === 0) {
|
|
25961
|
+
continue;
|
|
25962
|
+
}
|
|
25963
|
+
const missingMatch = /^(.*) missing$/.exec(header);
|
|
25964
|
+
if (missingMatch) {
|
|
25965
|
+
continue;
|
|
25966
|
+
}
|
|
25967
|
+
const headerMatch = /^(.*) (\w+) (\d+)$/.exec(header);
|
|
25968
|
+
if (!headerMatch) {
|
|
25969
|
+
throw new Error(`Malformed git cat-file header: ${header}`);
|
|
25970
|
+
}
|
|
25971
|
+
const [, objectRef, objectType, sizeText] = headerMatch;
|
|
25972
|
+
if (objectType !== "blob") {
|
|
25973
|
+
throw new Error(`Unsupported git object type for ${objectRef}: ${objectType}`);
|
|
25974
|
+
}
|
|
25975
|
+
const size = Number.parseInt(sizeText, 10);
|
|
25976
|
+
const contentEnd = offset + size;
|
|
25977
|
+
if (contentEnd > output.length) {
|
|
25978
|
+
throw new Error(`Malformed git cat-file output for ${objectRef}: truncated blob content`);
|
|
25979
|
+
}
|
|
25980
|
+
blobs.push({
|
|
25981
|
+
size,
|
|
25982
|
+
content: output.subarray(offset, contentEnd)
|
|
25983
|
+
});
|
|
25984
|
+
offset = contentEnd;
|
|
25985
|
+
if (offset < output.length && output[offset] === 10) {
|
|
25986
|
+
offset += 1;
|
|
25987
|
+
}
|
|
25988
|
+
}
|
|
25989
|
+
return blobs;
|
|
25990
|
+
}
|
|
25991
|
+
async function listGitRuns(repoDir, ref = "origin/main") {
|
|
25992
|
+
const { stdout: treeOut } = await runGit(["ls-tree", "-r", "--name-only", ref, "runs"], {
|
|
25993
|
+
cwd: repoDir
|
|
25994
|
+
});
|
|
25995
|
+
const benchmarkPaths = treeOut.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.endsWith("/benchmark.json"));
|
|
25996
|
+
if (benchmarkPaths.length === 0) {
|
|
25997
|
+
return [];
|
|
25998
|
+
}
|
|
25999
|
+
const batchInput = `${benchmarkPaths.map((benchmarkPath) => `${ref}:${benchmarkPath}`).join("\n")}
|
|
26000
|
+
`;
|
|
26001
|
+
const blobs = parseGitBatchBlobs(await runGitBatch(repoDir, batchInput));
|
|
26002
|
+
if (blobs.length !== benchmarkPaths.length) {
|
|
26003
|
+
throw new Error(
|
|
26004
|
+
`Expected ${benchmarkPaths.length} git blobs but received ${blobs.length} while listing results runs`
|
|
26005
|
+
);
|
|
26006
|
+
}
|
|
26007
|
+
const runs = blobs.flatMap((blob, index) => {
|
|
26008
|
+
const benchmarkPath = benchmarkPaths[index];
|
|
26009
|
+
const benchmark = JSON.parse(blob.content.toString("utf8"));
|
|
26010
|
+
const runDir = import_node_path54.default.posix.dirname(benchmarkPath);
|
|
26011
|
+
const relativeRunPath = import_node_path54.default.posix.relative("runs", runDir);
|
|
26012
|
+
const runId = buildGitRunId(relativeRunPath);
|
|
26013
|
+
const timestamp = benchmark.metadata?.timestamp?.trim() || import_node_path54.default.posix.basename(runDir);
|
|
26014
|
+
const targets = benchmark.metadata?.targets ?? [];
|
|
26015
|
+
const passRate = computeAveragePassRate(benchmark.run_summary);
|
|
26016
|
+
return [
|
|
26017
|
+
{
|
|
26018
|
+
run_id: runId,
|
|
26019
|
+
experiment: getRunExperiment(runId, benchmark),
|
|
26020
|
+
timestamp,
|
|
26021
|
+
...passRate !== void 0 && { pass_rate: passRate },
|
|
26022
|
+
...targets.length === 1 && targets[0] ? { target: targets[0] } : {},
|
|
26023
|
+
manifest_path: import_node_path54.default.posix.join(runDir, "index.jsonl"),
|
|
26024
|
+
benchmark_path: benchmarkPath,
|
|
26025
|
+
display_name: import_node_path54.default.posix.basename(runDir),
|
|
26026
|
+
test_count: benchmark.metadata?.tests_run?.length ?? 0,
|
|
26027
|
+
avg_score: 0,
|
|
26028
|
+
size_bytes: blob.size
|
|
26029
|
+
}
|
|
26030
|
+
];
|
|
26031
|
+
});
|
|
26032
|
+
runs.sort((a, b) => b.timestamp.localeCompare(a.timestamp));
|
|
26033
|
+
return runs;
|
|
26034
|
+
}
|
|
26035
|
+
async function materializeGitRun(repoDir, relativeRunPath, ref = "origin/main") {
|
|
26036
|
+
const normalizedRunPath = relativeRunPath.split(import_node_path54.default.sep).join("/");
|
|
26037
|
+
const runTreePath = import_node_path54.default.posix.join("runs", normalizedRunPath);
|
|
26038
|
+
const targetRunDir = import_node_path54.default.join(repoDir, ...runTreePath.split("/"));
|
|
26039
|
+
const { stdout: treeOut } = await runGit(["ls-tree", "-r", "--name-only", ref, runTreePath], {
|
|
26040
|
+
cwd: repoDir
|
|
26041
|
+
});
|
|
26042
|
+
const filePaths = treeOut.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
|
|
26043
|
+
if (filePaths.length === 0) {
|
|
26044
|
+
return;
|
|
26045
|
+
}
|
|
26046
|
+
const batchInput = `${filePaths.map((filePath) => `${ref}:${filePath}`).join("\n")}
|
|
26047
|
+
`;
|
|
26048
|
+
const blobs = parseGitBatchBlobs(await runGitBatch(repoDir, batchInput));
|
|
26049
|
+
if (blobs.length !== filePaths.length) {
|
|
26050
|
+
throw new Error(
|
|
26051
|
+
`Expected ${filePaths.length} git blobs but received ${blobs.length} while materializing results run`
|
|
26052
|
+
);
|
|
26053
|
+
}
|
|
26054
|
+
const tempRoot = (0, import_node_fs19.mkdtempSync)(import_node_path54.default.join(repoDir, ".agentv-run-"));
|
|
26055
|
+
const tempRunDir = import_node_path54.default.join(tempRoot, "run");
|
|
26056
|
+
try {
|
|
26057
|
+
for (const [index, filePath] of filePaths.entries()) {
|
|
26058
|
+
const relativeFilePath = import_node_path54.default.posix.relative(runTreePath, filePath);
|
|
26059
|
+
const absolutePath = import_node_path54.default.join(tempRunDir, ...relativeFilePath.split("/"));
|
|
26060
|
+
(0, import_node_fs19.mkdirSync)(import_node_path54.default.dirname(absolutePath), { recursive: true });
|
|
26061
|
+
(0, import_node_fs19.writeFileSync)(absolutePath, blobs[index].content);
|
|
26062
|
+
}
|
|
26063
|
+
(0, import_node_fs19.mkdirSync)(import_node_path54.default.dirname(targetRunDir), { recursive: true });
|
|
26064
|
+
try {
|
|
26065
|
+
(0, import_node_fs19.renameSync)(tempRunDir, targetRunDir);
|
|
26066
|
+
} catch (error) {
|
|
26067
|
+
const code = typeof error === "object" && error !== null && "code" in error ? error.code : void 0;
|
|
26068
|
+
if ((code === "EEXIST" || code === "ENOTEMPTY") && (0, import_node_fs19.existsSync)(targetRunDir)) {
|
|
26069
|
+
return;
|
|
26070
|
+
}
|
|
26071
|
+
throw error;
|
|
26072
|
+
}
|
|
26073
|
+
} finally {
|
|
26074
|
+
(0, import_node_fs19.rmSync)(tempRoot, { recursive: true, force: true });
|
|
26075
|
+
}
|
|
26076
|
+
}
|
|
25858
26077
|
|
|
25859
26078
|
// src/index.ts
|
|
25860
26079
|
init_paths();
|
|
@@ -27494,7 +27713,7 @@ function createAgentKernel() {
|
|
|
27494
27713
|
getOutputFilenames,
|
|
27495
27714
|
getProject,
|
|
27496
27715
|
getProjectsRegistryPath,
|
|
27497
|
-
|
|
27716
|
+
getResultsRepoLocalPaths,
|
|
27498
27717
|
getResultsRepoStatus,
|
|
27499
27718
|
getSubagentsRoot,
|
|
27500
27719
|
getTextContent,
|
|
@@ -27514,6 +27733,7 @@ function createAgentKernel() {
|
|
|
27514
27733
|
isTestMessage,
|
|
27515
27734
|
isTestMessageRole,
|
|
27516
27735
|
killAllTrackedChildren,
|
|
27736
|
+
listGitRuns,
|
|
27517
27737
|
listTargetNames,
|
|
27518
27738
|
loadConfig,
|
|
27519
27739
|
loadEvalCaseById,
|
|
@@ -27525,10 +27745,11 @@ function createAgentKernel() {
|
|
|
27525
27745
|
loadTests,
|
|
27526
27746
|
loadTsConfig,
|
|
27527
27747
|
loadTsEvalFile,
|
|
27748
|
+
materializeGitRun,
|
|
27528
27749
|
mergeExecutionMetrics,
|
|
27529
27750
|
negateScore,
|
|
27530
27751
|
normalizeLineEndings,
|
|
27531
|
-
|
|
27752
|
+
normalizeResultsConfig,
|
|
27532
27753
|
parseAgentSkillsEvals,
|
|
27533
27754
|
parseClaudeSession,
|
|
27534
27755
|
parseCodexSession,
|