@agentv/core 4.31.0-next.1 → 4.31.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-QY6BS2V4.js → chunk-A27NE3R7.js} +27 -9
- package/dist/chunk-A27NE3R7.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +27 -6
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +27 -6
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +285 -47
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +23 -6
- package/dist/index.d.ts +23 -6
- package/dist/index.js +267 -41
- package/dist/index.js.map +1 -1
- package/dist/{ts-eval-loader-TWWSN6OX.js → ts-eval-loader-XR6DNOZ3.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-QY6BS2V4.js.map +0 -1
- /package/dist/{ts-eval-loader-TWWSN6OX.js.map → ts-eval-loader-XR6DNOZ3.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -1168,6 +1168,9 @@ function parseExecutionDefaults(raw, configPath) {
|
|
|
1168
1168
|
}
|
|
1169
1169
|
return Object.keys(result).length > 0 ? result : void 0;
|
|
1170
1170
|
}
|
|
1171
|
+
function isFilesystemPath(p) {
|
|
1172
|
+
return p.startsWith("/") || p.startsWith("~/") || p.startsWith("~\\") || p === "~" || /^[A-Za-z]:[/\\]/.test(p);
|
|
1173
|
+
}
|
|
1171
1174
|
function parseResultsConfig(raw, configPath) {
|
|
1172
1175
|
if (raw === void 0 || raw === null) {
|
|
1173
1176
|
return void 0;
|
|
@@ -1177,15 +1180,29 @@ function parseResultsConfig(raw, configPath) {
|
|
|
1177
1180
|
return void 0;
|
|
1178
1181
|
}
|
|
1179
1182
|
const obj = raw;
|
|
1183
|
+
if (obj.mode !== "github") {
|
|
1184
|
+
logWarning(`Invalid results.mode in ${configPath}, expected 'github'`);
|
|
1185
|
+
return void 0;
|
|
1186
|
+
}
|
|
1180
1187
|
const repo = typeof obj.repo === "string" ? obj.repo.trim() : "";
|
|
1181
|
-
const resultsPath = typeof obj.path === "string" ? obj.path.trim() : "";
|
|
1182
1188
|
if (!repo) {
|
|
1183
1189
|
logWarning(`Invalid results.repo in ${configPath}, expected non-empty string`);
|
|
1184
1190
|
return void 0;
|
|
1185
1191
|
}
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1192
|
+
let resultsPath;
|
|
1193
|
+
if (obj.path !== void 0) {
|
|
1194
|
+
if (typeof obj.path !== "string" || obj.path.trim().length === 0) {
|
|
1195
|
+
logWarning(`Invalid results.path in ${configPath}, expected non-empty string`);
|
|
1196
|
+
return void 0;
|
|
1197
|
+
}
|
|
1198
|
+
const trimmedPath = obj.path.trim();
|
|
1199
|
+
if (!isFilesystemPath(trimmedPath)) {
|
|
1200
|
+
logWarning(
|
|
1201
|
+
`Invalid results.path in ${configPath}: '${trimmedPath}' looks like a repo subdirectory. results.path now specifies the local filesystem directory for the clone (e.g., ~/data/agentv-results). Remove 'path' to use the default or set an absolute/home-relative path.`
|
|
1202
|
+
);
|
|
1203
|
+
return void 0;
|
|
1204
|
+
}
|
|
1205
|
+
resultsPath = trimmedPath;
|
|
1189
1206
|
}
|
|
1190
1207
|
if (obj.auto_push !== void 0 && typeof obj.auto_push !== "boolean") {
|
|
1191
1208
|
logWarning(`Invalid results.auto_push in ${configPath}, expected boolean`);
|
|
@@ -1200,8 +1217,9 @@ function parseResultsConfig(raw, configPath) {
|
|
|
1200
1217
|
branchPrefix = obj.branch_prefix.trim();
|
|
1201
1218
|
}
|
|
1202
1219
|
return {
|
|
1220
|
+
mode: "github",
|
|
1203
1221
|
repo,
|
|
1204
|
-
path: resultsPath,
|
|
1222
|
+
...resultsPath !== void 0 && { path: resultsPath },
|
|
1205
1223
|
...typeof obj.auto_push === "boolean" && { auto_push: obj.auto_push },
|
|
1206
1224
|
...branchPrefix && { branch_prefix: branchPrefix }
|
|
1207
1225
|
};
|
|
@@ -1314,10 +1332,10 @@ async function execFileWithStdinBun(argv, stdinPayload, options) {
|
|
|
1314
1332
|
}
|
|
1315
1333
|
}
|
|
1316
1334
|
async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
1317
|
-
const { spawn:
|
|
1335
|
+
const { spawn: spawn6 } = await import("child_process");
|
|
1318
1336
|
return new Promise((resolve, reject) => {
|
|
1319
1337
|
const [cmd, ...args] = argv;
|
|
1320
|
-
const child =
|
|
1338
|
+
const child = spawn6(cmd, args, {
|
|
1321
1339
|
cwd: options.cwd,
|
|
1322
1340
|
stdio: ["pipe", "pipe", "pipe"],
|
|
1323
1341
|
// Merge additional env vars with process.env
|
|
@@ -1368,10 +1386,10 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
1368
1386
|
const stderrPath = path57.join(dir, "stderr.txt");
|
|
1369
1387
|
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
1370
1388
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
1371
|
-
const { spawn:
|
|
1389
|
+
const { spawn: spawn6 } = await import("child_process");
|
|
1372
1390
|
try {
|
|
1373
1391
|
const exitCode = await new Promise((resolve, reject) => {
|
|
1374
|
-
const child =
|
|
1392
|
+
const child = spawn6(wrappedCommand, {
|
|
1375
1393
|
shell: true,
|
|
1376
1394
|
cwd: options.cwd,
|
|
1377
1395
|
stdio: ["ignore", "ignore", "ignore"],
|
|
@@ -4651,7 +4669,7 @@ function getAgentvHome() {
|
|
|
4651
4669
|
if (envHome && envHome !== "undefined") {
|
|
4652
4670
|
if (!logged) {
|
|
4653
4671
|
logged = true;
|
|
4654
|
-
console.
|
|
4672
|
+
console.log(`Using AGENTV_HOME: ${envHome}`);
|
|
4655
4673
|
}
|
|
4656
4674
|
return envHome;
|
|
4657
4675
|
}
|
|
@@ -24814,7 +24832,7 @@ __export(index_exports, {
|
|
|
24814
24832
|
getOutputFilenames: () => getOutputFilenames,
|
|
24815
24833
|
getProject: () => getProject,
|
|
24816
24834
|
getProjectsRegistryPath: () => getProjectsRegistryPath,
|
|
24817
|
-
|
|
24835
|
+
getResultsRepoLocalPaths: () => getResultsRepoLocalPaths,
|
|
24818
24836
|
getResultsRepoStatus: () => getResultsRepoStatus,
|
|
24819
24837
|
getSubagentsRoot: () => getSubagentsRoot,
|
|
24820
24838
|
getTextContent: () => getTextContent,
|
|
@@ -24834,6 +24852,7 @@ __export(index_exports, {
|
|
|
24834
24852
|
isTestMessage: () => isTestMessage,
|
|
24835
24853
|
isTestMessageRole: () => isTestMessageRole,
|
|
24836
24854
|
killAllTrackedChildren: () => killAllTrackedChildren,
|
|
24855
|
+
listGitRuns: () => listGitRuns,
|
|
24837
24856
|
listTargetNames: () => listTargetNames,
|
|
24838
24857
|
loadConfig: () => loadConfig,
|
|
24839
24858
|
loadEvalCaseById: () => loadEvalCaseById,
|
|
@@ -24845,6 +24864,7 @@ __export(index_exports, {
|
|
|
24845
24864
|
loadTests: () => loadTests,
|
|
24846
24865
|
loadTsConfig: () => loadTsConfig,
|
|
24847
24866
|
loadTsEvalFile: () => loadTsEvalFile,
|
|
24867
|
+
materializeGitRun: () => materializeGitRun,
|
|
24848
24868
|
mergeExecutionMetrics: () => mergeExecutionMetrics,
|
|
24849
24869
|
negateScore: () => negateScore,
|
|
24850
24870
|
normalizeLineEndings: () => normalizeLineEndings,
|
|
@@ -25537,10 +25557,19 @@ function withFriendlyGitHubAuthError(error) {
|
|
|
25537
25557
|
}
|
|
25538
25558
|
return new Error(message);
|
|
25539
25559
|
}
|
|
25560
|
+
function expandHome(p) {
|
|
25561
|
+
if (p === "~" || p.startsWith("~/") || p.startsWith("~\\")) {
|
|
25562
|
+
return import_node_path54.default.join(import_node_os10.default.homedir(), p.slice(1));
|
|
25563
|
+
}
|
|
25564
|
+
return p;
|
|
25565
|
+
}
|
|
25540
25566
|
function normalizeResultsConfig(config) {
|
|
25567
|
+
const repo = config.repo.trim();
|
|
25568
|
+
const resolvedPath = config.path ? expandHome(config.path.trim()) : import_node_path54.default.join(getAgentvHome(), "results", sanitizeRepoSlug(repo));
|
|
25541
25569
|
return {
|
|
25542
|
-
|
|
25543
|
-
|
|
25570
|
+
mode: "github",
|
|
25571
|
+
repo,
|
|
25572
|
+
path: resolvedPath,
|
|
25544
25573
|
auto_push: config.auto_push === true,
|
|
25545
25574
|
branch_prefix: config.branch_prefix?.trim() || "eval-results"
|
|
25546
25575
|
};
|
|
@@ -25551,7 +25580,7 @@ function resolveResultsRepoUrl(repo) {
|
|
|
25551
25580
|
}
|
|
25552
25581
|
return `https://github.com/${repo}.git`;
|
|
25553
25582
|
}
|
|
25554
|
-
function
|
|
25583
|
+
function getResultsRepoLocalPaths(repo) {
|
|
25555
25584
|
const rootDir = import_node_path54.default.join(getAgentvHome(), "cache", "results-repo", sanitizeRepoSlug(repo));
|
|
25556
25585
|
return {
|
|
25557
25586
|
rootDir,
|
|
@@ -25578,7 +25607,7 @@ async function runCommand(executable, args, options) {
|
|
|
25578
25607
|
try {
|
|
25579
25608
|
const { stdout, stderr } = await execFileAsync4(executable, [...args], {
|
|
25580
25609
|
cwd: options?.cwd,
|
|
25581
|
-
env: process.env
|
|
25610
|
+
env: options?.env ?? process.env
|
|
25582
25611
|
});
|
|
25583
25612
|
return { stdout, stderr };
|
|
25584
25613
|
} catch (error) {
|
|
@@ -25592,8 +25621,17 @@ async function runCommand(executable, args, options) {
|
|
|
25592
25621
|
throw withFriendlyGitHubAuthError(error);
|
|
25593
25622
|
}
|
|
25594
25623
|
}
|
|
25624
|
+
function getGitEnv() {
|
|
25625
|
+
const env = {};
|
|
25626
|
+
for (const [key, value] of Object.entries(process.env)) {
|
|
25627
|
+
if (value !== void 0 && !(key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND")) {
|
|
25628
|
+
env[key] = value;
|
|
25629
|
+
}
|
|
25630
|
+
}
|
|
25631
|
+
return env;
|
|
25632
|
+
}
|
|
25595
25633
|
async function runGit(args, options) {
|
|
25596
|
-
return runCommand("git", args, options);
|
|
25634
|
+
return runCommand("git", args, { ...options, env: getGitEnv() });
|
|
25597
25635
|
}
|
|
25598
25636
|
async function runGh(args, options) {
|
|
25599
25637
|
return runCommand("gh", args, options);
|
|
@@ -25617,13 +25655,11 @@ async function resolveDefaultBranch(repoDir) {
|
|
|
25617
25655
|
}
|
|
25618
25656
|
return "main";
|
|
25619
25657
|
}
|
|
25620
|
-
async function
|
|
25658
|
+
async function fetchResultsRepo(repoDir) {
|
|
25621
25659
|
await runGit(["fetch", "origin", "--prune"], { cwd: repoDir });
|
|
25622
|
-
await runGit(["checkout", baseBranch], { cwd: repoDir });
|
|
25623
|
-
await runGit(["pull", "--ff-only", "origin", baseBranch], { cwd: repoDir });
|
|
25624
25660
|
}
|
|
25625
25661
|
function updateStatusFile(config, patch) {
|
|
25626
|
-
const cachePaths =
|
|
25662
|
+
const cachePaths = getResultsRepoLocalPaths(config.repo);
|
|
25627
25663
|
const current = readPersistedStatus(cachePaths.statusFile);
|
|
25628
25664
|
writePersistedStatus(cachePaths.statusFile, {
|
|
25629
25665
|
...current,
|
|
@@ -25632,26 +25668,31 @@ function updateStatusFile(config, patch) {
|
|
|
25632
25668
|
}
|
|
25633
25669
|
async function ensureResultsRepoClone(config) {
|
|
25634
25670
|
const normalized = normalizeResultsConfig(config);
|
|
25635
|
-
const cachePaths =
|
|
25671
|
+
const cachePaths = getResultsRepoLocalPaths(normalized.repo);
|
|
25672
|
+
const cloneDir = normalized.path;
|
|
25636
25673
|
(0, import_node_fs19.mkdirSync)(cachePaths.rootDir, { recursive: true });
|
|
25637
|
-
|
|
25674
|
+
(0, import_node_fs19.mkdirSync)(import_node_path54.default.dirname(cloneDir), { recursive: true });
|
|
25675
|
+
const cloneMissing = !(0, import_node_fs19.existsSync)(cloneDir);
|
|
25676
|
+
const gitDir = import_node_path54.default.join(cloneDir, ".git");
|
|
25677
|
+
const cloneEmpty = !cloneMissing && !(0, import_node_fs19.existsSync)(gitDir) && (await (0, import_promises39.readdir)(cloneDir)).length === 0;
|
|
25678
|
+
if (cloneMissing || cloneEmpty) {
|
|
25638
25679
|
try {
|
|
25639
25680
|
await runGit([
|
|
25640
25681
|
"clone",
|
|
25641
25682
|
"--filter=blob:none",
|
|
25642
25683
|
resolveResultsRepoUrl(normalized.repo),
|
|
25643
|
-
|
|
25684
|
+
cloneDir
|
|
25644
25685
|
]);
|
|
25645
|
-
return
|
|
25686
|
+
return cloneDir;
|
|
25646
25687
|
} catch (error) {
|
|
25647
25688
|
updateStatusFile(normalized, { last_error: withFriendlyGitHubAuthError(error).message });
|
|
25648
25689
|
throw withFriendlyGitHubAuthError(error);
|
|
25649
25690
|
}
|
|
25650
25691
|
}
|
|
25651
|
-
if (!(0, import_node_fs19.existsSync)(
|
|
25652
|
-
throw new Error(`Results repo
|
|
25692
|
+
if (!(0, import_node_fs19.existsSync)(gitDir)) {
|
|
25693
|
+
throw new Error(`Results repo clone path is not a git repository: ${cloneDir}`);
|
|
25653
25694
|
}
|
|
25654
|
-
return
|
|
25695
|
+
return cloneDir;
|
|
25655
25696
|
}
|
|
25656
25697
|
function getResultsRepoStatus(config) {
|
|
25657
25698
|
if (!config) {
|
|
@@ -25659,20 +25700,20 @@ function getResultsRepoStatus(config) {
|
|
|
25659
25700
|
configured: false,
|
|
25660
25701
|
available: false,
|
|
25661
25702
|
repo: "",
|
|
25662
|
-
|
|
25703
|
+
local_dir: ""
|
|
25663
25704
|
};
|
|
25664
25705
|
}
|
|
25665
25706
|
const normalized = normalizeResultsConfig(config);
|
|
25666
|
-
const
|
|
25667
|
-
const persisted = readPersistedStatus(
|
|
25707
|
+
const localPaths = getResultsRepoLocalPaths(normalized.repo);
|
|
25708
|
+
const persisted = readPersistedStatus(localPaths.statusFile);
|
|
25668
25709
|
return {
|
|
25669
25710
|
configured: true,
|
|
25670
|
-
available: (0, import_node_fs19.existsSync)(
|
|
25711
|
+
available: (0, import_node_fs19.existsSync)(normalized.path),
|
|
25671
25712
|
repo: normalized.repo,
|
|
25672
25713
|
path: normalized.path,
|
|
25673
25714
|
auto_push: normalized.auto_push,
|
|
25674
25715
|
branch_prefix: normalized.branch_prefix,
|
|
25675
|
-
|
|
25716
|
+
local_dir: normalized.path,
|
|
25676
25717
|
last_synced_at: persisted.last_synced_at,
|
|
25677
25718
|
last_error: persisted.last_error
|
|
25678
25719
|
};
|
|
@@ -25681,8 +25722,7 @@ async function syncResultsRepo(config) {
|
|
|
25681
25722
|
const normalized = normalizeResultsConfig(config);
|
|
25682
25723
|
try {
|
|
25683
25724
|
const repoDir = await ensureResultsRepoClone(normalized);
|
|
25684
|
-
|
|
25685
|
-
await updateCacheRepo(repoDir, baseBranch);
|
|
25725
|
+
await fetchResultsRepo(repoDir);
|
|
25686
25726
|
updateStatusFile(normalized, {
|
|
25687
25727
|
last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
25688
25728
|
last_error: void 0
|
|
@@ -25699,7 +25739,7 @@ async function checkoutResultsRepoBranch(config, branchName) {
|
|
|
25699
25739
|
const normalized = normalizeResultsConfig(config);
|
|
25700
25740
|
const repoDir = await ensureResultsRepoClone(normalized);
|
|
25701
25741
|
const baseBranch = await resolveDefaultBranch(repoDir);
|
|
25702
|
-
await
|
|
25742
|
+
await fetchResultsRepo(repoDir);
|
|
25703
25743
|
await runGit(["checkout", "-B", branchName, `origin/${baseBranch}`], { cwd: repoDir });
|
|
25704
25744
|
updateStatusFile(normalized, { last_error: void 0 });
|
|
25705
25745
|
return {
|
|
@@ -25712,7 +25752,7 @@ async function prepareResultsRepoBranch(config, branchName) {
|
|
|
25712
25752
|
const normalized = normalizeResultsConfig(config);
|
|
25713
25753
|
const cloneDir = await ensureResultsRepoClone(normalized);
|
|
25714
25754
|
const baseBranch = await resolveDefaultBranch(cloneDir);
|
|
25715
|
-
await
|
|
25755
|
+
await fetchResultsRepo(cloneDir);
|
|
25716
25756
|
const worktreeRoot = await (0, import_promises39.mkdtemp)(import_node_path54.default.join(import_node_os10.default.tmpdir(), "agentv-results-repo-"));
|
|
25717
25757
|
const worktreeDir = import_node_path54.default.join(worktreeRoot, "repo");
|
|
25718
25758
|
await runGit(["worktree", "add", "-B", branchName, worktreeDir, `origin/${baseBranch}`], {
|
|
@@ -25738,10 +25778,7 @@ async function stageResultsArtifacts(params) {
|
|
|
25738
25778
|
}
|
|
25739
25779
|
function resolveResultsRepoRunsDir(config) {
|
|
25740
25780
|
const normalized = normalizeResultsConfig(config);
|
|
25741
|
-
return import_node_path54.default.join(
|
|
25742
|
-
getResultsRepoCachePaths(normalized.repo).repoDir,
|
|
25743
|
-
...normalized.path.split("/")
|
|
25744
|
-
);
|
|
25781
|
+
return import_node_path54.default.join(normalized.path, "runs");
|
|
25745
25782
|
}
|
|
25746
25783
|
async function directorySizeBytes(targetPath) {
|
|
25747
25784
|
const entry = await (0, import_promises39.stat)(targetPath);
|
|
@@ -25770,7 +25807,7 @@ async function commitAndPushResultsBranch(params) {
|
|
|
25770
25807
|
async function pushResultsRepoBranch(config, branchName, cwd) {
|
|
25771
25808
|
const normalized = normalizeResultsConfig(config);
|
|
25772
25809
|
await runGit(["push", "-u", "origin", branchName], {
|
|
25773
|
-
cwd: cwd ??
|
|
25810
|
+
cwd: cwd ?? normalized.path
|
|
25774
25811
|
});
|
|
25775
25812
|
updateStatusFile(normalized, {
|
|
25776
25813
|
last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
@@ -25803,8 +25840,8 @@ async function directPushResults(params) {
|
|
|
25803
25840
|
const normalized = normalizeResultsConfig(params.config);
|
|
25804
25841
|
const repoDir = await ensureResultsRepoClone(normalized);
|
|
25805
25842
|
const baseBranch = await resolveDefaultBranch(repoDir);
|
|
25806
|
-
await
|
|
25807
|
-
const destinationDir = import_node_path54.default.join(repoDir,
|
|
25843
|
+
await fetchResultsRepo(repoDir);
|
|
25844
|
+
const destinationDir = import_node_path54.default.join(repoDir, "runs", params.destinationPath);
|
|
25808
25845
|
await stageResultsArtifacts({
|
|
25809
25846
|
repoDir,
|
|
25810
25847
|
sourceDir: params.sourceDir,
|
|
@@ -25818,10 +25855,19 @@ async function directPushResults(params) {
|
|
|
25818
25855
|
if (status.trim().length === 0) {
|
|
25819
25856
|
return false;
|
|
25820
25857
|
}
|
|
25821
|
-
await runGit(
|
|
25858
|
+
await runGit(
|
|
25859
|
+
[
|
|
25860
|
+
"commit",
|
|
25861
|
+
"-m",
|
|
25862
|
+
params.commitMessage,
|
|
25863
|
+
"-m",
|
|
25864
|
+
`Agentv-Run: ${buildGitRunId(params.destinationPath)}`
|
|
25865
|
+
],
|
|
25866
|
+
{ cwd: repoDir }
|
|
25867
|
+
);
|
|
25822
25868
|
for (let attempt = 1; attempt <= DIRECT_PUSH_MAX_RETRIES; attempt++) {
|
|
25823
25869
|
try {
|
|
25824
|
-
await runGit(["push", "origin", baseBranch], { cwd: repoDir });
|
|
25870
|
+
await runGit(["push", "origin", `HEAD:${baseBranch}`], { cwd: repoDir });
|
|
25825
25871
|
updateStatusFile(normalized, {
|
|
25826
25872
|
last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
25827
25873
|
last_error: void 0
|
|
@@ -25830,7 +25876,8 @@ async function directPushResults(params) {
|
|
|
25830
25876
|
} catch (error) {
|
|
25831
25877
|
const message = error instanceof Error ? error.message : String(error);
|
|
25832
25878
|
if (attempt < DIRECT_PUSH_MAX_RETRIES && message.includes("non-fast-forward")) {
|
|
25833
|
-
await
|
|
25879
|
+
await fetchResultsRepo(repoDir);
|
|
25880
|
+
await runGit(["rebase", `origin/${baseBranch}`], { cwd: repoDir });
|
|
25834
25881
|
} else {
|
|
25835
25882
|
throw error;
|
|
25836
25883
|
}
|
|
@@ -25838,6 +25885,195 @@ async function directPushResults(params) {
|
|
|
25838
25885
|
}
|
|
25839
25886
|
return false;
|
|
25840
25887
|
}
|
|
25888
|
+
function buildGitRunId(relativeRunPath) {
|
|
25889
|
+
const normalized = relativeRunPath.split(import_node_path54.default.sep).join("/");
|
|
25890
|
+
const segments = normalized.split("/").filter(Boolean);
|
|
25891
|
+
if (segments.length >= 2) {
|
|
25892
|
+
const experiment = segments.slice(0, -1).join("/");
|
|
25893
|
+
const timestamp = segments.at(-1);
|
|
25894
|
+
if (experiment === "default") {
|
|
25895
|
+
return timestamp ?? normalized;
|
|
25896
|
+
}
|
|
25897
|
+
return `${experiment}::${timestamp}`;
|
|
25898
|
+
}
|
|
25899
|
+
return segments[0] ?? relativeRunPath;
|
|
25900
|
+
}
|
|
25901
|
+
function getRunExperiment(runId, benchmark) {
|
|
25902
|
+
const experiment = benchmark.metadata?.experiment?.trim();
|
|
25903
|
+
if (experiment) {
|
|
25904
|
+
return experiment;
|
|
25905
|
+
}
|
|
25906
|
+
const separatorIndex = runId.lastIndexOf("::");
|
|
25907
|
+
return separatorIndex === -1 ? "default" : runId.slice(0, separatorIndex);
|
|
25908
|
+
}
|
|
25909
|
+
function computeAveragePassRate(runSummary) {
|
|
25910
|
+
if (!runSummary) {
|
|
25911
|
+
return void 0;
|
|
25912
|
+
}
|
|
25913
|
+
const passRates = Object.values(runSummary).map((summary) => summary.pass_rate?.mean).filter((value) => typeof value === "number" && Number.isFinite(value));
|
|
25914
|
+
if (passRates.length === 0) {
|
|
25915
|
+
return void 0;
|
|
25916
|
+
}
|
|
25917
|
+
return passRates.reduce((sum, value) => sum + value, 0) / passRates.length;
|
|
25918
|
+
}
|
|
25919
|
+
async function runGitBatch(repoDir, input) {
|
|
25920
|
+
return new Promise((resolve, reject) => {
|
|
25921
|
+
const child = (0, import_node_child_process12.spawn)("git", ["cat-file", "--batch"], {
|
|
25922
|
+
cwd: repoDir,
|
|
25923
|
+
env: getGitEnv(),
|
|
25924
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
25925
|
+
});
|
|
25926
|
+
const stdoutChunks = [];
|
|
25927
|
+
const stderrChunks = [];
|
|
25928
|
+
child.stdout.on("data", (chunk) => {
|
|
25929
|
+
stdoutChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
25930
|
+
});
|
|
25931
|
+
child.stderr.on("data", (chunk) => {
|
|
25932
|
+
stderrChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
25933
|
+
});
|
|
25934
|
+
child.on("error", (error) => reject(withFriendlyGitHubAuthError(error)));
|
|
25935
|
+
child.on("close", (code) => {
|
|
25936
|
+
if (code === 0) {
|
|
25937
|
+
resolve(Buffer.concat(stdoutChunks));
|
|
25938
|
+
return;
|
|
25939
|
+
}
|
|
25940
|
+
const stderr = Buffer.concat(stderrChunks).toString("utf8").trim();
|
|
25941
|
+
reject(
|
|
25942
|
+
withFriendlyGitHubAuthError(
|
|
25943
|
+
stderr.length > 0 ? new Error(stderr) : new Error("git cat-file failed")
|
|
25944
|
+
)
|
|
25945
|
+
);
|
|
25946
|
+
});
|
|
25947
|
+
child.stdin.end(input);
|
|
25948
|
+
});
|
|
25949
|
+
}
|
|
25950
|
+
function parseGitBatchBlobs(output) {
|
|
25951
|
+
const blobs = [];
|
|
25952
|
+
let offset = 0;
|
|
25953
|
+
while (offset < output.length) {
|
|
25954
|
+
const headerEnd = output.indexOf(10, offset);
|
|
25955
|
+
if (headerEnd === -1) {
|
|
25956
|
+
throw new Error("Malformed git cat-file output: missing header terminator");
|
|
25957
|
+
}
|
|
25958
|
+
const header = output.subarray(offset, headerEnd).toString("utf8");
|
|
25959
|
+
offset = headerEnd + 1;
|
|
25960
|
+
if (header.length === 0) {
|
|
25961
|
+
continue;
|
|
25962
|
+
}
|
|
25963
|
+
const missingMatch = /^(.*) missing$/.exec(header);
|
|
25964
|
+
if (missingMatch) {
|
|
25965
|
+
continue;
|
|
25966
|
+
}
|
|
25967
|
+
const headerMatch = /^(.*) (\w+) (\d+)$/.exec(header);
|
|
25968
|
+
if (!headerMatch) {
|
|
25969
|
+
throw new Error(`Malformed git cat-file header: ${header}`);
|
|
25970
|
+
}
|
|
25971
|
+
const [, objectRef, objectType, sizeText] = headerMatch;
|
|
25972
|
+
if (objectType !== "blob") {
|
|
25973
|
+
throw new Error(`Unsupported git object type for ${objectRef}: ${objectType}`);
|
|
25974
|
+
}
|
|
25975
|
+
const size = Number.parseInt(sizeText, 10);
|
|
25976
|
+
const contentEnd = offset + size;
|
|
25977
|
+
if (contentEnd > output.length) {
|
|
25978
|
+
throw new Error(`Malformed git cat-file output for ${objectRef}: truncated blob content`);
|
|
25979
|
+
}
|
|
25980
|
+
blobs.push({
|
|
25981
|
+
size,
|
|
25982
|
+
content: output.subarray(offset, contentEnd)
|
|
25983
|
+
});
|
|
25984
|
+
offset = contentEnd;
|
|
25985
|
+
if (offset < output.length && output[offset] === 10) {
|
|
25986
|
+
offset += 1;
|
|
25987
|
+
}
|
|
25988
|
+
}
|
|
25989
|
+
return blobs;
|
|
25990
|
+
}
|
|
25991
|
+
async function listGitRuns(repoDir, ref = "origin/main") {
|
|
25992
|
+
const { stdout: treeOut } = await runGit(["ls-tree", "-r", "--name-only", ref, "runs"], {
|
|
25993
|
+
cwd: repoDir
|
|
25994
|
+
});
|
|
25995
|
+
const benchmarkPaths = treeOut.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.endsWith("/benchmark.json"));
|
|
25996
|
+
if (benchmarkPaths.length === 0) {
|
|
25997
|
+
return [];
|
|
25998
|
+
}
|
|
25999
|
+
const batchInput = `${benchmarkPaths.map((benchmarkPath) => `${ref}:${benchmarkPath}`).join("\n")}
|
|
26000
|
+
`;
|
|
26001
|
+
const blobs = parseGitBatchBlobs(await runGitBatch(repoDir, batchInput));
|
|
26002
|
+
if (blobs.length !== benchmarkPaths.length) {
|
|
26003
|
+
throw new Error(
|
|
26004
|
+
`Expected ${benchmarkPaths.length} git blobs but received ${blobs.length} while listing results runs`
|
|
26005
|
+
);
|
|
26006
|
+
}
|
|
26007
|
+
const runs = blobs.flatMap((blob, index) => {
|
|
26008
|
+
const benchmarkPath = benchmarkPaths[index];
|
|
26009
|
+
const benchmark = JSON.parse(blob.content.toString("utf8"));
|
|
26010
|
+
const runDir = import_node_path54.default.posix.dirname(benchmarkPath);
|
|
26011
|
+
const relativeRunPath = import_node_path54.default.posix.relative("runs", runDir);
|
|
26012
|
+
const runId = buildGitRunId(relativeRunPath);
|
|
26013
|
+
const timestamp = benchmark.metadata?.timestamp?.trim() || import_node_path54.default.posix.basename(runDir);
|
|
26014
|
+
const targets = benchmark.metadata?.targets ?? [];
|
|
26015
|
+
const passRate = computeAveragePassRate(benchmark.run_summary);
|
|
26016
|
+
return [
|
|
26017
|
+
{
|
|
26018
|
+
run_id: runId,
|
|
26019
|
+
experiment: getRunExperiment(runId, benchmark),
|
|
26020
|
+
timestamp,
|
|
26021
|
+
...passRate !== void 0 && { pass_rate: passRate },
|
|
26022
|
+
...targets.length === 1 && targets[0] ? { target: targets[0] } : {},
|
|
26023
|
+
manifest_path: import_node_path54.default.posix.join(runDir, "index.jsonl"),
|
|
26024
|
+
benchmark_path: benchmarkPath,
|
|
26025
|
+
display_name: import_node_path54.default.posix.basename(runDir),
|
|
26026
|
+
test_count: benchmark.metadata?.tests_run?.length ?? 0,
|
|
26027
|
+
avg_score: 0,
|
|
26028
|
+
size_bytes: blob.size
|
|
26029
|
+
}
|
|
26030
|
+
];
|
|
26031
|
+
});
|
|
26032
|
+
runs.sort((a, b) => b.timestamp.localeCompare(a.timestamp));
|
|
26033
|
+
return runs;
|
|
26034
|
+
}
|
|
26035
|
+
async function materializeGitRun(repoDir, relativeRunPath, ref = "origin/main") {
|
|
26036
|
+
const normalizedRunPath = relativeRunPath.split(import_node_path54.default.sep).join("/");
|
|
26037
|
+
const runTreePath = import_node_path54.default.posix.join("runs", normalizedRunPath);
|
|
26038
|
+
const targetRunDir = import_node_path54.default.join(repoDir, ...runTreePath.split("/"));
|
|
26039
|
+
const { stdout: treeOut } = await runGit(["ls-tree", "-r", "--name-only", ref, runTreePath], {
|
|
26040
|
+
cwd: repoDir
|
|
26041
|
+
});
|
|
26042
|
+
const filePaths = treeOut.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
|
|
26043
|
+
if (filePaths.length === 0) {
|
|
26044
|
+
return;
|
|
26045
|
+
}
|
|
26046
|
+
const batchInput = `${filePaths.map((filePath) => `${ref}:${filePath}`).join("\n")}
|
|
26047
|
+
`;
|
|
26048
|
+
const blobs = parseGitBatchBlobs(await runGitBatch(repoDir, batchInput));
|
|
26049
|
+
if (blobs.length !== filePaths.length) {
|
|
26050
|
+
throw new Error(
|
|
26051
|
+
`Expected ${filePaths.length} git blobs but received ${blobs.length} while materializing results run`
|
|
26052
|
+
);
|
|
26053
|
+
}
|
|
26054
|
+
const tempRoot = (0, import_node_fs19.mkdtempSync)(import_node_path54.default.join(repoDir, ".agentv-run-"));
|
|
26055
|
+
const tempRunDir = import_node_path54.default.join(tempRoot, "run");
|
|
26056
|
+
try {
|
|
26057
|
+
for (const [index, filePath] of filePaths.entries()) {
|
|
26058
|
+
const relativeFilePath = import_node_path54.default.posix.relative(runTreePath, filePath);
|
|
26059
|
+
const absolutePath = import_node_path54.default.join(tempRunDir, ...relativeFilePath.split("/"));
|
|
26060
|
+
(0, import_node_fs19.mkdirSync)(import_node_path54.default.dirname(absolutePath), { recursive: true });
|
|
26061
|
+
(0, import_node_fs19.writeFileSync)(absolutePath, blobs[index].content);
|
|
26062
|
+
}
|
|
26063
|
+
(0, import_node_fs19.mkdirSync)(import_node_path54.default.dirname(targetRunDir), { recursive: true });
|
|
26064
|
+
try {
|
|
26065
|
+
(0, import_node_fs19.renameSync)(tempRunDir, targetRunDir);
|
|
26066
|
+
} catch (error) {
|
|
26067
|
+
const code = typeof error === "object" && error !== null && "code" in error ? error.code : void 0;
|
|
26068
|
+
if ((code === "EEXIST" || code === "ENOTEMPTY") && (0, import_node_fs19.existsSync)(targetRunDir)) {
|
|
26069
|
+
return;
|
|
26070
|
+
}
|
|
26071
|
+
throw error;
|
|
26072
|
+
}
|
|
26073
|
+
} finally {
|
|
26074
|
+
(0, import_node_fs19.rmSync)(tempRoot, { recursive: true, force: true });
|
|
26075
|
+
}
|
|
26076
|
+
}
|
|
25841
26077
|
|
|
25842
26078
|
// src/index.ts
|
|
25843
26079
|
init_paths();
|
|
@@ -27477,7 +27713,7 @@ function createAgentKernel() {
|
|
|
27477
27713
|
getOutputFilenames,
|
|
27478
27714
|
getProject,
|
|
27479
27715
|
getProjectsRegistryPath,
|
|
27480
|
-
|
|
27716
|
+
getResultsRepoLocalPaths,
|
|
27481
27717
|
getResultsRepoStatus,
|
|
27482
27718
|
getSubagentsRoot,
|
|
27483
27719
|
getTextContent,
|
|
@@ -27497,6 +27733,7 @@ function createAgentKernel() {
|
|
|
27497
27733
|
isTestMessage,
|
|
27498
27734
|
isTestMessageRole,
|
|
27499
27735
|
killAllTrackedChildren,
|
|
27736
|
+
listGitRuns,
|
|
27500
27737
|
listTargetNames,
|
|
27501
27738
|
loadConfig,
|
|
27502
27739
|
loadEvalCaseById,
|
|
@@ -27508,6 +27745,7 @@ function createAgentKernel() {
|
|
|
27508
27745
|
loadTests,
|
|
27509
27746
|
loadTsConfig,
|
|
27510
27747
|
loadTsEvalFile,
|
|
27748
|
+
materializeGitRun,
|
|
27511
27749
|
mergeExecutionMetrics,
|
|
27512
27750
|
negateScore,
|
|
27513
27751
|
normalizeLineEndings,
|