@agentv/core 4.31.0-next.1 → 4.31.1-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1168,6 +1168,9 @@ function parseExecutionDefaults(raw, configPath) {
1168
1168
  }
1169
1169
  return Object.keys(result).length > 0 ? result : void 0;
1170
1170
  }
1171
+ function isFilesystemPath(p) {
1172
+ return p.startsWith("/") || p.startsWith("~/") || p.startsWith("~\\") || p === "~" || /^[A-Za-z]:[/\\]/.test(p);
1173
+ }
1171
1174
  function parseResultsConfig(raw, configPath) {
1172
1175
  if (raw === void 0 || raw === null) {
1173
1176
  return void 0;
@@ -1177,15 +1180,29 @@ function parseResultsConfig(raw, configPath) {
1177
1180
  return void 0;
1178
1181
  }
1179
1182
  const obj = raw;
1183
+ if (obj.mode !== "github") {
1184
+ logWarning(`Invalid results.mode in ${configPath}, expected 'github'`);
1185
+ return void 0;
1186
+ }
1180
1187
  const repo = typeof obj.repo === "string" ? obj.repo.trim() : "";
1181
- const resultsPath = typeof obj.path === "string" ? obj.path.trim() : "";
1182
1188
  if (!repo) {
1183
1189
  logWarning(`Invalid results.repo in ${configPath}, expected non-empty string`);
1184
1190
  return void 0;
1185
1191
  }
1186
- if (!resultsPath) {
1187
- logWarning(`Invalid results.path in ${configPath}, expected non-empty string`);
1188
- return void 0;
1192
+ let resultsPath;
1193
+ if (obj.path !== void 0) {
1194
+ if (typeof obj.path !== "string" || obj.path.trim().length === 0) {
1195
+ logWarning(`Invalid results.path in ${configPath}, expected non-empty string`);
1196
+ return void 0;
1197
+ }
1198
+ const trimmedPath = obj.path.trim();
1199
+ if (!isFilesystemPath(trimmedPath)) {
1200
+ logWarning(
1201
+ `Invalid results.path in ${configPath}: '${trimmedPath}' looks like a repo subdirectory. results.path now specifies the local filesystem directory for the clone (e.g., ~/data/agentv-results). Remove 'path' to use the default or set an absolute/home-relative path.`
1202
+ );
1203
+ return void 0;
1204
+ }
1205
+ resultsPath = trimmedPath;
1189
1206
  }
1190
1207
  if (obj.auto_push !== void 0 && typeof obj.auto_push !== "boolean") {
1191
1208
  logWarning(`Invalid results.auto_push in ${configPath}, expected boolean`);
@@ -1200,8 +1217,9 @@ function parseResultsConfig(raw, configPath) {
1200
1217
  branchPrefix = obj.branch_prefix.trim();
1201
1218
  }
1202
1219
  return {
1220
+ mode: "github",
1203
1221
  repo,
1204
- path: resultsPath,
1222
+ ...resultsPath !== void 0 && { path: resultsPath },
1205
1223
  ...typeof obj.auto_push === "boolean" && { auto_push: obj.auto_push },
1206
1224
  ...branchPrefix && { branch_prefix: branchPrefix }
1207
1225
  };
@@ -1314,10 +1332,10 @@ async function execFileWithStdinBun(argv, stdinPayload, options) {
1314
1332
  }
1315
1333
  }
1316
1334
  async function execFileWithStdinNode(argv, stdinPayload, options) {
1317
- const { spawn: spawn5 } = await import("child_process");
1335
+ const { spawn: spawn6 } = await import("child_process");
1318
1336
  return new Promise((resolve, reject) => {
1319
1337
  const [cmd, ...args] = argv;
1320
- const child = spawn5(cmd, args, {
1338
+ const child = spawn6(cmd, args, {
1321
1339
  cwd: options.cwd,
1322
1340
  stdio: ["pipe", "pipe", "pipe"],
1323
1341
  // Merge additional env vars with process.env
@@ -1368,10 +1386,10 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
1368
1386
  const stderrPath = path57.join(dir, "stderr.txt");
1369
1387
  await writeFile9(stdinPath, stdinPayload, "utf8");
1370
1388
  const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
1371
- const { spawn: spawn5 } = await import("child_process");
1389
+ const { spawn: spawn6 } = await import("child_process");
1372
1390
  try {
1373
1391
  const exitCode = await new Promise((resolve, reject) => {
1374
- const child = spawn5(wrappedCommand, {
1392
+ const child = spawn6(wrappedCommand, {
1375
1393
  shell: true,
1376
1394
  cwd: options.cwd,
1377
1395
  stdio: ["ignore", "ignore", "ignore"],
@@ -4651,7 +4669,7 @@ function getAgentvHome() {
4651
4669
  if (envHome && envHome !== "undefined") {
4652
4670
  if (!logged) {
4653
4671
  logged = true;
4654
- console.warn(`Using AGENTV_HOME: ${envHome}`);
4672
+ console.log(`Using AGENTV_HOME: ${envHome}`);
4655
4673
  }
4656
4674
  return envHome;
4657
4675
  }
@@ -24814,7 +24832,7 @@ __export(index_exports, {
24814
24832
  getOutputFilenames: () => getOutputFilenames,
24815
24833
  getProject: () => getProject,
24816
24834
  getProjectsRegistryPath: () => getProjectsRegistryPath,
24817
- getResultsRepoCachePaths: () => getResultsRepoCachePaths,
24835
+ getResultsRepoLocalPaths: () => getResultsRepoLocalPaths,
24818
24836
  getResultsRepoStatus: () => getResultsRepoStatus,
24819
24837
  getSubagentsRoot: () => getSubagentsRoot,
24820
24838
  getTextContent: () => getTextContent,
@@ -24834,6 +24852,7 @@ __export(index_exports, {
24834
24852
  isTestMessage: () => isTestMessage,
24835
24853
  isTestMessageRole: () => isTestMessageRole,
24836
24854
  killAllTrackedChildren: () => killAllTrackedChildren,
24855
+ listGitRuns: () => listGitRuns,
24837
24856
  listTargetNames: () => listTargetNames,
24838
24857
  loadConfig: () => loadConfig,
24839
24858
  loadEvalCaseById: () => loadEvalCaseById,
@@ -24845,6 +24864,7 @@ __export(index_exports, {
24845
24864
  loadTests: () => loadTests,
24846
24865
  loadTsConfig: () => loadTsConfig,
24847
24866
  loadTsEvalFile: () => loadTsEvalFile,
24867
+ materializeGitRun: () => materializeGitRun,
24848
24868
  mergeExecutionMetrics: () => mergeExecutionMetrics,
24849
24869
  negateScore: () => negateScore,
24850
24870
  normalizeLineEndings: () => normalizeLineEndings,
@@ -25537,10 +25557,19 @@ function withFriendlyGitHubAuthError(error) {
25537
25557
  }
25538
25558
  return new Error(message);
25539
25559
  }
25560
+ function expandHome(p) {
25561
+ if (p === "~" || p.startsWith("~/") || p.startsWith("~\\")) {
25562
+ return import_node_path54.default.join(import_node_os10.default.homedir(), p.slice(1));
25563
+ }
25564
+ return p;
25565
+ }
25540
25566
  function normalizeResultsConfig(config) {
25567
+ const repo = config.repo.trim();
25568
+ const resolvedPath = config.path ? expandHome(config.path.trim()) : import_node_path54.default.join(getAgentvHome(), "results", sanitizeRepoSlug(repo));
25541
25569
  return {
25542
- repo: config.repo.trim(),
25543
- path: config.path.trim().replace(/^\/+|\/+$/g, ""),
25570
+ mode: "github",
25571
+ repo,
25572
+ path: resolvedPath,
25544
25573
  auto_push: config.auto_push === true,
25545
25574
  branch_prefix: config.branch_prefix?.trim() || "eval-results"
25546
25575
  };
@@ -25551,7 +25580,7 @@ function resolveResultsRepoUrl(repo) {
25551
25580
  }
25552
25581
  return `https://github.com/${repo}.git`;
25553
25582
  }
25554
- function getResultsRepoCachePaths(repo) {
25583
+ function getResultsRepoLocalPaths(repo) {
25555
25584
  const rootDir = import_node_path54.default.join(getAgentvHome(), "cache", "results-repo", sanitizeRepoSlug(repo));
25556
25585
  return {
25557
25586
  rootDir,
@@ -25578,7 +25607,7 @@ async function runCommand(executable, args, options) {
25578
25607
  try {
25579
25608
  const { stdout, stderr } = await execFileAsync4(executable, [...args], {
25580
25609
  cwd: options?.cwd,
25581
- env: process.env
25610
+ env: options?.env ?? process.env
25582
25611
  });
25583
25612
  return { stdout, stderr };
25584
25613
  } catch (error) {
@@ -25592,8 +25621,17 @@ async function runCommand(executable, args, options) {
25592
25621
  throw withFriendlyGitHubAuthError(error);
25593
25622
  }
25594
25623
  }
25624
+ function getGitEnv() {
25625
+ const env = {};
25626
+ for (const [key, value] of Object.entries(process.env)) {
25627
+ if (value !== void 0 && !(key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND")) {
25628
+ env[key] = value;
25629
+ }
25630
+ }
25631
+ return env;
25632
+ }
25595
25633
  async function runGit(args, options) {
25596
- return runCommand("git", args, options);
25634
+ return runCommand("git", args, { ...options, env: getGitEnv() });
25597
25635
  }
25598
25636
  async function runGh(args, options) {
25599
25637
  return runCommand("gh", args, options);
@@ -25617,13 +25655,11 @@ async function resolveDefaultBranch(repoDir) {
25617
25655
  }
25618
25656
  return "main";
25619
25657
  }
25620
- async function updateCacheRepo(repoDir, baseBranch) {
25658
+ async function fetchResultsRepo(repoDir) {
25621
25659
  await runGit(["fetch", "origin", "--prune"], { cwd: repoDir });
25622
- await runGit(["checkout", baseBranch], { cwd: repoDir });
25623
- await runGit(["pull", "--ff-only", "origin", baseBranch], { cwd: repoDir });
25624
25660
  }
25625
25661
  function updateStatusFile(config, patch) {
25626
- const cachePaths = getResultsRepoCachePaths(config.repo);
25662
+ const cachePaths = getResultsRepoLocalPaths(config.repo);
25627
25663
  const current = readPersistedStatus(cachePaths.statusFile);
25628
25664
  writePersistedStatus(cachePaths.statusFile, {
25629
25665
  ...current,
@@ -25632,26 +25668,31 @@ function updateStatusFile(config, patch) {
25632
25668
  }
25633
25669
  async function ensureResultsRepoClone(config) {
25634
25670
  const normalized = normalizeResultsConfig(config);
25635
- const cachePaths = getResultsRepoCachePaths(normalized.repo);
25671
+ const cachePaths = getResultsRepoLocalPaths(normalized.repo);
25672
+ const cloneDir = normalized.path;
25636
25673
  (0, import_node_fs19.mkdirSync)(cachePaths.rootDir, { recursive: true });
25637
- if (!(0, import_node_fs19.existsSync)(cachePaths.repoDir)) {
25674
+ (0, import_node_fs19.mkdirSync)(import_node_path54.default.dirname(cloneDir), { recursive: true });
25675
+ const cloneMissing = !(0, import_node_fs19.existsSync)(cloneDir);
25676
+ const gitDir = import_node_path54.default.join(cloneDir, ".git");
25677
+ const cloneEmpty = !cloneMissing && !(0, import_node_fs19.existsSync)(gitDir) && (await (0, import_promises39.readdir)(cloneDir)).length === 0;
25678
+ if (cloneMissing || cloneEmpty) {
25638
25679
  try {
25639
25680
  await runGit([
25640
25681
  "clone",
25641
25682
  "--filter=blob:none",
25642
25683
  resolveResultsRepoUrl(normalized.repo),
25643
- cachePaths.repoDir
25684
+ cloneDir
25644
25685
  ]);
25645
- return cachePaths.repoDir;
25686
+ return cloneDir;
25646
25687
  } catch (error) {
25647
25688
  updateStatusFile(normalized, { last_error: withFriendlyGitHubAuthError(error).message });
25648
25689
  throw withFriendlyGitHubAuthError(error);
25649
25690
  }
25650
25691
  }
25651
- if (!(0, import_node_fs19.existsSync)(import_node_path54.default.join(cachePaths.repoDir, ".git"))) {
25652
- throw new Error(`Results repo cache is not a git repository: ${cachePaths.repoDir}`);
25692
+ if (!(0, import_node_fs19.existsSync)(gitDir)) {
25693
+ throw new Error(`Results repo clone path is not a git repository: ${cloneDir}`);
25653
25694
  }
25654
- return cachePaths.repoDir;
25695
+ return cloneDir;
25655
25696
  }
25656
25697
  function getResultsRepoStatus(config) {
25657
25698
  if (!config) {
@@ -25659,20 +25700,20 @@ function getResultsRepoStatus(config) {
25659
25700
  configured: false,
25660
25701
  available: false,
25661
25702
  repo: "",
25662
- cache_dir: ""
25703
+ local_dir: ""
25663
25704
  };
25664
25705
  }
25665
25706
  const normalized = normalizeResultsConfig(config);
25666
- const cachePaths = getResultsRepoCachePaths(normalized.repo);
25667
- const persisted = readPersistedStatus(cachePaths.statusFile);
25707
+ const localPaths = getResultsRepoLocalPaths(normalized.repo);
25708
+ const persisted = readPersistedStatus(localPaths.statusFile);
25668
25709
  return {
25669
25710
  configured: true,
25670
- available: (0, import_node_fs19.existsSync)(cachePaths.repoDir),
25711
+ available: (0, import_node_fs19.existsSync)(normalized.path),
25671
25712
  repo: normalized.repo,
25672
25713
  path: normalized.path,
25673
25714
  auto_push: normalized.auto_push,
25674
25715
  branch_prefix: normalized.branch_prefix,
25675
- cache_dir: cachePaths.repoDir,
25716
+ local_dir: normalized.path,
25676
25717
  last_synced_at: persisted.last_synced_at,
25677
25718
  last_error: persisted.last_error
25678
25719
  };
@@ -25681,8 +25722,7 @@ async function syncResultsRepo(config) {
25681
25722
  const normalized = normalizeResultsConfig(config);
25682
25723
  try {
25683
25724
  const repoDir = await ensureResultsRepoClone(normalized);
25684
- const baseBranch = await resolveDefaultBranch(repoDir);
25685
- await updateCacheRepo(repoDir, baseBranch);
25725
+ await fetchResultsRepo(repoDir);
25686
25726
  updateStatusFile(normalized, {
25687
25727
  last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
25688
25728
  last_error: void 0
@@ -25699,7 +25739,7 @@ async function checkoutResultsRepoBranch(config, branchName) {
25699
25739
  const normalized = normalizeResultsConfig(config);
25700
25740
  const repoDir = await ensureResultsRepoClone(normalized);
25701
25741
  const baseBranch = await resolveDefaultBranch(repoDir);
25702
- await updateCacheRepo(repoDir, baseBranch);
25742
+ await fetchResultsRepo(repoDir);
25703
25743
  await runGit(["checkout", "-B", branchName, `origin/${baseBranch}`], { cwd: repoDir });
25704
25744
  updateStatusFile(normalized, { last_error: void 0 });
25705
25745
  return {
@@ -25712,7 +25752,7 @@ async function prepareResultsRepoBranch(config, branchName) {
25712
25752
  const normalized = normalizeResultsConfig(config);
25713
25753
  const cloneDir = await ensureResultsRepoClone(normalized);
25714
25754
  const baseBranch = await resolveDefaultBranch(cloneDir);
25715
- await updateCacheRepo(cloneDir, baseBranch);
25755
+ await fetchResultsRepo(cloneDir);
25716
25756
  const worktreeRoot = await (0, import_promises39.mkdtemp)(import_node_path54.default.join(import_node_os10.default.tmpdir(), "agentv-results-repo-"));
25717
25757
  const worktreeDir = import_node_path54.default.join(worktreeRoot, "repo");
25718
25758
  await runGit(["worktree", "add", "-B", branchName, worktreeDir, `origin/${baseBranch}`], {
@@ -25738,10 +25778,7 @@ async function stageResultsArtifacts(params) {
25738
25778
  }
25739
25779
  function resolveResultsRepoRunsDir(config) {
25740
25780
  const normalized = normalizeResultsConfig(config);
25741
- return import_node_path54.default.join(
25742
- getResultsRepoCachePaths(normalized.repo).repoDir,
25743
- ...normalized.path.split("/")
25744
- );
25781
+ return import_node_path54.default.join(normalized.path, "runs");
25745
25782
  }
25746
25783
  async function directorySizeBytes(targetPath) {
25747
25784
  const entry = await (0, import_promises39.stat)(targetPath);
@@ -25770,7 +25807,7 @@ async function commitAndPushResultsBranch(params) {
25770
25807
  async function pushResultsRepoBranch(config, branchName, cwd) {
25771
25808
  const normalized = normalizeResultsConfig(config);
25772
25809
  await runGit(["push", "-u", "origin", branchName], {
25773
- cwd: cwd ?? getResultsRepoCachePaths(normalized.repo).repoDir
25810
+ cwd: cwd ?? normalized.path
25774
25811
  });
25775
25812
  updateStatusFile(normalized, {
25776
25813
  last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
@@ -25803,8 +25840,8 @@ async function directPushResults(params) {
25803
25840
  const normalized = normalizeResultsConfig(params.config);
25804
25841
  const repoDir = await ensureResultsRepoClone(normalized);
25805
25842
  const baseBranch = await resolveDefaultBranch(repoDir);
25806
- await updateCacheRepo(repoDir, baseBranch);
25807
- const destinationDir = import_node_path54.default.join(repoDir, normalized.path, params.destinationPath);
25843
+ await fetchResultsRepo(repoDir);
25844
+ const destinationDir = import_node_path54.default.join(repoDir, "runs", params.destinationPath);
25808
25845
  await stageResultsArtifacts({
25809
25846
  repoDir,
25810
25847
  sourceDir: params.sourceDir,
@@ -25818,10 +25855,19 @@ async function directPushResults(params) {
25818
25855
  if (status.trim().length === 0) {
25819
25856
  return false;
25820
25857
  }
25821
- await runGit(["commit", "-m", params.commitMessage], { cwd: repoDir });
25858
+ await runGit(
25859
+ [
25860
+ "commit",
25861
+ "-m",
25862
+ params.commitMessage,
25863
+ "-m",
25864
+ `Agentv-Run: ${buildGitRunId(params.destinationPath)}`
25865
+ ],
25866
+ { cwd: repoDir }
25867
+ );
25822
25868
  for (let attempt = 1; attempt <= DIRECT_PUSH_MAX_RETRIES; attempt++) {
25823
25869
  try {
25824
- await runGit(["push", "origin", baseBranch], { cwd: repoDir });
25870
+ await runGit(["push", "origin", `HEAD:${baseBranch}`], { cwd: repoDir });
25825
25871
  updateStatusFile(normalized, {
25826
25872
  last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
25827
25873
  last_error: void 0
@@ -25830,7 +25876,8 @@ async function directPushResults(params) {
25830
25876
  } catch (error) {
25831
25877
  const message = error instanceof Error ? error.message : String(error);
25832
25878
  if (attempt < DIRECT_PUSH_MAX_RETRIES && message.includes("non-fast-forward")) {
25833
- await runGit(["pull", "--rebase", "origin", baseBranch], { cwd: repoDir });
25879
+ await fetchResultsRepo(repoDir);
25880
+ await runGit(["rebase", `origin/${baseBranch}`], { cwd: repoDir });
25834
25881
  } else {
25835
25882
  throw error;
25836
25883
  }
@@ -25838,6 +25885,195 @@ async function directPushResults(params) {
25838
25885
  }
25839
25886
  return false;
25840
25887
  }
25888
+ function buildGitRunId(relativeRunPath) {
25889
+ const normalized = relativeRunPath.split(import_node_path54.default.sep).join("/");
25890
+ const segments = normalized.split("/").filter(Boolean);
25891
+ if (segments.length >= 2) {
25892
+ const experiment = segments.slice(0, -1).join("/");
25893
+ const timestamp = segments.at(-1);
25894
+ if (experiment === "default") {
25895
+ return timestamp ?? normalized;
25896
+ }
25897
+ return `${experiment}::${timestamp}`;
25898
+ }
25899
+ return segments[0] ?? relativeRunPath;
25900
+ }
25901
+ function getRunExperiment(runId, benchmark) {
25902
+ const experiment = benchmark.metadata?.experiment?.trim();
25903
+ if (experiment) {
25904
+ return experiment;
25905
+ }
25906
+ const separatorIndex = runId.lastIndexOf("::");
25907
+ return separatorIndex === -1 ? "default" : runId.slice(0, separatorIndex);
25908
+ }
25909
+ function computeAveragePassRate(runSummary) {
25910
+ if (!runSummary) {
25911
+ return void 0;
25912
+ }
25913
+ const passRates = Object.values(runSummary).map((summary) => summary.pass_rate?.mean).filter((value) => typeof value === "number" && Number.isFinite(value));
25914
+ if (passRates.length === 0) {
25915
+ return void 0;
25916
+ }
25917
+ return passRates.reduce((sum, value) => sum + value, 0) / passRates.length;
25918
+ }
25919
+ async function runGitBatch(repoDir, input) {
25920
+ return new Promise((resolve, reject) => {
25921
+ const child = (0, import_node_child_process12.spawn)("git", ["cat-file", "--batch"], {
25922
+ cwd: repoDir,
25923
+ env: getGitEnv(),
25924
+ stdio: ["pipe", "pipe", "pipe"]
25925
+ });
25926
+ const stdoutChunks = [];
25927
+ const stderrChunks = [];
25928
+ child.stdout.on("data", (chunk) => {
25929
+ stdoutChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
25930
+ });
25931
+ child.stderr.on("data", (chunk) => {
25932
+ stderrChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
25933
+ });
25934
+ child.on("error", (error) => reject(withFriendlyGitHubAuthError(error)));
25935
+ child.on("close", (code) => {
25936
+ if (code === 0) {
25937
+ resolve(Buffer.concat(stdoutChunks));
25938
+ return;
25939
+ }
25940
+ const stderr = Buffer.concat(stderrChunks).toString("utf8").trim();
25941
+ reject(
25942
+ withFriendlyGitHubAuthError(
25943
+ stderr.length > 0 ? new Error(stderr) : new Error("git cat-file failed")
25944
+ )
25945
+ );
25946
+ });
25947
+ child.stdin.end(input);
25948
+ });
25949
+ }
25950
+ function parseGitBatchBlobs(output) {
25951
+ const blobs = [];
25952
+ let offset = 0;
25953
+ while (offset < output.length) {
25954
+ const headerEnd = output.indexOf(10, offset);
25955
+ if (headerEnd === -1) {
25956
+ throw new Error("Malformed git cat-file output: missing header terminator");
25957
+ }
25958
+ const header = output.subarray(offset, headerEnd).toString("utf8");
25959
+ offset = headerEnd + 1;
25960
+ if (header.length === 0) {
25961
+ continue;
25962
+ }
25963
+ const missingMatch = /^(.*) missing$/.exec(header);
25964
+ if (missingMatch) {
25965
+ continue;
25966
+ }
25967
+ const headerMatch = /^(.*) (\w+) (\d+)$/.exec(header);
25968
+ if (!headerMatch) {
25969
+ throw new Error(`Malformed git cat-file header: ${header}`);
25970
+ }
25971
+ const [, objectRef, objectType, sizeText] = headerMatch;
25972
+ if (objectType !== "blob") {
25973
+ throw new Error(`Unsupported git object type for ${objectRef}: ${objectType}`);
25974
+ }
25975
+ const size = Number.parseInt(sizeText, 10);
25976
+ const contentEnd = offset + size;
25977
+ if (contentEnd > output.length) {
25978
+ throw new Error(`Malformed git cat-file output for ${objectRef}: truncated blob content`);
25979
+ }
25980
+ blobs.push({
25981
+ size,
25982
+ content: output.subarray(offset, contentEnd)
25983
+ });
25984
+ offset = contentEnd;
25985
+ if (offset < output.length && output[offset] === 10) {
25986
+ offset += 1;
25987
+ }
25988
+ }
25989
+ return blobs;
25990
+ }
25991
+ async function listGitRuns(repoDir, ref = "origin/main") {
25992
+ const { stdout: treeOut } = await runGit(["ls-tree", "-r", "--name-only", ref, "runs"], {
25993
+ cwd: repoDir
25994
+ });
25995
+ const benchmarkPaths = treeOut.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.endsWith("/benchmark.json"));
25996
+ if (benchmarkPaths.length === 0) {
25997
+ return [];
25998
+ }
25999
+ const batchInput = `${benchmarkPaths.map((benchmarkPath) => `${ref}:${benchmarkPath}`).join("\n")}
26000
+ `;
26001
+ const blobs = parseGitBatchBlobs(await runGitBatch(repoDir, batchInput));
26002
+ if (blobs.length !== benchmarkPaths.length) {
26003
+ throw new Error(
26004
+ `Expected ${benchmarkPaths.length} git blobs but received ${blobs.length} while listing results runs`
26005
+ );
26006
+ }
26007
+ const runs = blobs.flatMap((blob, index) => {
26008
+ const benchmarkPath = benchmarkPaths[index];
26009
+ const benchmark = JSON.parse(blob.content.toString("utf8"));
26010
+ const runDir = import_node_path54.default.posix.dirname(benchmarkPath);
26011
+ const relativeRunPath = import_node_path54.default.posix.relative("runs", runDir);
26012
+ const runId = buildGitRunId(relativeRunPath);
26013
+ const timestamp = benchmark.metadata?.timestamp?.trim() || import_node_path54.default.posix.basename(runDir);
26014
+ const targets = benchmark.metadata?.targets ?? [];
26015
+ const passRate = computeAveragePassRate(benchmark.run_summary);
26016
+ return [
26017
+ {
26018
+ run_id: runId,
26019
+ experiment: getRunExperiment(runId, benchmark),
26020
+ timestamp,
26021
+ ...passRate !== void 0 && { pass_rate: passRate },
26022
+ ...targets.length === 1 && targets[0] ? { target: targets[0] } : {},
26023
+ manifest_path: import_node_path54.default.posix.join(runDir, "index.jsonl"),
26024
+ benchmark_path: benchmarkPath,
26025
+ display_name: import_node_path54.default.posix.basename(runDir),
26026
+ test_count: benchmark.metadata?.tests_run?.length ?? 0,
26027
+ avg_score: 0,
26028
+ size_bytes: blob.size
26029
+ }
26030
+ ];
26031
+ });
26032
+ runs.sort((a, b) => b.timestamp.localeCompare(a.timestamp));
26033
+ return runs;
26034
+ }
26035
+ async function materializeGitRun(repoDir, relativeRunPath, ref = "origin/main") {
26036
+ const normalizedRunPath = relativeRunPath.split(import_node_path54.default.sep).join("/");
26037
+ const runTreePath = import_node_path54.default.posix.join("runs", normalizedRunPath);
26038
+ const targetRunDir = import_node_path54.default.join(repoDir, ...runTreePath.split("/"));
26039
+ const { stdout: treeOut } = await runGit(["ls-tree", "-r", "--name-only", ref, runTreePath], {
26040
+ cwd: repoDir
26041
+ });
26042
+ const filePaths = treeOut.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
26043
+ if (filePaths.length === 0) {
26044
+ return;
26045
+ }
26046
+ const batchInput = `${filePaths.map((filePath) => `${ref}:${filePath}`).join("\n")}
26047
+ `;
26048
+ const blobs = parseGitBatchBlobs(await runGitBatch(repoDir, batchInput));
26049
+ if (blobs.length !== filePaths.length) {
26050
+ throw new Error(
26051
+ `Expected ${filePaths.length} git blobs but received ${blobs.length} while materializing results run`
26052
+ );
26053
+ }
26054
+ const tempRoot = (0, import_node_fs19.mkdtempSync)(import_node_path54.default.join(repoDir, ".agentv-run-"));
26055
+ const tempRunDir = import_node_path54.default.join(tempRoot, "run");
26056
+ try {
26057
+ for (const [index, filePath] of filePaths.entries()) {
26058
+ const relativeFilePath = import_node_path54.default.posix.relative(runTreePath, filePath);
26059
+ const absolutePath = import_node_path54.default.join(tempRunDir, ...relativeFilePath.split("/"));
26060
+ (0, import_node_fs19.mkdirSync)(import_node_path54.default.dirname(absolutePath), { recursive: true });
26061
+ (0, import_node_fs19.writeFileSync)(absolutePath, blobs[index].content);
26062
+ }
26063
+ (0, import_node_fs19.mkdirSync)(import_node_path54.default.dirname(targetRunDir), { recursive: true });
26064
+ try {
26065
+ (0, import_node_fs19.renameSync)(tempRunDir, targetRunDir);
26066
+ } catch (error) {
26067
+ const code = typeof error === "object" && error !== null && "code" in error ? error.code : void 0;
26068
+ if ((code === "EEXIST" || code === "ENOTEMPTY") && (0, import_node_fs19.existsSync)(targetRunDir)) {
26069
+ return;
26070
+ }
26071
+ throw error;
26072
+ }
26073
+ } finally {
26074
+ (0, import_node_fs19.rmSync)(tempRoot, { recursive: true, force: true });
26075
+ }
26076
+ }
25841
26077
 
25842
26078
  // src/index.ts
25843
26079
  init_paths();
@@ -27477,7 +27713,7 @@ function createAgentKernel() {
27477
27713
  getOutputFilenames,
27478
27714
  getProject,
27479
27715
  getProjectsRegistryPath,
27480
- getResultsRepoCachePaths,
27716
+ getResultsRepoLocalPaths,
27481
27717
  getResultsRepoStatus,
27482
27718
  getSubagentsRoot,
27483
27719
  getTextContent,
@@ -27497,6 +27733,7 @@ function createAgentKernel() {
27497
27733
  isTestMessage,
27498
27734
  isTestMessageRole,
27499
27735
  killAllTrackedChildren,
27736
+ listGitRuns,
27500
27737
  listTargetNames,
27501
27738
  loadConfig,
27502
27739
  loadEvalCaseById,
@@ -27508,6 +27745,7 @@ function createAgentKernel() {
27508
27745
  loadTests,
27509
27746
  loadTsConfig,
27510
27747
  loadTsEvalFile,
27748
+ materializeGitRun,
27511
27749
  mergeExecutionMetrics,
27512
27750
  negateScore,
27513
27751
  normalizeLineEndings,