@agentv/core 4.32.0-next.1 → 4.34.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -15,9 +15,27 @@ import {
15
15
  GraderRegistry,
16
16
  LatencyGrader,
17
17
  LlmGrader,
18
+ NORMALIZED_REDACTION_LEVELS,
19
+ NORMALIZED_TOOL_STATUSES,
20
+ NORMALIZED_TRACE_EVENT_TYPES,
21
+ NORMALIZED_TRACE_SOURCE_KINDS,
22
+ NORMALIZED_TRAJECTORY_SCHEMA_VERSION,
23
+ NormalizedRawEvidenceWireSchema,
24
+ NormalizedRedactionStateWireSchema,
25
+ NormalizedTraceBranchWireSchema,
26
+ NormalizedTraceErrorWireSchema,
27
+ NormalizedTraceEventWireSchema,
28
+ NormalizedTraceMessageWireSchema,
29
+ NormalizedTraceModelWireSchema,
30
+ NormalizedTraceSessionWireSchema,
31
+ NormalizedTraceSourceRefWireSchema,
32
+ NormalizedTraceSourceWireSchema,
33
+ NormalizedTraceToolWireSchema,
34
+ NormalizedTrajectoryWireSchema,
18
35
  PASS_THRESHOLD,
19
36
  ProviderRegistry,
20
37
  RepoManager,
38
+ ResponseCache,
21
39
  SkillTriggerGrader,
22
40
  TemplateNotDirectoryError,
23
41
  TemplateNotFoundError,
@@ -37,6 +55,7 @@ import {
37
55
  cleanupEvalWorkspaces,
38
56
  cleanupWorkspace,
39
57
  computeTraceSummary,
58
+ computeTraceSummaryFromTrajectory,
40
59
  computeWorkspaceFingerprint,
41
60
  consumeClaudeLogEntries,
42
61
  consumeCodexLogEntries,
@@ -71,14 +90,9 @@ import {
71
90
  extractWorkersFromSuite,
72
91
  formatToolCalls,
73
92
  freeformEvaluationSchema,
74
- getAgentvConfigDir,
75
- getAgentvDataDir,
76
- getAgentvHome,
77
- getSubagentsRoot,
78
- getTraceStateRoot,
93
+ fromNormalizedTrajectoryWire,
94
+ getSelectedTrajectoryEvents,
79
95
  getWorkspacePath,
80
- getWorkspacePoolRoot,
81
- getWorkspacesRoot,
82
96
  initializeBaseline,
83
97
  isAgentSkillsFormat,
84
98
  isNonEmptyString,
@@ -105,6 +119,7 @@ import {
105
119
  readTargetDefinitions,
106
120
  readTestSuiteMetadata,
107
121
  resolveAndCreateProvider,
122
+ resolveResultsConfigForProject,
108
123
  resolveWorkspaceTemplate,
109
124
  rubricEvaluationSchema,
110
125
  runContainsAllAssertion,
@@ -122,6 +137,8 @@ import {
122
137
  runStartsWithAssertion,
123
138
  scoreRangeEvaluationSchema,
124
139
  scoreToVerdict,
140
+ shouldEnableCache,
141
+ shouldSkipCacheForTemperature,
125
142
  subscribeToClaudeLogEntries,
126
143
  subscribeToCodexLogEntries,
127
144
  subscribeToCopilotCliLogEntries,
@@ -129,20 +146,29 @@ import {
129
146
  subscribeToPiLogEntries,
130
147
  substituteVariables,
131
148
  toCamelCaseDeep,
149
+ toNormalizedTrajectoryWire,
132
150
  toSnakeCaseDeep,
133
151
  tokensPerTool,
134
152
  trackChild,
135
153
  trackedChildCount
136
- } from "./chunk-N5EU446L.js";
154
+ } from "./chunk-7QB53OPK.js";
137
155
  import {
138
156
  COMMON_TARGET_SETTINGS,
157
+ RUBRIC_OPERATOR_VALUES,
139
158
  TEST_MESSAGE_ROLES,
140
159
  buildDirectoryChain,
141
160
  buildSearchRoots,
142
161
  extractLastAssistantContent,
143
162
  fileExists,
144
163
  findGitRoot,
164
+ getAgentvConfigDir,
165
+ getAgentvDataDir,
166
+ getAgentvHome,
167
+ getSubagentsRoot,
145
168
  getTextContent,
169
+ getTraceStateRoot,
170
+ getWorkspacePoolRoot,
171
+ getWorkspacesRoot,
146
172
  interpolateEnv,
147
173
  isContent,
148
174
  isContentArray,
@@ -158,7 +184,7 @@ import {
158
184
  resolveDelegatedTargetDefinition,
159
185
  resolveFileReference,
160
186
  resolveTargetDefinition
161
- } from "./chunk-5RQMJZDJ.js";
187
+ } from "./chunk-EW5X2RGJ.js";
162
188
  import "./chunk-3WGHC7LC.js";
163
189
  import "./chunk-YDFZ7XN3.js";
164
190
  import {
@@ -444,16 +470,14 @@ var AgentVConfigSchema = z.object({
444
470
  }).optional(),
445
471
  /** Output settings */
446
472
  output: z.object({
447
- /** Output format */
448
- format: z.enum(["jsonl", "yaml", "json", "xml"]).optional(),
449
- /** Output directory */
473
+ /** Default eval run artifact directory */
450
474
  dir: z.string().optional()
451
- }).optional(),
475
+ }).strict().optional(),
452
476
  /** Response caching */
453
477
  cache: z.object({
454
478
  /** Enable response caching */
455
479
  enabled: z.boolean().optional(),
456
- /** Cache file path */
480
+ /** Response cache directory */
457
481
  path: z.string().optional()
458
482
  }).optional(),
459
483
  /** Cost and duration limits */
@@ -679,47 +703,6 @@ function extractReposFromObject(obj) {
679
703
  return result;
680
704
  }
681
705
 
682
- // src/evaluation/cache/response-cache.ts
683
- import { mkdir, readFile as readFile2, writeFile } from "node:fs/promises";
684
- import path3 from "node:path";
685
- var DEFAULT_CACHE_PATH = ".agentv/cache";
686
- var ResponseCache = class {
687
- cachePath;
688
- constructor(cachePath) {
689
- this.cachePath = cachePath ?? DEFAULT_CACHE_PATH;
690
- }
691
- async get(key) {
692
- const filePath = this.keyToPath(key);
693
- try {
694
- const data = await readFile2(filePath, "utf8");
695
- return JSON.parse(data);
696
- } catch {
697
- return void 0;
698
- }
699
- }
700
- async set(key, value) {
701
- const filePath = this.keyToPath(key);
702
- const dir = path3.dirname(filePath);
703
- await mkdir(dir, { recursive: true });
704
- await writeFile(filePath, JSON.stringify(value, null, 2), "utf8");
705
- }
706
- keyToPath(key) {
707
- const prefix = key.slice(0, 2);
708
- return path3.join(this.cachePath, prefix, `${key}.json`);
709
- }
710
- };
711
- function shouldEnableCache(params) {
712
- if (params.cliNoCache) return false;
713
- return params.cliCache || params.yamlCache === true;
714
- }
715
- function shouldSkipCacheForTemperature(targetConfig) {
716
- const temp = targetConfig.temperature;
717
- if (typeof temp === "number" && temp > 0) {
718
- return true;
719
- }
720
- return false;
721
- }
722
-
723
706
  // src/evaluation/results-repo.ts
724
707
  import { execFile, spawn } from "node:child_process";
725
708
  import {
@@ -733,11 +716,12 @@ import {
733
716
  } from "node:fs";
734
717
  import { cp, mkdtemp, readdir, rm, stat } from "node:fs/promises";
735
718
  import os from "node:os";
736
- import path4 from "node:path";
719
+ import path3 from "node:path";
737
720
  import { promisify } from "node:util";
738
721
  var execFileAsync = promisify(execFile);
739
722
  var RESULTS_REPO_RESULTS_DIR = ".agentv/results";
740
723
  var RESULTS_REPO_RUNS_DIR = `${RESULTS_REPO_RESULTS_DIR}/runs`;
724
+ var activeResultsRepoSyncs = /* @__PURE__ */ new Set();
741
725
  function sanitizeRepoSlug(repo) {
742
726
  return repo.trim().replace(/[^A-Za-z0-9._-]+/g, "-");
743
727
  }
@@ -751,13 +735,13 @@ function withFriendlyGitHubAuthError(error) {
751
735
  }
752
736
  function expandHome(p) {
753
737
  if (p === "~" || p.startsWith("~/") || p.startsWith("~\\")) {
754
- return path4.join(os.homedir(), p.slice(1));
738
+ return path3.join(os.homedir(), p.slice(1));
755
739
  }
756
740
  return p;
757
741
  }
758
742
  function normalizeResultsConfig(config) {
759
743
  const repo = config.repo.trim();
760
- const resolvedPath = config.path ? expandHome(config.path.trim()) : path4.join(getAgentvDataDir(), "results", sanitizeRepoSlug(repo));
744
+ const resolvedPath = config.path ? expandHome(config.path.trim()) : path3.join(getAgentvDataDir(), "results", sanitizeRepoSlug(repo));
761
745
  return {
762
746
  mode: "github",
763
747
  repo,
@@ -773,11 +757,11 @@ function resolveResultsRepoUrl(repo) {
773
757
  return `https://github.com/${repo}.git`;
774
758
  }
775
759
  function getResultsRepoLocalPaths(repo) {
776
- const rootDir = path4.join(getAgentvDataDir(), "cache", "results-repo", sanitizeRepoSlug(repo));
760
+ const rootDir = path3.join(getAgentvDataDir(), "cache", "results-repo", sanitizeRepoSlug(repo));
777
761
  return {
778
762
  rootDir,
779
- repoDir: path4.join(rootDir, "repo"),
780
- statusFile: path4.join(rootDir, "status.json")
763
+ repoDir: path3.join(rootDir, "repo"),
764
+ statusFile: path3.join(rootDir, "status.json")
781
765
  };
782
766
  }
783
767
  function readPersistedStatus(statusFile) {
@@ -791,7 +775,7 @@ function readPersistedStatus(statusFile) {
791
775
  }
792
776
  }
793
777
  function writePersistedStatus(statusFile, status) {
794
- mkdirSync(path4.dirname(statusFile), { recursive: true });
778
+ mkdirSync(path3.dirname(statusFile), { recursive: true });
795
779
  writeFileSync(statusFile, `${JSON.stringify(status, null, 2)}
796
780
  `, "utf8");
797
781
  }
@@ -850,6 +834,14 @@ async function resolveDefaultBranch(repoDir) {
850
834
  async function fetchResultsRepo(repoDir) {
851
835
  await runGit(["fetch", "origin", "--prune"], { cwd: repoDir });
852
836
  }
837
+ async function isGitRepository(repoDir) {
838
+ try {
839
+ const { stdout } = await runGit(["rev-parse", "--is-inside-work-tree"], { cwd: repoDir });
840
+ return stdout.trim() === "true";
841
+ } catch {
842
+ return false;
843
+ }
844
+ }
853
845
  function updateStatusFile(config, patch) {
854
846
  const cachePaths = getResultsRepoLocalPaths(config.repo);
855
847
  const current = readPersistedStatus(cachePaths.statusFile);
@@ -863,9 +855,9 @@ async function ensureResultsRepoClone(config) {
863
855
  const cachePaths = getResultsRepoLocalPaths(normalized.repo);
864
856
  const cloneDir = normalized.path;
865
857
  mkdirSync(cachePaths.rootDir, { recursive: true });
866
- mkdirSync(path4.dirname(cloneDir), { recursive: true });
858
+ mkdirSync(path3.dirname(cloneDir), { recursive: true });
867
859
  const cloneMissing = !existsSync(cloneDir);
868
- const gitDir = path4.join(cloneDir, ".git");
860
+ const gitDir = path3.join(cloneDir, ".git");
869
861
  const cloneEmpty = !cloneMissing && !existsSync(gitDir) && (await readdir(cloneDir)).length === 0;
870
862
  if (cloneMissing || cloneEmpty) {
871
863
  try {
@@ -892,7 +884,8 @@ function getResultsRepoStatus(config) {
892
884
  configured: false,
893
885
  available: false,
894
886
  repo: "",
895
- local_dir: ""
887
+ local_dir: "",
888
+ sync_status: "unavailable"
896
889
  };
897
890
  }
898
891
  const normalized = normalizeResultsConfig(config);
@@ -907,9 +900,246 @@ function getResultsRepoStatus(config) {
907
900
  branch_prefix: normalized.branch_prefix,
908
901
  local_dir: normalized.path,
909
902
  last_synced_at: persisted.last_synced_at,
910
- last_error: persisted.last_error
903
+ last_error: persisted.last_error,
904
+ sync_status: existsSync(normalized.path) ? "clean" : "unavailable"
911
905
  };
912
906
  }
907
+ function parseGitPorcelainPaths(status) {
908
+ const dirtyPaths = /* @__PURE__ */ new Set();
909
+ const conflictedPaths = /* @__PURE__ */ new Set();
910
+ const conflictCodes = /* @__PURE__ */ new Set(["DD", "AU", "UD", "UA", "DU", "AA", "UU"]);
911
+ for (const line of status.split(/\r?\n/)) {
912
+ if (!line.trim()) continue;
913
+ const code = line.slice(0, 2);
914
+ const rawPath = line.slice(3).trim();
915
+ const paths = rawPath.includes(" -> ") ? rawPath.split(" -> ") : [rawPath];
916
+ for (const p of paths.map((entry) => entry.trim()).filter(Boolean)) {
917
+ dirtyPaths.add(p);
918
+ if (conflictCodes.has(code)) {
919
+ conflictedPaths.add(p);
920
+ }
921
+ }
922
+ }
923
+ return {
924
+ dirtyPaths: [...dirtyPaths].sort(),
925
+ conflictedPaths: [...conflictedPaths].sort()
926
+ };
927
+ }
928
+ async function getCurrentBranch(repoDir) {
929
+ const { stdout } = await runGit(["branch", "--show-current"], { cwd: repoDir, check: false });
930
+ const branch = stdout.trim();
931
+ if (branch) {
932
+ return branch;
933
+ }
934
+ const { stdout: sha } = await runGit(["rev-parse", "--short", "HEAD"], {
935
+ cwd: repoDir,
936
+ check: false
937
+ });
938
+ return sha.trim() ? `HEAD@${sha.trim()}` : void 0;
939
+ }
940
+ async function resolveComparisonRef(repoDir) {
941
+ const { stdout: upstream } = await runGit(
942
+ ["rev-parse", "--abbrev-ref", "--symbolic-full-name", "@{upstream}"],
943
+ { cwd: repoDir, check: false }
944
+ );
945
+ const upstreamRef = upstream.trim();
946
+ if (upstreamRef && !upstreamRef.includes("fatal:")) {
947
+ return upstreamRef;
948
+ }
949
+ const baseBranch = await resolveDefaultBranch(repoDir);
950
+ const fallback = `origin/${baseBranch}`;
951
+ const { stdout: fallbackSha } = await runGit(["rev-parse", "--verify", fallback], {
952
+ cwd: repoDir,
953
+ check: false
954
+ });
955
+ return fallbackSha.trim() ? fallback : void 0;
956
+ }
957
+ async function getAheadBehind(repoDir, upstream) {
958
+ if (!upstream) {
959
+ return {};
960
+ }
961
+ const { stdout } = await runGit(["rev-list", "--left-right", "--count", `HEAD...${upstream}`], {
962
+ cwd: repoDir,
963
+ check: false
964
+ });
965
+ const [aheadText, behindText] = stdout.trim().split(/\s+/);
966
+ const ahead = Number.parseInt(aheadText ?? "", 10);
967
+ const behind = Number.parseInt(behindText ?? "", 10);
968
+ return {
969
+ ...Number.isFinite(ahead) && { ahead },
970
+ ...Number.isFinite(behind) && { behind }
971
+ };
972
+ }
973
+ async function hasInProgressGitConflict(repoDir) {
974
+ const markers = ["MERGE_HEAD", "CHERRY_PICK_HEAD", "REVERT_HEAD", "REBASE_HEAD"];
975
+ for (const marker of markers) {
976
+ const { stdout } = await runGit(["rev-parse", "--git-path", marker], {
977
+ cwd: repoDir,
978
+ check: false
979
+ });
980
+ const markerPath = stdout.trim();
981
+ const resolvedMarkerPath = path3.isAbsolute(markerPath) ? markerPath : path3.join(repoDir, markerPath);
982
+ if (markerPath && existsSync(resolvedMarkerPath)) {
983
+ return true;
984
+ }
985
+ }
986
+ return false;
987
+ }
988
+ async function buildGitDiffSummary(repoDir, upstream) {
989
+ const summaries = [];
990
+ for (const args of [
991
+ ["diff", "--stat"],
992
+ ["diff", "--cached", "--stat"],
993
+ ...upstream ? [["diff", "--stat", `${upstream}..HEAD`]] : []
994
+ ]) {
995
+ const { stdout } = await runGit(args, { cwd: repoDir, check: false });
996
+ const summary = stdout.trim();
997
+ if (summary) {
998
+ summaries.push(summary);
999
+ }
1000
+ }
1001
+ return summaries.length > 0 ? summaries.join("\n") : void 0;
1002
+ }
1003
+ async function inspectResultsRepoGit(repoDir) {
1004
+ const branch = await getCurrentBranch(repoDir);
1005
+ const upstream = await resolveComparisonRef(repoDir);
1006
+ const { stdout: porcelain } = await runGit(
1007
+ ["status", "--porcelain=v1", "--untracked-files=all"],
1008
+ {
1009
+ cwd: repoDir,
1010
+ check: false
1011
+ }
1012
+ );
1013
+ const { stdout: shortStatus } = await runGit(["status", "--short", "--branch"], {
1014
+ cwd: repoDir,
1015
+ check: false
1016
+ });
1017
+ const { dirtyPaths: allDirtyPaths, conflictedPaths } = parseGitPorcelainPaths(porcelain);
1018
+ const dirtyPaths = allDirtyPaths.filter(isSafeResultsRepoPath);
1019
+ const { ahead = 0, behind = 0 } = await getAheadBehind(repoDir, upstream);
1020
+ const inProgressConflict = await hasInProgressGitConflict(repoDir);
1021
+ let syncStatus = "clean";
1022
+ if (conflictedPaths.length > 0 || inProgressConflict) {
1023
+ syncStatus = "conflicted";
1024
+ } else if (dirtyPaths.length > 0) {
1025
+ syncStatus = "dirty";
1026
+ } else if (ahead > 0 && behind > 0) {
1027
+ syncStatus = "diverged";
1028
+ } else if (behind > 0) {
1029
+ syncStatus = "behind";
1030
+ } else if (ahead > 0) {
1031
+ syncStatus = "ahead";
1032
+ }
1033
+ return {
1034
+ syncStatus,
1035
+ branch,
1036
+ upstream,
1037
+ ahead,
1038
+ behind,
1039
+ dirtyPaths,
1040
+ conflictedPaths,
1041
+ gitStatus: shortStatus.trim() || void 0,
1042
+ gitDiffSummary: await buildGitDiffSummary(repoDir, upstream)
1043
+ };
1044
+ }
1045
+ function withGitInspection(status, inspection) {
1046
+ return {
1047
+ ...status,
1048
+ sync_status: inspection.syncStatus,
1049
+ branch: inspection.branch,
1050
+ upstream: inspection.upstream,
1051
+ ahead: inspection.ahead,
1052
+ behind: inspection.behind,
1053
+ dirty_paths: inspection.dirtyPaths,
1054
+ conflicted_paths: inspection.conflictedPaths,
1055
+ git_status: inspection.gitStatus,
1056
+ git_diff_summary: inspection.gitDiffSummary,
1057
+ last_error: lastErrorForGitInspection(status, inspection)
1058
+ };
1059
+ }
1060
+ function lastErrorForGitInspection(status, inspection) {
1061
+ if (inspection.syncStatus === "conflicted") {
1062
+ return "Results repo has unresolved git conflicts";
1063
+ }
1064
+ if (inspection.syncStatus === "diverged") {
1065
+ return "Results repo local and remote histories have diverged";
1066
+ }
1067
+ if (inspection.syncStatus === "dirty") {
1068
+ if (status.auto_push === false) {
1069
+ return "Results repo has uncommitted changes and auto_push is disabled";
1070
+ }
1071
+ }
1072
+ return void 0;
1073
+ }
1074
+ function withBlockedStatus(status, blockReason, flags) {
1075
+ return {
1076
+ ...status,
1077
+ blocked: true,
1078
+ block_reason: blockReason,
1079
+ ...flags?.pullPerformed !== void 0 && { pull_performed: flags.pullPerformed },
1080
+ ...flags?.pushPerformed !== void 0 && { push_performed: flags.pushPerformed },
1081
+ ...flags?.commitCreated !== void 0 && { commit_created: flags.commitCreated }
1082
+ };
1083
+ }
1084
+ function withActionFlags(status, flags) {
1085
+ return {
1086
+ ...status,
1087
+ blocked: false,
1088
+ pull_performed: flags.pullPerformed,
1089
+ push_performed: flags.pushPerformed,
1090
+ commit_created: flags.commitCreated
1091
+ };
1092
+ }
1093
+ function isSafeResultsRepoPath(p) {
1094
+ return p === RESULTS_REPO_RESULTS_DIR || p.startsWith(`${RESULTS_REPO_RESULTS_DIR}/`);
1095
+ }
1096
+ function areSafeResultsRepoPaths(paths) {
1097
+ return paths.length > 0 && paths.every(isSafeResultsRepoPath);
1098
+ }
1099
+ async function getAheadPaths(repoDir, upstream) {
1100
+ if (!upstream) {
1101
+ return [];
1102
+ }
1103
+ const { stdout } = await runGit(["diff", "--name-only", `${upstream}..HEAD`], {
1104
+ cwd: repoDir,
1105
+ check: false
1106
+ });
1107
+ return stdout.split(/\r?\n/).map((line) => line.trim()).filter(Boolean).sort();
1108
+ }
1109
+ function getPushTargetBranch(upstream, baseBranch) {
1110
+ return upstream?.startsWith("origin/") ? upstream.slice("origin/".length) : baseBranch;
1111
+ }
1112
+ async function statusFromInspection(normalized, repoDir) {
1113
+ return withGitInspection(getResultsRepoStatus(normalized), await inspectResultsRepoGit(repoDir));
1114
+ }
1115
+ async function getResultsRepoSyncStatus(config) {
1116
+ const baseStatus = getResultsRepoStatus(config);
1117
+ if (!config) {
1118
+ return baseStatus;
1119
+ }
1120
+ const normalized = normalizeResultsConfig(config);
1121
+ if (activeResultsRepoSyncs.has(normalized.path)) {
1122
+ return {
1123
+ ...baseStatus,
1124
+ sync_status: "syncing"
1125
+ };
1126
+ }
1127
+ if (!existsSync(normalized.path) || !await isGitRepository(normalized.path)) {
1128
+ return {
1129
+ ...baseStatus,
1130
+ sync_status: "unavailable"
1131
+ };
1132
+ }
1133
+ try {
1134
+ return withGitInspection(baseStatus, await inspectResultsRepoGit(normalized.path));
1135
+ } catch (error) {
1136
+ return {
1137
+ ...baseStatus,
1138
+ sync_status: "unavailable",
1139
+ last_error: getStatusMessage(error)
1140
+ };
1141
+ }
1142
+ }
913
1143
  async function syncResultsRepo(config) {
914
1144
  const normalized = normalizeResultsConfig(config);
915
1145
  try {
@@ -927,6 +1157,198 @@ async function syncResultsRepo(config) {
927
1157
  }
928
1158
  return getResultsRepoStatus(normalized);
929
1159
  }
1160
+ function getStatusMessage(error) {
1161
+ return error instanceof Error ? error.message : String(error);
1162
+ }
1163
+ async function syncResultsRepoForProject(config) {
1164
+ const normalized = normalizeResultsConfig(config);
1165
+ const syncKey = normalized.path;
1166
+ if (activeResultsRepoSyncs.has(syncKey)) {
1167
+ return {
1168
+ ...await getResultsRepoSyncStatus(normalized),
1169
+ sync_status: "syncing",
1170
+ blocked: true,
1171
+ block_reason: "Results repo sync is already in progress"
1172
+ };
1173
+ }
1174
+ activeResultsRepoSyncs.add(syncKey);
1175
+ let pullPerformed = false;
1176
+ let pushPerformed = false;
1177
+ let commitCreated = false;
1178
+ try {
1179
+ const repoDir = await ensureResultsRepoClone(normalized);
1180
+ await fetchResultsRepo(repoDir);
1181
+ let inspection = await inspectResultsRepoGit(repoDir);
1182
+ if (inspection.syncStatus === "conflicted") {
1183
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1184
+ updateStatusFile(normalized, {
1185
+ last_error: "Results repo has unresolved git conflicts"
1186
+ });
1187
+ return withBlockedStatus(status, "Results repo has unresolved git conflicts", {
1188
+ pullPerformed,
1189
+ pushPerformed,
1190
+ commitCreated
1191
+ });
1192
+ }
1193
+ if (inspection.syncStatus === "dirty") {
1194
+ if (!normalized.auto_push) {
1195
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1196
+ updateStatusFile(normalized, {
1197
+ last_error: "Results repo has uncommitted changes and auto_push is disabled"
1198
+ });
1199
+ return withBlockedStatus(
1200
+ status,
1201
+ "Results repo has uncommitted changes and auto_push is disabled",
1202
+ {
1203
+ pullPerformed,
1204
+ pushPerformed,
1205
+ commitCreated
1206
+ }
1207
+ );
1208
+ }
1209
+ if ((inspection.behind ?? 0) > 0) {
1210
+ if (!inspection.upstream) {
1211
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1212
+ updateStatusFile(normalized, {
1213
+ last_error: "Results repo has no upstream branch to pull from"
1214
+ });
1215
+ return withBlockedStatus(status, "Results repo has no upstream branch to pull from", {
1216
+ pullPerformed,
1217
+ pushPerformed,
1218
+ commitCreated
1219
+ });
1220
+ }
1221
+ try {
1222
+ await runGit(["merge", "--ff-only", inspection.upstream], { cwd: repoDir });
1223
+ pullPerformed = true;
1224
+ inspection = await inspectResultsRepoGit(repoDir);
1225
+ } catch (error) {
1226
+ inspection = await inspectResultsRepoGit(repoDir);
1227
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1228
+ const reason = `Results repo could not be fast-forwarded: ${getStatusMessage(error)}`;
1229
+ updateStatusFile(normalized, { last_error: reason });
1230
+ return withBlockedStatus(status, reason, {
1231
+ pullPerformed,
1232
+ pushPerformed,
1233
+ commitCreated
1234
+ });
1235
+ }
1236
+ }
1237
+ if (inspection.syncStatus === "dirty") {
1238
+ await runGit(["add", "--all", "--", RESULTS_REPO_RESULTS_DIR], { cwd: repoDir });
1239
+ await runGit(
1240
+ [
1241
+ "commit",
1242
+ "-m",
1243
+ "chore(results): sync local result metadata",
1244
+ "--",
1245
+ RESULTS_REPO_RESULTS_DIR
1246
+ ],
1247
+ {
1248
+ cwd: repoDir
1249
+ }
1250
+ );
1251
+ commitCreated = true;
1252
+ inspection = await inspectResultsRepoGit(repoDir);
1253
+ }
1254
+ }
1255
+ if (inspection.syncStatus === "diverged") {
1256
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1257
+ updateStatusFile(normalized, {
1258
+ last_error: "Results repo local and remote histories have diverged"
1259
+ });
1260
+ return withBlockedStatus(status, "Results repo local and remote histories have diverged", {
1261
+ pullPerformed,
1262
+ pushPerformed,
1263
+ commitCreated
1264
+ });
1265
+ }
1266
+ if ((inspection.behind ?? 0) > 0 && (inspection.ahead ?? 0) === 0) {
1267
+ if (!inspection.upstream) {
1268
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1269
+ updateStatusFile(normalized, {
1270
+ last_error: "Results repo has no upstream branch to pull from"
1271
+ });
1272
+ return withBlockedStatus(status, "Results repo has no upstream branch to pull from", {
1273
+ pullPerformed,
1274
+ pushPerformed,
1275
+ commitCreated
1276
+ });
1277
+ }
1278
+ try {
1279
+ await runGit(["merge", "--ff-only", inspection.upstream], { cwd: repoDir });
1280
+ pullPerformed = true;
1281
+ inspection = await inspectResultsRepoGit(repoDir);
1282
+ } catch (error) {
1283
+ inspection = await inspectResultsRepoGit(repoDir);
1284
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1285
+ const reason = `Results repo could not be fast-forwarded: ${getStatusMessage(error)}`;
1286
+ updateStatusFile(normalized, { last_error: reason });
1287
+ return withBlockedStatus(status, reason, {
1288
+ pullPerformed,
1289
+ pushPerformed,
1290
+ commitCreated
1291
+ });
1292
+ }
1293
+ }
1294
+ if ((inspection.ahead ?? 0) > 0) {
1295
+ if (!normalized.auto_push) {
1296
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1297
+ return withActionFlags(status, {
1298
+ pullPerformed,
1299
+ pushPerformed,
1300
+ commitCreated
1301
+ });
1302
+ }
1303
+ const aheadPaths = await getAheadPaths(repoDir, inspection.upstream);
1304
+ if (!inspection.upstream || !areSafeResultsRepoPaths(aheadPaths)) {
1305
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1306
+ const reason = !inspection.upstream ? "Results repo has no upstream branch to push to" : "Results repo has non-results committed changes";
1307
+ updateStatusFile(normalized, { last_error: reason });
1308
+ return withBlockedStatus(status, reason, {
1309
+ pullPerformed,
1310
+ pushPerformed,
1311
+ commitCreated
1312
+ });
1313
+ }
1314
+ const baseBranch = await resolveDefaultBranch(repoDir);
1315
+ const targetBranch = getPushTargetBranch(inspection.upstream, baseBranch);
1316
+ try {
1317
+ await runGit(["push", "origin", `HEAD:${targetBranch}`], { cwd: repoDir });
1318
+ pushPerformed = true;
1319
+ await fetchResultsRepo(repoDir);
1320
+ inspection = await inspectResultsRepoGit(repoDir);
1321
+ } catch (error) {
1322
+ await fetchResultsRepo(repoDir).catch(() => void 0);
1323
+ inspection = await inspectResultsRepoGit(repoDir);
1324
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1325
+ const reason = `Results repo push was rejected: ${getStatusMessage(error)}`;
1326
+ updateStatusFile(normalized, { last_error: reason });
1327
+ return withBlockedStatus(status, reason, {
1328
+ pullPerformed,
1329
+ pushPerformed,
1330
+ commitCreated
1331
+ });
1332
+ }
1333
+ }
1334
+ updateStatusFile(normalized, {
1335
+ last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
1336
+ last_error: void 0
1337
+ });
1338
+ return withActionFlags(await statusFromInspection(normalized, repoDir), {
1339
+ pullPerformed,
1340
+ pushPerformed,
1341
+ commitCreated
1342
+ });
1343
+ } catch (error) {
1344
+ updateStatusFile(normalized, {
1345
+ last_error: withFriendlyGitHubAuthError(error).message
1346
+ });
1347
+ throw withFriendlyGitHubAuthError(error);
1348
+ } finally {
1349
+ activeResultsRepoSyncs.delete(syncKey);
1350
+ }
1351
+ }
930
1352
  async function checkoutResultsRepoBranch(config, branchName) {
931
1353
  const normalized = normalizeResultsConfig(config);
932
1354
  const repoDir = await ensureResultsRepoClone(normalized);
@@ -945,8 +1367,8 @@ async function prepareResultsRepoBranch(config, branchName) {
945
1367
  const cloneDir = await ensureResultsRepoClone(normalized);
946
1368
  const baseBranch = await resolveDefaultBranch(cloneDir);
947
1369
  await fetchResultsRepo(cloneDir);
948
- const worktreeRoot = await mkdtemp(path4.join(os.tmpdir(), "agentv-results-repo-"));
949
- const worktreeDir = path4.join(worktreeRoot, "repo");
1370
+ const worktreeRoot = await mkdtemp(path3.join(os.tmpdir(), "agentv-results-repo-"));
1371
+ const worktreeDir = path3.join(worktreeRoot, "repo");
950
1372
  await runGit(["worktree", "add", "-B", branchName, worktreeDir, `origin/${baseBranch}`], {
951
1373
  cwd: cloneDir
952
1374
  });
@@ -965,12 +1387,12 @@ async function prepareResultsRepoBranch(config, branchName) {
965
1387
  }
966
1388
  async function stageResultsArtifacts(params) {
967
1389
  rmSync(params.destinationDir, { recursive: true, force: true });
968
- mkdirSync(path4.dirname(params.destinationDir), { recursive: true });
1390
+ mkdirSync(path3.dirname(params.destinationDir), { recursive: true });
969
1391
  await cp(params.sourceDir, params.destinationDir, { recursive: true });
970
1392
  }
971
1393
  function resolveResultsRepoRunsDir(config) {
972
1394
  const normalized = normalizeResultsConfig(config);
973
- return path4.join(normalized.path, RESULTS_REPO_RESULTS_DIR, "runs");
1395
+ return path3.join(normalized.path, RESULTS_REPO_RESULTS_DIR, "runs");
974
1396
  }
975
1397
  async function directorySizeBytes(targetPath) {
976
1398
  const entry = await stat(targetPath);
@@ -979,7 +1401,7 @@ async function directorySizeBytes(targetPath) {
979
1401
  }
980
1402
  let total = 0;
981
1403
  for (const child of await readdir(targetPath, { withFileTypes: true })) {
982
- total += await directorySizeBytes(path4.join(targetPath, child.name));
1404
+ total += await directorySizeBytes(path3.join(targetPath, child.name));
983
1405
  }
984
1406
  return total;
985
1407
  }
@@ -1028,12 +1450,40 @@ async function createDraftResultsPr(params) {
1028
1450
  return stdout.trim();
1029
1451
  }
1030
1452
  var DIRECT_PUSH_MAX_RETRIES = 3;
1453
+ async function hasUnpushedCommits(repoDir, baseBranch) {
1454
+ const { stdout } = await runGit(["rev-list", "--count", `origin/${baseBranch}..HEAD`], {
1455
+ cwd: repoDir,
1456
+ check: false
1457
+ });
1458
+ return Number.parseInt(stdout.trim(), 10) > 0;
1459
+ }
1460
+ async function pushDirectResultsToBase(params) {
1461
+ for (let attempt = 1; attempt <= DIRECT_PUSH_MAX_RETRIES; attempt++) {
1462
+ try {
1463
+ await runGit(["push", "origin", `HEAD:${params.baseBranch}`], { cwd: params.repoDir });
1464
+ updateStatusFile(params.normalized, {
1465
+ last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
1466
+ last_error: void 0
1467
+ });
1468
+ return;
1469
+ } catch (error) {
1470
+ const message = error instanceof Error ? error.message : String(error);
1471
+ if (attempt < DIRECT_PUSH_MAX_RETRIES && message.includes("non-fast-forward")) {
1472
+ await fetchResultsRepo(params.repoDir);
1473
+ await runGit(["rebase", `origin/${params.baseBranch}`], { cwd: params.repoDir });
1474
+ } else {
1475
+ throw error;
1476
+ }
1477
+ }
1478
+ }
1479
+ }
1031
1480
  async function directPushResults(params) {
1032
1481
  const normalized = normalizeResultsConfig(params.config);
1033
1482
  const repoDir = await ensureResultsRepoClone(normalized);
1034
1483
  const baseBranch = await resolveDefaultBranch(repoDir);
1035
1484
  await fetchResultsRepo(repoDir);
1036
- const destinationDir = path4.join(
1485
+ const targetRunId = buildGitRunId(params.destinationPath);
1486
+ const destinationDir = path3.join(
1037
1487
  repoDir,
1038
1488
  RESULTS_REPO_RESULTS_DIR,
1039
1489
  "runs",
@@ -1050,40 +1500,24 @@ async function directPushResults(params) {
1050
1500
  check: false
1051
1501
  });
1052
1502
  if (status.trim().length === 0) {
1053
- return false;
1054
- }
1055
- await runGit(
1056
- [
1057
- "commit",
1058
- "-m",
1059
- params.commitMessage,
1060
- "-m",
1061
- `Agentv-Run: ${buildGitRunId(params.destinationPath)}`
1062
- ],
1063
- { cwd: repoDir }
1064
- );
1065
- for (let attempt = 1; attempt <= DIRECT_PUSH_MAX_RETRIES; attempt++) {
1066
- try {
1067
- await runGit(["push", "origin", `HEAD:${baseBranch}`], { cwd: repoDir });
1068
- updateStatusFile(normalized, {
1069
- last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
1070
- last_error: void 0
1071
- });
1072
- return true;
1073
- } catch (error) {
1074
- const message = error instanceof Error ? error.message : String(error);
1075
- if (attempt < DIRECT_PUSH_MAX_RETRIES && message.includes("non-fast-forward")) {
1076
- await fetchResultsRepo(repoDir);
1077
- await runGit(["rebase", `origin/${baseBranch}`], { cwd: repoDir });
1078
- } else {
1079
- throw error;
1503
+ if (await hasUnpushedCommits(repoDir, baseBranch)) {
1504
+ const aheadPaths = await getAheadPaths(repoDir, `origin/${baseBranch}`);
1505
+ if (!areSafeResultsRepoPaths(aheadPaths)) {
1506
+ throw new Error("Results repo has non-results committed changes");
1080
1507
  }
1508
+ await pushDirectResultsToBase({ normalized, repoDir, baseBranch });
1509
+ return true;
1081
1510
  }
1511
+ return false;
1082
1512
  }
1083
- return false;
1513
+ await runGit(["commit", "-m", params.commitMessage, "-m", `Agentv-Run: ${targetRunId}`], {
1514
+ cwd: repoDir
1515
+ });
1516
+ await pushDirectResultsToBase({ normalized, repoDir, baseBranch });
1517
+ return true;
1084
1518
  }
1085
1519
  function buildGitRunId(relativeRunPath) {
1086
- const normalized = relativeRunPath.split(path4.sep).join("/");
1520
+ const normalized = relativeRunPath.split(path3.sep).join("/");
1087
1521
  const segments = normalized.split("/").filter(Boolean);
1088
1522
  if (segments.length >= 2) {
1089
1523
  const experiment = segments.slice(0, -1).join("/");
@@ -1207,10 +1641,11 @@ async function listGitRuns(repoDir, ref = "origin/main") {
1207
1641
  const runs = blobs.flatMap((blob, index) => {
1208
1642
  const benchmarkPath = benchmarkPaths[index];
1209
1643
  const benchmark = JSON.parse(blob.content.toString("utf8"));
1210
- const runDir = path4.posix.dirname(benchmarkPath);
1211
- const relativeRunPath = path4.posix.relative(RESULTS_REPO_RUNS_DIR, runDir);
1644
+ const runDir = path3.posix.dirname(benchmarkPath);
1645
+ const relativeRunPath = path3.posix.relative(RESULTS_REPO_RUNS_DIR, runDir);
1212
1646
  const runId = buildGitRunId(relativeRunPath);
1213
- const timestamp = benchmark.metadata?.timestamp?.trim() || path4.posix.basename(runDir);
1647
+ const timestamp = benchmark.metadata?.timestamp?.trim() || path3.posix.basename(runDir);
1648
+ const displayName = benchmark.metadata?.display_name?.trim() || path3.posix.basename(runDir);
1214
1649
  const targets = benchmark.metadata?.targets ?? [];
1215
1650
  const passRate = computeAveragePassRate(benchmark.run_summary);
1216
1651
  return [
@@ -1220,9 +1655,9 @@ async function listGitRuns(repoDir, ref = "origin/main") {
1220
1655
  timestamp,
1221
1656
  ...passRate !== void 0 && { pass_rate: passRate },
1222
1657
  ...targets.length === 1 && targets[0] ? { target: targets[0] } : {},
1223
- manifest_path: path4.posix.join(runDir, "index.jsonl"),
1658
+ manifest_path: path3.posix.join(runDir, "index.jsonl"),
1224
1659
  benchmark_path: benchmarkPath,
1225
- display_name: path4.posix.basename(runDir),
1660
+ display_name: displayName,
1226
1661
  test_count: benchmark.metadata?.tests_run?.length ?? 0,
1227
1662
  avg_score: 0,
1228
1663
  size_bytes: blob.size
@@ -1233,9 +1668,9 @@ async function listGitRuns(repoDir, ref = "origin/main") {
1233
1668
  return runs;
1234
1669
  }
1235
1670
  async function materializeGitRun(repoDir, relativeRunPath, ref = "origin/main") {
1236
- const normalizedRunPath = relativeRunPath.split(path4.sep).join("/");
1237
- const runTreePath = path4.posix.join(RESULTS_REPO_RUNS_DIR, normalizedRunPath);
1238
- const targetRunDir = path4.join(repoDir, ...runTreePath.split("/"));
1671
+ const normalizedRunPath = relativeRunPath.split(path3.sep).join("/");
1672
+ const runTreePath = path3.posix.join(RESULTS_REPO_RUNS_DIR, normalizedRunPath);
1673
+ const targetRunDir = path3.join(repoDir, ...runTreePath.split("/"));
1239
1674
  const { stdout: treeOut } = await runGit(["ls-tree", "-r", "--name-only", ref, runTreePath], {
1240
1675
  cwd: repoDir
1241
1676
  });
@@ -1251,16 +1686,16 @@ async function materializeGitRun(repoDir, relativeRunPath, ref = "origin/main")
1251
1686
  `Expected ${filePaths.length} git blobs but received ${blobs.length} while materializing results run`
1252
1687
  );
1253
1688
  }
1254
- const tempRoot = mkdtempSync(path4.join(repoDir, ".agentv-run-"));
1255
- const tempRunDir = path4.join(tempRoot, "run");
1689
+ const tempRoot = mkdtempSync(path3.join(repoDir, ".agentv-run-"));
1690
+ const tempRunDir = path3.join(tempRoot, "run");
1256
1691
  try {
1257
1692
  for (const [index, filePath] of filePaths.entries()) {
1258
- const relativeFilePath = path4.posix.relative(runTreePath, filePath);
1259
- const absolutePath = path4.join(tempRunDir, ...relativeFilePath.split("/"));
1260
- mkdirSync(path4.dirname(absolutePath), { recursive: true });
1693
+ const relativeFilePath = path3.posix.relative(runTreePath, filePath);
1694
+ const absolutePath = path3.join(tempRunDir, ...relativeFilePath.split("/"));
1695
+ mkdirSync(path3.dirname(absolutePath), { recursive: true });
1261
1696
  writeFileSync(absolutePath, blobs[index].content);
1262
1697
  }
1263
- mkdirSync(path4.dirname(targetRunDir), { recursive: true });
1698
+ mkdirSync(path3.dirname(targetRunDir), { recursive: true });
1264
1699
  try {
1265
1700
  renameSync(tempRunDir, targetRunDir);
1266
1701
  } catch (error) {
@@ -1276,67 +1711,11 @@ async function materializeGitRun(repoDir, relativeRunPath, ref = "origin/main")
1276
1711
  }
1277
1712
 
1278
1713
  // src/projects.ts
1279
- import {
1280
- existsSync as existsSync2,
1281
- mkdirSync as mkdirSync2,
1282
- readFileSync as readFileSync3,
1283
- readdirSync,
1284
- renameSync as renameSync2,
1285
- statSync,
1286
- unlinkSync,
1287
- writeFileSync as writeFileSync2
1288
- } from "node:fs";
1289
- import path5 from "node:path";
1714
+ import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync as readFileSync3, readdirSync, statSync, writeFileSync as writeFileSync2 } from "node:fs";
1715
+ import path4 from "node:path";
1290
1716
  import { stringify as stringifyYaml } from "yaml";
1291
1717
  function getProjectsRegistryPath() {
1292
- return path5.join(getAgentvConfigDir(), "projects.yaml");
1293
- }
1294
- function getLegacyBenchmarksRegistryPath() {
1295
- return path5.join(getAgentvConfigDir(), "benchmarks.yaml");
1296
- }
1297
- function migrateLegacyBenchmarksFile() {
1298
- const newPath = getProjectsRegistryPath();
1299
- const oldPath = getLegacyBenchmarksRegistryPath();
1300
- const newExists = existsSync2(newPath);
1301
- const oldExists = existsSync2(oldPath);
1302
- if (!oldExists) return;
1303
- if (newExists) {
1304
- console.warn(
1305
- `[agentv] Both ${oldPath} and ${newPath} exist. Using ${path5.basename(newPath)}; delete ${path5.basename(oldPath)} when you've confirmed the new file is correct.`
1306
- );
1307
- return;
1308
- }
1309
- let parsed = null;
1310
- try {
1311
- const raw = readFileSync3(oldPath, "utf-8");
1312
- parsed = parseYamlValue(raw);
1313
- } catch (err) {
1314
- console.warn(
1315
- `[agentv] Failed to read legacy ${path5.basename(oldPath)} for migration: ${err.message}. Leaving the file in place; you may need to migrate it manually.`
1316
- );
1317
- return;
1318
- }
1319
- const entries = parsed && typeof parsed === "object" && Array.isArray(parsed.benchmarks) ? parsed.benchmarks : [];
1320
- const newContent = stringifyYaml({ projects: entries });
1321
- const tempPath = `${newPath}.migrating`;
1322
- try {
1323
- mkdirSync2(path5.dirname(newPath), { recursive: true });
1324
- writeFileSync2(tempPath, newContent, "utf-8");
1325
- renameSync2(tempPath, newPath);
1326
- unlinkSync(oldPath);
1327
- } catch (err) {
1328
- try {
1329
- if (existsSync2(tempPath)) unlinkSync(tempPath);
1330
- } catch {
1331
- }
1332
- console.warn(
1333
- `[agentv] Failed to migrate ${path5.basename(oldPath)} \u2192 ${path5.basename(newPath)}: ${err.message}. Legacy file left in place.`
1334
- );
1335
- return;
1336
- }
1337
- console.log(
1338
- `[agentv] Migrated registry: ${path5.basename(oldPath)} \u2192 ${path5.basename(newPath)} (${entries.length} entr${entries.length === 1 ? "y" : "ies"})`
1339
- );
1718
+ return path4.join(getAgentvConfigDir(), "config.yaml");
1340
1719
  }
1341
1720
  function fromYaml(raw) {
1342
1721
  if (!raw || typeof raw !== "object") return null;
@@ -1357,6 +1736,18 @@ function fromYaml(raw) {
1357
1736
  entry.source = { url: s.url, ref: s.ref };
1358
1737
  }
1359
1738
  }
1739
+ if (e.results && typeof e.results === "object") {
1740
+ const r = e.results;
1741
+ if (r.mode === "github" && typeof r.repo === "string" && r.repo.trim().length > 0) {
1742
+ entry.results = {
1743
+ mode: "github",
1744
+ repo: r.repo.trim(),
1745
+ ...typeof r.path === "string" && r.path.trim().length > 0 ? { path: r.path.trim() } : {},
1746
+ ...typeof r.auto_push === "boolean" ? { autoPush: r.auto_push } : {},
1747
+ ...typeof r.branch_prefix === "string" && r.branch_prefix.trim().length > 0 ? { branchPrefix: r.branch_prefix.trim() } : {}
1748
+ };
1749
+ }
1750
+ }
1360
1751
  return entry;
1361
1752
  }
1362
1753
  function toYaml(entry) {
@@ -1370,10 +1761,20 @@ function toYaml(entry) {
1370
1761
  if (entry.source) {
1371
1762
  yaml.source = { url: entry.source.url, ref: entry.source.ref };
1372
1763
  }
1764
+ if (entry.results) {
1765
+ yaml.results = {
1766
+ mode: entry.results.mode,
1767
+ repo: entry.results.repo,
1768
+ ...entry.results.path !== void 0 && { path: entry.results.path },
1769
+ ...entry.results.autoPush !== void 0 && { auto_push: entry.results.autoPush },
1770
+ ...entry.results.branchPrefix !== void 0 && {
1771
+ branch_prefix: entry.results.branchPrefix
1772
+ }
1773
+ };
1774
+ }
1373
1775
  return yaml;
1374
1776
  }
1375
1777
  function loadProjectRegistry() {
1376
- migrateLegacyBenchmarksFile();
1377
1778
  const registryPath = getProjectsRegistryPath();
1378
1779
  if (!existsSync2(registryPath)) {
1379
1780
  return { projects: [] };
@@ -1393,15 +1794,24 @@ function loadProjectRegistry() {
1393
1794
  }
1394
1795
  function saveProjectRegistry(registry) {
1395
1796
  const registryPath = getProjectsRegistryPath();
1396
- const dir = path5.dirname(registryPath);
1797
+ const dir = path4.dirname(registryPath);
1397
1798
  if (!existsSync2(dir)) {
1398
1799
  mkdirSync2(dir, { recursive: true });
1399
1800
  }
1400
- const payload = { projects: registry.projects.map(toYaml) };
1801
+ const payload = { ...readHomeConfig(registryPath), projects: registry.projects.map(toYaml) };
1401
1802
  writeFileSync2(registryPath, stringifyYaml(payload), "utf-8");
1402
1803
  }
1804
+ function readHomeConfig(configPath) {
1805
+ if (!existsSync2(configPath)) return {};
1806
+ try {
1807
+ const parsed = parseYamlValue(readFileSync3(configPath, "utf-8"));
1808
+ return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : {};
1809
+ } catch {
1810
+ return {};
1811
+ }
1812
+ }
1403
1813
  function deriveProjectId(dirPath, existingIds) {
1404
- const base = path5.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
1814
+ const base = path4.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
1405
1815
  let candidate = base || "project";
1406
1816
  let suffix = 2;
1407
1817
  while (existingIds.includes(candidate)) {
@@ -1411,11 +1821,11 @@ function deriveProjectId(dirPath, existingIds) {
1411
1821
  return candidate;
1412
1822
  }
1413
1823
  function addProject(projectPath) {
1414
- const absPath = path5.resolve(projectPath);
1824
+ const absPath = path4.resolve(projectPath);
1415
1825
  if (!existsSync2(absPath)) {
1416
1826
  throw new Error(`Directory not found: ${absPath}`);
1417
1827
  }
1418
- if (!existsSync2(path5.join(absPath, ".agentv"))) {
1828
+ if (!existsSync2(path4.join(absPath, ".agentv"))) {
1419
1829
  throw new Error(`No .agentv/ directory found in ${absPath}. Run an evaluation first.`);
1420
1830
  }
1421
1831
  const registry = loadProjectRegistry();
@@ -1429,7 +1839,7 @@ function addProject(projectPath) {
1429
1839
  absPath,
1430
1840
  registry.projects.map((p) => p.id)
1431
1841
  ),
1432
- name: path5.basename(absPath),
1842
+ name: path4.basename(absPath),
1433
1843
  path: absPath,
1434
1844
  addedAt: now,
1435
1845
  lastOpenedAt: now
@@ -1449,6 +1859,14 @@ function removeProject(projectId) {
1449
1859
  function getProject(projectId) {
1450
1860
  return loadProjectRegistry().projects.find((p) => p.id === projectId);
1451
1861
  }
1862
+ function getProjectForPath(fsPath) {
1863
+ const absPath = path4.resolve(fsPath);
1864
+ return loadProjectRegistry().projects.filter((p) => {
1865
+ const projectPath = path4.resolve(p.path);
1866
+ const relative = path4.relative(projectPath, absPath);
1867
+ return relative === "" || !relative.startsWith("..") && !path4.isAbsolute(relative);
1868
+ }).sort((a, b) => path4.resolve(b.path).length - path4.resolve(a.path).length)[0];
1869
+ }
1452
1870
  function touchProject(projectId) {
1453
1871
  const registry = loadProjectRegistry();
1454
1872
  const entry = registry.projects.find((p) => p.id === projectId);
@@ -1458,14 +1876,14 @@ function touchProject(projectId) {
1458
1876
  }
1459
1877
  }
1460
1878
  function discoverProjects(rootDir, maxDepth = 2) {
1461
- const absRoot = path5.resolve(rootDir);
1879
+ const absRoot = path4.resolve(rootDir);
1462
1880
  if (!existsSync2(absRoot) || !statSync(absRoot).isDirectory()) {
1463
1881
  return [];
1464
1882
  }
1465
1883
  const results = [];
1466
1884
  function scan(dir, depth) {
1467
1885
  if (depth > maxDepth) return;
1468
- if (existsSync2(path5.join(dir, ".agentv"))) {
1886
+ if (existsSync2(path4.join(dir, ".agentv"))) {
1469
1887
  results.push(dir);
1470
1888
  return;
1471
1889
  }
@@ -1475,7 +1893,7 @@ function discoverProjects(rootDir, maxDepth = 2) {
1475
1893
  for (const entry of entries) {
1476
1894
  if (!entry.isDirectory()) continue;
1477
1895
  if (entry.name.startsWith(".") || entry.name === "node_modules") continue;
1478
- scan(path5.join(dir, entry.name), depth + 1);
1896
+ scan(path4.join(dir, entry.name), depth + 1);
1479
1897
  }
1480
1898
  } catch {
1481
1899
  }
@@ -2504,8 +2922,8 @@ function extractResponseItemContent(content) {
2504
2922
  // src/import/codex-session-discovery.ts
2505
2923
  import { readdir as readdir2, stat as stat2 } from "node:fs/promises";
2506
2924
  import { homedir } from "node:os";
2507
- import path6 from "node:path";
2508
- var DEFAULT_SESSIONS_DIR = () => path6.join(homedir(), ".codex", "sessions");
2925
+ import path5 from "node:path";
2926
+ var DEFAULT_SESSIONS_DIR = () => path5.join(homedir(), ".codex", "sessions");
2509
2927
  async function discoverCodexSessions(opts) {
2510
2928
  const sessionsDir = opts?.sessionsDir ?? DEFAULT_SESSIONS_DIR();
2511
2929
  const limit = opts?.latest ? 1 : opts?.limit ?? 10;
@@ -2517,7 +2935,7 @@ async function discoverCodexSessions(opts) {
2517
2935
  return [];
2518
2936
  }
2519
2937
  for (const year of yearDirs) {
2520
- const yearPath = path6.join(sessionsDir, year);
2938
+ const yearPath = path5.join(sessionsDir, year);
2521
2939
  let monthDirs;
2522
2940
  try {
2523
2941
  monthDirs = await readdir2(yearPath);
@@ -2525,7 +2943,7 @@ async function discoverCodexSessions(opts) {
2525
2943
  continue;
2526
2944
  }
2527
2945
  for (const month of monthDirs) {
2528
- const monthPath = path6.join(yearPath, month);
2946
+ const monthPath = path5.join(yearPath, month);
2529
2947
  let dayDirs;
2530
2948
  try {
2531
2949
  dayDirs = await readdir2(monthPath);
@@ -2537,7 +2955,7 @@ async function discoverCodexSessions(opts) {
2537
2955
  const dirDate = `${year}-${month}-${day}`;
2538
2956
  if (dirDate !== opts.date) continue;
2539
2957
  }
2540
- const dayPath = path6.join(monthPath, day);
2958
+ const dayPath = path5.join(monthPath, day);
2541
2959
  let files;
2542
2960
  try {
2543
2961
  files = await readdir2(dayPath);
@@ -2546,7 +2964,7 @@ async function discoverCodexSessions(opts) {
2546
2964
  }
2547
2965
  for (const file of files) {
2548
2966
  if (!file.startsWith("rollout-") || !file.endsWith(".jsonl")) continue;
2549
- const filePath = path6.join(dayPath, file);
2967
+ const filePath = path5.join(dayPath, file);
2550
2968
  const nameWithoutExt = file.replace(/\.jsonl$/, "");
2551
2969
  const parts = nameWithoutExt.split("-");
2552
2970
  const sessionId = parts.length >= 6 ? parts.slice(-5).join("-") : nameWithoutExt;
@@ -2569,8 +2987,8 @@ async function discoverCodexSessions(opts) {
2569
2987
  // src/import/session-discovery.ts
2570
2988
  import { readdir as readdir3, stat as stat3 } from "node:fs/promises";
2571
2989
  import { homedir as homedir2 } from "node:os";
2572
- import path7 from "node:path";
2573
- var DEFAULT_PROJECTS_DIR = () => path7.join(homedir2(), ".claude", "projects");
2990
+ import path6 from "node:path";
2991
+ var DEFAULT_PROJECTS_DIR = () => path6.join(homedir2(), ".claude", "projects");
2574
2992
  function encodeProjectPath(projectPath) {
2575
2993
  return projectPath.replace(/\//g, "-");
2576
2994
  }
@@ -2589,7 +3007,7 @@ async function discoverClaudeSessions(opts) {
2589
3007
  }
2590
3008
  const sessions = [];
2591
3009
  for (const projectDir of projectDirs) {
2592
- const dirPath = path7.join(projectsDir, projectDir);
3010
+ const dirPath = path6.join(projectsDir, projectDir);
2593
3011
  let entries;
2594
3012
  try {
2595
3013
  entries = await readdir3(dirPath);
@@ -2600,7 +3018,7 @@ async function discoverClaudeSessions(opts) {
2600
3018
  if (!entry.endsWith(".jsonl")) continue;
2601
3019
  const sessionId = entry.replace(/\.jsonl$/, "");
2602
3020
  if (opts?.sessionId && sessionId !== opts.sessionId) continue;
2603
- const filePath = path7.join(dirPath, entry);
3021
+ const filePath = path6.join(dirPath, entry);
2604
3022
  let updatedAt;
2605
3023
  try {
2606
3024
  const fileStat = await stat3(filePath);
@@ -2621,7 +3039,7 @@ async function discoverClaudeSessions(opts) {
2621
3039
  }
2622
3040
 
2623
3041
  // src/import/types.ts
2624
- import { readFile as readFile3 } from "node:fs/promises";
3042
+ import { readFile as readFile2 } from "node:fs/promises";
2625
3043
  function toTranscriptJsonLines(entry, options) {
2626
3044
  const source = {
2627
3045
  provider: entry.source.provider,
@@ -2708,11 +3126,11 @@ function groupTranscriptJsonLines(lines) {
2708
3126
  }));
2709
3127
  }
2710
3128
  async function readTranscriptJsonl(filePath) {
2711
- const text = await readFile3(filePath, "utf8");
3129
+ const text = await readFile2(filePath, "utf8");
2712
3130
  return text.split("\n").filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
2713
3131
  }
2714
3132
  async function readTranscriptFile(filePath) {
2715
- return readFile3(filePath, "utf8");
3133
+ return readFile2(filePath, "utf8");
2716
3134
  }
2717
3135
 
2718
3136
  // src/import/transcript-provider.ts
@@ -2785,12 +3203,30 @@ export {
2785
3203
  GraderRegistry,
2786
3204
  LatencyGrader,
2787
3205
  LlmGrader,
3206
+ NORMALIZED_REDACTION_LEVELS,
3207
+ NORMALIZED_TOOL_STATUSES,
3208
+ NORMALIZED_TRACE_EVENT_TYPES,
3209
+ NORMALIZED_TRACE_SOURCE_KINDS,
3210
+ NORMALIZED_TRAJECTORY_SCHEMA_VERSION,
3211
+ NormalizedRawEvidenceWireSchema,
3212
+ NormalizedRedactionStateWireSchema,
3213
+ NormalizedTraceBranchWireSchema,
3214
+ NormalizedTraceErrorWireSchema,
3215
+ NormalizedTraceEventWireSchema,
3216
+ NormalizedTraceMessageWireSchema,
3217
+ NormalizedTraceModelWireSchema,
3218
+ NormalizedTraceSessionWireSchema,
3219
+ NormalizedTraceSourceRefWireSchema,
3220
+ NormalizedTraceSourceWireSchema,
3221
+ NormalizedTraceToolWireSchema,
3222
+ NormalizedTrajectoryWireSchema,
2788
3223
  OTEL_BACKEND_PRESETS,
2789
3224
  OtelStreamingObserver,
2790
3225
  OtelTraceExporter,
2791
3226
  OtlpJsonFileExporter,
2792
3227
  PASS_THRESHOLD,
2793
3228
  ProviderRegistry,
3229
+ RUBRIC_OPERATOR_VALUES,
2794
3230
  RepoManager,
2795
3231
  ResponseCache,
2796
3232
  RunBudgetTracker,
@@ -2820,6 +3256,7 @@ export {
2820
3256
  cleanupWorkspace,
2821
3257
  commitAndPushResultsBranch,
2822
3258
  computeTraceSummary,
3259
+ computeTraceSummaryFromTrajectory,
2823
3260
  computeWorkspaceFingerprint,
2824
3261
  consumeClaudeLogEntries,
2825
3262
  consumeCodexLogEntries,
@@ -2868,15 +3305,19 @@ export {
2868
3305
  findGitRoot,
2869
3306
  formatToolCalls,
2870
3307
  freeformEvaluationSchema,
3308
+ fromNormalizedTrajectoryWire,
2871
3309
  generateRubrics,
2872
3310
  getAgentvConfigDir,
2873
3311
  getAgentvDataDir,
2874
3312
  getAgentvHome,
2875
3313
  getOutputFilenames,
2876
3314
  getProject,
3315
+ getProjectForPath,
2877
3316
  getProjectsRegistryPath,
2878
3317
  getResultsRepoLocalPaths,
2879
3318
  getResultsRepoStatus,
3319
+ getResultsRepoSyncStatus,
3320
+ getSelectedTrajectoryEvents,
2880
3321
  getSubagentsRoot,
2881
3322
  getTextContent,
2882
3323
  getTraceStateRoot,
@@ -2932,6 +3373,7 @@ export {
2932
3373
  resolveAndCreateProvider,
2933
3374
  resolveDelegatedTargetDefinition,
2934
3375
  resolveFileReference,
3376
+ resolveResultsConfigForProject,
2935
3377
  resolveResultsRepoRunsDir,
2936
3378
  resolveResultsRepoUrl,
2937
3379
  resolveTargetDefinition,
@@ -2967,7 +3409,9 @@ export {
2967
3409
  syncProject,
2968
3410
  syncProjects,
2969
3411
  syncResultsRepo,
3412
+ syncResultsRepoForProject,
2970
3413
  toCamelCaseDeep,
3414
+ toNormalizedTrajectoryWire,
2971
3415
  toSnakeCaseDeep,
2972
3416
  toTranscriptJsonLines,
2973
3417
  tokensPerTool,