@agentv/core 4.31.4-next.1 → 4.33.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -15,9 +15,27 @@ import {
15
15
  GraderRegistry,
16
16
  LatencyGrader,
17
17
  LlmGrader,
18
+ NORMALIZED_REDACTION_LEVELS,
19
+ NORMALIZED_TOOL_STATUSES,
20
+ NORMALIZED_TRACE_EVENT_TYPES,
21
+ NORMALIZED_TRACE_SOURCE_KINDS,
22
+ NORMALIZED_TRAJECTORY_SCHEMA_VERSION,
23
+ NormalizedRawEvidenceWireSchema,
24
+ NormalizedRedactionStateWireSchema,
25
+ NormalizedTraceBranchWireSchema,
26
+ NormalizedTraceErrorWireSchema,
27
+ NormalizedTraceEventWireSchema,
28
+ NormalizedTraceMessageWireSchema,
29
+ NormalizedTraceModelWireSchema,
30
+ NormalizedTraceSessionWireSchema,
31
+ NormalizedTraceSourceRefWireSchema,
32
+ NormalizedTraceSourceWireSchema,
33
+ NormalizedTraceToolWireSchema,
34
+ NormalizedTrajectoryWireSchema,
18
35
  PASS_THRESHOLD,
19
36
  ProviderRegistry,
20
37
  RepoManager,
38
+ ResponseCache,
21
39
  SkillTriggerGrader,
22
40
  TemplateNotDirectoryError,
23
41
  TemplateNotFoundError,
@@ -37,6 +55,7 @@ import {
37
55
  cleanupEvalWorkspaces,
38
56
  cleanupWorkspace,
39
57
  computeTraceSummary,
58
+ computeTraceSummaryFromTrajectory,
40
59
  computeWorkspaceFingerprint,
41
60
  consumeClaudeLogEntries,
42
61
  consumeCodexLogEntries,
@@ -71,13 +90,9 @@ import {
71
90
  extractWorkersFromSuite,
72
91
  formatToolCalls,
73
92
  freeformEvaluationSchema,
74
- getAgentvConfigDir,
75
- getAgentvHome,
76
- getSubagentsRoot,
77
- getTraceStateRoot,
93
+ fromNormalizedTrajectoryWire,
94
+ getSelectedTrajectoryEvents,
78
95
  getWorkspacePath,
79
- getWorkspacePoolRoot,
80
- getWorkspacesRoot,
81
96
  initializeBaseline,
82
97
  isAgentSkillsFormat,
83
98
  isNonEmptyString,
@@ -104,6 +119,7 @@ import {
104
119
  readTargetDefinitions,
105
120
  readTestSuiteMetadata,
106
121
  resolveAndCreateProvider,
122
+ resolveResultsConfigForProject,
107
123
  resolveWorkspaceTemplate,
108
124
  rubricEvaluationSchema,
109
125
  runContainsAllAssertion,
@@ -121,6 +137,8 @@ import {
121
137
  runStartsWithAssertion,
122
138
  scoreRangeEvaluationSchema,
123
139
  scoreToVerdict,
140
+ shouldEnableCache,
141
+ shouldSkipCacheForTemperature,
124
142
  subscribeToClaudeLogEntries,
125
143
  subscribeToCodexLogEntries,
126
144
  subscribeToCopilotCliLogEntries,
@@ -128,20 +146,29 @@ import {
128
146
  subscribeToPiLogEntries,
129
147
  substituteVariables,
130
148
  toCamelCaseDeep,
149
+ toNormalizedTrajectoryWire,
131
150
  toSnakeCaseDeep,
132
151
  tokensPerTool,
133
152
  trackChild,
134
153
  trackedChildCount
135
- } from "./chunk-575K7WRM.js";
154
+ } from "./chunk-7QB53OPK.js";
136
155
  import {
137
156
  COMMON_TARGET_SETTINGS,
157
+ RUBRIC_OPERATOR_VALUES,
138
158
  TEST_MESSAGE_ROLES,
139
159
  buildDirectoryChain,
140
160
  buildSearchRoots,
141
161
  extractLastAssistantContent,
142
162
  fileExists,
143
163
  findGitRoot,
164
+ getAgentvConfigDir,
165
+ getAgentvDataDir,
166
+ getAgentvHome,
167
+ getSubagentsRoot,
144
168
  getTextContent,
169
+ getTraceStateRoot,
170
+ getWorkspacePoolRoot,
171
+ getWorkspacesRoot,
145
172
  interpolateEnv,
146
173
  isContent,
147
174
  isContentArray,
@@ -157,7 +184,7 @@ import {
157
184
  resolveDelegatedTargetDefinition,
158
185
  resolveFileReference,
159
186
  resolveTargetDefinition
160
- } from "./chunk-5RQMJZDJ.js";
187
+ } from "./chunk-EW5X2RGJ.js";
161
188
  import "./chunk-3WGHC7LC.js";
162
189
  import "./chunk-YDFZ7XN3.js";
163
190
  import {
@@ -443,16 +470,14 @@ var AgentVConfigSchema = z.object({
443
470
  }).optional(),
444
471
  /** Output settings */
445
472
  output: z.object({
446
- /** Output format */
447
- format: z.enum(["jsonl", "yaml", "json", "xml"]).optional(),
448
- /** Output directory */
473
+ /** Default eval run artifact directory */
449
474
  dir: z.string().optional()
450
- }).optional(),
475
+ }).strict().optional(),
451
476
  /** Response caching */
452
477
  cache: z.object({
453
478
  /** Enable response caching */
454
479
  enabled: z.boolean().optional(),
455
- /** Cache file path */
480
+ /** Response cache directory */
456
481
  path: z.string().optional()
457
482
  }).optional(),
458
483
  /** Cost and duration limits */
@@ -678,47 +703,6 @@ function extractReposFromObject(obj) {
678
703
  return result;
679
704
  }
680
705
 
681
- // src/evaluation/cache/response-cache.ts
682
- import { mkdir, readFile as readFile2, writeFile } from "node:fs/promises";
683
- import path3 from "node:path";
684
- var DEFAULT_CACHE_PATH = ".agentv/cache";
685
- var ResponseCache = class {
686
- cachePath;
687
- constructor(cachePath) {
688
- this.cachePath = cachePath ?? DEFAULT_CACHE_PATH;
689
- }
690
- async get(key) {
691
- const filePath = this.keyToPath(key);
692
- try {
693
- const data = await readFile2(filePath, "utf8");
694
- return JSON.parse(data);
695
- } catch {
696
- return void 0;
697
- }
698
- }
699
- async set(key, value) {
700
- const filePath = this.keyToPath(key);
701
- const dir = path3.dirname(filePath);
702
- await mkdir(dir, { recursive: true });
703
- await writeFile(filePath, JSON.stringify(value, null, 2), "utf8");
704
- }
705
- keyToPath(key) {
706
- const prefix = key.slice(0, 2);
707
- return path3.join(this.cachePath, prefix, `${key}.json`);
708
- }
709
- };
710
- function shouldEnableCache(params) {
711
- if (params.cliNoCache) return false;
712
- return params.cliCache || params.yamlCache === true;
713
- }
714
- function shouldSkipCacheForTemperature(targetConfig) {
715
- const temp = targetConfig.temperature;
716
- if (typeof temp === "number" && temp > 0) {
717
- return true;
718
- }
719
- return false;
720
- }
721
-
722
706
  // src/evaluation/results-repo.ts
723
707
  import { execFile, spawn } from "node:child_process";
724
708
  import {
@@ -732,11 +716,12 @@ import {
732
716
  } from "node:fs";
733
717
  import { cp, mkdtemp, readdir, rm, stat } from "node:fs/promises";
734
718
  import os from "node:os";
735
- import path4 from "node:path";
719
+ import path3 from "node:path";
736
720
  import { promisify } from "node:util";
737
721
  var execFileAsync = promisify(execFile);
738
722
  var RESULTS_REPO_RESULTS_DIR = ".agentv/results";
739
723
  var RESULTS_REPO_RUNS_DIR = `${RESULTS_REPO_RESULTS_DIR}/runs`;
724
+ var activeResultsRepoSyncs = /* @__PURE__ */ new Set();
740
725
  function sanitizeRepoSlug(repo) {
741
726
  return repo.trim().replace(/[^A-Za-z0-9._-]+/g, "-");
742
727
  }
@@ -750,13 +735,13 @@ function withFriendlyGitHubAuthError(error) {
750
735
  }
751
736
  function expandHome(p) {
752
737
  if (p === "~" || p.startsWith("~/") || p.startsWith("~\\")) {
753
- return path4.join(os.homedir(), p.slice(1));
738
+ return path3.join(os.homedir(), p.slice(1));
754
739
  }
755
740
  return p;
756
741
  }
757
742
  function normalizeResultsConfig(config) {
758
743
  const repo = config.repo.trim();
759
- const resolvedPath = config.path ? expandHome(config.path.trim()) : path4.join(getAgentvHome(), "results", sanitizeRepoSlug(repo));
744
+ const resolvedPath = config.path ? expandHome(config.path.trim()) : path3.join(getAgentvDataDir(), "results", sanitizeRepoSlug(repo));
760
745
  return {
761
746
  mode: "github",
762
747
  repo,
@@ -772,11 +757,11 @@ function resolveResultsRepoUrl(repo) {
772
757
  return `https://github.com/${repo}.git`;
773
758
  }
774
759
  function getResultsRepoLocalPaths(repo) {
775
- const rootDir = path4.join(getAgentvHome(), "cache", "results-repo", sanitizeRepoSlug(repo));
760
+ const rootDir = path3.join(getAgentvDataDir(), "cache", "results-repo", sanitizeRepoSlug(repo));
776
761
  return {
777
762
  rootDir,
778
- repoDir: path4.join(rootDir, "repo"),
779
- statusFile: path4.join(rootDir, "status.json")
763
+ repoDir: path3.join(rootDir, "repo"),
764
+ statusFile: path3.join(rootDir, "status.json")
780
765
  };
781
766
  }
782
767
  function readPersistedStatus(statusFile) {
@@ -790,7 +775,7 @@ function readPersistedStatus(statusFile) {
790
775
  }
791
776
  }
792
777
  function writePersistedStatus(statusFile, status) {
793
- mkdirSync(path4.dirname(statusFile), { recursive: true });
778
+ mkdirSync(path3.dirname(statusFile), { recursive: true });
794
779
  writeFileSync(statusFile, `${JSON.stringify(status, null, 2)}
795
780
  `, "utf8");
796
781
  }
@@ -849,6 +834,14 @@ async function resolveDefaultBranch(repoDir) {
849
834
  async function fetchResultsRepo(repoDir) {
850
835
  await runGit(["fetch", "origin", "--prune"], { cwd: repoDir });
851
836
  }
837
+ async function isGitRepository(repoDir) {
838
+ try {
839
+ const { stdout } = await runGit(["rev-parse", "--is-inside-work-tree"], { cwd: repoDir });
840
+ return stdout.trim() === "true";
841
+ } catch {
842
+ return false;
843
+ }
844
+ }
852
845
  function updateStatusFile(config, patch) {
853
846
  const cachePaths = getResultsRepoLocalPaths(config.repo);
854
847
  const current = readPersistedStatus(cachePaths.statusFile);
@@ -862,9 +855,9 @@ async function ensureResultsRepoClone(config) {
862
855
  const cachePaths = getResultsRepoLocalPaths(normalized.repo);
863
856
  const cloneDir = normalized.path;
864
857
  mkdirSync(cachePaths.rootDir, { recursive: true });
865
- mkdirSync(path4.dirname(cloneDir), { recursive: true });
858
+ mkdirSync(path3.dirname(cloneDir), { recursive: true });
866
859
  const cloneMissing = !existsSync(cloneDir);
867
- const gitDir = path4.join(cloneDir, ".git");
860
+ const gitDir = path3.join(cloneDir, ".git");
868
861
  const cloneEmpty = !cloneMissing && !existsSync(gitDir) && (await readdir(cloneDir)).length === 0;
869
862
  if (cloneMissing || cloneEmpty) {
870
863
  try {
@@ -891,7 +884,8 @@ function getResultsRepoStatus(config) {
891
884
  configured: false,
892
885
  available: false,
893
886
  repo: "",
894
- local_dir: ""
887
+ local_dir: "",
888
+ sync_status: "unavailable"
895
889
  };
896
890
  }
897
891
  const normalized = normalizeResultsConfig(config);
@@ -906,9 +900,247 @@ function getResultsRepoStatus(config) {
906
900
  branch_prefix: normalized.branch_prefix,
907
901
  local_dir: normalized.path,
908
902
  last_synced_at: persisted.last_synced_at,
909
- last_error: persisted.last_error
903
+ last_error: persisted.last_error,
904
+ sync_status: existsSync(normalized.path) ? "clean" : "unavailable"
905
+ };
906
+ }
907
+ function parseGitPorcelainPaths(status) {
908
+ const dirtyPaths = /* @__PURE__ */ new Set();
909
+ const conflictedPaths = /* @__PURE__ */ new Set();
910
+ const conflictCodes = /* @__PURE__ */ new Set(["DD", "AU", "UD", "UA", "DU", "AA", "UU"]);
911
+ for (const line of status.split(/\r?\n/)) {
912
+ if (!line.trim()) continue;
913
+ const code = line.slice(0, 2);
914
+ const rawPath = line.slice(3).trim();
915
+ const paths = rawPath.includes(" -> ") ? rawPath.split(" -> ") : [rawPath];
916
+ for (const p of paths.map((entry) => entry.trim()).filter(Boolean)) {
917
+ dirtyPaths.add(p);
918
+ if (conflictCodes.has(code)) {
919
+ conflictedPaths.add(p);
920
+ }
921
+ }
922
+ }
923
+ return {
924
+ dirtyPaths: [...dirtyPaths].sort(),
925
+ conflictedPaths: [...conflictedPaths].sort()
926
+ };
927
+ }
928
+ async function getCurrentBranch(repoDir) {
929
+ const { stdout } = await runGit(["branch", "--show-current"], { cwd: repoDir, check: false });
930
+ const branch = stdout.trim();
931
+ if (branch) {
932
+ return branch;
933
+ }
934
+ const { stdout: sha } = await runGit(["rev-parse", "--short", "HEAD"], {
935
+ cwd: repoDir,
936
+ check: false
937
+ });
938
+ return sha.trim() ? `HEAD@${sha.trim()}` : void 0;
939
+ }
940
+ async function resolveComparisonRef(repoDir) {
941
+ const { stdout: upstream } = await runGit(
942
+ ["rev-parse", "--abbrev-ref", "--symbolic-full-name", "@{upstream}"],
943
+ { cwd: repoDir, check: false }
944
+ );
945
+ const upstreamRef = upstream.trim();
946
+ if (upstreamRef && !upstreamRef.includes("fatal:")) {
947
+ return upstreamRef;
948
+ }
949
+ const baseBranch = await resolveDefaultBranch(repoDir);
950
+ const fallback = `origin/${baseBranch}`;
951
+ const { stdout: fallbackSha } = await runGit(["rev-parse", "--verify", fallback], {
952
+ cwd: repoDir,
953
+ check: false
954
+ });
955
+ return fallbackSha.trim() ? fallback : void 0;
956
+ }
957
+ async function getAheadBehind(repoDir, upstream) {
958
+ if (!upstream) {
959
+ return {};
960
+ }
961
+ const { stdout } = await runGit(["rev-list", "--left-right", "--count", `HEAD...${upstream}`], {
962
+ cwd: repoDir,
963
+ check: false
964
+ });
965
+ const [aheadText, behindText] = stdout.trim().split(/\s+/);
966
+ const ahead = Number.parseInt(aheadText ?? "", 10);
967
+ const behind = Number.parseInt(behindText ?? "", 10);
968
+ return {
969
+ ...Number.isFinite(ahead) && { ahead },
970
+ ...Number.isFinite(behind) && { behind }
971
+ };
972
+ }
973
+ async function hasInProgressGitConflict(repoDir) {
974
+ const markers = ["MERGE_HEAD", "CHERRY_PICK_HEAD", "REVERT_HEAD", "REBASE_HEAD"];
975
+ for (const marker of markers) {
976
+ const { stdout } = await runGit(["rev-parse", "--git-path", marker], {
977
+ cwd: repoDir,
978
+ check: false
979
+ });
980
+ const markerPath = stdout.trim();
981
+ const resolvedMarkerPath = path3.isAbsolute(markerPath) ? markerPath : path3.join(repoDir, markerPath);
982
+ if (markerPath && existsSync(resolvedMarkerPath)) {
983
+ return true;
984
+ }
985
+ }
986
+ return false;
987
+ }
988
+ async function buildGitDiffSummary(repoDir, upstream) {
989
+ const summaries = [];
990
+ for (const args of [
991
+ ["diff", "--stat"],
992
+ ["diff", "--cached", "--stat"],
993
+ ...upstream ? [["diff", "--stat", `${upstream}..HEAD`]] : []
994
+ ]) {
995
+ const { stdout } = await runGit(args, { cwd: repoDir, check: false });
996
+ const summary = stdout.trim();
997
+ if (summary) {
998
+ summaries.push(summary);
999
+ }
1000
+ }
1001
+ return summaries.length > 0 ? summaries.join("\n") : void 0;
1002
+ }
1003
+ async function inspectResultsRepoGit(repoDir) {
1004
+ const branch = await getCurrentBranch(repoDir);
1005
+ const upstream = await resolveComparisonRef(repoDir);
1006
+ const { stdout: porcelain } = await runGit(
1007
+ ["status", "--porcelain=v1", "--untracked-files=all"],
1008
+ {
1009
+ cwd: repoDir,
1010
+ check: false
1011
+ }
1012
+ );
1013
+ const { stdout: shortStatus } = await runGit(["status", "--short", "--branch"], {
1014
+ cwd: repoDir,
1015
+ check: false
1016
+ });
1017
+ const { dirtyPaths, conflictedPaths } = parseGitPorcelainPaths(porcelain);
1018
+ const { ahead = 0, behind = 0 } = await getAheadBehind(repoDir, upstream);
1019
+ const inProgressConflict = await hasInProgressGitConflict(repoDir);
1020
+ let syncStatus = "clean";
1021
+ if (conflictedPaths.length > 0 || inProgressConflict) {
1022
+ syncStatus = "conflicted";
1023
+ } else if (dirtyPaths.length > 0) {
1024
+ syncStatus = "dirty";
1025
+ } else if (ahead > 0 && behind > 0) {
1026
+ syncStatus = "diverged";
1027
+ } else if (behind > 0) {
1028
+ syncStatus = "behind";
1029
+ } else if (ahead > 0) {
1030
+ syncStatus = "ahead";
1031
+ }
1032
+ return {
1033
+ syncStatus,
1034
+ branch,
1035
+ upstream,
1036
+ ahead,
1037
+ behind,
1038
+ dirtyPaths,
1039
+ conflictedPaths,
1040
+ gitStatus: shortStatus.trim() || void 0,
1041
+ gitDiffSummary: await buildGitDiffSummary(repoDir, upstream)
1042
+ };
1043
+ }
1044
+ function withGitInspection(status, inspection) {
1045
+ return {
1046
+ ...status,
1047
+ sync_status: inspection.syncStatus,
1048
+ branch: inspection.branch,
1049
+ upstream: inspection.upstream,
1050
+ ahead: inspection.ahead,
1051
+ behind: inspection.behind,
1052
+ dirty_paths: inspection.dirtyPaths,
1053
+ conflicted_paths: inspection.conflictedPaths,
1054
+ git_status: inspection.gitStatus,
1055
+ git_diff_summary: inspection.gitDiffSummary,
1056
+ last_error: lastErrorForGitInspection(status, inspection)
1057
+ };
1058
+ }
1059
+ function lastErrorForGitInspection(status, inspection) {
1060
+ if (inspection.syncStatus === "conflicted") {
1061
+ return "Results repo has unresolved git conflicts";
1062
+ }
1063
+ if (inspection.syncStatus === "diverged") {
1064
+ return "Results repo local and remote histories have diverged";
1065
+ }
1066
+ if (inspection.syncStatus === "dirty") {
1067
+ if (status.auto_push === false) {
1068
+ return "Results repo has uncommitted changes and auto_push is disabled";
1069
+ }
1070
+ if (!areSafeResultsRepoPaths(inspection.dirtyPaths)) {
1071
+ return "Results repo has non-results working tree changes";
1072
+ }
1073
+ }
1074
+ return void 0;
1075
+ }
1076
+ function withBlockedStatus(status, blockReason, flags) {
1077
+ return {
1078
+ ...status,
1079
+ blocked: true,
1080
+ block_reason: blockReason,
1081
+ ...flags?.pullPerformed !== void 0 && { pull_performed: flags.pullPerformed },
1082
+ ...flags?.pushPerformed !== void 0 && { push_performed: flags.pushPerformed },
1083
+ ...flags?.commitCreated !== void 0 && { commit_created: flags.commitCreated }
1084
+ };
1085
+ }
1086
+ function withActionFlags(status, flags) {
1087
+ return {
1088
+ ...status,
1089
+ blocked: false,
1090
+ pull_performed: flags.pullPerformed,
1091
+ push_performed: flags.pushPerformed,
1092
+ commit_created: flags.commitCreated
910
1093
  };
911
1094
  }
1095
+ function areSafeResultsRepoPaths(paths) {
1096
+ return paths.length > 0 && paths.every(
1097
+ (p) => p === RESULTS_REPO_RESULTS_DIR || p.startsWith(`${RESULTS_REPO_RESULTS_DIR}/`)
1098
+ );
1099
+ }
1100
+ async function getAheadPaths(repoDir, upstream) {
1101
+ if (!upstream) {
1102
+ return [];
1103
+ }
1104
+ const { stdout } = await runGit(["diff", "--name-only", `${upstream}..HEAD`], {
1105
+ cwd: repoDir,
1106
+ check: false
1107
+ });
1108
+ return stdout.split(/\r?\n/).map((line) => line.trim()).filter(Boolean).sort();
1109
+ }
1110
+ function getPushTargetBranch(upstream, baseBranch) {
1111
+ return upstream?.startsWith("origin/") ? upstream.slice("origin/".length) : baseBranch;
1112
+ }
1113
+ async function statusFromInspection(normalized, repoDir) {
1114
+ return withGitInspection(getResultsRepoStatus(normalized), await inspectResultsRepoGit(repoDir));
1115
+ }
1116
+ async function getResultsRepoSyncStatus(config) {
1117
+ const baseStatus = getResultsRepoStatus(config);
1118
+ if (!config) {
1119
+ return baseStatus;
1120
+ }
1121
+ const normalized = normalizeResultsConfig(config);
1122
+ if (activeResultsRepoSyncs.has(normalized.path)) {
1123
+ return {
1124
+ ...baseStatus,
1125
+ sync_status: "syncing"
1126
+ };
1127
+ }
1128
+ if (!existsSync(normalized.path) || !await isGitRepository(normalized.path)) {
1129
+ return {
1130
+ ...baseStatus,
1131
+ sync_status: "unavailable"
1132
+ };
1133
+ }
1134
+ try {
1135
+ return withGitInspection(baseStatus, await inspectResultsRepoGit(normalized.path));
1136
+ } catch (error) {
1137
+ return {
1138
+ ...baseStatus,
1139
+ sync_status: "unavailable",
1140
+ last_error: getStatusMessage(error)
1141
+ };
1142
+ }
1143
+ }
912
1144
  async function syncResultsRepo(config) {
913
1145
  const normalized = normalizeResultsConfig(config);
914
1146
  try {
@@ -926,6 +1158,180 @@ async function syncResultsRepo(config) {
926
1158
  }
927
1159
  return getResultsRepoStatus(normalized);
928
1160
  }
1161
+ function getStatusMessage(error) {
1162
+ return error instanceof Error ? error.message : String(error);
1163
+ }
1164
+ async function syncResultsRepoForProject(config) {
1165
+ const normalized = normalizeResultsConfig(config);
1166
+ const syncKey = normalized.path;
1167
+ if (activeResultsRepoSyncs.has(syncKey)) {
1168
+ return {
1169
+ ...await getResultsRepoSyncStatus(normalized),
1170
+ sync_status: "syncing",
1171
+ blocked: true,
1172
+ block_reason: "Results repo sync is already in progress"
1173
+ };
1174
+ }
1175
+ activeResultsRepoSyncs.add(syncKey);
1176
+ let pullPerformed = false;
1177
+ let pushPerformed = false;
1178
+ let commitCreated = false;
1179
+ try {
1180
+ const repoDir = await ensureResultsRepoClone(normalized);
1181
+ await fetchResultsRepo(repoDir);
1182
+ let inspection = await inspectResultsRepoGit(repoDir);
1183
+ if (inspection.syncStatus === "conflicted") {
1184
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1185
+ updateStatusFile(normalized, {
1186
+ last_error: "Results repo has unresolved git conflicts"
1187
+ });
1188
+ return withBlockedStatus(status, "Results repo has unresolved git conflicts", {
1189
+ pullPerformed,
1190
+ pushPerformed,
1191
+ commitCreated
1192
+ });
1193
+ }
1194
+ if (inspection.syncStatus === "dirty") {
1195
+ if (!normalized.auto_push) {
1196
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1197
+ updateStatusFile(normalized, {
1198
+ last_error: "Results repo has uncommitted changes and auto_push is disabled"
1199
+ });
1200
+ return withBlockedStatus(
1201
+ status,
1202
+ "Results repo has uncommitted changes and auto_push is disabled",
1203
+ {
1204
+ pullPerformed,
1205
+ pushPerformed,
1206
+ commitCreated
1207
+ }
1208
+ );
1209
+ }
1210
+ if (!areSafeResultsRepoPaths(inspection.dirtyPaths)) {
1211
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1212
+ updateStatusFile(normalized, {
1213
+ last_error: "Results repo has non-results working tree changes"
1214
+ });
1215
+ return withBlockedStatus(status, "Results repo has non-results working tree changes", {
1216
+ pullPerformed,
1217
+ pushPerformed,
1218
+ commitCreated
1219
+ });
1220
+ }
1221
+ if ((inspection.behind ?? 0) > 0) {
1222
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1223
+ const reason = "Results repo has uncommitted result changes and remote changes";
1224
+ updateStatusFile(normalized, { last_error: reason });
1225
+ return withBlockedStatus(status, reason, {
1226
+ pullPerformed,
1227
+ pushPerformed,
1228
+ commitCreated
1229
+ });
1230
+ }
1231
+ await runGit(["add", "--all", "--", RESULTS_REPO_RESULTS_DIR], { cwd: repoDir });
1232
+ await runGit(["commit", "-m", "chore(results): sync local result metadata"], {
1233
+ cwd: repoDir
1234
+ });
1235
+ commitCreated = true;
1236
+ inspection = await inspectResultsRepoGit(repoDir);
1237
+ }
1238
+ if (inspection.syncStatus === "diverged") {
1239
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1240
+ updateStatusFile(normalized, {
1241
+ last_error: "Results repo local and remote histories have diverged"
1242
+ });
1243
+ return withBlockedStatus(status, "Results repo local and remote histories have diverged", {
1244
+ pullPerformed,
1245
+ pushPerformed,
1246
+ commitCreated
1247
+ });
1248
+ }
1249
+ if ((inspection.behind ?? 0) > 0 && (inspection.ahead ?? 0) === 0) {
1250
+ if (!inspection.upstream) {
1251
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1252
+ updateStatusFile(normalized, {
1253
+ last_error: "Results repo has no upstream branch to pull from"
1254
+ });
1255
+ return withBlockedStatus(status, "Results repo has no upstream branch to pull from", {
1256
+ pullPerformed,
1257
+ pushPerformed,
1258
+ commitCreated
1259
+ });
1260
+ }
1261
+ try {
1262
+ await runGit(["merge", "--ff-only", inspection.upstream], { cwd: repoDir });
1263
+ pullPerformed = true;
1264
+ inspection = await inspectResultsRepoGit(repoDir);
1265
+ } catch (error) {
1266
+ inspection = await inspectResultsRepoGit(repoDir);
1267
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1268
+ const reason = `Results repo could not be fast-forwarded: ${getStatusMessage(error)}`;
1269
+ updateStatusFile(normalized, { last_error: reason });
1270
+ return withBlockedStatus(status, reason, {
1271
+ pullPerformed,
1272
+ pushPerformed,
1273
+ commitCreated
1274
+ });
1275
+ }
1276
+ }
1277
+ if ((inspection.ahead ?? 0) > 0) {
1278
+ if (!normalized.auto_push) {
1279
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1280
+ return withActionFlags(status, {
1281
+ pullPerformed,
1282
+ pushPerformed,
1283
+ commitCreated
1284
+ });
1285
+ }
1286
+ const aheadPaths = await getAheadPaths(repoDir, inspection.upstream);
1287
+ if (!inspection.upstream || !areSafeResultsRepoPaths(aheadPaths)) {
1288
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1289
+ const reason = !inspection.upstream ? "Results repo has no upstream branch to push to" : "Results repo has non-results committed changes";
1290
+ updateStatusFile(normalized, { last_error: reason });
1291
+ return withBlockedStatus(status, reason, {
1292
+ pullPerformed,
1293
+ pushPerformed,
1294
+ commitCreated
1295
+ });
1296
+ }
1297
+ const baseBranch = await resolveDefaultBranch(repoDir);
1298
+ const targetBranch = getPushTargetBranch(inspection.upstream, baseBranch);
1299
+ try {
1300
+ await runGit(["push", "origin", `HEAD:${targetBranch}`], { cwd: repoDir });
1301
+ pushPerformed = true;
1302
+ await fetchResultsRepo(repoDir);
1303
+ inspection = await inspectResultsRepoGit(repoDir);
1304
+ } catch (error) {
1305
+ await fetchResultsRepo(repoDir).catch(() => void 0);
1306
+ inspection = await inspectResultsRepoGit(repoDir);
1307
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1308
+ const reason = `Results repo push was rejected: ${getStatusMessage(error)}`;
1309
+ updateStatusFile(normalized, { last_error: reason });
1310
+ return withBlockedStatus(status, reason, {
1311
+ pullPerformed,
1312
+ pushPerformed,
1313
+ commitCreated
1314
+ });
1315
+ }
1316
+ }
1317
+ updateStatusFile(normalized, {
1318
+ last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
1319
+ last_error: void 0
1320
+ });
1321
+ return withActionFlags(await statusFromInspection(normalized, repoDir), {
1322
+ pullPerformed,
1323
+ pushPerformed,
1324
+ commitCreated
1325
+ });
1326
+ } catch (error) {
1327
+ updateStatusFile(normalized, {
1328
+ last_error: withFriendlyGitHubAuthError(error).message
1329
+ });
1330
+ throw withFriendlyGitHubAuthError(error);
1331
+ } finally {
1332
+ activeResultsRepoSyncs.delete(syncKey);
1333
+ }
1334
+ }
929
1335
  async function checkoutResultsRepoBranch(config, branchName) {
930
1336
  const normalized = normalizeResultsConfig(config);
931
1337
  const repoDir = await ensureResultsRepoClone(normalized);
@@ -944,8 +1350,8 @@ async function prepareResultsRepoBranch(config, branchName) {
944
1350
  const cloneDir = await ensureResultsRepoClone(normalized);
945
1351
  const baseBranch = await resolveDefaultBranch(cloneDir);
946
1352
  await fetchResultsRepo(cloneDir);
947
- const worktreeRoot = await mkdtemp(path4.join(os.tmpdir(), "agentv-results-repo-"));
948
- const worktreeDir = path4.join(worktreeRoot, "repo");
1353
+ const worktreeRoot = await mkdtemp(path3.join(os.tmpdir(), "agentv-results-repo-"));
1354
+ const worktreeDir = path3.join(worktreeRoot, "repo");
949
1355
  await runGit(["worktree", "add", "-B", branchName, worktreeDir, `origin/${baseBranch}`], {
950
1356
  cwd: cloneDir
951
1357
  });
@@ -964,12 +1370,12 @@ async function prepareResultsRepoBranch(config, branchName) {
964
1370
  }
965
1371
  async function stageResultsArtifacts(params) {
966
1372
  rmSync(params.destinationDir, { recursive: true, force: true });
967
- mkdirSync(path4.dirname(params.destinationDir), { recursive: true });
1373
+ mkdirSync(path3.dirname(params.destinationDir), { recursive: true });
968
1374
  await cp(params.sourceDir, params.destinationDir, { recursive: true });
969
1375
  }
970
1376
  function resolveResultsRepoRunsDir(config) {
971
1377
  const normalized = normalizeResultsConfig(config);
972
- return path4.join(normalized.path, RESULTS_REPO_RESULTS_DIR, "runs");
1378
+ return path3.join(normalized.path, RESULTS_REPO_RESULTS_DIR, "runs");
973
1379
  }
974
1380
  async function directorySizeBytes(targetPath) {
975
1381
  const entry = await stat(targetPath);
@@ -978,7 +1384,7 @@ async function directorySizeBytes(targetPath) {
978
1384
  }
979
1385
  let total = 0;
980
1386
  for (const child of await readdir(targetPath, { withFileTypes: true })) {
981
- total += await directorySizeBytes(path4.join(targetPath, child.name));
1387
+ total += await directorySizeBytes(path3.join(targetPath, child.name));
982
1388
  }
983
1389
  return total;
984
1390
  }
@@ -1027,12 +1433,40 @@ async function createDraftResultsPr(params) {
1027
1433
  return stdout.trim();
1028
1434
  }
1029
1435
  var DIRECT_PUSH_MAX_RETRIES = 3;
1436
+ async function hasUnpushedCommits(repoDir, baseBranch) {
1437
+ const { stdout } = await runGit(["rev-list", "--count", `origin/${baseBranch}..HEAD`], {
1438
+ cwd: repoDir,
1439
+ check: false
1440
+ });
1441
+ return Number.parseInt(stdout.trim(), 10) > 0;
1442
+ }
1443
+ async function pushDirectResultsToBase(params) {
1444
+ for (let attempt = 1; attempt <= DIRECT_PUSH_MAX_RETRIES; attempt++) {
1445
+ try {
1446
+ await runGit(["push", "origin", `HEAD:${params.baseBranch}`], { cwd: params.repoDir });
1447
+ updateStatusFile(params.normalized, {
1448
+ last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
1449
+ last_error: void 0
1450
+ });
1451
+ return;
1452
+ } catch (error) {
1453
+ const message = error instanceof Error ? error.message : String(error);
1454
+ if (attempt < DIRECT_PUSH_MAX_RETRIES && message.includes("non-fast-forward")) {
1455
+ await fetchResultsRepo(params.repoDir);
1456
+ await runGit(["rebase", `origin/${params.baseBranch}`], { cwd: params.repoDir });
1457
+ } else {
1458
+ throw error;
1459
+ }
1460
+ }
1461
+ }
1462
+ }
1030
1463
  async function directPushResults(params) {
1031
1464
  const normalized = normalizeResultsConfig(params.config);
1032
1465
  const repoDir = await ensureResultsRepoClone(normalized);
1033
1466
  const baseBranch = await resolveDefaultBranch(repoDir);
1034
1467
  await fetchResultsRepo(repoDir);
1035
- const destinationDir = path4.join(
1468
+ const targetRunId = buildGitRunId(params.destinationPath);
1469
+ const destinationDir = path3.join(
1036
1470
  repoDir,
1037
1471
  RESULTS_REPO_RESULTS_DIR,
1038
1472
  "runs",
@@ -1049,40 +1483,24 @@ async function directPushResults(params) {
1049
1483
  check: false
1050
1484
  });
1051
1485
  if (status.trim().length === 0) {
1052
- return false;
1053
- }
1054
- await runGit(
1055
- [
1056
- "commit",
1057
- "-m",
1058
- params.commitMessage,
1059
- "-m",
1060
- `Agentv-Run: ${buildGitRunId(params.destinationPath)}`
1061
- ],
1062
- { cwd: repoDir }
1063
- );
1064
- for (let attempt = 1; attempt <= DIRECT_PUSH_MAX_RETRIES; attempt++) {
1065
- try {
1066
- await runGit(["push", "origin", `HEAD:${baseBranch}`], { cwd: repoDir });
1067
- updateStatusFile(normalized, {
1068
- last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
1069
- last_error: void 0
1070
- });
1071
- return true;
1072
- } catch (error) {
1073
- const message = error instanceof Error ? error.message : String(error);
1074
- if (attempt < DIRECT_PUSH_MAX_RETRIES && message.includes("non-fast-forward")) {
1075
- await fetchResultsRepo(repoDir);
1076
- await runGit(["rebase", `origin/${baseBranch}`], { cwd: repoDir });
1077
- } else {
1078
- throw error;
1486
+ if (await hasUnpushedCommits(repoDir, baseBranch)) {
1487
+ const aheadPaths = await getAheadPaths(repoDir, `origin/${baseBranch}`);
1488
+ if (!areSafeResultsRepoPaths(aheadPaths)) {
1489
+ throw new Error("Results repo has non-results committed changes");
1079
1490
  }
1491
+ await pushDirectResultsToBase({ normalized, repoDir, baseBranch });
1492
+ return true;
1080
1493
  }
1494
+ return false;
1081
1495
  }
1082
- return false;
1496
+ await runGit(["commit", "-m", params.commitMessage, "-m", `Agentv-Run: ${targetRunId}`], {
1497
+ cwd: repoDir
1498
+ });
1499
+ await pushDirectResultsToBase({ normalized, repoDir, baseBranch });
1500
+ return true;
1083
1501
  }
1084
1502
  function buildGitRunId(relativeRunPath) {
1085
- const normalized = relativeRunPath.split(path4.sep).join("/");
1503
+ const normalized = relativeRunPath.split(path3.sep).join("/");
1086
1504
  const segments = normalized.split("/").filter(Boolean);
1087
1505
  if (segments.length >= 2) {
1088
1506
  const experiment = segments.slice(0, -1).join("/");
@@ -1206,10 +1624,11 @@ async function listGitRuns(repoDir, ref = "origin/main") {
1206
1624
  const runs = blobs.flatMap((blob, index) => {
1207
1625
  const benchmarkPath = benchmarkPaths[index];
1208
1626
  const benchmark = JSON.parse(blob.content.toString("utf8"));
1209
- const runDir = path4.posix.dirname(benchmarkPath);
1210
- const relativeRunPath = path4.posix.relative(RESULTS_REPO_RUNS_DIR, runDir);
1627
+ const runDir = path3.posix.dirname(benchmarkPath);
1628
+ const relativeRunPath = path3.posix.relative(RESULTS_REPO_RUNS_DIR, runDir);
1211
1629
  const runId = buildGitRunId(relativeRunPath);
1212
- const timestamp = benchmark.metadata?.timestamp?.trim() || path4.posix.basename(runDir);
1630
+ const timestamp = benchmark.metadata?.timestamp?.trim() || path3.posix.basename(runDir);
1631
+ const displayName = benchmark.metadata?.display_name?.trim() || path3.posix.basename(runDir);
1213
1632
  const targets = benchmark.metadata?.targets ?? [];
1214
1633
  const passRate = computeAveragePassRate(benchmark.run_summary);
1215
1634
  return [
@@ -1219,9 +1638,9 @@ async function listGitRuns(repoDir, ref = "origin/main") {
1219
1638
  timestamp,
1220
1639
  ...passRate !== void 0 && { pass_rate: passRate },
1221
1640
  ...targets.length === 1 && targets[0] ? { target: targets[0] } : {},
1222
- manifest_path: path4.posix.join(runDir, "index.jsonl"),
1641
+ manifest_path: path3.posix.join(runDir, "index.jsonl"),
1223
1642
  benchmark_path: benchmarkPath,
1224
- display_name: path4.posix.basename(runDir),
1643
+ display_name: displayName,
1225
1644
  test_count: benchmark.metadata?.tests_run?.length ?? 0,
1226
1645
  avg_score: 0,
1227
1646
  size_bytes: blob.size
@@ -1232,9 +1651,9 @@ async function listGitRuns(repoDir, ref = "origin/main") {
1232
1651
  return runs;
1233
1652
  }
1234
1653
  async function materializeGitRun(repoDir, relativeRunPath, ref = "origin/main") {
1235
- const normalizedRunPath = relativeRunPath.split(path4.sep).join("/");
1236
- const runTreePath = path4.posix.join(RESULTS_REPO_RUNS_DIR, normalizedRunPath);
1237
- const targetRunDir = path4.join(repoDir, ...runTreePath.split("/"));
1654
+ const normalizedRunPath = relativeRunPath.split(path3.sep).join("/");
1655
+ const runTreePath = path3.posix.join(RESULTS_REPO_RUNS_DIR, normalizedRunPath);
1656
+ const targetRunDir = path3.join(repoDir, ...runTreePath.split("/"));
1238
1657
  const { stdout: treeOut } = await runGit(["ls-tree", "-r", "--name-only", ref, runTreePath], {
1239
1658
  cwd: repoDir
1240
1659
  });
@@ -1250,16 +1669,16 @@ async function materializeGitRun(repoDir, relativeRunPath, ref = "origin/main")
1250
1669
  `Expected ${filePaths.length} git blobs but received ${blobs.length} while materializing results run`
1251
1670
  );
1252
1671
  }
1253
- const tempRoot = mkdtempSync(path4.join(repoDir, ".agentv-run-"));
1254
- const tempRunDir = path4.join(tempRoot, "run");
1672
+ const tempRoot = mkdtempSync(path3.join(repoDir, ".agentv-run-"));
1673
+ const tempRunDir = path3.join(tempRoot, "run");
1255
1674
  try {
1256
1675
  for (const [index, filePath] of filePaths.entries()) {
1257
- const relativeFilePath = path4.posix.relative(runTreePath, filePath);
1258
- const absolutePath = path4.join(tempRunDir, ...relativeFilePath.split("/"));
1259
- mkdirSync(path4.dirname(absolutePath), { recursive: true });
1676
+ const relativeFilePath = path3.posix.relative(runTreePath, filePath);
1677
+ const absolutePath = path3.join(tempRunDir, ...relativeFilePath.split("/"));
1678
+ mkdirSync(path3.dirname(absolutePath), { recursive: true });
1260
1679
  writeFileSync(absolutePath, blobs[index].content);
1261
1680
  }
1262
- mkdirSync(path4.dirname(targetRunDir), { recursive: true });
1681
+ mkdirSync(path3.dirname(targetRunDir), { recursive: true });
1263
1682
  try {
1264
1683
  renameSync(tempRunDir, targetRunDir);
1265
1684
  } catch (error) {
@@ -1275,67 +1694,11 @@ async function materializeGitRun(repoDir, relativeRunPath, ref = "origin/main")
1275
1694
  }
1276
1695
 
1277
1696
  // src/projects.ts
1278
- import {
1279
- existsSync as existsSync2,
1280
- mkdirSync as mkdirSync2,
1281
- readFileSync as readFileSync3,
1282
- readdirSync,
1283
- renameSync as renameSync2,
1284
- statSync,
1285
- unlinkSync,
1286
- writeFileSync as writeFileSync2
1287
- } from "node:fs";
1288
- import path5 from "node:path";
1697
+ import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync as readFileSync3, readdirSync, statSync, writeFileSync as writeFileSync2 } from "node:fs";
1698
+ import path4 from "node:path";
1289
1699
  import { stringify as stringifyYaml } from "yaml";
1290
1700
  function getProjectsRegistryPath() {
1291
- return path5.join(getAgentvConfigDir(), "projects.yaml");
1292
- }
1293
- function getLegacyBenchmarksRegistryPath() {
1294
- return path5.join(getAgentvConfigDir(), "benchmarks.yaml");
1295
- }
1296
- function migrateLegacyBenchmarksFile() {
1297
- const newPath = getProjectsRegistryPath();
1298
- const oldPath = getLegacyBenchmarksRegistryPath();
1299
- const newExists = existsSync2(newPath);
1300
- const oldExists = existsSync2(oldPath);
1301
- if (!oldExists) return;
1302
- if (newExists) {
1303
- console.warn(
1304
- `[agentv] Both ${oldPath} and ${newPath} exist. Using ${path5.basename(newPath)}; delete ${path5.basename(oldPath)} when you've confirmed the new file is correct.`
1305
- );
1306
- return;
1307
- }
1308
- let parsed = null;
1309
- try {
1310
- const raw = readFileSync3(oldPath, "utf-8");
1311
- parsed = parseYamlValue(raw);
1312
- } catch (err) {
1313
- console.warn(
1314
- `[agentv] Failed to read legacy ${path5.basename(oldPath)} for migration: ${err.message}. Leaving the file in place; you may need to migrate it manually.`
1315
- );
1316
- return;
1317
- }
1318
- const entries = parsed && typeof parsed === "object" && Array.isArray(parsed.benchmarks) ? parsed.benchmarks : [];
1319
- const newContent = stringifyYaml({ projects: entries });
1320
- const tempPath = `${newPath}.migrating`;
1321
- try {
1322
- mkdirSync2(path5.dirname(newPath), { recursive: true });
1323
- writeFileSync2(tempPath, newContent, "utf-8");
1324
- renameSync2(tempPath, newPath);
1325
- unlinkSync(oldPath);
1326
- } catch (err) {
1327
- try {
1328
- if (existsSync2(tempPath)) unlinkSync(tempPath);
1329
- } catch {
1330
- }
1331
- console.warn(
1332
- `[agentv] Failed to migrate ${path5.basename(oldPath)} \u2192 ${path5.basename(newPath)}: ${err.message}. Legacy file left in place.`
1333
- );
1334
- return;
1335
- }
1336
- console.log(
1337
- `[agentv] Migrated registry: ${path5.basename(oldPath)} \u2192 ${path5.basename(newPath)} (${entries.length} entr${entries.length === 1 ? "y" : "ies"})`
1338
- );
1701
+ return path4.join(getAgentvConfigDir(), "config.yaml");
1339
1702
  }
1340
1703
  function fromYaml(raw) {
1341
1704
  if (!raw || typeof raw !== "object") return null;
@@ -1356,6 +1719,18 @@ function fromYaml(raw) {
1356
1719
  entry.source = { url: s.url, ref: s.ref };
1357
1720
  }
1358
1721
  }
1722
+ if (e.results && typeof e.results === "object") {
1723
+ const r = e.results;
1724
+ if (r.mode === "github" && typeof r.repo === "string" && r.repo.trim().length > 0) {
1725
+ entry.results = {
1726
+ mode: "github",
1727
+ repo: r.repo.trim(),
1728
+ ...typeof r.path === "string" && r.path.trim().length > 0 ? { path: r.path.trim() } : {},
1729
+ ...typeof r.auto_push === "boolean" ? { autoPush: r.auto_push } : {},
1730
+ ...typeof r.branch_prefix === "string" && r.branch_prefix.trim().length > 0 ? { branchPrefix: r.branch_prefix.trim() } : {}
1731
+ };
1732
+ }
1733
+ }
1359
1734
  return entry;
1360
1735
  }
1361
1736
  function toYaml(entry) {
@@ -1369,10 +1744,20 @@ function toYaml(entry) {
1369
1744
  if (entry.source) {
1370
1745
  yaml.source = { url: entry.source.url, ref: entry.source.ref };
1371
1746
  }
1747
+ if (entry.results) {
1748
+ yaml.results = {
1749
+ mode: entry.results.mode,
1750
+ repo: entry.results.repo,
1751
+ ...entry.results.path !== void 0 && { path: entry.results.path },
1752
+ ...entry.results.autoPush !== void 0 && { auto_push: entry.results.autoPush },
1753
+ ...entry.results.branchPrefix !== void 0 && {
1754
+ branch_prefix: entry.results.branchPrefix
1755
+ }
1756
+ };
1757
+ }
1372
1758
  return yaml;
1373
1759
  }
1374
1760
  function loadProjectRegistry() {
1375
- migrateLegacyBenchmarksFile();
1376
1761
  const registryPath = getProjectsRegistryPath();
1377
1762
  if (!existsSync2(registryPath)) {
1378
1763
  return { projects: [] };
@@ -1392,15 +1777,24 @@ function loadProjectRegistry() {
1392
1777
  }
1393
1778
  function saveProjectRegistry(registry) {
1394
1779
  const registryPath = getProjectsRegistryPath();
1395
- const dir = path5.dirname(registryPath);
1780
+ const dir = path4.dirname(registryPath);
1396
1781
  if (!existsSync2(dir)) {
1397
1782
  mkdirSync2(dir, { recursive: true });
1398
1783
  }
1399
- const payload = { projects: registry.projects.map(toYaml) };
1784
+ const payload = { ...readHomeConfig(registryPath), projects: registry.projects.map(toYaml) };
1400
1785
  writeFileSync2(registryPath, stringifyYaml(payload), "utf-8");
1401
1786
  }
1787
+ function readHomeConfig(configPath) {
1788
+ if (!existsSync2(configPath)) return {};
1789
+ try {
1790
+ const parsed = parseYamlValue(readFileSync3(configPath, "utf-8"));
1791
+ return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : {};
1792
+ } catch {
1793
+ return {};
1794
+ }
1795
+ }
1402
1796
  function deriveProjectId(dirPath, existingIds) {
1403
- const base = path5.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
1797
+ const base = path4.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
1404
1798
  let candidate = base || "project";
1405
1799
  let suffix = 2;
1406
1800
  while (existingIds.includes(candidate)) {
@@ -1410,11 +1804,11 @@ function deriveProjectId(dirPath, existingIds) {
1410
1804
  return candidate;
1411
1805
  }
1412
1806
  function addProject(projectPath) {
1413
- const absPath = path5.resolve(projectPath);
1807
+ const absPath = path4.resolve(projectPath);
1414
1808
  if (!existsSync2(absPath)) {
1415
1809
  throw new Error(`Directory not found: ${absPath}`);
1416
1810
  }
1417
- if (!existsSync2(path5.join(absPath, ".agentv"))) {
1811
+ if (!existsSync2(path4.join(absPath, ".agentv"))) {
1418
1812
  throw new Error(`No .agentv/ directory found in ${absPath}. Run an evaluation first.`);
1419
1813
  }
1420
1814
  const registry = loadProjectRegistry();
@@ -1428,7 +1822,7 @@ function addProject(projectPath) {
1428
1822
  absPath,
1429
1823
  registry.projects.map((p) => p.id)
1430
1824
  ),
1431
- name: path5.basename(absPath),
1825
+ name: path4.basename(absPath),
1432
1826
  path: absPath,
1433
1827
  addedAt: now,
1434
1828
  lastOpenedAt: now
@@ -1448,6 +1842,14 @@ function removeProject(projectId) {
1448
1842
  function getProject(projectId) {
1449
1843
  return loadProjectRegistry().projects.find((p) => p.id === projectId);
1450
1844
  }
1845
+ function getProjectForPath(fsPath) {
1846
+ const absPath = path4.resolve(fsPath);
1847
+ return loadProjectRegistry().projects.filter((p) => {
1848
+ const projectPath = path4.resolve(p.path);
1849
+ const relative = path4.relative(projectPath, absPath);
1850
+ return relative === "" || !relative.startsWith("..") && !path4.isAbsolute(relative);
1851
+ }).sort((a, b) => path4.resolve(b.path).length - path4.resolve(a.path).length)[0];
1852
+ }
1451
1853
  function touchProject(projectId) {
1452
1854
  const registry = loadProjectRegistry();
1453
1855
  const entry = registry.projects.find((p) => p.id === projectId);
@@ -1457,14 +1859,14 @@ function touchProject(projectId) {
1457
1859
  }
1458
1860
  }
1459
1861
  function discoverProjects(rootDir, maxDepth = 2) {
1460
- const absRoot = path5.resolve(rootDir);
1862
+ const absRoot = path4.resolve(rootDir);
1461
1863
  if (!existsSync2(absRoot) || !statSync(absRoot).isDirectory()) {
1462
1864
  return [];
1463
1865
  }
1464
1866
  const results = [];
1465
1867
  function scan(dir, depth) {
1466
1868
  if (depth > maxDepth) return;
1467
- if (existsSync2(path5.join(dir, ".agentv"))) {
1869
+ if (existsSync2(path4.join(dir, ".agentv"))) {
1468
1870
  results.push(dir);
1469
1871
  return;
1470
1872
  }
@@ -1474,7 +1876,7 @@ function discoverProjects(rootDir, maxDepth = 2) {
1474
1876
  for (const entry of entries) {
1475
1877
  if (!entry.isDirectory()) continue;
1476
1878
  if (entry.name.startsWith(".") || entry.name === "node_modules") continue;
1477
- scan(path5.join(dir, entry.name), depth + 1);
1879
+ scan(path4.join(dir, entry.name), depth + 1);
1478
1880
  }
1479
1881
  } catch {
1480
1882
  }
@@ -2503,8 +2905,8 @@ function extractResponseItemContent(content) {
2503
2905
  // src/import/codex-session-discovery.ts
2504
2906
  import { readdir as readdir2, stat as stat2 } from "node:fs/promises";
2505
2907
  import { homedir } from "node:os";
2506
- import path6 from "node:path";
2507
- var DEFAULT_SESSIONS_DIR = () => path6.join(homedir(), ".codex", "sessions");
2908
+ import path5 from "node:path";
2909
+ var DEFAULT_SESSIONS_DIR = () => path5.join(homedir(), ".codex", "sessions");
2508
2910
  async function discoverCodexSessions(opts) {
2509
2911
  const sessionsDir = opts?.sessionsDir ?? DEFAULT_SESSIONS_DIR();
2510
2912
  const limit = opts?.latest ? 1 : opts?.limit ?? 10;
@@ -2516,7 +2918,7 @@ async function discoverCodexSessions(opts) {
2516
2918
  return [];
2517
2919
  }
2518
2920
  for (const year of yearDirs) {
2519
- const yearPath = path6.join(sessionsDir, year);
2921
+ const yearPath = path5.join(sessionsDir, year);
2520
2922
  let monthDirs;
2521
2923
  try {
2522
2924
  monthDirs = await readdir2(yearPath);
@@ -2524,7 +2926,7 @@ async function discoverCodexSessions(opts) {
2524
2926
  continue;
2525
2927
  }
2526
2928
  for (const month of monthDirs) {
2527
- const monthPath = path6.join(yearPath, month);
2929
+ const monthPath = path5.join(yearPath, month);
2528
2930
  let dayDirs;
2529
2931
  try {
2530
2932
  dayDirs = await readdir2(monthPath);
@@ -2536,7 +2938,7 @@ async function discoverCodexSessions(opts) {
2536
2938
  const dirDate = `${year}-${month}-${day}`;
2537
2939
  if (dirDate !== opts.date) continue;
2538
2940
  }
2539
- const dayPath = path6.join(monthPath, day);
2941
+ const dayPath = path5.join(monthPath, day);
2540
2942
  let files;
2541
2943
  try {
2542
2944
  files = await readdir2(dayPath);
@@ -2545,7 +2947,7 @@ async function discoverCodexSessions(opts) {
2545
2947
  }
2546
2948
  for (const file of files) {
2547
2949
  if (!file.startsWith("rollout-") || !file.endsWith(".jsonl")) continue;
2548
- const filePath = path6.join(dayPath, file);
2950
+ const filePath = path5.join(dayPath, file);
2549
2951
  const nameWithoutExt = file.replace(/\.jsonl$/, "");
2550
2952
  const parts = nameWithoutExt.split("-");
2551
2953
  const sessionId = parts.length >= 6 ? parts.slice(-5).join("-") : nameWithoutExt;
@@ -2568,8 +2970,8 @@ async function discoverCodexSessions(opts) {
2568
2970
  // src/import/session-discovery.ts
2569
2971
  import { readdir as readdir3, stat as stat3 } from "node:fs/promises";
2570
2972
  import { homedir as homedir2 } from "node:os";
2571
- import path7 from "node:path";
2572
- var DEFAULT_PROJECTS_DIR = () => path7.join(homedir2(), ".claude", "projects");
2973
+ import path6 from "node:path";
2974
+ var DEFAULT_PROJECTS_DIR = () => path6.join(homedir2(), ".claude", "projects");
2573
2975
  function encodeProjectPath(projectPath) {
2574
2976
  return projectPath.replace(/\//g, "-");
2575
2977
  }
@@ -2588,7 +2990,7 @@ async function discoverClaudeSessions(opts) {
2588
2990
  }
2589
2991
  const sessions = [];
2590
2992
  for (const projectDir of projectDirs) {
2591
- const dirPath = path7.join(projectsDir, projectDir);
2993
+ const dirPath = path6.join(projectsDir, projectDir);
2592
2994
  let entries;
2593
2995
  try {
2594
2996
  entries = await readdir3(dirPath);
@@ -2599,7 +3001,7 @@ async function discoverClaudeSessions(opts) {
2599
3001
  if (!entry.endsWith(".jsonl")) continue;
2600
3002
  const sessionId = entry.replace(/\.jsonl$/, "");
2601
3003
  if (opts?.sessionId && sessionId !== opts.sessionId) continue;
2602
- const filePath = path7.join(dirPath, entry);
3004
+ const filePath = path6.join(dirPath, entry);
2603
3005
  let updatedAt;
2604
3006
  try {
2605
3007
  const fileStat = await stat3(filePath);
@@ -2620,7 +3022,7 @@ async function discoverClaudeSessions(opts) {
2620
3022
  }
2621
3023
 
2622
3024
  // src/import/types.ts
2623
- import { readFile as readFile3 } from "node:fs/promises";
3025
+ import { readFile as readFile2 } from "node:fs/promises";
2624
3026
  function toTranscriptJsonLines(entry, options) {
2625
3027
  const source = {
2626
3028
  provider: entry.source.provider,
@@ -2707,11 +3109,11 @@ function groupTranscriptJsonLines(lines) {
2707
3109
  }));
2708
3110
  }
2709
3111
  async function readTranscriptJsonl(filePath) {
2710
- const text = await readFile3(filePath, "utf8");
3112
+ const text = await readFile2(filePath, "utf8");
2711
3113
  return text.split("\n").filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
2712
3114
  }
2713
3115
  async function readTranscriptFile(filePath) {
2714
- return readFile3(filePath, "utf8");
3116
+ return readFile2(filePath, "utf8");
2715
3117
  }
2716
3118
 
2717
3119
  // src/import/transcript-provider.ts
@@ -2784,12 +3186,30 @@ export {
2784
3186
  GraderRegistry,
2785
3187
  LatencyGrader,
2786
3188
  LlmGrader,
3189
+ NORMALIZED_REDACTION_LEVELS,
3190
+ NORMALIZED_TOOL_STATUSES,
3191
+ NORMALIZED_TRACE_EVENT_TYPES,
3192
+ NORMALIZED_TRACE_SOURCE_KINDS,
3193
+ NORMALIZED_TRAJECTORY_SCHEMA_VERSION,
3194
+ NormalizedRawEvidenceWireSchema,
3195
+ NormalizedRedactionStateWireSchema,
3196
+ NormalizedTraceBranchWireSchema,
3197
+ NormalizedTraceErrorWireSchema,
3198
+ NormalizedTraceEventWireSchema,
3199
+ NormalizedTraceMessageWireSchema,
3200
+ NormalizedTraceModelWireSchema,
3201
+ NormalizedTraceSessionWireSchema,
3202
+ NormalizedTraceSourceRefWireSchema,
3203
+ NormalizedTraceSourceWireSchema,
3204
+ NormalizedTraceToolWireSchema,
3205
+ NormalizedTrajectoryWireSchema,
2787
3206
  OTEL_BACKEND_PRESETS,
2788
3207
  OtelStreamingObserver,
2789
3208
  OtelTraceExporter,
2790
3209
  OtlpJsonFileExporter,
2791
3210
  PASS_THRESHOLD,
2792
3211
  ProviderRegistry,
3212
+ RUBRIC_OPERATOR_VALUES,
2793
3213
  RepoManager,
2794
3214
  ResponseCache,
2795
3215
  RunBudgetTracker,
@@ -2819,6 +3239,7 @@ export {
2819
3239
  cleanupWorkspace,
2820
3240
  commitAndPushResultsBranch,
2821
3241
  computeTraceSummary,
3242
+ computeTraceSummaryFromTrajectory,
2822
3243
  computeWorkspaceFingerprint,
2823
3244
  consumeClaudeLogEntries,
2824
3245
  consumeCodexLogEntries,
@@ -2867,14 +3288,19 @@ export {
2867
3288
  findGitRoot,
2868
3289
  formatToolCalls,
2869
3290
  freeformEvaluationSchema,
3291
+ fromNormalizedTrajectoryWire,
2870
3292
  generateRubrics,
2871
3293
  getAgentvConfigDir,
3294
+ getAgentvDataDir,
2872
3295
  getAgentvHome,
2873
3296
  getOutputFilenames,
2874
3297
  getProject,
3298
+ getProjectForPath,
2875
3299
  getProjectsRegistryPath,
2876
3300
  getResultsRepoLocalPaths,
2877
3301
  getResultsRepoStatus,
3302
+ getResultsRepoSyncStatus,
3303
+ getSelectedTrajectoryEvents,
2878
3304
  getSubagentsRoot,
2879
3305
  getTextContent,
2880
3306
  getTraceStateRoot,
@@ -2930,6 +3356,7 @@ export {
2930
3356
  resolveAndCreateProvider,
2931
3357
  resolveDelegatedTargetDefinition,
2932
3358
  resolveFileReference,
3359
+ resolveResultsConfigForProject,
2933
3360
  resolveResultsRepoRunsDir,
2934
3361
  resolveResultsRepoUrl,
2935
3362
  resolveTargetDefinition,
@@ -2965,7 +3392,9 @@ export {
2965
3392
  syncProject,
2966
3393
  syncProjects,
2967
3394
  syncResultsRepo,
3395
+ syncResultsRepoForProject,
2968
3396
  toCamelCaseDeep,
3397
+ toNormalizedTrajectoryWire,
2969
3398
  toSnakeCaseDeep,
2970
3399
  toTranscriptJsonLines,
2971
3400
  tokensPerTool,