@agentv/core 4.32.0-next.1 → 4.33.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -15,9 +15,27 @@ import {
15
15
  GraderRegistry,
16
16
  LatencyGrader,
17
17
  LlmGrader,
18
+ NORMALIZED_REDACTION_LEVELS,
19
+ NORMALIZED_TOOL_STATUSES,
20
+ NORMALIZED_TRACE_EVENT_TYPES,
21
+ NORMALIZED_TRACE_SOURCE_KINDS,
22
+ NORMALIZED_TRAJECTORY_SCHEMA_VERSION,
23
+ NormalizedRawEvidenceWireSchema,
24
+ NormalizedRedactionStateWireSchema,
25
+ NormalizedTraceBranchWireSchema,
26
+ NormalizedTraceErrorWireSchema,
27
+ NormalizedTraceEventWireSchema,
28
+ NormalizedTraceMessageWireSchema,
29
+ NormalizedTraceModelWireSchema,
30
+ NormalizedTraceSessionWireSchema,
31
+ NormalizedTraceSourceRefWireSchema,
32
+ NormalizedTraceSourceWireSchema,
33
+ NormalizedTraceToolWireSchema,
34
+ NormalizedTrajectoryWireSchema,
18
35
  PASS_THRESHOLD,
19
36
  ProviderRegistry,
20
37
  RepoManager,
38
+ ResponseCache,
21
39
  SkillTriggerGrader,
22
40
  TemplateNotDirectoryError,
23
41
  TemplateNotFoundError,
@@ -37,6 +55,7 @@ import {
37
55
  cleanupEvalWorkspaces,
38
56
  cleanupWorkspace,
39
57
  computeTraceSummary,
58
+ computeTraceSummaryFromTrajectory,
40
59
  computeWorkspaceFingerprint,
41
60
  consumeClaudeLogEntries,
42
61
  consumeCodexLogEntries,
@@ -71,14 +90,9 @@ import {
71
90
  extractWorkersFromSuite,
72
91
  formatToolCalls,
73
92
  freeformEvaluationSchema,
74
- getAgentvConfigDir,
75
- getAgentvDataDir,
76
- getAgentvHome,
77
- getSubagentsRoot,
78
- getTraceStateRoot,
93
+ fromNormalizedTrajectoryWire,
94
+ getSelectedTrajectoryEvents,
79
95
  getWorkspacePath,
80
- getWorkspacePoolRoot,
81
- getWorkspacesRoot,
82
96
  initializeBaseline,
83
97
  isAgentSkillsFormat,
84
98
  isNonEmptyString,
@@ -105,6 +119,7 @@ import {
105
119
  readTargetDefinitions,
106
120
  readTestSuiteMetadata,
107
121
  resolveAndCreateProvider,
122
+ resolveResultsConfigForProject,
108
123
  resolveWorkspaceTemplate,
109
124
  rubricEvaluationSchema,
110
125
  runContainsAllAssertion,
@@ -122,6 +137,8 @@ import {
122
137
  runStartsWithAssertion,
123
138
  scoreRangeEvaluationSchema,
124
139
  scoreToVerdict,
140
+ shouldEnableCache,
141
+ shouldSkipCacheForTemperature,
125
142
  subscribeToClaudeLogEntries,
126
143
  subscribeToCodexLogEntries,
127
144
  subscribeToCopilotCliLogEntries,
@@ -129,20 +146,29 @@ import {
129
146
  subscribeToPiLogEntries,
130
147
  substituteVariables,
131
148
  toCamelCaseDeep,
149
+ toNormalizedTrajectoryWire,
132
150
  toSnakeCaseDeep,
133
151
  tokensPerTool,
134
152
  trackChild,
135
153
  trackedChildCount
136
- } from "./chunk-N5EU446L.js";
154
+ } from "./chunk-7QB53OPK.js";
137
155
  import {
138
156
  COMMON_TARGET_SETTINGS,
157
+ RUBRIC_OPERATOR_VALUES,
139
158
  TEST_MESSAGE_ROLES,
140
159
  buildDirectoryChain,
141
160
  buildSearchRoots,
142
161
  extractLastAssistantContent,
143
162
  fileExists,
144
163
  findGitRoot,
164
+ getAgentvConfigDir,
165
+ getAgentvDataDir,
166
+ getAgentvHome,
167
+ getSubagentsRoot,
145
168
  getTextContent,
169
+ getTraceStateRoot,
170
+ getWorkspacePoolRoot,
171
+ getWorkspacesRoot,
146
172
  interpolateEnv,
147
173
  isContent,
148
174
  isContentArray,
@@ -158,7 +184,7 @@ import {
158
184
  resolveDelegatedTargetDefinition,
159
185
  resolveFileReference,
160
186
  resolveTargetDefinition
161
- } from "./chunk-5RQMJZDJ.js";
187
+ } from "./chunk-EW5X2RGJ.js";
162
188
  import "./chunk-3WGHC7LC.js";
163
189
  import "./chunk-YDFZ7XN3.js";
164
190
  import {
@@ -444,16 +470,14 @@ var AgentVConfigSchema = z.object({
444
470
  }).optional(),
445
471
  /** Output settings */
446
472
  output: z.object({
447
- /** Output format */
448
- format: z.enum(["jsonl", "yaml", "json", "xml"]).optional(),
449
- /** Output directory */
473
+ /** Default eval run artifact directory */
450
474
  dir: z.string().optional()
451
- }).optional(),
475
+ }).strict().optional(),
452
476
  /** Response caching */
453
477
  cache: z.object({
454
478
  /** Enable response caching */
455
479
  enabled: z.boolean().optional(),
456
- /** Cache file path */
480
+ /** Response cache directory */
457
481
  path: z.string().optional()
458
482
  }).optional(),
459
483
  /** Cost and duration limits */
@@ -679,47 +703,6 @@ function extractReposFromObject(obj) {
679
703
  return result;
680
704
  }
681
705
 
682
- // src/evaluation/cache/response-cache.ts
683
- import { mkdir, readFile as readFile2, writeFile } from "node:fs/promises";
684
- import path3 from "node:path";
685
- var DEFAULT_CACHE_PATH = ".agentv/cache";
686
- var ResponseCache = class {
687
- cachePath;
688
- constructor(cachePath) {
689
- this.cachePath = cachePath ?? DEFAULT_CACHE_PATH;
690
- }
691
- async get(key) {
692
- const filePath = this.keyToPath(key);
693
- try {
694
- const data = await readFile2(filePath, "utf8");
695
- return JSON.parse(data);
696
- } catch {
697
- return void 0;
698
- }
699
- }
700
- async set(key, value) {
701
- const filePath = this.keyToPath(key);
702
- const dir = path3.dirname(filePath);
703
- await mkdir(dir, { recursive: true });
704
- await writeFile(filePath, JSON.stringify(value, null, 2), "utf8");
705
- }
706
- keyToPath(key) {
707
- const prefix = key.slice(0, 2);
708
- return path3.join(this.cachePath, prefix, `${key}.json`);
709
- }
710
- };
711
- function shouldEnableCache(params) {
712
- if (params.cliNoCache) return false;
713
- return params.cliCache || params.yamlCache === true;
714
- }
715
- function shouldSkipCacheForTemperature(targetConfig) {
716
- const temp = targetConfig.temperature;
717
- if (typeof temp === "number" && temp > 0) {
718
- return true;
719
- }
720
- return false;
721
- }
722
-
723
706
  // src/evaluation/results-repo.ts
724
707
  import { execFile, spawn } from "node:child_process";
725
708
  import {
@@ -733,11 +716,12 @@ import {
733
716
  } from "node:fs";
734
717
  import { cp, mkdtemp, readdir, rm, stat } from "node:fs/promises";
735
718
  import os from "node:os";
736
- import path4 from "node:path";
719
+ import path3 from "node:path";
737
720
  import { promisify } from "node:util";
738
721
  var execFileAsync = promisify(execFile);
739
722
  var RESULTS_REPO_RESULTS_DIR = ".agentv/results";
740
723
  var RESULTS_REPO_RUNS_DIR = `${RESULTS_REPO_RESULTS_DIR}/runs`;
724
+ var activeResultsRepoSyncs = /* @__PURE__ */ new Set();
741
725
  function sanitizeRepoSlug(repo) {
742
726
  return repo.trim().replace(/[^A-Za-z0-9._-]+/g, "-");
743
727
  }
@@ -751,13 +735,13 @@ function withFriendlyGitHubAuthError(error) {
751
735
  }
752
736
  function expandHome(p) {
753
737
  if (p === "~" || p.startsWith("~/") || p.startsWith("~\\")) {
754
- return path4.join(os.homedir(), p.slice(1));
738
+ return path3.join(os.homedir(), p.slice(1));
755
739
  }
756
740
  return p;
757
741
  }
758
742
  function normalizeResultsConfig(config) {
759
743
  const repo = config.repo.trim();
760
- const resolvedPath = config.path ? expandHome(config.path.trim()) : path4.join(getAgentvDataDir(), "results", sanitizeRepoSlug(repo));
744
+ const resolvedPath = config.path ? expandHome(config.path.trim()) : path3.join(getAgentvDataDir(), "results", sanitizeRepoSlug(repo));
761
745
  return {
762
746
  mode: "github",
763
747
  repo,
@@ -773,11 +757,11 @@ function resolveResultsRepoUrl(repo) {
773
757
  return `https://github.com/${repo}.git`;
774
758
  }
775
759
  function getResultsRepoLocalPaths(repo) {
776
- const rootDir = path4.join(getAgentvDataDir(), "cache", "results-repo", sanitizeRepoSlug(repo));
760
+ const rootDir = path3.join(getAgentvDataDir(), "cache", "results-repo", sanitizeRepoSlug(repo));
777
761
  return {
778
762
  rootDir,
779
- repoDir: path4.join(rootDir, "repo"),
780
- statusFile: path4.join(rootDir, "status.json")
763
+ repoDir: path3.join(rootDir, "repo"),
764
+ statusFile: path3.join(rootDir, "status.json")
781
765
  };
782
766
  }
783
767
  function readPersistedStatus(statusFile) {
@@ -791,7 +775,7 @@ function readPersistedStatus(statusFile) {
791
775
  }
792
776
  }
793
777
  function writePersistedStatus(statusFile, status) {
794
- mkdirSync(path4.dirname(statusFile), { recursive: true });
778
+ mkdirSync(path3.dirname(statusFile), { recursive: true });
795
779
  writeFileSync(statusFile, `${JSON.stringify(status, null, 2)}
796
780
  `, "utf8");
797
781
  }
@@ -850,6 +834,14 @@ async function resolveDefaultBranch(repoDir) {
850
834
  async function fetchResultsRepo(repoDir) {
851
835
  await runGit(["fetch", "origin", "--prune"], { cwd: repoDir });
852
836
  }
837
+ async function isGitRepository(repoDir) {
838
+ try {
839
+ const { stdout } = await runGit(["rev-parse", "--is-inside-work-tree"], { cwd: repoDir });
840
+ return stdout.trim() === "true";
841
+ } catch {
842
+ return false;
843
+ }
844
+ }
853
845
  function updateStatusFile(config, patch) {
854
846
  const cachePaths = getResultsRepoLocalPaths(config.repo);
855
847
  const current = readPersistedStatus(cachePaths.statusFile);
@@ -863,9 +855,9 @@ async function ensureResultsRepoClone(config) {
863
855
  const cachePaths = getResultsRepoLocalPaths(normalized.repo);
864
856
  const cloneDir = normalized.path;
865
857
  mkdirSync(cachePaths.rootDir, { recursive: true });
866
- mkdirSync(path4.dirname(cloneDir), { recursive: true });
858
+ mkdirSync(path3.dirname(cloneDir), { recursive: true });
867
859
  const cloneMissing = !existsSync(cloneDir);
868
- const gitDir = path4.join(cloneDir, ".git");
860
+ const gitDir = path3.join(cloneDir, ".git");
869
861
  const cloneEmpty = !cloneMissing && !existsSync(gitDir) && (await readdir(cloneDir)).length === 0;
870
862
  if (cloneMissing || cloneEmpty) {
871
863
  try {
@@ -892,7 +884,8 @@ function getResultsRepoStatus(config) {
892
884
  configured: false,
893
885
  available: false,
894
886
  repo: "",
895
- local_dir: ""
887
+ local_dir: "",
888
+ sync_status: "unavailable"
896
889
  };
897
890
  }
898
891
  const normalized = normalizeResultsConfig(config);
@@ -907,9 +900,247 @@ function getResultsRepoStatus(config) {
907
900
  branch_prefix: normalized.branch_prefix,
908
901
  local_dir: normalized.path,
909
902
  last_synced_at: persisted.last_synced_at,
910
- last_error: persisted.last_error
903
+ last_error: persisted.last_error,
904
+ sync_status: existsSync(normalized.path) ? "clean" : "unavailable"
905
+ };
906
+ }
907
+ function parseGitPorcelainPaths(status) {
908
+ const dirtyPaths = /* @__PURE__ */ new Set();
909
+ const conflictedPaths = /* @__PURE__ */ new Set();
910
+ const conflictCodes = /* @__PURE__ */ new Set(["DD", "AU", "UD", "UA", "DU", "AA", "UU"]);
911
+ for (const line of status.split(/\r?\n/)) {
912
+ if (!line.trim()) continue;
913
+ const code = line.slice(0, 2);
914
+ const rawPath = line.slice(3).trim();
915
+ const paths = rawPath.includes(" -> ") ? rawPath.split(" -> ") : [rawPath];
916
+ for (const p of paths.map((entry) => entry.trim()).filter(Boolean)) {
917
+ dirtyPaths.add(p);
918
+ if (conflictCodes.has(code)) {
919
+ conflictedPaths.add(p);
920
+ }
921
+ }
922
+ }
923
+ return {
924
+ dirtyPaths: [...dirtyPaths].sort(),
925
+ conflictedPaths: [...conflictedPaths].sort()
926
+ };
927
+ }
928
+ async function getCurrentBranch(repoDir) {
929
+ const { stdout } = await runGit(["branch", "--show-current"], { cwd: repoDir, check: false });
930
+ const branch = stdout.trim();
931
+ if (branch) {
932
+ return branch;
933
+ }
934
+ const { stdout: sha } = await runGit(["rev-parse", "--short", "HEAD"], {
935
+ cwd: repoDir,
936
+ check: false
937
+ });
938
+ return sha.trim() ? `HEAD@${sha.trim()}` : void 0;
939
+ }
940
+ async function resolveComparisonRef(repoDir) {
941
+ const { stdout: upstream } = await runGit(
942
+ ["rev-parse", "--abbrev-ref", "--symbolic-full-name", "@{upstream}"],
943
+ { cwd: repoDir, check: false }
944
+ );
945
+ const upstreamRef = upstream.trim();
946
+ if (upstreamRef && !upstreamRef.includes("fatal:")) {
947
+ return upstreamRef;
948
+ }
949
+ const baseBranch = await resolveDefaultBranch(repoDir);
950
+ const fallback = `origin/${baseBranch}`;
951
+ const { stdout: fallbackSha } = await runGit(["rev-parse", "--verify", fallback], {
952
+ cwd: repoDir,
953
+ check: false
954
+ });
955
+ return fallbackSha.trim() ? fallback : void 0;
956
+ }
957
+ async function getAheadBehind(repoDir, upstream) {
958
+ if (!upstream) {
959
+ return {};
960
+ }
961
+ const { stdout } = await runGit(["rev-list", "--left-right", "--count", `HEAD...${upstream}`], {
962
+ cwd: repoDir,
963
+ check: false
964
+ });
965
+ const [aheadText, behindText] = stdout.trim().split(/\s+/);
966
+ const ahead = Number.parseInt(aheadText ?? "", 10);
967
+ const behind = Number.parseInt(behindText ?? "", 10);
968
+ return {
969
+ ...Number.isFinite(ahead) && { ahead },
970
+ ...Number.isFinite(behind) && { behind }
911
971
  };
912
972
  }
973
+ async function hasInProgressGitConflict(repoDir) {
974
+ const markers = ["MERGE_HEAD", "CHERRY_PICK_HEAD", "REVERT_HEAD", "REBASE_HEAD"];
975
+ for (const marker of markers) {
976
+ const { stdout } = await runGit(["rev-parse", "--git-path", marker], {
977
+ cwd: repoDir,
978
+ check: false
979
+ });
980
+ const markerPath = stdout.trim();
981
+ const resolvedMarkerPath = path3.isAbsolute(markerPath) ? markerPath : path3.join(repoDir, markerPath);
982
+ if (markerPath && existsSync(resolvedMarkerPath)) {
983
+ return true;
984
+ }
985
+ }
986
+ return false;
987
+ }
988
+ async function buildGitDiffSummary(repoDir, upstream) {
989
+ const summaries = [];
990
+ for (const args of [
991
+ ["diff", "--stat"],
992
+ ["diff", "--cached", "--stat"],
993
+ ...upstream ? [["diff", "--stat", `${upstream}..HEAD`]] : []
994
+ ]) {
995
+ const { stdout } = await runGit(args, { cwd: repoDir, check: false });
996
+ const summary = stdout.trim();
997
+ if (summary) {
998
+ summaries.push(summary);
999
+ }
1000
+ }
1001
+ return summaries.length > 0 ? summaries.join("\n") : void 0;
1002
+ }
1003
+ async function inspectResultsRepoGit(repoDir) {
1004
+ const branch = await getCurrentBranch(repoDir);
1005
+ const upstream = await resolveComparisonRef(repoDir);
1006
+ const { stdout: porcelain } = await runGit(
1007
+ ["status", "--porcelain=v1", "--untracked-files=all"],
1008
+ {
1009
+ cwd: repoDir,
1010
+ check: false
1011
+ }
1012
+ );
1013
+ const { stdout: shortStatus } = await runGit(["status", "--short", "--branch"], {
1014
+ cwd: repoDir,
1015
+ check: false
1016
+ });
1017
+ const { dirtyPaths, conflictedPaths } = parseGitPorcelainPaths(porcelain);
1018
+ const { ahead = 0, behind = 0 } = await getAheadBehind(repoDir, upstream);
1019
+ const inProgressConflict = await hasInProgressGitConflict(repoDir);
1020
+ let syncStatus = "clean";
1021
+ if (conflictedPaths.length > 0 || inProgressConflict) {
1022
+ syncStatus = "conflicted";
1023
+ } else if (dirtyPaths.length > 0) {
1024
+ syncStatus = "dirty";
1025
+ } else if (ahead > 0 && behind > 0) {
1026
+ syncStatus = "diverged";
1027
+ } else if (behind > 0) {
1028
+ syncStatus = "behind";
1029
+ } else if (ahead > 0) {
1030
+ syncStatus = "ahead";
1031
+ }
1032
+ return {
1033
+ syncStatus,
1034
+ branch,
1035
+ upstream,
1036
+ ahead,
1037
+ behind,
1038
+ dirtyPaths,
1039
+ conflictedPaths,
1040
+ gitStatus: shortStatus.trim() || void 0,
1041
+ gitDiffSummary: await buildGitDiffSummary(repoDir, upstream)
1042
+ };
1043
+ }
1044
+ function withGitInspection(status, inspection) {
1045
+ return {
1046
+ ...status,
1047
+ sync_status: inspection.syncStatus,
1048
+ branch: inspection.branch,
1049
+ upstream: inspection.upstream,
1050
+ ahead: inspection.ahead,
1051
+ behind: inspection.behind,
1052
+ dirty_paths: inspection.dirtyPaths,
1053
+ conflicted_paths: inspection.conflictedPaths,
1054
+ git_status: inspection.gitStatus,
1055
+ git_diff_summary: inspection.gitDiffSummary,
1056
+ last_error: lastErrorForGitInspection(status, inspection)
1057
+ };
1058
+ }
1059
+ function lastErrorForGitInspection(status, inspection) {
1060
+ if (inspection.syncStatus === "conflicted") {
1061
+ return "Results repo has unresolved git conflicts";
1062
+ }
1063
+ if (inspection.syncStatus === "diverged") {
1064
+ return "Results repo local and remote histories have diverged";
1065
+ }
1066
+ if (inspection.syncStatus === "dirty") {
1067
+ if (status.auto_push === false) {
1068
+ return "Results repo has uncommitted changes and auto_push is disabled";
1069
+ }
1070
+ if (!areSafeResultsRepoPaths(inspection.dirtyPaths)) {
1071
+ return "Results repo has non-results working tree changes";
1072
+ }
1073
+ }
1074
+ return void 0;
1075
+ }
1076
+ function withBlockedStatus(status, blockReason, flags) {
1077
+ return {
1078
+ ...status,
1079
+ blocked: true,
1080
+ block_reason: blockReason,
1081
+ ...flags?.pullPerformed !== void 0 && { pull_performed: flags.pullPerformed },
1082
+ ...flags?.pushPerformed !== void 0 && { push_performed: flags.pushPerformed },
1083
+ ...flags?.commitCreated !== void 0 && { commit_created: flags.commitCreated }
1084
+ };
1085
+ }
1086
+ function withActionFlags(status, flags) {
1087
+ return {
1088
+ ...status,
1089
+ blocked: false,
1090
+ pull_performed: flags.pullPerformed,
1091
+ push_performed: flags.pushPerformed,
1092
+ commit_created: flags.commitCreated
1093
+ };
1094
+ }
1095
+ function areSafeResultsRepoPaths(paths) {
1096
+ return paths.length > 0 && paths.every(
1097
+ (p) => p === RESULTS_REPO_RESULTS_DIR || p.startsWith(`${RESULTS_REPO_RESULTS_DIR}/`)
1098
+ );
1099
+ }
1100
+ async function getAheadPaths(repoDir, upstream) {
1101
+ if (!upstream) {
1102
+ return [];
1103
+ }
1104
+ const { stdout } = await runGit(["diff", "--name-only", `${upstream}..HEAD`], {
1105
+ cwd: repoDir,
1106
+ check: false
1107
+ });
1108
+ return stdout.split(/\r?\n/).map((line) => line.trim()).filter(Boolean).sort();
1109
+ }
1110
+ function getPushTargetBranch(upstream, baseBranch) {
1111
+ return upstream?.startsWith("origin/") ? upstream.slice("origin/".length) : baseBranch;
1112
+ }
1113
+ async function statusFromInspection(normalized, repoDir) {
1114
+ return withGitInspection(getResultsRepoStatus(normalized), await inspectResultsRepoGit(repoDir));
1115
+ }
1116
+ async function getResultsRepoSyncStatus(config) {
1117
+ const baseStatus = getResultsRepoStatus(config);
1118
+ if (!config) {
1119
+ return baseStatus;
1120
+ }
1121
+ const normalized = normalizeResultsConfig(config);
1122
+ if (activeResultsRepoSyncs.has(normalized.path)) {
1123
+ return {
1124
+ ...baseStatus,
1125
+ sync_status: "syncing"
1126
+ };
1127
+ }
1128
+ if (!existsSync(normalized.path) || !await isGitRepository(normalized.path)) {
1129
+ return {
1130
+ ...baseStatus,
1131
+ sync_status: "unavailable"
1132
+ };
1133
+ }
1134
+ try {
1135
+ return withGitInspection(baseStatus, await inspectResultsRepoGit(normalized.path));
1136
+ } catch (error) {
1137
+ return {
1138
+ ...baseStatus,
1139
+ sync_status: "unavailable",
1140
+ last_error: getStatusMessage(error)
1141
+ };
1142
+ }
1143
+ }
913
1144
  async function syncResultsRepo(config) {
914
1145
  const normalized = normalizeResultsConfig(config);
915
1146
  try {
@@ -927,6 +1158,180 @@ async function syncResultsRepo(config) {
927
1158
  }
928
1159
  return getResultsRepoStatus(normalized);
929
1160
  }
1161
+ function getStatusMessage(error) {
1162
+ return error instanceof Error ? error.message : String(error);
1163
+ }
1164
+ async function syncResultsRepoForProject(config) {
1165
+ const normalized = normalizeResultsConfig(config);
1166
+ const syncKey = normalized.path;
1167
+ if (activeResultsRepoSyncs.has(syncKey)) {
1168
+ return {
1169
+ ...await getResultsRepoSyncStatus(normalized),
1170
+ sync_status: "syncing",
1171
+ blocked: true,
1172
+ block_reason: "Results repo sync is already in progress"
1173
+ };
1174
+ }
1175
+ activeResultsRepoSyncs.add(syncKey);
1176
+ let pullPerformed = false;
1177
+ let pushPerformed = false;
1178
+ let commitCreated = false;
1179
+ try {
1180
+ const repoDir = await ensureResultsRepoClone(normalized);
1181
+ await fetchResultsRepo(repoDir);
1182
+ let inspection = await inspectResultsRepoGit(repoDir);
1183
+ if (inspection.syncStatus === "conflicted") {
1184
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1185
+ updateStatusFile(normalized, {
1186
+ last_error: "Results repo has unresolved git conflicts"
1187
+ });
1188
+ return withBlockedStatus(status, "Results repo has unresolved git conflicts", {
1189
+ pullPerformed,
1190
+ pushPerformed,
1191
+ commitCreated
1192
+ });
1193
+ }
1194
+ if (inspection.syncStatus === "dirty") {
1195
+ if (!normalized.auto_push) {
1196
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1197
+ updateStatusFile(normalized, {
1198
+ last_error: "Results repo has uncommitted changes and auto_push is disabled"
1199
+ });
1200
+ return withBlockedStatus(
1201
+ status,
1202
+ "Results repo has uncommitted changes and auto_push is disabled",
1203
+ {
1204
+ pullPerformed,
1205
+ pushPerformed,
1206
+ commitCreated
1207
+ }
1208
+ );
1209
+ }
1210
+ if (!areSafeResultsRepoPaths(inspection.dirtyPaths)) {
1211
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1212
+ updateStatusFile(normalized, {
1213
+ last_error: "Results repo has non-results working tree changes"
1214
+ });
1215
+ return withBlockedStatus(status, "Results repo has non-results working tree changes", {
1216
+ pullPerformed,
1217
+ pushPerformed,
1218
+ commitCreated
1219
+ });
1220
+ }
1221
+ if ((inspection.behind ?? 0) > 0) {
1222
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1223
+ const reason = "Results repo has uncommitted result changes and remote changes";
1224
+ updateStatusFile(normalized, { last_error: reason });
1225
+ return withBlockedStatus(status, reason, {
1226
+ pullPerformed,
1227
+ pushPerformed,
1228
+ commitCreated
1229
+ });
1230
+ }
1231
+ await runGit(["add", "--all", "--", RESULTS_REPO_RESULTS_DIR], { cwd: repoDir });
1232
+ await runGit(["commit", "-m", "chore(results): sync local result metadata"], {
1233
+ cwd: repoDir
1234
+ });
1235
+ commitCreated = true;
1236
+ inspection = await inspectResultsRepoGit(repoDir);
1237
+ }
1238
+ if (inspection.syncStatus === "diverged") {
1239
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1240
+ updateStatusFile(normalized, {
1241
+ last_error: "Results repo local and remote histories have diverged"
1242
+ });
1243
+ return withBlockedStatus(status, "Results repo local and remote histories have diverged", {
1244
+ pullPerformed,
1245
+ pushPerformed,
1246
+ commitCreated
1247
+ });
1248
+ }
1249
+ if ((inspection.behind ?? 0) > 0 && (inspection.ahead ?? 0) === 0) {
1250
+ if (!inspection.upstream) {
1251
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1252
+ updateStatusFile(normalized, {
1253
+ last_error: "Results repo has no upstream branch to pull from"
1254
+ });
1255
+ return withBlockedStatus(status, "Results repo has no upstream branch to pull from", {
1256
+ pullPerformed,
1257
+ pushPerformed,
1258
+ commitCreated
1259
+ });
1260
+ }
1261
+ try {
1262
+ await runGit(["merge", "--ff-only", inspection.upstream], { cwd: repoDir });
1263
+ pullPerformed = true;
1264
+ inspection = await inspectResultsRepoGit(repoDir);
1265
+ } catch (error) {
1266
+ inspection = await inspectResultsRepoGit(repoDir);
1267
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1268
+ const reason = `Results repo could not be fast-forwarded: ${getStatusMessage(error)}`;
1269
+ updateStatusFile(normalized, { last_error: reason });
1270
+ return withBlockedStatus(status, reason, {
1271
+ pullPerformed,
1272
+ pushPerformed,
1273
+ commitCreated
1274
+ });
1275
+ }
1276
+ }
1277
+ if ((inspection.ahead ?? 0) > 0) {
1278
+ if (!normalized.auto_push) {
1279
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1280
+ return withActionFlags(status, {
1281
+ pullPerformed,
1282
+ pushPerformed,
1283
+ commitCreated
1284
+ });
1285
+ }
1286
+ const aheadPaths = await getAheadPaths(repoDir, inspection.upstream);
1287
+ if (!inspection.upstream || !areSafeResultsRepoPaths(aheadPaths)) {
1288
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1289
+ const reason = !inspection.upstream ? "Results repo has no upstream branch to push to" : "Results repo has non-results committed changes";
1290
+ updateStatusFile(normalized, { last_error: reason });
1291
+ return withBlockedStatus(status, reason, {
1292
+ pullPerformed,
1293
+ pushPerformed,
1294
+ commitCreated
1295
+ });
1296
+ }
1297
+ const baseBranch = await resolveDefaultBranch(repoDir);
1298
+ const targetBranch = getPushTargetBranch(inspection.upstream, baseBranch);
1299
+ try {
1300
+ await runGit(["push", "origin", `HEAD:${targetBranch}`], { cwd: repoDir });
1301
+ pushPerformed = true;
1302
+ await fetchResultsRepo(repoDir);
1303
+ inspection = await inspectResultsRepoGit(repoDir);
1304
+ } catch (error) {
1305
+ await fetchResultsRepo(repoDir).catch(() => void 0);
1306
+ inspection = await inspectResultsRepoGit(repoDir);
1307
+ const status = withGitInspection(getResultsRepoStatus(normalized), inspection);
1308
+ const reason = `Results repo push was rejected: ${getStatusMessage(error)}`;
1309
+ updateStatusFile(normalized, { last_error: reason });
1310
+ return withBlockedStatus(status, reason, {
1311
+ pullPerformed,
1312
+ pushPerformed,
1313
+ commitCreated
1314
+ });
1315
+ }
1316
+ }
1317
+ updateStatusFile(normalized, {
1318
+ last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
1319
+ last_error: void 0
1320
+ });
1321
+ return withActionFlags(await statusFromInspection(normalized, repoDir), {
1322
+ pullPerformed,
1323
+ pushPerformed,
1324
+ commitCreated
1325
+ });
1326
+ } catch (error) {
1327
+ updateStatusFile(normalized, {
1328
+ last_error: withFriendlyGitHubAuthError(error).message
1329
+ });
1330
+ throw withFriendlyGitHubAuthError(error);
1331
+ } finally {
1332
+ activeResultsRepoSyncs.delete(syncKey);
1333
+ }
1334
+ }
930
1335
  async function checkoutResultsRepoBranch(config, branchName) {
931
1336
  const normalized = normalizeResultsConfig(config);
932
1337
  const repoDir = await ensureResultsRepoClone(normalized);
@@ -945,8 +1350,8 @@ async function prepareResultsRepoBranch(config, branchName) {
945
1350
  const cloneDir = await ensureResultsRepoClone(normalized);
946
1351
  const baseBranch = await resolveDefaultBranch(cloneDir);
947
1352
  await fetchResultsRepo(cloneDir);
948
- const worktreeRoot = await mkdtemp(path4.join(os.tmpdir(), "agentv-results-repo-"));
949
- const worktreeDir = path4.join(worktreeRoot, "repo");
1353
+ const worktreeRoot = await mkdtemp(path3.join(os.tmpdir(), "agentv-results-repo-"));
1354
+ const worktreeDir = path3.join(worktreeRoot, "repo");
950
1355
  await runGit(["worktree", "add", "-B", branchName, worktreeDir, `origin/${baseBranch}`], {
951
1356
  cwd: cloneDir
952
1357
  });
@@ -965,12 +1370,12 @@ async function prepareResultsRepoBranch(config, branchName) {
965
1370
  }
966
1371
  async function stageResultsArtifacts(params) {
967
1372
  rmSync(params.destinationDir, { recursive: true, force: true });
968
- mkdirSync(path4.dirname(params.destinationDir), { recursive: true });
1373
+ mkdirSync(path3.dirname(params.destinationDir), { recursive: true });
969
1374
  await cp(params.sourceDir, params.destinationDir, { recursive: true });
970
1375
  }
971
1376
  function resolveResultsRepoRunsDir(config) {
972
1377
  const normalized = normalizeResultsConfig(config);
973
- return path4.join(normalized.path, RESULTS_REPO_RESULTS_DIR, "runs");
1378
+ return path3.join(normalized.path, RESULTS_REPO_RESULTS_DIR, "runs");
974
1379
  }
975
1380
  async function directorySizeBytes(targetPath) {
976
1381
  const entry = await stat(targetPath);
@@ -979,7 +1384,7 @@ async function directorySizeBytes(targetPath) {
979
1384
  }
980
1385
  let total = 0;
981
1386
  for (const child of await readdir(targetPath, { withFileTypes: true })) {
982
- total += await directorySizeBytes(path4.join(targetPath, child.name));
1387
+ total += await directorySizeBytes(path3.join(targetPath, child.name));
983
1388
  }
984
1389
  return total;
985
1390
  }
@@ -1028,12 +1433,40 @@ async function createDraftResultsPr(params) {
1028
1433
  return stdout.trim();
1029
1434
  }
1030
1435
  var DIRECT_PUSH_MAX_RETRIES = 3;
1436
+ async function hasUnpushedCommits(repoDir, baseBranch) {
1437
+ const { stdout } = await runGit(["rev-list", "--count", `origin/${baseBranch}..HEAD`], {
1438
+ cwd: repoDir,
1439
+ check: false
1440
+ });
1441
+ return Number.parseInt(stdout.trim(), 10) > 0;
1442
+ }
1443
+ async function pushDirectResultsToBase(params) {
1444
+ for (let attempt = 1; attempt <= DIRECT_PUSH_MAX_RETRIES; attempt++) {
1445
+ try {
1446
+ await runGit(["push", "origin", `HEAD:${params.baseBranch}`], { cwd: params.repoDir });
1447
+ updateStatusFile(params.normalized, {
1448
+ last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
1449
+ last_error: void 0
1450
+ });
1451
+ return;
1452
+ } catch (error) {
1453
+ const message = error instanceof Error ? error.message : String(error);
1454
+ if (attempt < DIRECT_PUSH_MAX_RETRIES && message.includes("non-fast-forward")) {
1455
+ await fetchResultsRepo(params.repoDir);
1456
+ await runGit(["rebase", `origin/${params.baseBranch}`], { cwd: params.repoDir });
1457
+ } else {
1458
+ throw error;
1459
+ }
1460
+ }
1461
+ }
1462
+ }
1031
1463
  async function directPushResults(params) {
1032
1464
  const normalized = normalizeResultsConfig(params.config);
1033
1465
  const repoDir = await ensureResultsRepoClone(normalized);
1034
1466
  const baseBranch = await resolveDefaultBranch(repoDir);
1035
1467
  await fetchResultsRepo(repoDir);
1036
- const destinationDir = path4.join(
1468
+ const targetRunId = buildGitRunId(params.destinationPath);
1469
+ const destinationDir = path3.join(
1037
1470
  repoDir,
1038
1471
  RESULTS_REPO_RESULTS_DIR,
1039
1472
  "runs",
@@ -1050,40 +1483,24 @@ async function directPushResults(params) {
1050
1483
  check: false
1051
1484
  });
1052
1485
  if (status.trim().length === 0) {
1053
- return false;
1054
- }
1055
- await runGit(
1056
- [
1057
- "commit",
1058
- "-m",
1059
- params.commitMessage,
1060
- "-m",
1061
- `Agentv-Run: ${buildGitRunId(params.destinationPath)}`
1062
- ],
1063
- { cwd: repoDir }
1064
- );
1065
- for (let attempt = 1; attempt <= DIRECT_PUSH_MAX_RETRIES; attempt++) {
1066
- try {
1067
- await runGit(["push", "origin", `HEAD:${baseBranch}`], { cwd: repoDir });
1068
- updateStatusFile(normalized, {
1069
- last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
1070
- last_error: void 0
1071
- });
1072
- return true;
1073
- } catch (error) {
1074
- const message = error instanceof Error ? error.message : String(error);
1075
- if (attempt < DIRECT_PUSH_MAX_RETRIES && message.includes("non-fast-forward")) {
1076
- await fetchResultsRepo(repoDir);
1077
- await runGit(["rebase", `origin/${baseBranch}`], { cwd: repoDir });
1078
- } else {
1079
- throw error;
1486
+ if (await hasUnpushedCommits(repoDir, baseBranch)) {
1487
+ const aheadPaths = await getAheadPaths(repoDir, `origin/${baseBranch}`);
1488
+ if (!areSafeResultsRepoPaths(aheadPaths)) {
1489
+ throw new Error("Results repo has non-results committed changes");
1080
1490
  }
1491
+ await pushDirectResultsToBase({ normalized, repoDir, baseBranch });
1492
+ return true;
1081
1493
  }
1494
+ return false;
1082
1495
  }
1083
- return false;
1496
+ await runGit(["commit", "-m", params.commitMessage, "-m", `Agentv-Run: ${targetRunId}`], {
1497
+ cwd: repoDir
1498
+ });
1499
+ await pushDirectResultsToBase({ normalized, repoDir, baseBranch });
1500
+ return true;
1084
1501
  }
1085
1502
  function buildGitRunId(relativeRunPath) {
1086
- const normalized = relativeRunPath.split(path4.sep).join("/");
1503
+ const normalized = relativeRunPath.split(path3.sep).join("/");
1087
1504
  const segments = normalized.split("/").filter(Boolean);
1088
1505
  if (segments.length >= 2) {
1089
1506
  const experiment = segments.slice(0, -1).join("/");
@@ -1207,10 +1624,11 @@ async function listGitRuns(repoDir, ref = "origin/main") {
1207
1624
  const runs = blobs.flatMap((blob, index) => {
1208
1625
  const benchmarkPath = benchmarkPaths[index];
1209
1626
  const benchmark = JSON.parse(blob.content.toString("utf8"));
1210
- const runDir = path4.posix.dirname(benchmarkPath);
1211
- const relativeRunPath = path4.posix.relative(RESULTS_REPO_RUNS_DIR, runDir);
1627
+ const runDir = path3.posix.dirname(benchmarkPath);
1628
+ const relativeRunPath = path3.posix.relative(RESULTS_REPO_RUNS_DIR, runDir);
1212
1629
  const runId = buildGitRunId(relativeRunPath);
1213
- const timestamp = benchmark.metadata?.timestamp?.trim() || path4.posix.basename(runDir);
1630
+ const timestamp = benchmark.metadata?.timestamp?.trim() || path3.posix.basename(runDir);
1631
+ const displayName = benchmark.metadata?.display_name?.trim() || path3.posix.basename(runDir);
1214
1632
  const targets = benchmark.metadata?.targets ?? [];
1215
1633
  const passRate = computeAveragePassRate(benchmark.run_summary);
1216
1634
  return [
@@ -1220,9 +1638,9 @@ async function listGitRuns(repoDir, ref = "origin/main") {
1220
1638
  timestamp,
1221
1639
  ...passRate !== void 0 && { pass_rate: passRate },
1222
1640
  ...targets.length === 1 && targets[0] ? { target: targets[0] } : {},
1223
- manifest_path: path4.posix.join(runDir, "index.jsonl"),
1641
+ manifest_path: path3.posix.join(runDir, "index.jsonl"),
1224
1642
  benchmark_path: benchmarkPath,
1225
- display_name: path4.posix.basename(runDir),
1643
+ display_name: displayName,
1226
1644
  test_count: benchmark.metadata?.tests_run?.length ?? 0,
1227
1645
  avg_score: 0,
1228
1646
  size_bytes: blob.size
@@ -1233,9 +1651,9 @@ async function listGitRuns(repoDir, ref = "origin/main") {
1233
1651
  return runs;
1234
1652
  }
1235
1653
  async function materializeGitRun(repoDir, relativeRunPath, ref = "origin/main") {
1236
- const normalizedRunPath = relativeRunPath.split(path4.sep).join("/");
1237
- const runTreePath = path4.posix.join(RESULTS_REPO_RUNS_DIR, normalizedRunPath);
1238
- const targetRunDir = path4.join(repoDir, ...runTreePath.split("/"));
1654
+ const normalizedRunPath = relativeRunPath.split(path3.sep).join("/");
1655
+ const runTreePath = path3.posix.join(RESULTS_REPO_RUNS_DIR, normalizedRunPath);
1656
+ const targetRunDir = path3.join(repoDir, ...runTreePath.split("/"));
1239
1657
  const { stdout: treeOut } = await runGit(["ls-tree", "-r", "--name-only", ref, runTreePath], {
1240
1658
  cwd: repoDir
1241
1659
  });
@@ -1251,16 +1669,16 @@ async function materializeGitRun(repoDir, relativeRunPath, ref = "origin/main")
1251
1669
  `Expected ${filePaths.length} git blobs but received ${blobs.length} while materializing results run`
1252
1670
  );
1253
1671
  }
1254
- const tempRoot = mkdtempSync(path4.join(repoDir, ".agentv-run-"));
1255
- const tempRunDir = path4.join(tempRoot, "run");
1672
+ const tempRoot = mkdtempSync(path3.join(repoDir, ".agentv-run-"));
1673
+ const tempRunDir = path3.join(tempRoot, "run");
1256
1674
  try {
1257
1675
  for (const [index, filePath] of filePaths.entries()) {
1258
- const relativeFilePath = path4.posix.relative(runTreePath, filePath);
1259
- const absolutePath = path4.join(tempRunDir, ...relativeFilePath.split("/"));
1260
- mkdirSync(path4.dirname(absolutePath), { recursive: true });
1676
+ const relativeFilePath = path3.posix.relative(runTreePath, filePath);
1677
+ const absolutePath = path3.join(tempRunDir, ...relativeFilePath.split("/"));
1678
+ mkdirSync(path3.dirname(absolutePath), { recursive: true });
1261
1679
  writeFileSync(absolutePath, blobs[index].content);
1262
1680
  }
1263
- mkdirSync(path4.dirname(targetRunDir), { recursive: true });
1681
+ mkdirSync(path3.dirname(targetRunDir), { recursive: true });
1264
1682
  try {
1265
1683
  renameSync(tempRunDir, targetRunDir);
1266
1684
  } catch (error) {
@@ -1276,67 +1694,11 @@ async function materializeGitRun(repoDir, relativeRunPath, ref = "origin/main")
1276
1694
  }
1277
1695
 
1278
1696
  // src/projects.ts
1279
- import {
1280
- existsSync as existsSync2,
1281
- mkdirSync as mkdirSync2,
1282
- readFileSync as readFileSync3,
1283
- readdirSync,
1284
- renameSync as renameSync2,
1285
- statSync,
1286
- unlinkSync,
1287
- writeFileSync as writeFileSync2
1288
- } from "node:fs";
1289
- import path5 from "node:path";
1697
+ import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync as readFileSync3, readdirSync, statSync, writeFileSync as writeFileSync2 } from "node:fs";
1698
+ import path4 from "node:path";
1290
1699
  import { stringify as stringifyYaml } from "yaml";
1291
1700
  function getProjectsRegistryPath() {
1292
- return path5.join(getAgentvConfigDir(), "projects.yaml");
1293
- }
1294
- function getLegacyBenchmarksRegistryPath() {
1295
- return path5.join(getAgentvConfigDir(), "benchmarks.yaml");
1296
- }
1297
- function migrateLegacyBenchmarksFile() {
1298
- const newPath = getProjectsRegistryPath();
1299
- const oldPath = getLegacyBenchmarksRegistryPath();
1300
- const newExists = existsSync2(newPath);
1301
- const oldExists = existsSync2(oldPath);
1302
- if (!oldExists) return;
1303
- if (newExists) {
1304
- console.warn(
1305
- `[agentv] Both ${oldPath} and ${newPath} exist. Using ${path5.basename(newPath)}; delete ${path5.basename(oldPath)} when you've confirmed the new file is correct.`
1306
- );
1307
- return;
1308
- }
1309
- let parsed = null;
1310
- try {
1311
- const raw = readFileSync3(oldPath, "utf-8");
1312
- parsed = parseYamlValue(raw);
1313
- } catch (err) {
1314
- console.warn(
1315
- `[agentv] Failed to read legacy ${path5.basename(oldPath)} for migration: ${err.message}. Leaving the file in place; you may need to migrate it manually.`
1316
- );
1317
- return;
1318
- }
1319
- const entries = parsed && typeof parsed === "object" && Array.isArray(parsed.benchmarks) ? parsed.benchmarks : [];
1320
- const newContent = stringifyYaml({ projects: entries });
1321
- const tempPath = `${newPath}.migrating`;
1322
- try {
1323
- mkdirSync2(path5.dirname(newPath), { recursive: true });
1324
- writeFileSync2(tempPath, newContent, "utf-8");
1325
- renameSync2(tempPath, newPath);
1326
- unlinkSync(oldPath);
1327
- } catch (err) {
1328
- try {
1329
- if (existsSync2(tempPath)) unlinkSync(tempPath);
1330
- } catch {
1331
- }
1332
- console.warn(
1333
- `[agentv] Failed to migrate ${path5.basename(oldPath)} \u2192 ${path5.basename(newPath)}: ${err.message}. Legacy file left in place.`
1334
- );
1335
- return;
1336
- }
1337
- console.log(
1338
- `[agentv] Migrated registry: ${path5.basename(oldPath)} \u2192 ${path5.basename(newPath)} (${entries.length} entr${entries.length === 1 ? "y" : "ies"})`
1339
- );
1701
+ return path4.join(getAgentvConfigDir(), "config.yaml");
1340
1702
  }
1341
1703
  function fromYaml(raw) {
1342
1704
  if (!raw || typeof raw !== "object") return null;
@@ -1357,6 +1719,18 @@ function fromYaml(raw) {
1357
1719
  entry.source = { url: s.url, ref: s.ref };
1358
1720
  }
1359
1721
  }
1722
+ if (e.results && typeof e.results === "object") {
1723
+ const r = e.results;
1724
+ if (r.mode === "github" && typeof r.repo === "string" && r.repo.trim().length > 0) {
1725
+ entry.results = {
1726
+ mode: "github",
1727
+ repo: r.repo.trim(),
1728
+ ...typeof r.path === "string" && r.path.trim().length > 0 ? { path: r.path.trim() } : {},
1729
+ ...typeof r.auto_push === "boolean" ? { autoPush: r.auto_push } : {},
1730
+ ...typeof r.branch_prefix === "string" && r.branch_prefix.trim().length > 0 ? { branchPrefix: r.branch_prefix.trim() } : {}
1731
+ };
1732
+ }
1733
+ }
1360
1734
  return entry;
1361
1735
  }
1362
1736
  function toYaml(entry) {
@@ -1370,10 +1744,20 @@ function toYaml(entry) {
1370
1744
  if (entry.source) {
1371
1745
  yaml.source = { url: entry.source.url, ref: entry.source.ref };
1372
1746
  }
1747
+ if (entry.results) {
1748
+ yaml.results = {
1749
+ mode: entry.results.mode,
1750
+ repo: entry.results.repo,
1751
+ ...entry.results.path !== void 0 && { path: entry.results.path },
1752
+ ...entry.results.autoPush !== void 0 && { auto_push: entry.results.autoPush },
1753
+ ...entry.results.branchPrefix !== void 0 && {
1754
+ branch_prefix: entry.results.branchPrefix
1755
+ }
1756
+ };
1757
+ }
1373
1758
  return yaml;
1374
1759
  }
1375
1760
  function loadProjectRegistry() {
1376
- migrateLegacyBenchmarksFile();
1377
1761
  const registryPath = getProjectsRegistryPath();
1378
1762
  if (!existsSync2(registryPath)) {
1379
1763
  return { projects: [] };
@@ -1393,15 +1777,24 @@ function loadProjectRegistry() {
1393
1777
  }
1394
1778
  function saveProjectRegistry(registry) {
1395
1779
  const registryPath = getProjectsRegistryPath();
1396
- const dir = path5.dirname(registryPath);
1780
+ const dir = path4.dirname(registryPath);
1397
1781
  if (!existsSync2(dir)) {
1398
1782
  mkdirSync2(dir, { recursive: true });
1399
1783
  }
1400
- const payload = { projects: registry.projects.map(toYaml) };
1784
+ const payload = { ...readHomeConfig(registryPath), projects: registry.projects.map(toYaml) };
1401
1785
  writeFileSync2(registryPath, stringifyYaml(payload), "utf-8");
1402
1786
  }
1787
+ function readHomeConfig(configPath) {
1788
+ if (!existsSync2(configPath)) return {};
1789
+ try {
1790
+ const parsed = parseYamlValue(readFileSync3(configPath, "utf-8"));
1791
+ return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : {};
1792
+ } catch {
1793
+ return {};
1794
+ }
1795
+ }
1403
1796
  function deriveProjectId(dirPath, existingIds) {
1404
- const base = path5.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
1797
+ const base = path4.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
1405
1798
  let candidate = base || "project";
1406
1799
  let suffix = 2;
1407
1800
  while (existingIds.includes(candidate)) {
@@ -1411,11 +1804,11 @@ function deriveProjectId(dirPath, existingIds) {
1411
1804
  return candidate;
1412
1805
  }
1413
1806
  function addProject(projectPath) {
1414
- const absPath = path5.resolve(projectPath);
1807
+ const absPath = path4.resolve(projectPath);
1415
1808
  if (!existsSync2(absPath)) {
1416
1809
  throw new Error(`Directory not found: ${absPath}`);
1417
1810
  }
1418
- if (!existsSync2(path5.join(absPath, ".agentv"))) {
1811
+ if (!existsSync2(path4.join(absPath, ".agentv"))) {
1419
1812
  throw new Error(`No .agentv/ directory found in ${absPath}. Run an evaluation first.`);
1420
1813
  }
1421
1814
  const registry = loadProjectRegistry();
@@ -1429,7 +1822,7 @@ function addProject(projectPath) {
1429
1822
  absPath,
1430
1823
  registry.projects.map((p) => p.id)
1431
1824
  ),
1432
- name: path5.basename(absPath),
1825
+ name: path4.basename(absPath),
1433
1826
  path: absPath,
1434
1827
  addedAt: now,
1435
1828
  lastOpenedAt: now
@@ -1449,6 +1842,14 @@ function removeProject(projectId) {
1449
1842
  function getProject(projectId) {
1450
1843
  return loadProjectRegistry().projects.find((p) => p.id === projectId);
1451
1844
  }
1845
+ function getProjectForPath(fsPath) {
1846
+ const absPath = path4.resolve(fsPath);
1847
+ return loadProjectRegistry().projects.filter((p) => {
1848
+ const projectPath = path4.resolve(p.path);
1849
+ const relative = path4.relative(projectPath, absPath);
1850
+ return relative === "" || !relative.startsWith("..") && !path4.isAbsolute(relative);
1851
+ }).sort((a, b) => path4.resolve(b.path).length - path4.resolve(a.path).length)[0];
1852
+ }
1452
1853
  function touchProject(projectId) {
1453
1854
  const registry = loadProjectRegistry();
1454
1855
  const entry = registry.projects.find((p) => p.id === projectId);
@@ -1458,14 +1859,14 @@ function touchProject(projectId) {
1458
1859
  }
1459
1860
  }
1460
1861
  function discoverProjects(rootDir, maxDepth = 2) {
1461
- const absRoot = path5.resolve(rootDir);
1862
+ const absRoot = path4.resolve(rootDir);
1462
1863
  if (!existsSync2(absRoot) || !statSync(absRoot).isDirectory()) {
1463
1864
  return [];
1464
1865
  }
1465
1866
  const results = [];
1466
1867
  function scan(dir, depth) {
1467
1868
  if (depth > maxDepth) return;
1468
- if (existsSync2(path5.join(dir, ".agentv"))) {
1869
+ if (existsSync2(path4.join(dir, ".agentv"))) {
1469
1870
  results.push(dir);
1470
1871
  return;
1471
1872
  }
@@ -1475,7 +1876,7 @@ function discoverProjects(rootDir, maxDepth = 2) {
1475
1876
  for (const entry of entries) {
1476
1877
  if (!entry.isDirectory()) continue;
1477
1878
  if (entry.name.startsWith(".") || entry.name === "node_modules") continue;
1478
- scan(path5.join(dir, entry.name), depth + 1);
1879
+ scan(path4.join(dir, entry.name), depth + 1);
1479
1880
  }
1480
1881
  } catch {
1481
1882
  }
@@ -2504,8 +2905,8 @@ function extractResponseItemContent(content) {
2504
2905
  // src/import/codex-session-discovery.ts
2505
2906
  import { readdir as readdir2, stat as stat2 } from "node:fs/promises";
2506
2907
  import { homedir } from "node:os";
2507
- import path6 from "node:path";
2508
- var DEFAULT_SESSIONS_DIR = () => path6.join(homedir(), ".codex", "sessions");
2908
+ import path5 from "node:path";
2909
+ var DEFAULT_SESSIONS_DIR = () => path5.join(homedir(), ".codex", "sessions");
2509
2910
  async function discoverCodexSessions(opts) {
2510
2911
  const sessionsDir = opts?.sessionsDir ?? DEFAULT_SESSIONS_DIR();
2511
2912
  const limit = opts?.latest ? 1 : opts?.limit ?? 10;
@@ -2517,7 +2918,7 @@ async function discoverCodexSessions(opts) {
2517
2918
  return [];
2518
2919
  }
2519
2920
  for (const year of yearDirs) {
2520
- const yearPath = path6.join(sessionsDir, year);
2921
+ const yearPath = path5.join(sessionsDir, year);
2521
2922
  let monthDirs;
2522
2923
  try {
2523
2924
  monthDirs = await readdir2(yearPath);
@@ -2525,7 +2926,7 @@ async function discoverCodexSessions(opts) {
2525
2926
  continue;
2526
2927
  }
2527
2928
  for (const month of monthDirs) {
2528
- const monthPath = path6.join(yearPath, month);
2929
+ const monthPath = path5.join(yearPath, month);
2529
2930
  let dayDirs;
2530
2931
  try {
2531
2932
  dayDirs = await readdir2(monthPath);
@@ -2537,7 +2938,7 @@ async function discoverCodexSessions(opts) {
2537
2938
  const dirDate = `${year}-${month}-${day}`;
2538
2939
  if (dirDate !== opts.date) continue;
2539
2940
  }
2540
- const dayPath = path6.join(monthPath, day);
2941
+ const dayPath = path5.join(monthPath, day);
2541
2942
  let files;
2542
2943
  try {
2543
2944
  files = await readdir2(dayPath);
@@ -2546,7 +2947,7 @@ async function discoverCodexSessions(opts) {
2546
2947
  }
2547
2948
  for (const file of files) {
2548
2949
  if (!file.startsWith("rollout-") || !file.endsWith(".jsonl")) continue;
2549
- const filePath = path6.join(dayPath, file);
2950
+ const filePath = path5.join(dayPath, file);
2550
2951
  const nameWithoutExt = file.replace(/\.jsonl$/, "");
2551
2952
  const parts = nameWithoutExt.split("-");
2552
2953
  const sessionId = parts.length >= 6 ? parts.slice(-5).join("-") : nameWithoutExt;
@@ -2569,8 +2970,8 @@ async function discoverCodexSessions(opts) {
2569
2970
  // src/import/session-discovery.ts
2570
2971
  import { readdir as readdir3, stat as stat3 } from "node:fs/promises";
2571
2972
  import { homedir as homedir2 } from "node:os";
2572
- import path7 from "node:path";
2573
- var DEFAULT_PROJECTS_DIR = () => path7.join(homedir2(), ".claude", "projects");
2973
+ import path6 from "node:path";
2974
+ var DEFAULT_PROJECTS_DIR = () => path6.join(homedir2(), ".claude", "projects");
2574
2975
  function encodeProjectPath(projectPath) {
2575
2976
  return projectPath.replace(/\//g, "-");
2576
2977
  }
@@ -2589,7 +2990,7 @@ async function discoverClaudeSessions(opts) {
2589
2990
  }
2590
2991
  const sessions = [];
2591
2992
  for (const projectDir of projectDirs) {
2592
- const dirPath = path7.join(projectsDir, projectDir);
2993
+ const dirPath = path6.join(projectsDir, projectDir);
2593
2994
  let entries;
2594
2995
  try {
2595
2996
  entries = await readdir3(dirPath);
@@ -2600,7 +3001,7 @@ async function discoverClaudeSessions(opts) {
2600
3001
  if (!entry.endsWith(".jsonl")) continue;
2601
3002
  const sessionId = entry.replace(/\.jsonl$/, "");
2602
3003
  if (opts?.sessionId && sessionId !== opts.sessionId) continue;
2603
- const filePath = path7.join(dirPath, entry);
3004
+ const filePath = path6.join(dirPath, entry);
2604
3005
  let updatedAt;
2605
3006
  try {
2606
3007
  const fileStat = await stat3(filePath);
@@ -2621,7 +3022,7 @@ async function discoverClaudeSessions(opts) {
2621
3022
  }
2622
3023
 
2623
3024
  // src/import/types.ts
2624
- import { readFile as readFile3 } from "node:fs/promises";
3025
+ import { readFile as readFile2 } from "node:fs/promises";
2625
3026
  function toTranscriptJsonLines(entry, options) {
2626
3027
  const source = {
2627
3028
  provider: entry.source.provider,
@@ -2708,11 +3109,11 @@ function groupTranscriptJsonLines(lines) {
2708
3109
  }));
2709
3110
  }
2710
3111
  async function readTranscriptJsonl(filePath) {
2711
- const text = await readFile3(filePath, "utf8");
3112
+ const text = await readFile2(filePath, "utf8");
2712
3113
  return text.split("\n").filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
2713
3114
  }
2714
3115
  async function readTranscriptFile(filePath) {
2715
- return readFile3(filePath, "utf8");
3116
+ return readFile2(filePath, "utf8");
2716
3117
  }
2717
3118
 
2718
3119
  // src/import/transcript-provider.ts
@@ -2785,12 +3186,30 @@ export {
2785
3186
  GraderRegistry,
2786
3187
  LatencyGrader,
2787
3188
  LlmGrader,
3189
+ NORMALIZED_REDACTION_LEVELS,
3190
+ NORMALIZED_TOOL_STATUSES,
3191
+ NORMALIZED_TRACE_EVENT_TYPES,
3192
+ NORMALIZED_TRACE_SOURCE_KINDS,
3193
+ NORMALIZED_TRAJECTORY_SCHEMA_VERSION,
3194
+ NormalizedRawEvidenceWireSchema,
3195
+ NormalizedRedactionStateWireSchema,
3196
+ NormalizedTraceBranchWireSchema,
3197
+ NormalizedTraceErrorWireSchema,
3198
+ NormalizedTraceEventWireSchema,
3199
+ NormalizedTraceMessageWireSchema,
3200
+ NormalizedTraceModelWireSchema,
3201
+ NormalizedTraceSessionWireSchema,
3202
+ NormalizedTraceSourceRefWireSchema,
3203
+ NormalizedTraceSourceWireSchema,
3204
+ NormalizedTraceToolWireSchema,
3205
+ NormalizedTrajectoryWireSchema,
2788
3206
  OTEL_BACKEND_PRESETS,
2789
3207
  OtelStreamingObserver,
2790
3208
  OtelTraceExporter,
2791
3209
  OtlpJsonFileExporter,
2792
3210
  PASS_THRESHOLD,
2793
3211
  ProviderRegistry,
3212
+ RUBRIC_OPERATOR_VALUES,
2794
3213
  RepoManager,
2795
3214
  ResponseCache,
2796
3215
  RunBudgetTracker,
@@ -2820,6 +3239,7 @@ export {
2820
3239
  cleanupWorkspace,
2821
3240
  commitAndPushResultsBranch,
2822
3241
  computeTraceSummary,
3242
+ computeTraceSummaryFromTrajectory,
2823
3243
  computeWorkspaceFingerprint,
2824
3244
  consumeClaudeLogEntries,
2825
3245
  consumeCodexLogEntries,
@@ -2868,15 +3288,19 @@ export {
2868
3288
  findGitRoot,
2869
3289
  formatToolCalls,
2870
3290
  freeformEvaluationSchema,
3291
+ fromNormalizedTrajectoryWire,
2871
3292
  generateRubrics,
2872
3293
  getAgentvConfigDir,
2873
3294
  getAgentvDataDir,
2874
3295
  getAgentvHome,
2875
3296
  getOutputFilenames,
2876
3297
  getProject,
3298
+ getProjectForPath,
2877
3299
  getProjectsRegistryPath,
2878
3300
  getResultsRepoLocalPaths,
2879
3301
  getResultsRepoStatus,
3302
+ getResultsRepoSyncStatus,
3303
+ getSelectedTrajectoryEvents,
2880
3304
  getSubagentsRoot,
2881
3305
  getTextContent,
2882
3306
  getTraceStateRoot,
@@ -2932,6 +3356,7 @@ export {
2932
3356
  resolveAndCreateProvider,
2933
3357
  resolveDelegatedTargetDefinition,
2934
3358
  resolveFileReference,
3359
+ resolveResultsConfigForProject,
2935
3360
  resolveResultsRepoRunsDir,
2936
3361
  resolveResultsRepoUrl,
2937
3362
  resolveTargetDefinition,
@@ -2967,7 +3392,9 @@ export {
2967
3392
  syncProject,
2968
3393
  syncProjects,
2969
3394
  syncResultsRepo,
3395
+ syncResultsRepoForProject,
2970
3396
  toCamelCaseDeep,
3397
+ toNormalizedTrajectoryWire,
2971
3398
  toSnakeCaseDeep,
2972
3399
  toTranscriptJsonLines,
2973
3400
  tokensPerTool,