@agentv/core 4.28.0 → 4.29.1-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1040,13 +1040,69 @@ async function directPushResults(params) {
1040
1040
  return false;
1041
1041
  }
1042
1042
 
1043
- // src/benchmarks.ts
1044
- import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync as readFileSync3, readdirSync, statSync, writeFileSync as writeFileSync2 } from "node:fs";
1043
+ // src/projects.ts
1044
+ import {
1045
+ existsSync as existsSync2,
1046
+ mkdirSync as mkdirSync2,
1047
+ readFileSync as readFileSync3,
1048
+ readdirSync,
1049
+ renameSync,
1050
+ statSync,
1051
+ unlinkSync,
1052
+ writeFileSync as writeFileSync2
1053
+ } from "node:fs";
1045
1054
  import path5 from "node:path";
1046
1055
  import { stringify as stringifyYaml } from "yaml";
1047
- function getBenchmarksRegistryPath() {
1056
+ function getProjectsRegistryPath() {
1057
+ return path5.join(getAgentvConfigDir(), "projects.yaml");
1058
+ }
1059
+ function getLegacyBenchmarksRegistryPath() {
1048
1060
  return path5.join(getAgentvConfigDir(), "benchmarks.yaml");
1049
1061
  }
1062
+ function migrateLegacyBenchmarksFile() {
1063
+ const newPath = getProjectsRegistryPath();
1064
+ const oldPath = getLegacyBenchmarksRegistryPath();
1065
+ const newExists = existsSync2(newPath);
1066
+ const oldExists = existsSync2(oldPath);
1067
+ if (!oldExists) return;
1068
+ if (newExists) {
1069
+ console.warn(
1070
+ `[agentv] Both ${oldPath} and ${newPath} exist. Using ${path5.basename(newPath)}; delete ${path5.basename(oldPath)} when you've confirmed the new file is correct.`
1071
+ );
1072
+ return;
1073
+ }
1074
+ let parsed = null;
1075
+ try {
1076
+ const raw = readFileSync3(oldPath, "utf-8");
1077
+ parsed = parseYamlValue(raw);
1078
+ } catch (err) {
1079
+ console.warn(
1080
+ `[agentv] Failed to read legacy ${path5.basename(oldPath)} for migration: ${err.message}. Leaving the file in place; you may need to migrate it manually.`
1081
+ );
1082
+ return;
1083
+ }
1084
+ const entries = parsed && typeof parsed === "object" && Array.isArray(parsed.benchmarks) ? parsed.benchmarks : [];
1085
+ const newContent = stringifyYaml({ projects: entries });
1086
+ const tempPath = `${newPath}.migrating`;
1087
+ try {
1088
+ mkdirSync2(path5.dirname(newPath), { recursive: true });
1089
+ writeFileSync2(tempPath, newContent, "utf-8");
1090
+ renameSync(tempPath, newPath);
1091
+ unlinkSync(oldPath);
1092
+ } catch (err) {
1093
+ try {
1094
+ if (existsSync2(tempPath)) unlinkSync(tempPath);
1095
+ } catch {
1096
+ }
1097
+ console.warn(
1098
+ `[agentv] Failed to migrate ${path5.basename(oldPath)} \u2192 ${path5.basename(newPath)}: ${err.message}. Legacy file left in place.`
1099
+ );
1100
+ return;
1101
+ }
1102
+ console.log(
1103
+ `[agentv] Migrated registry: ${path5.basename(oldPath)} \u2192 ${path5.basename(newPath)} (${entries.length} entr${entries.length === 1 ? "y" : "ies"})`
1104
+ );
1105
+ }
1050
1106
  function fromYaml(raw) {
1051
1107
  if (!raw || typeof raw !== "object") return null;
1052
1108
  const e = raw;
@@ -1081,36 +1137,37 @@ function toYaml(entry) {
1081
1137
  }
1082
1138
  return yaml;
1083
1139
  }
1084
- function loadBenchmarkRegistry() {
1085
- const registryPath = getBenchmarksRegistryPath();
1140
+ function loadProjectRegistry() {
1141
+ migrateLegacyBenchmarksFile();
1142
+ const registryPath = getProjectsRegistryPath();
1086
1143
  if (!existsSync2(registryPath)) {
1087
- return { benchmarks: [] };
1144
+ return { projects: [] };
1088
1145
  }
1089
1146
  try {
1090
1147
  const raw = readFileSync3(registryPath, "utf-8");
1091
1148
  const parsed = parseYamlValue(raw);
1092
1149
  if (!parsed || typeof parsed !== "object") {
1093
- return { benchmarks: [] };
1150
+ return { projects: [] };
1094
1151
  }
1095
1152
  const env = process.env;
1096
- const benchmarks = Array.isArray(parsed.benchmarks) ? parsed.benchmarks.map((e) => fromYaml(interpolateEnv(e, env))).filter((e) => e !== null) : [];
1097
- return { benchmarks };
1153
+ const projects = Array.isArray(parsed.projects) ? parsed.projects.map((e) => fromYaml(interpolateEnv(e, env))).filter((e) => e !== null) : [];
1154
+ return { projects };
1098
1155
  } catch {
1099
- return { benchmarks: [] };
1156
+ return { projects: [] };
1100
1157
  }
1101
1158
  }
1102
- function saveBenchmarkRegistry(registry) {
1103
- const registryPath = getBenchmarksRegistryPath();
1159
+ function saveProjectRegistry(registry) {
1160
+ const registryPath = getProjectsRegistryPath();
1104
1161
  const dir = path5.dirname(registryPath);
1105
1162
  if (!existsSync2(dir)) {
1106
1163
  mkdirSync2(dir, { recursive: true });
1107
1164
  }
1108
- const payload = { benchmarks: registry.benchmarks.map(toYaml) };
1165
+ const payload = { projects: registry.projects.map(toYaml) };
1109
1166
  writeFileSync2(registryPath, stringifyYaml(payload), "utf-8");
1110
1167
  }
1111
- function deriveBenchmarkId(dirPath, existingIds) {
1168
+ function deriveProjectId(dirPath, existingIds) {
1112
1169
  const base = path5.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
1113
- let candidate = base || "benchmark";
1170
+ let candidate = base || "project";
1114
1171
  let suffix = 2;
1115
1172
  while (existingIds.includes(candidate)) {
1116
1173
  candidate = `${base}-${suffix}`;
@@ -1118,54 +1175,54 @@ function deriveBenchmarkId(dirPath, existingIds) {
1118
1175
  }
1119
1176
  return candidate;
1120
1177
  }
1121
- function addBenchmark(benchmarkPath) {
1122
- const absPath = path5.resolve(benchmarkPath);
1178
+ function addProject(projectPath) {
1179
+ const absPath = path5.resolve(projectPath);
1123
1180
  if (!existsSync2(absPath)) {
1124
1181
  throw new Error(`Directory not found: ${absPath}`);
1125
1182
  }
1126
1183
  if (!existsSync2(path5.join(absPath, ".agentv"))) {
1127
1184
  throw new Error(`No .agentv/ directory found in ${absPath}. Run an evaluation first.`);
1128
1185
  }
1129
- const registry = loadBenchmarkRegistry();
1130
- const existing = registry.benchmarks.find((p) => p.path === absPath);
1186
+ const registry = loadProjectRegistry();
1187
+ const existing = registry.projects.find((p) => p.path === absPath);
1131
1188
  if (existing) {
1132
1189
  return existing;
1133
1190
  }
1134
1191
  const now = (/* @__PURE__ */ new Date()).toISOString();
1135
1192
  const entry = {
1136
- id: deriveBenchmarkId(
1193
+ id: deriveProjectId(
1137
1194
  absPath,
1138
- registry.benchmarks.map((p) => p.id)
1195
+ registry.projects.map((p) => p.id)
1139
1196
  ),
1140
1197
  name: path5.basename(absPath),
1141
1198
  path: absPath,
1142
1199
  addedAt: now,
1143
1200
  lastOpenedAt: now
1144
1201
  };
1145
- registry.benchmarks.push(entry);
1146
- saveBenchmarkRegistry(registry);
1202
+ registry.projects.push(entry);
1203
+ saveProjectRegistry(registry);
1147
1204
  return entry;
1148
1205
  }
1149
- function removeBenchmark(benchmarkId) {
1150
- const registry = loadBenchmarkRegistry();
1151
- const idx = registry.benchmarks.findIndex((p) => p.id === benchmarkId);
1206
+ function removeProject(projectId) {
1207
+ const registry = loadProjectRegistry();
1208
+ const idx = registry.projects.findIndex((p) => p.id === projectId);
1152
1209
  if (idx < 0) return false;
1153
- registry.benchmarks.splice(idx, 1);
1154
- saveBenchmarkRegistry(registry);
1210
+ registry.projects.splice(idx, 1);
1211
+ saveProjectRegistry(registry);
1155
1212
  return true;
1156
1213
  }
1157
- function getBenchmark(benchmarkId) {
1158
- return loadBenchmarkRegistry().benchmarks.find((p) => p.id === benchmarkId);
1214
+ function getProject(projectId) {
1215
+ return loadProjectRegistry().projects.find((p) => p.id === projectId);
1159
1216
  }
1160
- function touchBenchmark(benchmarkId) {
1161
- const registry = loadBenchmarkRegistry();
1162
- const entry = registry.benchmarks.find((p) => p.id === benchmarkId);
1217
+ function touchProject(projectId) {
1218
+ const registry = loadProjectRegistry();
1219
+ const entry = registry.projects.find((p) => p.id === projectId);
1163
1220
  if (entry) {
1164
1221
  entry.lastOpenedAt = (/* @__PURE__ */ new Date()).toISOString();
1165
- saveBenchmarkRegistry(registry);
1222
+ saveProjectRegistry(registry);
1166
1223
  }
1167
1224
  }
1168
- function discoverBenchmarks(rootDir, maxDepth = 2) {
1225
+ function discoverProjects(rootDir, maxDepth = 2) {
1169
1226
  const absRoot = path5.resolve(rootDir);
1170
1227
  if (!existsSync2(absRoot) || !statSync(absRoot).isDirectory()) {
1171
1228
  return [];
@@ -1192,12 +1249,12 @@ function discoverBenchmarks(rootDir, maxDepth = 2) {
1192
1249
  return results.sort();
1193
1250
  }
1194
1251
 
1195
- // src/benchmark-sync.ts
1252
+ // src/project-sync.ts
1196
1253
  import * as childProcess from "node:child_process";
1197
1254
  import { existsSync as existsSync3 } from "node:fs";
1198
- async function syncBenchmark(entry) {
1255
+ async function syncProject(entry) {
1199
1256
  if (!entry.source) {
1200
- throw new Error(`Benchmark '${entry.id}' has no source defined`);
1257
+ throw new Error(`Project '${entry.id}' has no source defined`);
1201
1258
  }
1202
1259
  const { url, ref } = entry.source;
1203
1260
  const dest = entry.path;
@@ -1211,12 +1268,12 @@ async function syncBenchmark(entry) {
1211
1268
  );
1212
1269
  }
1213
1270
  }
1214
- async function syncBenchmarks(entries) {
1271
+ async function syncProjects(entries) {
1215
1272
  for (const entry of entries) {
1216
1273
  if (!entry.source) continue;
1217
- console.log(`Syncing benchmark '${entry.id}' from ${entry.source.url}...`);
1218
- await syncBenchmark(entry);
1219
- console.log(`Benchmark '${entry.id}' synced.`);
1274
+ console.log(`Syncing project '${entry.id}' from ${entry.source.url}...`);
1275
+ await syncProject(entry);
1276
+ console.log(`Project '${entry.id}' synced.`);
1220
1277
  }
1221
1278
  }
1222
1279
 
@@ -2511,7 +2568,7 @@ export {
2511
2568
  TranscriptProvider,
2512
2569
  WorkspaceCreationError,
2513
2570
  WorkspacePoolManager,
2514
- addBenchmark,
2571
+ addProject,
2515
2572
  assembleLlmGraderPrompt,
2516
2573
  avgToolDurationMs,
2517
2574
  buildDirectoryChain,
@@ -2542,17 +2599,17 @@ export {
2542
2599
  createTempWorkspace,
2543
2600
  deepEqual,
2544
2601
  defineConfig,
2545
- deriveBenchmarkId,
2546
2602
  deriveCategory,
2603
+ deriveProjectId,
2547
2604
  detectFormat,
2548
2605
  directPushResults,
2549
2606
  directorySizeBytes,
2550
2607
  discoverAssertions,
2551
- discoverBenchmarks,
2552
2608
  discoverClaudeSessions,
2553
2609
  discoverCodexSessions,
2554
2610
  discoverCopilotSessions,
2555
2611
  discoverGraders,
2612
+ discoverProjects,
2556
2613
  discoverProviders,
2557
2614
  ensureResultsRepoClone,
2558
2615
  ensureVSCodeSubagents,
@@ -2579,9 +2636,9 @@ export {
2579
2636
  generateRubrics,
2580
2637
  getAgentvConfigDir,
2581
2638
  getAgentvHome,
2582
- getBenchmark,
2583
- getBenchmarksRegistryPath,
2584
2639
  getOutputFilenames,
2640
+ getProject,
2641
+ getProjectsRegistryPath,
2585
2642
  getResultsRepoCachePaths,
2586
2643
  getResultsRepoStatus,
2587
2644
  getSubagentsRoot,
@@ -2603,11 +2660,11 @@ export {
2603
2660
  isTestMessageRole,
2604
2661
  killAllTrackedChildren,
2605
2662
  listTargetNames,
2606
- loadBenchmarkRegistry,
2607
2663
  loadConfig,
2608
2664
  loadEvalCaseById,
2609
2665
  loadEvalCases,
2610
2666
  loadEvalSuite,
2667
+ loadProjectRegistry,
2611
2668
  loadTestById,
2612
2669
  loadTestSuite,
2613
2670
  loadTests,
@@ -2633,7 +2690,7 @@ export {
2633
2690
  readTextFile,
2634
2691
  readTranscriptFile,
2635
2692
  readTranscriptJsonl,
2636
- removeBenchmark,
2693
+ removeProject,
2637
2694
  resolveAndCreateProvider,
2638
2695
  resolveDelegatedTargetDefinition,
2639
2696
  resolveFileReference,
@@ -2656,7 +2713,7 @@ export {
2656
2713
  runIsJsonAssertion,
2657
2714
  runRegexAssertion,
2658
2715
  runStartsWithAssertion,
2659
- saveBenchmarkRegistry,
2716
+ saveProjectRegistry,
2660
2717
  scanRepoDeps,
2661
2718
  scoreRangeEvaluationSchema,
2662
2719
  scoreToVerdict,
@@ -2669,14 +2726,14 @@ export {
2669
2726
  subscribeToCopilotSdkLogEntries,
2670
2727
  subscribeToPiLogEntries,
2671
2728
  substituteVariables,
2672
- syncBenchmark,
2673
- syncBenchmarks,
2729
+ syncProject,
2730
+ syncProjects,
2674
2731
  syncResultsRepo,
2675
2732
  toCamelCaseDeep,
2676
2733
  toSnakeCaseDeep,
2677
2734
  toTranscriptJsonLines,
2678
2735
  tokensPerTool,
2679
- touchBenchmark,
2736
+ touchProject,
2680
2737
  trackChild,
2681
2738
  trackedChildCount,
2682
2739
  transpileEvalYaml,