@agentv/core 4.28.0 → 4.29.1-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -24667,7 +24667,7 @@ __export(index_exports, {
24667
24667
  TranscriptProvider: () => TranscriptProvider,
24668
24668
  WorkspaceCreationError: () => WorkspaceCreationError,
24669
24669
  WorkspacePoolManager: () => WorkspacePoolManager,
24670
- addBenchmark: () => addBenchmark,
24670
+ addProject: () => addProject,
24671
24671
  assembleLlmGraderPrompt: () => assembleLlmGraderPrompt,
24672
24672
  avgToolDurationMs: () => avgToolDurationMs,
24673
24673
  buildDirectoryChain: () => buildDirectoryChain2,
@@ -24698,17 +24698,17 @@ __export(index_exports, {
24698
24698
  createTempWorkspace: () => createTempWorkspace,
24699
24699
  deepEqual: () => deepEqual,
24700
24700
  defineConfig: () => defineConfig,
24701
- deriveBenchmarkId: () => deriveBenchmarkId,
24702
24701
  deriveCategory: () => deriveCategory,
24702
+ deriveProjectId: () => deriveProjectId,
24703
24703
  detectFormat: () => detectFormat,
24704
24704
  directPushResults: () => directPushResults,
24705
24705
  directorySizeBytes: () => directorySizeBytes,
24706
24706
  discoverAssertions: () => discoverAssertions,
24707
- discoverBenchmarks: () => discoverBenchmarks,
24708
24707
  discoverClaudeSessions: () => discoverClaudeSessions,
24709
24708
  discoverCodexSessions: () => discoverCodexSessions,
24710
24709
  discoverCopilotSessions: () => discoverCopilotSessions,
24711
24710
  discoverGraders: () => discoverGraders,
24711
+ discoverProjects: () => discoverProjects,
24712
24712
  discoverProviders: () => discoverProviders,
24713
24713
  ensureResultsRepoClone: () => ensureResultsRepoClone,
24714
24714
  ensureVSCodeSubagents: () => ensureVSCodeSubagents,
@@ -24735,9 +24735,9 @@ __export(index_exports, {
24735
24735
  generateRubrics: () => generateRubrics,
24736
24736
  getAgentvConfigDir: () => getAgentvConfigDir,
24737
24737
  getAgentvHome: () => getAgentvHome,
24738
- getBenchmark: () => getBenchmark,
24739
- getBenchmarksRegistryPath: () => getBenchmarksRegistryPath,
24740
24738
  getOutputFilenames: () => getOutputFilenames,
24739
+ getProject: () => getProject,
24740
+ getProjectsRegistryPath: () => getProjectsRegistryPath,
24741
24741
  getResultsRepoCachePaths: () => getResultsRepoCachePaths,
24742
24742
  getResultsRepoStatus: () => getResultsRepoStatus,
24743
24743
  getSubagentsRoot: () => getSubagentsRoot,
@@ -24759,11 +24759,11 @@ __export(index_exports, {
24759
24759
  isTestMessageRole: () => isTestMessageRole,
24760
24760
  killAllTrackedChildren: () => killAllTrackedChildren,
24761
24761
  listTargetNames: () => listTargetNames,
24762
- loadBenchmarkRegistry: () => loadBenchmarkRegistry,
24763
24762
  loadConfig: () => loadConfig,
24764
24763
  loadEvalCaseById: () => loadEvalCaseById,
24765
24764
  loadEvalCases: () => loadEvalCases,
24766
24765
  loadEvalSuite: () => loadEvalSuite,
24766
+ loadProjectRegistry: () => loadProjectRegistry,
24767
24767
  loadTestById: () => loadTestById,
24768
24768
  loadTestSuite: () => loadTestSuite,
24769
24769
  loadTests: () => loadTests,
@@ -24789,7 +24789,7 @@ __export(index_exports, {
24789
24789
  readTextFile: () => readTextFile,
24790
24790
  readTranscriptFile: () => readTranscriptFile,
24791
24791
  readTranscriptJsonl: () => readTranscriptJsonl,
24792
- removeBenchmark: () => removeBenchmark,
24792
+ removeProject: () => removeProject,
24793
24793
  resolveAndCreateProvider: () => resolveAndCreateProvider,
24794
24794
  resolveDelegatedTargetDefinition: () => resolveDelegatedTargetDefinition,
24795
24795
  resolveFileReference: () => resolveFileReference3,
@@ -24812,7 +24812,7 @@ __export(index_exports, {
24812
24812
  runIsJsonAssertion: () => runIsJsonAssertion,
24813
24813
  runRegexAssertion: () => runRegexAssertion,
24814
24814
  runStartsWithAssertion: () => runStartsWithAssertion,
24815
- saveBenchmarkRegistry: () => saveBenchmarkRegistry,
24815
+ saveProjectRegistry: () => saveProjectRegistry,
24816
24816
  scanRepoDeps: () => scanRepoDeps,
24817
24817
  scoreRangeEvaluationSchema: () => scoreRangeEvaluationSchema,
24818
24818
  scoreToVerdict: () => scoreToVerdict,
@@ -24825,14 +24825,14 @@ __export(index_exports, {
24825
24825
  subscribeToCopilotSdkLogEntries: () => subscribeToCopilotSdkLogEntries,
24826
24826
  subscribeToPiLogEntries: () => subscribeToPiLogEntries,
24827
24827
  substituteVariables: () => substituteVariables,
24828
- syncBenchmark: () => syncBenchmark,
24829
- syncBenchmarks: () => syncBenchmarks,
24828
+ syncProject: () => syncProject,
24829
+ syncProjects: () => syncProjects,
24830
24830
  syncResultsRepo: () => syncResultsRepo,
24831
24831
  toCamelCaseDeep: () => toCamelCaseDeep,
24832
24832
  toSnakeCaseDeep: () => toSnakeCaseDeep,
24833
24833
  toTranscriptJsonLines: () => toTranscriptJsonLines,
24834
24834
  tokensPerTool: () => tokensPerTool,
24835
- touchBenchmark: () => touchBenchmark,
24835
+ touchProject: () => touchProject,
24836
24836
  trackChild: () => trackChild,
24837
24837
  trackedChildCount: () => trackedChildCount,
24838
24838
  transpileEvalYaml: () => transpileEvalYaml,
@@ -25766,7 +25766,7 @@ async function directPushResults(params) {
25766
25766
  // src/index.ts
25767
25767
  init_paths();
25768
25768
 
25769
- // src/benchmarks.ts
25769
+ // src/projects.ts
25770
25770
  init_cjs_shims();
25771
25771
  var import_node_fs20 = require("fs");
25772
25772
  var import_node_path55 = __toESM(require("path"), 1);
@@ -25774,9 +25774,56 @@ var import_yaml2 = require("yaml");
25774
25774
  init_interpolation();
25775
25775
  init_yaml_loader();
25776
25776
  init_paths();
25777
- function getBenchmarksRegistryPath() {
25777
+ function getProjectsRegistryPath() {
25778
+ return import_node_path55.default.join(getAgentvConfigDir(), "projects.yaml");
25779
+ }
25780
+ function getLegacyBenchmarksRegistryPath() {
25778
25781
  return import_node_path55.default.join(getAgentvConfigDir(), "benchmarks.yaml");
25779
25782
  }
25783
+ function migrateLegacyBenchmarksFile() {
25784
+ const newPath = getProjectsRegistryPath();
25785
+ const oldPath = getLegacyBenchmarksRegistryPath();
25786
+ const newExists = (0, import_node_fs20.existsSync)(newPath);
25787
+ const oldExists = (0, import_node_fs20.existsSync)(oldPath);
25788
+ if (!oldExists) return;
25789
+ if (newExists) {
25790
+ console.warn(
25791
+ `[agentv] Both ${oldPath} and ${newPath} exist. Using ${import_node_path55.default.basename(newPath)}; delete ${import_node_path55.default.basename(oldPath)} when you've confirmed the new file is correct.`
25792
+ );
25793
+ return;
25794
+ }
25795
+ let parsed = null;
25796
+ try {
25797
+ const raw = (0, import_node_fs20.readFileSync)(oldPath, "utf-8");
25798
+ parsed = parseYamlValue(raw);
25799
+ } catch (err) {
25800
+ console.warn(
25801
+ `[agentv] Failed to read legacy ${import_node_path55.default.basename(oldPath)} for migration: ${err.message}. Leaving the file in place; you may need to migrate it manually.`
25802
+ );
25803
+ return;
25804
+ }
25805
+ const entries = parsed && typeof parsed === "object" && Array.isArray(parsed.benchmarks) ? parsed.benchmarks : [];
25806
+ const newContent = (0, import_yaml2.stringify)({ projects: entries });
25807
+ const tempPath = `${newPath}.migrating`;
25808
+ try {
25809
+ (0, import_node_fs20.mkdirSync)(import_node_path55.default.dirname(newPath), { recursive: true });
25810
+ (0, import_node_fs20.writeFileSync)(tempPath, newContent, "utf-8");
25811
+ (0, import_node_fs20.renameSync)(tempPath, newPath);
25812
+ (0, import_node_fs20.unlinkSync)(oldPath);
25813
+ } catch (err) {
25814
+ try {
25815
+ if ((0, import_node_fs20.existsSync)(tempPath)) (0, import_node_fs20.unlinkSync)(tempPath);
25816
+ } catch {
25817
+ }
25818
+ console.warn(
25819
+ `[agentv] Failed to migrate ${import_node_path55.default.basename(oldPath)} \u2192 ${import_node_path55.default.basename(newPath)}: ${err.message}. Legacy file left in place.`
25820
+ );
25821
+ return;
25822
+ }
25823
+ console.log(
25824
+ `[agentv] Migrated registry: ${import_node_path55.default.basename(oldPath)} \u2192 ${import_node_path55.default.basename(newPath)} (${entries.length} entr${entries.length === 1 ? "y" : "ies"})`
25825
+ );
25826
+ }
25780
25827
  function fromYaml(raw) {
25781
25828
  if (!raw || typeof raw !== "object") return null;
25782
25829
  const e = raw;
@@ -25811,36 +25858,37 @@ function toYaml(entry) {
25811
25858
  }
25812
25859
  return yaml;
25813
25860
  }
25814
- function loadBenchmarkRegistry() {
25815
- const registryPath = getBenchmarksRegistryPath();
25861
+ function loadProjectRegistry() {
25862
+ migrateLegacyBenchmarksFile();
25863
+ const registryPath = getProjectsRegistryPath();
25816
25864
  if (!(0, import_node_fs20.existsSync)(registryPath)) {
25817
- return { benchmarks: [] };
25865
+ return { projects: [] };
25818
25866
  }
25819
25867
  try {
25820
25868
  const raw = (0, import_node_fs20.readFileSync)(registryPath, "utf-8");
25821
25869
  const parsed = parseYamlValue(raw);
25822
25870
  if (!parsed || typeof parsed !== "object") {
25823
- return { benchmarks: [] };
25871
+ return { projects: [] };
25824
25872
  }
25825
25873
  const env = process.env;
25826
- const benchmarks = Array.isArray(parsed.benchmarks) ? parsed.benchmarks.map((e) => fromYaml(interpolateEnv(e, env))).filter((e) => e !== null) : [];
25827
- return { benchmarks };
25874
+ const projects = Array.isArray(parsed.projects) ? parsed.projects.map((e) => fromYaml(interpolateEnv(e, env))).filter((e) => e !== null) : [];
25875
+ return { projects };
25828
25876
  } catch {
25829
- return { benchmarks: [] };
25877
+ return { projects: [] };
25830
25878
  }
25831
25879
  }
25832
- function saveBenchmarkRegistry(registry) {
25833
- const registryPath = getBenchmarksRegistryPath();
25880
+ function saveProjectRegistry(registry) {
25881
+ const registryPath = getProjectsRegistryPath();
25834
25882
  const dir = import_node_path55.default.dirname(registryPath);
25835
25883
  if (!(0, import_node_fs20.existsSync)(dir)) {
25836
25884
  (0, import_node_fs20.mkdirSync)(dir, { recursive: true });
25837
25885
  }
25838
- const payload = { benchmarks: registry.benchmarks.map(toYaml) };
25886
+ const payload = { projects: registry.projects.map(toYaml) };
25839
25887
  (0, import_node_fs20.writeFileSync)(registryPath, (0, import_yaml2.stringify)(payload), "utf-8");
25840
25888
  }
25841
- function deriveBenchmarkId(dirPath, existingIds) {
25889
+ function deriveProjectId(dirPath, existingIds) {
25842
25890
  const base = import_node_path55.default.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
25843
- let candidate = base || "benchmark";
25891
+ let candidate = base || "project";
25844
25892
  let suffix = 2;
25845
25893
  while (existingIds.includes(candidate)) {
25846
25894
  candidate = `${base}-${suffix}`;
@@ -25848,54 +25896,54 @@ function deriveBenchmarkId(dirPath, existingIds) {
25848
25896
  }
25849
25897
  return candidate;
25850
25898
  }
25851
- function addBenchmark(benchmarkPath) {
25852
- const absPath = import_node_path55.default.resolve(benchmarkPath);
25899
+ function addProject(projectPath) {
25900
+ const absPath = import_node_path55.default.resolve(projectPath);
25853
25901
  if (!(0, import_node_fs20.existsSync)(absPath)) {
25854
25902
  throw new Error(`Directory not found: ${absPath}`);
25855
25903
  }
25856
25904
  if (!(0, import_node_fs20.existsSync)(import_node_path55.default.join(absPath, ".agentv"))) {
25857
25905
  throw new Error(`No .agentv/ directory found in ${absPath}. Run an evaluation first.`);
25858
25906
  }
25859
- const registry = loadBenchmarkRegistry();
25860
- const existing = registry.benchmarks.find((p) => p.path === absPath);
25907
+ const registry = loadProjectRegistry();
25908
+ const existing = registry.projects.find((p) => p.path === absPath);
25861
25909
  if (existing) {
25862
25910
  return existing;
25863
25911
  }
25864
25912
  const now = (/* @__PURE__ */ new Date()).toISOString();
25865
25913
  const entry = {
25866
- id: deriveBenchmarkId(
25914
+ id: deriveProjectId(
25867
25915
  absPath,
25868
- registry.benchmarks.map((p) => p.id)
25916
+ registry.projects.map((p) => p.id)
25869
25917
  ),
25870
25918
  name: import_node_path55.default.basename(absPath),
25871
25919
  path: absPath,
25872
25920
  addedAt: now,
25873
25921
  lastOpenedAt: now
25874
25922
  };
25875
- registry.benchmarks.push(entry);
25876
- saveBenchmarkRegistry(registry);
25923
+ registry.projects.push(entry);
25924
+ saveProjectRegistry(registry);
25877
25925
  return entry;
25878
25926
  }
25879
- function removeBenchmark(benchmarkId) {
25880
- const registry = loadBenchmarkRegistry();
25881
- const idx = registry.benchmarks.findIndex((p) => p.id === benchmarkId);
25927
+ function removeProject(projectId) {
25928
+ const registry = loadProjectRegistry();
25929
+ const idx = registry.projects.findIndex((p) => p.id === projectId);
25882
25930
  if (idx < 0) return false;
25883
- registry.benchmarks.splice(idx, 1);
25884
- saveBenchmarkRegistry(registry);
25931
+ registry.projects.splice(idx, 1);
25932
+ saveProjectRegistry(registry);
25885
25933
  return true;
25886
25934
  }
25887
- function getBenchmark(benchmarkId) {
25888
- return loadBenchmarkRegistry().benchmarks.find((p) => p.id === benchmarkId);
25935
+ function getProject(projectId) {
25936
+ return loadProjectRegistry().projects.find((p) => p.id === projectId);
25889
25937
  }
25890
- function touchBenchmark(benchmarkId) {
25891
- const registry = loadBenchmarkRegistry();
25892
- const entry = registry.benchmarks.find((p) => p.id === benchmarkId);
25938
+ function touchProject(projectId) {
25939
+ const registry = loadProjectRegistry();
25940
+ const entry = registry.projects.find((p) => p.id === projectId);
25893
25941
  if (entry) {
25894
25942
  entry.lastOpenedAt = (/* @__PURE__ */ new Date()).toISOString();
25895
- saveBenchmarkRegistry(registry);
25943
+ saveProjectRegistry(registry);
25896
25944
  }
25897
25945
  }
25898
- function discoverBenchmarks(rootDir, maxDepth = 2) {
25946
+ function discoverProjects(rootDir, maxDepth = 2) {
25899
25947
  const absRoot = import_node_path55.default.resolve(rootDir);
25900
25948
  if (!(0, import_node_fs20.existsSync)(absRoot) || !(0, import_node_fs20.statSync)(absRoot).isDirectory()) {
25901
25949
  return [];
@@ -25922,13 +25970,13 @@ function discoverBenchmarks(rootDir, maxDepth = 2) {
25922
25970
  return results.sort();
25923
25971
  }
25924
25972
 
25925
- // src/benchmark-sync.ts
25973
+ // src/project-sync.ts
25926
25974
  init_cjs_shims();
25927
25975
  var childProcess = __toESM(require("child_process"), 1);
25928
25976
  var import_node_fs21 = require("fs");
25929
- async function syncBenchmark(entry) {
25977
+ async function syncProject(entry) {
25930
25978
  if (!entry.source) {
25931
- throw new Error(`Benchmark '${entry.id}' has no source defined`);
25979
+ throw new Error(`Project '${entry.id}' has no source defined`);
25932
25980
  }
25933
25981
  const { url, ref } = entry.source;
25934
25982
  const dest = entry.path;
@@ -25942,12 +25990,12 @@ async function syncBenchmark(entry) {
25942
25990
  );
25943
25991
  }
25944
25992
  }
25945
- async function syncBenchmarks(entries) {
25993
+ async function syncProjects(entries) {
25946
25994
  for (const entry of entries) {
25947
25995
  if (!entry.source) continue;
25948
- console.log(`Syncing benchmark '${entry.id}' from ${entry.source.url}...`);
25949
- await syncBenchmark(entry);
25950
- console.log(`Benchmark '${entry.id}' synced.`);
25996
+ console.log(`Syncing project '${entry.id}' from ${entry.source.url}...`);
25997
+ await syncProject(entry);
25998
+ console.log(`Project '${entry.id}' synced.`);
25951
25999
  }
25952
26000
  }
25953
26001
 
@@ -27282,7 +27330,7 @@ function createAgentKernel() {
27282
27330
  TranscriptProvider,
27283
27331
  WorkspaceCreationError,
27284
27332
  WorkspacePoolManager,
27285
- addBenchmark,
27333
+ addProject,
27286
27334
  assembleLlmGraderPrompt,
27287
27335
  avgToolDurationMs,
27288
27336
  buildDirectoryChain,
@@ -27313,17 +27361,17 @@ function createAgentKernel() {
27313
27361
  createTempWorkspace,
27314
27362
  deepEqual,
27315
27363
  defineConfig,
27316
- deriveBenchmarkId,
27317
27364
  deriveCategory,
27365
+ deriveProjectId,
27318
27366
  detectFormat,
27319
27367
  directPushResults,
27320
27368
  directorySizeBytes,
27321
27369
  discoverAssertions,
27322
- discoverBenchmarks,
27323
27370
  discoverClaudeSessions,
27324
27371
  discoverCodexSessions,
27325
27372
  discoverCopilotSessions,
27326
27373
  discoverGraders,
27374
+ discoverProjects,
27327
27375
  discoverProviders,
27328
27376
  ensureResultsRepoClone,
27329
27377
  ensureVSCodeSubagents,
@@ -27350,9 +27398,9 @@ function createAgentKernel() {
27350
27398
  generateRubrics,
27351
27399
  getAgentvConfigDir,
27352
27400
  getAgentvHome,
27353
- getBenchmark,
27354
- getBenchmarksRegistryPath,
27355
27401
  getOutputFilenames,
27402
+ getProject,
27403
+ getProjectsRegistryPath,
27356
27404
  getResultsRepoCachePaths,
27357
27405
  getResultsRepoStatus,
27358
27406
  getSubagentsRoot,
@@ -27374,11 +27422,11 @@ function createAgentKernel() {
27374
27422
  isTestMessageRole,
27375
27423
  killAllTrackedChildren,
27376
27424
  listTargetNames,
27377
- loadBenchmarkRegistry,
27378
27425
  loadConfig,
27379
27426
  loadEvalCaseById,
27380
27427
  loadEvalCases,
27381
27428
  loadEvalSuite,
27429
+ loadProjectRegistry,
27382
27430
  loadTestById,
27383
27431
  loadTestSuite,
27384
27432
  loadTests,
@@ -27404,7 +27452,7 @@ function createAgentKernel() {
27404
27452
  readTextFile,
27405
27453
  readTranscriptFile,
27406
27454
  readTranscriptJsonl,
27407
- removeBenchmark,
27455
+ removeProject,
27408
27456
  resolveAndCreateProvider,
27409
27457
  resolveDelegatedTargetDefinition,
27410
27458
  resolveFileReference,
@@ -27427,7 +27475,7 @@ function createAgentKernel() {
27427
27475
  runIsJsonAssertion,
27428
27476
  runRegexAssertion,
27429
27477
  runStartsWithAssertion,
27430
- saveBenchmarkRegistry,
27478
+ saveProjectRegistry,
27431
27479
  scanRepoDeps,
27432
27480
  scoreRangeEvaluationSchema,
27433
27481
  scoreToVerdict,
@@ -27440,14 +27488,14 @@ function createAgentKernel() {
27440
27488
  subscribeToCopilotSdkLogEntries,
27441
27489
  subscribeToPiLogEntries,
27442
27490
  substituteVariables,
27443
- syncBenchmark,
27444
- syncBenchmarks,
27491
+ syncProject,
27492
+ syncProjects,
27445
27493
  syncResultsRepo,
27446
27494
  toCamelCaseDeep,
27447
27495
  toSnakeCaseDeep,
27448
27496
  toTranscriptJsonLines,
27449
27497
  tokensPerTool,
27450
- touchBenchmark,
27498
+ touchProject,
27451
27499
  trackChild,
27452
27500
  trackedChildCount,
27453
27501
  transpileEvalYaml,