npm - @agentv/core - Versions diffs - 4.11.2-next.1 → 4.12.0-next.1 - Mend

@agentv/core 4.11.2-next.1 → 4.12.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.cjs CHANGED Viewed

@@ -1801,7 +1801,7 @@ __export(index_exports, {
   TranscriptProvider: () => TranscriptProvider,
   WorkspaceCreationError: () => WorkspaceCreationError,
   WorkspacePoolManager: () => WorkspacePoolManager,
-  addProject: () => addProject,
+  addBenchmark: () => addBenchmark,
   assembleLlmGraderPrompt: () => assembleLlmGraderPrompt,
   assembleLlmJudgePrompt: () => assembleLlmGraderPrompt,
   avgToolDurationMs: () => avgToolDurationMs,
@@ -1833,17 +1833,17 @@ __export(index_exports, {
   createTempWorkspace: () => createTempWorkspace,
   deepEqual: () => deepEqual,
   defineConfig: () => defineConfig,
+  deriveBenchmarkId: () => deriveBenchmarkId,
   deriveCategory: () => deriveCategory,
-  deriveProjectId: () => deriveProjectId,
   detectFormat: () => detectFormat,
   directorySizeBytes: () => directorySizeBytes,
   discoverAssertions: () => discoverAssertions,
+  discoverBenchmarks: () => discoverBenchmarks,
   discoverClaudeSessions: () => discoverClaudeSessions,
   discoverCodexSessions: () => discoverCodexSessions,
   discoverCopilotSessions: () => discoverCopilotSessions,
   discoverGraders: () => discoverGraders,
   discoverJudges: () => discoverGraders,
-  discoverProjects: () => discoverProjects,
   discoverProviders: () => discoverProviders,
   ensureResultsRepoClone: () => ensureResultsRepoClone,
   ensureVSCodeSubagents: () => ensureVSCodeSubagents,
@@ -1867,9 +1867,9 @@ __export(index_exports, {
   freeformEvaluationSchema: () => freeformEvaluationSchema,
   generateRubrics: () => generateRubrics,
   getAgentvHome: () => getAgentvHome,
+  getBenchmark: () => getBenchmark,
+  getBenchmarksRegistryPath: () => getBenchmarksRegistryPath,
   getOutputFilenames: () => getOutputFilenames,
-  getProject: () => getProject,
-  getProjectsRegistryPath: () => getProjectsRegistryPath,
   getResultsRepoCachePaths: () => getResultsRepoCachePaths,
   getResultsRepoStatus: () => getResultsRepoStatus,
   getSubagentsRoot: () => getSubagentsRoot,
@@ -1889,11 +1889,11 @@ __export(index_exports, {
   isTestMessage: () => isTestMessage,
   isTestMessageRole: () => isTestMessageRole,
   listTargetNames: () => listTargetNames,
+  loadBenchmarkRegistry: () => loadBenchmarkRegistry,
   loadConfig: () => loadConfig,
   loadEvalCaseById: () => loadEvalCaseById,
   loadEvalCases: () => loadEvalCases,
   loadEvalSuite: () => loadEvalSuite,
-  loadProjectRegistry: () => loadProjectRegistry,
   loadTestById: () => loadTestById,
   loadTestSuite: () => loadTestSuite,
   loadTests: () => loadTests,
@@ -1916,7 +1916,7 @@ __export(index_exports, {
   readTextFile: () => readTextFile,
   readTranscriptFile: () => readTranscriptFile,
   readTranscriptJsonl: () => readTranscriptJsonl,
-  removeProject: () => removeProject,
+  removeBenchmark: () => removeBenchmark,
   resolveAndCreateProvider: () => resolveAndCreateProvider,
   resolveDelegatedTargetDefinition: () => resolveDelegatedTargetDefinition,
   resolveFileReference: () => resolveFileReference3,
@@ -1938,7 +1938,7 @@ __export(index_exports, {
   runIsJsonAssertion: () => runIsJsonAssertion,
   runRegexAssertion: () => runRegexAssertion,
   runStartsWithAssertion: () => runStartsWithAssertion,
-  saveProjectRegistry: () => saveProjectRegistry,
+  saveBenchmarkRegistry: () => saveBenchmarkRegistry,
   scanRepoDeps: () => scanRepoDeps,
   scoreToVerdict: () => scoreToVerdict,
   shouldEnableCache: () => shouldEnableCache,
@@ -1955,7 +1955,7 @@ __export(index_exports, {
   toSnakeCaseDeep: () => toSnakeCaseDeep,
   toTranscriptJsonLine: () => toTranscriptJsonLine,
   tokensPerTool: () => tokensPerTool,
-  touchProject: () => touchProject,
+  touchBenchmark: () => touchBenchmark,
   transpileEvalYaml: () => transpileEvalYaml,
   transpileEvalYamlFile: () => transpileEvalYamlFile,
   trimBaselineResult: () => trimBaselineResult
@@ -23102,41 +23102,41 @@ async function createDraftResultsPr(params) {
   return stdout.trim();
 }
-// src/projects.ts
+// src/benchmarks.ts
 init_cjs_shims();
 var import_node_fs19 = require("fs");
 var import_node_path54 = __toESM(require("path"), 1);
 var import_yaml10 = require("yaml");
-function getProjectsRegistryPath() {
+function getBenchmarksRegistryPath() {
   return import_node_path54.default.join(getAgentvHome(), "projects.yaml");
 }
-function loadProjectRegistry() {
-  const registryPath = getProjectsRegistryPath();
+function loadBenchmarkRegistry() {
+  const registryPath = getBenchmarksRegistryPath();
   if (!(0, import_node_fs19.existsSync)(registryPath)) {
-    return { projects: [] };
+    return { benchmarks: [] };
   }
   try {
     const raw = (0, import_node_fs19.readFileSync)(registryPath, "utf-8");
     const parsed = (0, import_yaml10.parse)(raw);
-    if (!parsed || !Array.isArray(parsed.projects)) {
-      return { projects: [] };
+    if (!parsed || !Array.isArray(parsed.benchmarks)) {
+      return { benchmarks: [] };
     }
-    return { projects: parsed.projects };
+    return { benchmarks: parsed.benchmarks };
   } catch {
-    return { projects: [] };
+    return { benchmarks: [] };
   }
 }
-function saveProjectRegistry(registry) {
-  const registryPath = getProjectsRegistryPath();
+function saveBenchmarkRegistry(registry) {
+  const registryPath = getBenchmarksRegistryPath();
   const dir = import_node_path54.default.dirname(registryPath);
   if (!(0, import_node_fs19.existsSync)(dir)) {
     (0, import_node_fs19.mkdirSync)(dir, { recursive: true });
   }
-  (0, import_node_fs19.writeFileSync)(registryPath, (0, import_yaml10.stringify)(registry), "utf-8");
+  (0, import_node_fs19.writeFileSync)(registryPath, (0, import_yaml10.stringify)({ benchmarks: registry.benchmarks }), "utf-8");
 }
-function deriveProjectId(dirPath, existingIds) {
+function deriveBenchmarkId(dirPath, existingIds) {
   const base = import_node_path54.default.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
-  let candidate = base || "project";
+  let candidate = base || "benchmark";
   let suffix = 2;
   while (existingIds.includes(candidate)) {
     candidate = `${base}-${suffix}`;
@@ -23144,54 +23144,54 @@ function deriveProjectId(dirPath, existingIds) {
   }
   return candidate;
 }
-function addProject(projectPath) {
-  const absPath = import_node_path54.default.resolve(projectPath);
+function addBenchmark(benchmarkPath) {
+  const absPath = import_node_path54.default.resolve(benchmarkPath);
   if (!(0, import_node_fs19.existsSync)(absPath)) {
     throw new Error(`Directory not found: ${absPath}`);
   }
   if (!(0, import_node_fs19.existsSync)(import_node_path54.default.join(absPath, ".agentv"))) {
     throw new Error(`No .agentv/ directory found in ${absPath}. Run an evaluation first.`);
   }
-  const registry = loadProjectRegistry();
-  const existing = registry.projects.find((p) => p.path === absPath);
+  const registry = loadBenchmarkRegistry();
+  const existing = registry.benchmarks.find((p) => p.path === absPath);
   if (existing) {
     return existing;
   }
   const now = (/* @__PURE__ */ new Date()).toISOString();
   const entry = {
-    id: deriveProjectId(
+    id: deriveBenchmarkId(
       absPath,
-      registry.projects.map((p) => p.id)
+      registry.benchmarks.map((p) => p.id)
     ),
     name: import_node_path54.default.basename(absPath),
     path: absPath,
     addedAt: now,
     lastOpenedAt: now
   };
-  registry.projects.push(entry);
-  saveProjectRegistry(registry);
+  registry.benchmarks.push(entry);
+  saveBenchmarkRegistry(registry);
   return entry;
 }
-function removeProject(projectId) {
-  const registry = loadProjectRegistry();
-  const idx = registry.projects.findIndex((p) => p.id === projectId);
+function removeBenchmark(benchmarkId) {
+  const registry = loadBenchmarkRegistry();
+  const idx = registry.benchmarks.findIndex((p) => p.id === benchmarkId);
   if (idx < 0) return false;
-  registry.projects.splice(idx, 1);
-  saveProjectRegistry(registry);
+  registry.benchmarks.splice(idx, 1);
+  saveBenchmarkRegistry(registry);
   return true;
 }
-function getProject(projectId) {
-  return loadProjectRegistry().projects.find((p) => p.id === projectId);
+function getBenchmark(benchmarkId) {
+  return loadBenchmarkRegistry().benchmarks.find((p) => p.id === benchmarkId);
 }
-function touchProject(projectId) {
-  const registry = loadProjectRegistry();
-  const entry = registry.projects.find((p) => p.id === projectId);
+function touchBenchmark(benchmarkId) {
+  const registry = loadBenchmarkRegistry();
+  const entry = registry.benchmarks.find((p) => p.id === benchmarkId);
   if (entry) {
     entry.lastOpenedAt = (/* @__PURE__ */ new Date()).toISOString();
-    saveProjectRegistry(registry);
+    saveBenchmarkRegistry(registry);
   }
 }
-function discoverProjects(rootDir, maxDepth = 2) {
+function discoverBenchmarks(rootDir, maxDepth = 2) {
   const absRoot = import_node_path54.default.resolve(rootDir);
   if (!(0, import_node_fs19.existsSync)(absRoot) || !(0, import_node_fs19.statSync)(absRoot).isDirectory()) {
     return [];
@@ -24379,7 +24379,7 @@ function createAgentKernel() {
   TranscriptProvider,
   WorkspaceCreationError,
   WorkspacePoolManager,
-  addProject,
+  addBenchmark,
   assembleLlmGraderPrompt,
   assembleLlmJudgePrompt,
   avgToolDurationMs,
@@ -24411,17 +24411,17 @@ function createAgentKernel() {
   createTempWorkspace,
   deepEqual,
   defineConfig,
+  deriveBenchmarkId,
   deriveCategory,
-  deriveProjectId,
   detectFormat,
   directorySizeBytes,
   discoverAssertions,
+  discoverBenchmarks,
   discoverClaudeSessions,
   discoverCodexSessions,
   discoverCopilotSessions,
   discoverGraders,
   discoverJudges,
-  discoverProjects,
   discoverProviders,
   ensureResultsRepoClone,
   ensureVSCodeSubagents,
@@ -24445,9 +24445,9 @@ function createAgentKernel() {
   freeformEvaluationSchema,
   generateRubrics,
   getAgentvHome,
+  getBenchmark,
+  getBenchmarksRegistryPath,
   getOutputFilenames,
-  getProject,
-  getProjectsRegistryPath,
   getResultsRepoCachePaths,
   getResultsRepoStatus,
   getSubagentsRoot,
@@ -24467,11 +24467,11 @@ function createAgentKernel() {
   isTestMessage,
   isTestMessageRole,
   listTargetNames,
+  loadBenchmarkRegistry,
   loadConfig,
   loadEvalCaseById,
   loadEvalCases,
   loadEvalSuite,
-  loadProjectRegistry,
   loadTestById,
   loadTestSuite,
   loadTests,
@@ -24494,7 +24494,7 @@ function createAgentKernel() {
   readTextFile,
   readTranscriptFile,
   readTranscriptJsonl,
-  removeProject,
+  removeBenchmark,
   resolveAndCreateProvider,
   resolveDelegatedTargetDefinition,
   resolveFileReference,
@@ -24516,7 +24516,7 @@ function createAgentKernel() {
   runIsJsonAssertion,
   runRegexAssertion,
   runStartsWithAssertion,
-  saveProjectRegistry,
+  saveBenchmarkRegistry,
   scanRepoDeps,
   scoreToVerdict,
   shouldEnableCache,
@@ -24533,7 +24533,7 @@ function createAgentKernel() {
   toSnakeCaseDeep,
   toTranscriptJsonLine,
   tokensPerTool,
-  touchProject,
+  touchBenchmark,
   transpileEvalYaml,
   transpileEvalYamlFile,
   trimBaselineResult