npm - agentv - Versions diffs - 4.11.2 → 4.12.0 - Mend

agentv 4.11.2 → 4.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/dist/{chunk-MIP46NEN.js → chunk-4MEGL2E3.js} +5 -5
package/dist/{chunk-MIP46NEN.js.map → chunk-4MEGL2E3.js.map} +1 -1
package/dist/{chunk-FQGY6QXQ.js → chunk-CXAO4VPP.js} +43 -43
package/dist/chunk-CXAO4VPP.js.map +1 -0
package/dist/{chunk-7TJ2PON3.js → chunk-VVWPD4CN.js} +104 -89
package/dist/chunk-VVWPD4CN.js.map +1 -0
package/dist/cli.js +3 -3
package/dist/{dist-HNSXNRVK.js → dist-D6EJ3O7Q.js} +20 -20
package/dist/index.js +3 -3
package/dist/{interactive-LRW3X5OF.js → interactive-SP2LWOQX.js} +3 -3
package/dist/studio/assets/index-BdR2qr8G.js +65 -0
package/dist/studio/assets/{index-VyDFrnoK.js → index-CkXzhDmw.js} +1 -1
package/dist/studio/assets/index-XVVBVabi.css +1 -0
package/dist/studio/index.html +2 -2
package/package.json +1 -1
package/dist/chunk-7TJ2PON3.js.map +0 -1
package/dist/chunk-FQGY6QXQ.js.map +0 -1
package/dist/studio/assets/index-Bi-KHfNm.js +0 -65
package/dist/studio/assets/index-D_j-w4UO.css +0 -1
/package/dist/{dist-HNSXNRVK.js.map → dist-D6EJ3O7Q.js.map} +0 -0
/package/dist/{interactive-LRW3X5OF.js.map → interactive-SP2LWOQX.js.map} +0 -0

package/dist/{chunk-7TJ2PON3.js → chunk-VVWPD4CN.js} RENAMED Viewed

@@ -42,25 +42,25 @@ import {
   validateFileReferences,
   validateTargetsFile,
   writeArtifactsFromResults
-} from "./chunk-MIP46NEN.js";
+} from "./chunk-4MEGL2E3.js";
 import {
   DEFAULT_CATEGORY,
   DEFAULT_THRESHOLD,
-  addProject,
+  addBenchmark,
   createBuiltinRegistry,
   deriveCategory,
+  discoverBenchmarks,
   discoverClaudeSessions,
   discoverCodexSessions,
   discoverCopilotSessions,
-  discoverProjects,
   executeScript,
   getAgentvHome,
+  getBenchmark,
   getOutputFilenames,
-  getProject,
   getWorkspacePoolRoot,
   isAgentSkillsFormat,
   listTargetNames,
-  loadProjectRegistry,
+  loadBenchmarkRegistry,
   loadTestSuite,
   normalizeLineEndings,
   parseAgentSkillsEvals,
@@ -69,14 +69,14 @@ import {
   parseCopilotEvents,
   readTargetDefinitions,
   readTranscriptFile,
-  removeProject,
+  removeBenchmark,
   scanRepoDeps,
   toCamelCaseDeep,
   toSnakeCaseDeep as toSnakeCaseDeep2,
   toTranscriptJsonLine,
   transpileEvalYamlFile,
   trimBaselineResult
-} from "./chunk-FQGY6QXQ.js";
+} from "./chunk-CXAO4VPP.js";
 import {
   __commonJS,
   __require,
@@ -3912,7 +3912,7 @@ var evalRunCommand = command({
   },
   handler: async (args) => {
     if (args.evalPaths.length === 0 && process.stdin.isTTY) {
-      const { launchInteractiveWizard } = await import("./interactive-LRW3X5OF.js");
+      const { launchInteractiveWizard } = await import("./interactive-SP2LWOQX.js");
       await launchInteractiveWizard();
       return;
     }
@@ -7252,13 +7252,14 @@ var resultsCommand = subcommands({
 // src/commands/results/serve.ts
 import { existsSync as existsSync12, readFileSync as readFileSync10, readdirSync as readdirSync4, statSync as statSync5, writeFileSync as writeFileSync4 } from "node:fs";
 import path16 from "node:path";
-import { fileURLToPath as fileURLToPath2 } from "node:url";
+import { fileURLToPath as fileURLToPath3 } from "node:url";
 import { Hono } from "hono";
 // src/commands/results/eval-runner.ts
-import { spawn } from "node:child_process";
+import { execFileSync, spawn } from "node:child_process";
 import { existsSync as existsSync10 } from "node:fs";
 import path14 from "node:path";
+import { fileURLToPath as fileURLToPath2 } from "node:url";
 var activeRuns = /* @__PURE__ */ new Map();
 function generateRunId() {
   const now = /* @__PURE__ */ new Date();
@@ -7342,16 +7343,27 @@ function resolveCliPath(cwd) {
   ];
   for (const c4 of candidates) {
     if (existsSync10(c4)) {
-      return { bunPath: "bun", cliPath: c4 };
+      return { binPath: "bun", args: [c4] };
     }
   }
-  const currentDir = typeof __dirname !== "undefined" ? __dirname : path14.dirname(new URL(import.meta.url).pathname);
+  const currentDir = typeof __dirname !== "undefined" ? __dirname : path14.dirname(fileURLToPath2(import.meta.url));
   const fromSrc = path14.resolve(currentDir, "../../../cli.ts");
   const fromDist = path14.resolve(currentDir, "../../cli.js");
-  if (existsSync10(fromSrc)) return { bunPath: "bun", cliPath: fromSrc };
-  if (existsSync10(fromDist)) return { bunPath: "bun", cliPath: fromDist };
+  if (existsSync10(fromSrc)) return { binPath: "bun", args: [fromSrc] };
+  if (existsSync10(fromDist)) return { binPath: "bun", args: [fromDist] };
+  if (isCommandAvailable("agentv")) {
+    return { binPath: "agentv", args: [] };
+  }
   return void 0;
 }
+function isCommandAvailable(cmd) {
+  try {
+    execFileSync(process.platform === "win32" ? "where" : "which", [cmd], { stdio: "ignore" });
+    return true;
+  } catch {
+    return false;
+  }
+}
 function registerEvalRoutes(app2, getCwd, options) {
   const readOnly = options?.readOnly === true;
   app2.get("/api/eval/discover", async (c4) => {
@@ -7409,7 +7421,7 @@ function registerEvalRoutes(app2, getCwd, options) {
     };
     activeRuns.set(runId, run2);
     try {
-      const child = spawn(cliPaths.bunPath, [cliPaths.cliPath, ...args], {
+      const child = spawn(cliPaths.binPath, [...cliPaths.args, ...args], {
         cwd,
         stdio: ["ignore", "pipe", "pipe"],
         env: { ...process.env }
@@ -7494,7 +7506,7 @@ Process error: ${err2.message}`;
     const args = buildCliArgs(body);
     return c4.json({ command: buildCliPreview(args) });
   });
-  app2.get("/api/projects/:projectId/eval/discover", async (c4) => {
+  app2.get("/api/benchmarks/:benchmarkId/eval/discover", async (c4) => {
     const cwd = getCwd(c4);
     try {
       const files = await discoverEvalFiles(cwd);
@@ -7509,7 +7521,7 @@ Process error: ${err2.message}`;
       return c4.json({ error: err2.message, eval_files: [] }, 500);
     }
   });
-  app2.get("/api/projects/:projectId/eval/targets", async (c4) => {
+  app2.get("/api/benchmarks/:benchmarkId/eval/targets", async (c4) => {
     const cwd = getCwd(c4);
     try {
       const names = await discoverTargetsInProject(cwd);
@@ -7518,7 +7530,7 @@ Process error: ${err2.message}`;
       return c4.json({ error: err2.message, targets: [] }, 500);
     }
   });
-  app2.post("/api/projects/:projectId/eval/run", async (c4) => {
+  app2.post("/api/benchmarks/:benchmarkId/eval/run", async (c4) => {
     const cwd = getCwd(c4);
     let body;
     try {
@@ -7546,7 +7558,7 @@ Process error: ${err2.message}`;
     };
     activeRuns.set(runId, run2);
     try {
-      const child = spawn(cliPaths.bunPath, [cliPaths.cliPath, ...args], {
+      const child = spawn(cliPaths.binPath, [...cliPaths.args, ...args], {
         cwd,
         stdio: ["ignore", "pipe", "pipe"],
         env: { ...process.env }
@@ -7583,7 +7595,7 @@ Process error: ${err2.message}`;
       return c4.json({ error: err2.message }, 500);
     }
   });
-  app2.get("/api/projects/:projectId/eval/status/:id", (c4) => {
+  app2.get("/api/benchmarks/:benchmarkId/eval/status/:id", (c4) => {
     const id = c4.req.param("id");
     const run2 = activeRuns.get(id ?? "");
     if (!run2) return c4.json({ error: "Run not found" }, 404);
@@ -7598,7 +7610,7 @@ Process error: ${err2.message}`;
       stderr: run2.stderr.slice(-5e3)
     });
   });
-  app2.get("/api/projects/:projectId/eval/runs", (c4) => {
+  app2.get("/api/benchmarks/:benchmarkId/eval/runs", (c4) => {
     const runs = [...activeRuns.values()].map((r) => ({
       id: r.id,
       status: r.status,
@@ -7610,7 +7622,7 @@ Process error: ${err2.message}`;
     runs.sort((a, b) => b.started_at.localeCompare(a.started_at));
     return c4.json({ runs });
   });
-  app2.post("/api/projects/:projectId/eval/preview", async (c4) => {
+  app2.post("/api/benchmarks/:benchmarkId/eval/preview", async (c4) => {
     let body;
     try {
       body = await c4.req.json();
@@ -8164,14 +8176,14 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
   const defaultCtx = { searchDir, agentvDir };
   const readOnly = options?.readOnly === true;
   const app2 = new Hono();
-  function withProject(c4, handler) {
-    const project = getProject(c4.req.param("projectId") ?? "");
-    if (!project || !existsSync12(project.path)) {
+  function withBenchmark(c4, handler) {
+    const benchmark = getBenchmark(c4.req.param("benchmarkId") ?? "");
+    if (!benchmark || !existsSync12(benchmark.path)) {
       return c4.json({ error: "Project not found" }, 404);
     }
     return handler(c4, {
-      searchDir: project.path,
-      agentvDir: path16.join(project.path, ".agentv")
+      searchDir: benchmark.path,
+      agentvDir: path16.join(benchmark.path, ".agentv")
     });
   }
   app2.post("/api/config", async (c4) => {
@@ -8191,7 +8203,7 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
       return c4.json({ error: "Failed to save config" }, 500);
     }
   });
-  function projectEntryToWire(entry) {
+  function benchmarkEntryToWire(entry) {
     return {
       id: entry.id,
       name: entry.name,
@@ -8200,10 +8212,10 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
       last_opened_at: entry.lastOpenedAt
     };
   }
-  app2.get("/api/projects", async (c4) => {
-    const registry = loadProjectRegistry();
-    const projects = await Promise.all(
-      registry.projects.map(async (p) => {
+  app2.get("/api/benchmarks", async (c4) => {
+    const registry = loadBenchmarkRegistry();
+    const benchmarks = await Promise.all(
+      registry.benchmarks.map(async (p) => {
         let runCount = 0;
         let passRate = 0;
         let lastRun = null;
@@ -8218,48 +8230,48 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
         } catch {
         }
         return {
-          ...projectEntryToWire(p),
+          ...benchmarkEntryToWire(p),
           run_count: runCount,
           pass_rate: passRate,
           last_run: lastRun
         };
       })
     );
-    return c4.json({ projects });
+    return c4.json({ projects: benchmarks });
   });
-  app2.post("/api/projects", async (c4) => {
+  app2.post("/api/benchmarks", async (c4) => {
     if (readOnly) {
       return c4.json({ error: "Studio is running in read-only mode" }, 403);
     }
     try {
       const body = await c4.req.json();
       if (!body.path) return c4.json({ error: "Missing path" }, 400);
-      const entry = addProject(body.path);
-      return c4.json(projectEntryToWire(entry), 201);
+      const entry = addBenchmark(body.path);
+      return c4.json(benchmarkEntryToWire(entry), 201);
     } catch (err2) {
       return c4.json({ error: err2.message }, 400);
     }
   });
-  app2.delete("/api/projects/:projectId", (c4) => {
+  app2.delete("/api/benchmarks/:benchmarkId", (c4) => {
     if (readOnly) {
       return c4.json({ error: "Studio is running in read-only mode" }, 403);
     }
-    const removed = removeProject(c4.req.param("projectId") ?? "");
+    const removed = removeBenchmark(c4.req.param("benchmarkId") ?? "");
     if (!removed) return c4.json({ error: "Project not found" }, 404);
     return c4.json({ ok: true });
   });
-  app2.get("/api/projects/:projectId/summary", async (c4) => {
-    const project = getProject(c4.req.param("projectId") ?? "");
-    if (!project) return c4.json({ error: "Project not found" }, 404);
+  app2.get("/api/benchmarks/:benchmarkId/summary", async (c4) => {
+    const benchmark = getBenchmark(c4.req.param("benchmarkId") ?? "");
+    if (!benchmark) return c4.json({ error: "Project not found" }, 404);
     try {
-      const { runs: metas } = await listMergedResultFiles(project.path);
+      const { runs: metas } = await listMergedResultFiles(benchmark.path);
       const runCount = metas.length;
       const passRate = runCount > 0 ? metas.reduce((s, m) => s + m.passRate, 0) / runCount : 0;
       const lastRun = metas.length > 0 ? metas[0].timestamp : null;
       return c4.json({
-        id: project.id,
-        name: project.name,
-        path: project.path,
+        id: benchmark.id,
+        name: benchmark.name,
+        path: benchmark.path,
         run_count: runCount,
         pass_rate: passRate,
         last_run: lastRun
@@ -8268,24 +8280,24 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
       return c4.json({ error: "Failed to read project" }, 500);
     }
   });
-  app2.post("/api/projects/discover", async (c4) => {
+  app2.post("/api/benchmarks/discover", async (c4) => {
     if (readOnly) {
       return c4.json({ error: "Studio is running in read-only mode" }, 403);
     }
     try {
       const body = await c4.req.json();
       if (!body.path) return c4.json({ error: "Missing path" }, 400);
-      const discovered = discoverProjects(body.path);
-      const registered = discovered.map((p) => projectEntryToWire(addProject(p)));
+      const discovered = discoverBenchmarks(body.path);
+      const registered = discovered.map((p) => benchmarkEntryToWire(addBenchmark(p)));
       return c4.json({ discovered: registered });
     } catch (err2) {
       return c4.json({ error: err2.message }, 400);
     }
   });
-  app2.get("/api/projects/all-runs", async (c4) => {
-    const registry = loadProjectRegistry();
+  app2.get("/api/benchmarks/all-runs", async (c4) => {
+    const registry = loadBenchmarkRegistry();
     const allRuns = [];
-    for (const p of registry.projects) {
+    for (const p of registry.benchmarks) {
       try {
         const { runs: metas } = await listMergedResultFiles(p.path);
         for (const m of metas) {
@@ -8411,8 +8423,8 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
     return c4.json({ entries: entries2 });
   });
   app2.get(
-    "/api/projects/:projectId/config",
-    (c4) => withProject(
+    "/api/benchmarks/:benchmarkId/config",
+    (c4) => withBenchmark(
       c4,
       (ctx, dataCtx) => handleConfig(ctx, dataCtx, {
         readOnly,
@@ -8421,50 +8433,53 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
     )
   );
   app2.get(
-    "/api/projects/:projectId/remote/status",
-    (c4) => withProject(
+    "/api/benchmarks/:benchmarkId/remote/status",
+    (c4) => withBenchmark(
       c4,
       async (ctx, dataCtx) => ctx.json(await getRemoteResultsStatus(dataCtx.searchDir))
     )
   );
   app2.post(
-    "/api/projects/:projectId/remote/sync",
-    (c4) => withProject(c4, async (ctx, dataCtx) => ctx.json(await syncRemoteResults(dataCtx.searchDir)))
+    "/api/benchmarks/:benchmarkId/remote/sync",
+    (c4) => withBenchmark(c4, async (ctx, dataCtx) => ctx.json(await syncRemoteResults(dataCtx.searchDir)))
+  );
+  app2.get("/api/benchmarks/:benchmarkId/runs", (c4) => withBenchmark(c4, handleRuns));
+  app2.get("/api/benchmarks/:benchmarkId/runs/:filename", (c4) => withBenchmark(c4, handleRunDetail));
+  app2.get(
+    "/api/benchmarks/:benchmarkId/runs/:filename/suites",
+    (c4) => withBenchmark(c4, handleRunSuites)
   );
-  app2.get("/api/projects/:projectId/runs", (c4) => withProject(c4, handleRuns));
-  app2.get("/api/projects/:projectId/runs/:filename", (c4) => withProject(c4, handleRunDetail));
-  app2.get("/api/projects/:projectId/runs/:filename/suites", (c4) => withProject(c4, handleRunSuites));
   app2.get(
-    "/api/projects/:projectId/runs/:filename/categories",
-    (c4) => withProject(c4, handleRunCategories)
+    "/api/benchmarks/:benchmarkId/runs/:filename/categories",
+    (c4) => withBenchmark(c4, handleRunCategories)
   );
   app2.get(
-    "/api/projects/:projectId/runs/:filename/categories/:category/suites",
-    (c4) => withProject(c4, handleCategorySuites)
+    "/api/benchmarks/:benchmarkId/runs/:filename/categories/:category/suites",
+    (c4) => withBenchmark(c4, handleCategorySuites)
   );
   app2.get(
-    "/api/projects/:projectId/runs/:filename/evals/:evalId",
-    (c4) => withProject(c4, handleEvalDetail)
+    "/api/benchmarks/:benchmarkId/runs/:filename/evals/:evalId",
+    (c4) => withBenchmark(c4, handleEvalDetail)
   );
   app2.get(
-    "/api/projects/:projectId/runs/:filename/evals/:evalId/files",
-    (c4) => withProject(c4, handleEvalFiles)
+    "/api/benchmarks/:benchmarkId/runs/:filename/evals/:evalId/files",
+    (c4) => withBenchmark(c4, handleEvalFiles)
   );
   app2.get(
-    "/api/projects/:projectId/runs/:filename/evals/:evalId/files/*",
-    (c4) => withProject(c4, handleEvalFileContent)
+    "/api/benchmarks/:benchmarkId/runs/:filename/evals/:evalId/files/*",
+    (c4) => withBenchmark(c4, handleEvalFileContent)
   );
-  app2.get("/api/projects/:projectId/experiments", (c4) => withProject(c4, handleExperiments));
-  app2.get("/api/projects/:projectId/compare", (c4) => withProject(c4, handleCompare));
-  app2.get("/api/projects/:projectId/targets", (c4) => withProject(c4, handleTargets));
-  app2.get("/api/projects/:projectId/feedback", (c4) => withProject(c4, handleFeedbackRead));
+  app2.get("/api/benchmarks/:benchmarkId/experiments", (c4) => withBenchmark(c4, handleExperiments));
+  app2.get("/api/benchmarks/:benchmarkId/compare", (c4) => withBenchmark(c4, handleCompare));
+  app2.get("/api/benchmarks/:benchmarkId/targets", (c4) => withBenchmark(c4, handleTargets));
+  app2.get("/api/benchmarks/:benchmarkId/feedback", (c4) => withBenchmark(c4, handleFeedbackRead));
   registerEvalRoutes(
     app2,
     (c4) => {
-      const projectId = c4.req.param("projectId");
-      if (projectId) {
-        const project = getProject(projectId);
-        if (project) return project.path;
+      const benchmarkId = c4.req.param("benchmarkId");
+      if (benchmarkId) {
+        const benchmark = getBenchmark(benchmarkId);
+        if (benchmark) return benchmark.path;
       }
       return searchDir;
     },
@@ -8512,7 +8527,7 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
   return app2;
 }
 function resolveStudioDistDir() {
-  const currentDir = typeof __dirname !== "undefined" ? __dirname : path16.dirname(fileURLToPath2(import.meta.url));
+  const currentDir = typeof __dirname !== "undefined" ? __dirname : path16.dirname(fileURLToPath3(import.meta.url));
   const candidates = [
     // From src/commands/results/ → sibling apps/studio/dist
     path16.resolve(currentDir, "../../../../studio/dist"),
@@ -8584,7 +8599,7 @@ var resultsServeCommand = command({
     const listenPort = port ?? (process.env.PORT ? Number(process.env.PORT) : 3117);
     if (add) {
       try {
-        const entry = addProject(add);
+        const entry = addBenchmark(add);
         console.log(`Registered project: ${entry.name} (${entry.id}) at ${entry.path}`);
       } catch (err2) {
         console.error(`Error: ${err2.message}`);
@@ -8593,7 +8608,7 @@ var resultsServeCommand = command({
       return;
     }
     if (remove) {
-      const removed = removeProject(remove);
+      const removed = removeBenchmark(remove);
       if (removed) {
         console.log(`Unregistered project: ${remove}`);
       } else {
@@ -8603,21 +8618,21 @@ var resultsServeCommand = command({
       return;
     }
     if (discover) {
-      const discovered = discoverProjects(discover);
+      const discovered = discoverBenchmarks(discover);
       if (discovered.length === 0) {
         console.log(`No projects with .agentv/ found under ${discover}`);
         return;
       }
       for (const p of discovered) {
-        const entry = addProject(p);
+        const entry = addBenchmark(p);
         console.log(`Registered: ${entry.name} (${entry.id}) at ${entry.path}`);
       }
       console.log(`
 Discovered ${discovered.length} project(s).`);
       return;
     }
-    const registry = loadProjectRegistry();
-    const { isMultiProject, showMultiWarning } = resolveDashboardMode(registry.projects.length, {
+    const registry = loadBenchmarkRegistry();
+    const { isMultiProject, showMultiWarning } = resolveDashboardMode(registry.benchmarks.length, {
       multi,
       single
     });
@@ -8652,7 +8667,7 @@ Discovered ${discovered.length} project(s).`);
         );
       }
       if (isMultiProject) {
-        console.log(`Multi-project mode: ${registry.projects.length} project(s) registered`);
+        console.log(`Multi-project mode: ${registry.benchmarks.length} project(s) registered`);
       } else if (results.length > 0 && sourceFile) {
         console.log(`Serving ${results.length} result(s) from ${sourceFile}`);
       } else {
@@ -8660,7 +8675,7 @@ Discovered ${discovered.length} project(s).`);
         console.log("Run an evaluation to see results: agentv eval <eval-file>");
       }
       console.log(`Dashboard: http://localhost:${listenPort}`);
-      console.log(`Projects API: http://localhost:${listenPort}/api/projects`);
+      console.log(`Benchmarks API: http://localhost:${listenPort}/api/benchmarks`);
       console.log("Press Ctrl+C to stop");
       const { serve: startServer } = await import("@hono/node-server");
       startServer({
@@ -9835,4 +9850,4 @@ export {
   preprocessArgv,
   runCli
 };
-//# sourceMappingURL=chunk-7TJ2PON3.js.map
+//# sourceMappingURL=chunk-VVWPD4CN.js.map