npm - agentv - Versions diffs - 4.28.0 → 4.29.0-next.1 - Mend

agentv 4.28.0 → 4.29.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/dist/{chunk-2WS3BEPV.js → chunk-GLJVO5PK.js} RENAMED Viewed

@@ -45,7 +45,7 @@ import {
   validateFileReferences,
   validateTargetsFile,
   validateWorkspacePaths
-} from "./chunk-NL5H3TIC.js";
+} from "./chunk-FEDIWLKK.js";
 import {
   RESULT_INDEX_FILENAME,
   aggregateRunDir,
@@ -53,27 +53,27 @@ import {
   resolveRunManifestPath,
   toSnakeCaseDeep as toSnakeCaseDeep2,
   writeArtifactsFromResults
-} from "./chunk-7T6AF75O.js";
+} from "./chunk-OS67VZUO.js";
 import {
   DEFAULT_CATEGORY,
-  addBenchmark,
+  addProject,
   deriveCategory,
   discoverClaudeSessions,
   discoverCodexSessions,
-  getBenchmark,
   getOutputFilenames,
-  loadBenchmarkRegistry,
+  getProject,
+  loadProjectRegistry,
   parseClaudeSession,
   parseCodexSession,
   readTranscriptFile,
-  removeBenchmark,
+  removeProject,
   runBeforeSessionHook,
   scanRepoDeps,
-  syncBenchmarks,
+  syncProjects,
   toTranscriptJsonLines,
   transpileEvalYamlFile,
   trimBaselineResult
-} from "./chunk-WZVOY2W2.js";
+} from "./chunk-VZMGBDJD.js";
 import {
   DEFAULT_THRESHOLD,
   createBuiltinRegistry,
@@ -4053,7 +4053,7 @@ var evalRunCommand = command({
   },
   handler: async (args) => {
     if (args.evalPaths.length === 0 && process.stdin.isTTY) {
-      const { launchInteractiveWizard } = await import("./interactive-7R2K3CBY.js");
+      const { launchInteractiveWizard } = await import("./interactive-5LEM6ITD.js");
       await launchInteractiveWizard();
       return;
     }
@@ -4121,7 +4121,7 @@ var evalRunCommand = command({
 // src/commands/eval/index.ts
 var evalCommand = subcommands({
   name: "eval",
-  description: "Evaluation commands",
+  description: "Evaluation commands. Shorthand: `agentv eval <eval-paths...>` aliases `agentv eval run <eval-paths...>`.",
   cmds: {
     run: evalRunCommand,
     assert: evalAssertCommand,
@@ -9338,7 +9338,7 @@ import { Hono } from "hono";
 // src/commands/results/eval-runner.ts
 import { execFileSync as execFileSync2, spawn } from "node:child_process";
-import { existsSync as existsSync12 } from "node:fs";
+import { createWriteStream, existsSync as existsSync12, mkdirSync as mkdirSync3 } from "node:fs";
 import path17 from "node:path";
 import { fileURLToPath as fileURLToPath2 } from "node:url";
 var activeRuns = /* @__PURE__ */ new Map();
@@ -9365,6 +9365,14 @@ function getActiveRunTarget(indexJsonlPath) {
   }
   return void 0;
 }
+function getActiveRunStatus(indexJsonlPath) {
+  for (const run2 of activeRuns.values()) {
+    if (run2.outputDir && path17.join(run2.outputDir, "index.jsonl") === indexJsonlPath) {
+      return run2.status;
+    }
+  }
+  return void 0;
+}
 async function discoverTargetsInProject(cwd) {
   const repoRoot = await findRepoRoot(cwd) ?? cwd;
   let targetsFilePath;
@@ -9477,6 +9485,17 @@ function isCommandAvailable(cmd) {
     return false;
   }
 }
+function openConsoleLogStream(outputDir) {
+  try {
+    mkdirSync3(outputDir, { recursive: true });
+    const stream = createWriteStream(path17.join(outputDir, "console.log"), { flags: "w" });
+    stream.on("error", () => {
+    });
+    return stream;
+  } catch {
+    return void 0;
+  }
+}
 function registerEvalRoutes(app2, getCwd, options) {
   const readOnly = options?.readOnly === true;
   app2.get("/api/eval/discover", async (c4) => {
@@ -9553,13 +9572,16 @@ function registerEvalRoutes(app2, getCwd, options) {
       });
       run2.process = child;
       run2.status = "running";
+      const logStream = openConsoleLogStream(outputDir);
       child.stdout?.on("data", (chunk) => {
+        logStream?.write(chunk);
         run2.stdout += chunk.toString();
         if (run2.stdout.length > 1e5) {
           run2.stdout = run2.stdout.slice(-8e4);
         }
       });
       child.stderr?.on("data", (chunk) => {
+        logStream?.write(chunk);
         run2.stderr += chunk.toString();
         if (run2.stderr.length > 1e5) {
           run2.stderr = run2.stderr.slice(-8e4);
@@ -9570,6 +9592,7 @@ function registerEvalRoutes(app2, getCwd, options) {
         run2.status = code === 0 ? "finished" : "failed";
         run2.finishedAt = (/* @__PURE__ */ new Date()).toISOString();
         run2.process = void 0;
+        logStream?.end();
         pruneFinishedRuns();
       });
       child.on("error", (err2) => {
@@ -9578,6 +9601,10 @@ function registerEvalRoutes(app2, getCwd, options) {
 Process error: ${err2.message}`;
         run2.finishedAt = (/* @__PURE__ */ new Date()).toISOString();
         run2.process = void 0;
+        logStream?.write(`
+Process error: ${err2.message}
+`);
+        logStream?.end();
       });
       return c4.json(
         {
@@ -9649,7 +9676,7 @@ Process error: ${err2.message}`;
     const args = buildCliArgs(body);
     return c4.json({ command: buildCliPreview(args) });
   });
-  app2.get("/api/benchmarks/:benchmarkId/eval/discover", async (c4) => {
+  app2.get("/api/projects/:projectId/eval/discover", async (c4) => {
     const cwd = getCwd(c4);
     try {
       const files = await discoverEvalFiles(cwd);
@@ -9664,7 +9691,7 @@ Process error: ${err2.message}`;
       return c4.json({ error: err2.message, eval_files: [] }, 500);
     }
   });
-  app2.get("/api/benchmarks/:benchmarkId/eval/targets", async (c4) => {
+  app2.get("/api/projects/:projectId/eval/targets", async (c4) => {
     const cwd = getCwd(c4);
     try {
       const names = await discoverTargetsInProject(cwd);
@@ -9673,7 +9700,7 @@ Process error: ${err2.message}`;
       return c4.json({ error: err2.message, targets: [] }, 500);
     }
   });
-  app2.post("/api/benchmarks/:benchmarkId/eval/run", async (c4) => {
+  app2.post("/api/projects/:projectId/eval/run", async (c4) => {
     if (readOnly) {
       return c4.json({ error: "Studio is running in read-only mode" }, 403);
     }
@@ -9722,11 +9749,14 @@ Process error: ${err2.message}`;
       });
       run2.process = child;
       run2.status = "running";
+      const logStream = openConsoleLogStream(outputDir);
       child.stdout?.on("data", (chunk) => {
+        logStream?.write(chunk);
         run2.stdout += chunk.toString();
         if (run2.stdout.length > 1e5) run2.stdout = run2.stdout.slice(-8e4);
       });
       child.stderr?.on("data", (chunk) => {
+        logStream?.write(chunk);
         run2.stderr += chunk.toString();
         if (run2.stderr.length > 1e5) run2.stderr = run2.stderr.slice(-8e4);
       });
@@ -9735,6 +9765,7 @@ Process error: ${err2.message}`;
         run2.status = code === 0 ? "finished" : "failed";
         run2.finishedAt = (/* @__PURE__ */ new Date()).toISOString();
         run2.process = void 0;
+        logStream?.end();
         pruneFinishedRuns();
       });
       child.on("error", (err2) => {
@@ -9743,6 +9774,10 @@ Process error: ${err2.message}`;
 Process error: ${err2.message}`;
         run2.finishedAt = (/* @__PURE__ */ new Date()).toISOString();
         run2.process = void 0;
+        logStream?.write(`
+Process error: ${err2.message}
+`);
+        logStream?.end();
       });
       return c4.json({ id: runId, status: run2.status, command: command2 }, 202);
     } catch (err2) {
@@ -9752,7 +9787,7 @@ Process error: ${err2.message}`;
       return c4.json({ error: err2.message }, 500);
     }
   });
-  app2.post("/api/benchmarks/:benchmarkId/eval/run/:id/stop", (c4) => {
+  app2.post("/api/projects/:projectId/eval/run/:id/stop", (c4) => {
     if (readOnly) {
       return c4.json({ error: "Studio is running in read-only mode" }, 403);
     }
@@ -9769,7 +9804,7 @@ Process error: ${err2.message}`;
     }
     return c4.json({ stopped: true, status: run2.status });
   });
-  app2.get("/api/benchmarks/:benchmarkId/eval/status/:id", (c4) => {
+  app2.get("/api/projects/:projectId/eval/status/:id", (c4) => {
     const id = c4.req.param("id");
     const run2 = activeRuns.get(id ?? "");
     if (!run2) return c4.json({ error: "Run not found" }, 404);
@@ -9784,7 +9819,7 @@ Process error: ${err2.message}`;
       stderr: run2.stderr.slice(-5e3)
     });
   });
-  app2.get("/api/benchmarks/:benchmarkId/eval/runs", (c4) => {
+  app2.get("/api/projects/:projectId/eval/runs", (c4) => {
     const runs = [...activeRuns.values()].map((r) => ({
       id: r.id,
       status: r.status,
@@ -9797,7 +9832,7 @@ Process error: ${err2.message}`;
     runs.sort((a, b) => b.started_at.localeCompare(a.started_at));
     return c4.json({ runs });
   });
-  app2.post("/api/benchmarks/:benchmarkId/eval/preview", async (c4) => {
+  app2.post("/api/projects/:projectId/eval/preview", async (c4) => {
     let body;
     try {
       body = await c4.req.json();
@@ -9887,7 +9922,7 @@ function normalizeTags(tags) {
 }
 // src/commands/results/studio-config.ts
-import { existsSync as existsSync14, mkdirSync as mkdirSync3, readFileSync as readFileSync11, writeFileSync as writeFileSync5 } from "node:fs";
+import { existsSync as existsSync14, mkdirSync as mkdirSync4, readFileSync as readFileSync11, writeFileSync as writeFileSync5 } from "node:fs";
 import path19 from "node:path";
 import { stringify as stringifyYaml2 } from "yaml";
 var DEFAULTS = {
@@ -9921,7 +9956,7 @@ function loadStudioConfig(agentvDir) {
 }
 function saveStudioConfig(agentvDir, config) {
   if (!existsSync14(agentvDir)) {
-    mkdirSync3(agentvDir, { recursive: true });
+    mkdirSync4(agentvDir, { recursive: true });
   }
   const configPath = path19.join(agentvDir, "config.yaml");
   let existing = {};
@@ -9977,14 +10012,11 @@ Serving most recent: ${metas[0].path}
   }
   return metas[0].path;
 }
-function resolveDashboardMode(benchmarkCount, options) {
+function resolveDashboardMode(projectCount, options) {
   if (options.single === true) {
-    return { isMultiBenchmark: false, showMultiWarning: options.multi === true };
+    return { projectDashboard: false };
   }
-  if (options.multi === true) {
-    return { isMultiBenchmark: true, showMultiWarning: true };
-  }
-  return { isMultiBenchmark: benchmarkCount > 1, showMultiWarning: false };
+  return { projectDashboard: projectCount > 1 };
 }
 function feedbackPath(resultDir) {
   return path20.join(resultDir, "feedback.json");
@@ -10096,6 +10128,7 @@ async function handleRuns(c4, { searchDir, agentvDir }) {
         }
       } catch {
       }
+      const liveStatus = getActiveRunStatus(m.path);
       const tagsEntry = readRunTags(m.path);
       return {
         filename: m.filename,
@@ -10109,11 +10142,30 @@ async function handleRuns(c4, { searchDir, agentvDir }) {
         source: m.source,
         ...target && { target },
         ...experiment && { experiment },
-        ...tagsEntry && { tags: tagsEntry.tags }
+        ...tagsEntry && { tags: tagsEntry.tags },
+        ...liveStatus && { status: liveStatus }
       };
     })
   });
 }
+async function handleRunLog(c4, { searchDir }) {
+  const filename = c4.req.param("filename") ?? "";
+  const meta = await findRunById(searchDir, filename);
+  if (!meta) return c4.json({ error: "Run not found" }, 404);
+  if (meta.source === "remote") {
+    return c4.json({ error: "Console log is not available for remote runs" }, 404);
+  }
+  const logPath = path20.join(path20.dirname(meta.path), "console.log");
+  if (!existsSync15(logPath)) {
+    return c4.json({ error: "Console log not found for this run" }, 404);
+  }
+  try {
+    const content = readFileSync12(logPath, "utf8");
+    return c4.text(content);
+  } catch {
+    return c4.json({ error: "Failed to read console log" }, 500);
+  }
+}
 async function handleRunDetail(c4, { searchDir }) {
   const filename = c4.req.param("filename") ?? "";
   const meta = await findRunById(searchDir, filename);
@@ -10523,8 +10575,8 @@ function handleConfig(c4, { agentvDir, searchDir }, options) {
   return c4.json({
     ...loadStudioConfig(agentvDir),
     read_only: options?.readOnly === true,
-    benchmark_name: path20.basename(searchDir),
-    multi_benchmark_dashboard: options?.multiBenchmarkDashboard === true
+    project_name: path20.basename(searchDir),
+    project_dashboard: options?.projectDashboard === true
   });
 }
 function handleFeedbackRead(c4, { searchDir }) {
@@ -10581,14 +10633,14 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
   const defaultCtx = { searchDir, agentvDir };
   const readOnly = options?.readOnly === true;
   const app2 = new Hono();
-  function withBenchmark(c4, handler) {
-    const benchmark = getBenchmark(c4.req.param("benchmarkId") ?? "");
-    if (!benchmark || !existsSync15(benchmark.path)) {
-      return c4.json({ error: "Benchmark not found" }, 404);
+  function withProject(c4, handler) {
+    const project = getProject(c4.req.param("projectId") ?? "");
+    if (!project || !existsSync15(project.path)) {
+      return c4.json({ error: "Project not found" }, 404);
     }
     return handler(c4, {
-      searchDir: benchmark.path,
-      agentvDir: path20.join(benchmark.path, ".agentv")
+      searchDir: project.path,
+      agentvDir: path20.join(project.path, ".agentv")
     });
   }
   app2.post("/api/config", async (c4) => {
@@ -10608,7 +10660,7 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
       return c4.json({ error: "Failed to save config" }, 500);
     }
   });
-  function benchmarkEntryToWire(entry) {
+  function projectEntryToWire(entry) {
     return {
       id: entry.id,
       name: entry.name,
@@ -10617,10 +10669,10 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
       last_opened_at: entry.lastOpenedAt
     };
   }
-  app2.get("/api/benchmarks", async (c4) => {
-    const registry = loadBenchmarkRegistry();
-    const benchmarks = await Promise.all(
-      registry.benchmarks.map(async (p) => {
+  app2.get("/api/projects", async (c4) => {
+    const registry = loadProjectRegistry();
+    const projects = await Promise.all(
+      registry.projects.map(async (p) => {
         let runCount = 0;
         let passRate = 0;
         let lastRun = null;
@@ -10635,52 +10687,52 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
         } catch {
         }
         return {
-          ...benchmarkEntryToWire(p),
+          ...projectEntryToWire(p),
           run_count: runCount,
           pass_rate: passRate,
           last_run: lastRun
         };
       })
     );
-    return c4.json({ benchmarks });
+    return c4.json({ projects });
   });
-  app2.post("/api/benchmarks", async (c4) => {
+  app2.post("/api/projects", async (c4) => {
     if (readOnly) {
       return c4.json({ error: "Studio is running in read-only mode" }, 403);
     }
     try {
       const body = await c4.req.json();
       if (!body.path) return c4.json({ error: "Missing path" }, 400);
-      const entry = addBenchmark(body.path);
-      return c4.json(benchmarkEntryToWire(entry), 201);
+      const entry = addProject(body.path);
+      return c4.json(projectEntryToWire(entry), 201);
     } catch (err2) {
       return c4.json({ error: err2.message }, 400);
     }
   });
-  app2.get("/api/benchmarks/:benchmarkId/summary", async (c4) => {
-    const benchmark = getBenchmark(c4.req.param("benchmarkId") ?? "");
-    if (!benchmark) return c4.json({ error: "Benchmark not found" }, 404);
+  app2.get("/api/projects/:projectId/summary", async (c4) => {
+    const project = getProject(c4.req.param("projectId") ?? "");
+    if (!project) return c4.json({ error: "Project not found" }, 404);
     try {
-      const { runs: metas } = await listMergedResultFiles(benchmark.path);
+      const { runs: metas } = await listMergedResultFiles(project.path);
       const runCount = metas.length;
       const passRate = runCount > 0 ? metas.reduce((s, m) => s + m.passRate, 0) / runCount : 0;
       const lastRun = metas.length > 0 ? metas[0].timestamp : null;
       return c4.json({
-        id: benchmark.id,
-        name: benchmark.name,
-        path: benchmark.path,
+        id: project.id,
+        name: project.name,
+        path: project.path,
         run_count: runCount,
         pass_rate: passRate,
         last_run: lastRun
       });
     } catch {
-      return c4.json({ error: "Failed to read benchmark" }, 500);
+      return c4.json({ error: "Failed to read project" }, 500);
     }
   });
-  app2.get("/api/benchmarks/all-runs", async (c4) => {
-    const registry = loadBenchmarkRegistry();
+  app2.get("/api/projects/all-runs", async (c4) => {
+    const registry = loadProjectRegistry();
     const allRuns = [];
-    for (const p of registry.benchmarks) {
+    for (const p of registry.projects) {
       try {
         const { runs: metas } = await listMergedResultFiles(p.path);
         for (const m of metas) {
@@ -10706,8 +10758,8 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
             source: m.source,
             ...target && { target },
             ...experiment && { experiment },
-            benchmark_id: p.id,
-            benchmark_name: p.name
+            project_id: p.id,
+            project_name: p.name
           });
         }
       } catch {
@@ -10716,19 +10768,19 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
     allRuns.sort((a, b) => b.timestamp.localeCompare(a.timestamp));
     return c4.json({ runs: allRuns });
   });
-  app2.delete("/api/benchmarks/:benchmarkId", (c4) => {
+  app2.delete("/api/projects/:projectId", (c4) => {
     if (readOnly) {
       return c4.json({ error: "Studio is running in read-only mode" }, 403);
     }
-    const removed = removeBenchmark(c4.req.param("benchmarkId") ?? "");
-    if (!removed) return c4.json({ error: "Benchmark not found" }, 404);
+    const removed = removeProject(c4.req.param("projectId") ?? "");
+    if (!removed) return c4.json({ error: "Project not found" }, 404);
     return c4.json({ ok: true });
   });
   app2.get(
     "/api/config",
     (c4) => handleConfig(c4, defaultCtx, {
       readOnly,
-      multiBenchmarkDashboard: options?.multiBenchmarkDashboard
+      projectDashboard: options?.projectDashboard
     })
   );
   app2.get("/api/remote/status", async (c4) => c4.json(await getRemoteResultsStatus(searchDir)));
@@ -10747,6 +10799,7 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
     return handleRunTagsDelete(c4, defaultCtx);
   });
   app2.get("/api/runs/:filename", (c4) => handleRunDetail(c4, defaultCtx));
+  app2.get("/api/runs/:filename/log", (c4) => handleRunLog(c4, defaultCtx));
   app2.get("/api/runs/:filename/suites", (c4) => handleRunSuites(c4, defaultCtx));
   app2.get("/api/runs/:filename/categories", (c4) => handleRunCategories(c4, defaultCtx));
   app2.get(
@@ -10826,75 +10879,73 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
     return c4.json({ entries: entries2 });
   });
   app2.get(
-    "/api/benchmarks/:benchmarkId/config",
-    (c4) => withBenchmark(
+    "/api/projects/:projectId/config",
+    (c4) => withProject(
       c4,
       (ctx, dataCtx) => handleConfig(ctx, dataCtx, {
         readOnly,
-        multiBenchmarkDashboard: options?.multiBenchmarkDashboard
+        projectDashboard: options?.projectDashboard
       })
     )
   );
   app2.get(
-    "/api/benchmarks/:benchmarkId/remote/status",
-    (c4) => withBenchmark(
+    "/api/projects/:projectId/remote/status",
+    (c4) => withProject(
       c4,
       async (ctx, dataCtx) => ctx.json(await getRemoteResultsStatus(dataCtx.searchDir))
     )
   );
   app2.post(
-    "/api/benchmarks/:benchmarkId/remote/sync",
-    (c4) => withBenchmark(c4, async (ctx, dataCtx) => ctx.json(await syncRemoteResults(dataCtx.searchDir)))
+    "/api/projects/:projectId/remote/sync",
+    (c4) => withProject(c4, async (ctx, dataCtx) => ctx.json(await syncRemoteResults(dataCtx.searchDir)))
   );
-  app2.get("/api/benchmarks/:benchmarkId/runs", (c4) => withBenchmark(c4, handleRuns));
-  app2.put("/api/benchmarks/:benchmarkId/runs/:filename/tags", (c4) => {
+  app2.get("/api/projects/:projectId/runs", (c4) => withProject(c4, handleRuns));
+  app2.put("/api/projects/:projectId/runs/:filename/tags", (c4) => {
     if (readOnly) {
       return c4.json({ error: "Studio is running in read-only mode" }, 403);
     }
-    return withBenchmark(c4, handleRunTagsPut);
+    return withProject(c4, handleRunTagsPut);
   });
-  app2.delete("/api/benchmarks/:benchmarkId/runs/:filename/tags", (c4) => {
+  app2.delete("/api/projects/:projectId/runs/:filename/tags", (c4) => {
     if (readOnly) {
       return c4.json({ error: "Studio is running in read-only mode" }, 403);
     }
-    return withBenchmark(c4, handleRunTagsDelete);
+    return withProject(c4, handleRunTagsDelete);
   });
-  app2.get("/api/benchmarks/:benchmarkId/runs/:filename", (c4) => withBenchmark(c4, handleRunDetail));
-  app2.get(
-    "/api/benchmarks/:benchmarkId/runs/:filename/suites",
-    (c4) => withBenchmark(c4, handleRunSuites)
-  );
+  app2.get("/api/projects/:projectId/runs/:filename", (c4) => withProject(c4, handleRunDetail));
+  app2.get("/api/projects/:projectId/runs/:filename/log", (c4) => withProject(c4, handleRunLog));
+  app2.get("/api/projects/:projectId/runs/:filename/suites", (c4) => withProject(c4, handleRunSuites));
   app2.get(
-    "/api/benchmarks/:benchmarkId/runs/:filename/categories",
-    (c4) => withBenchmark(c4, handleRunCategories)
+    "/api/projects/:projectId/runs/:filename/categories",
+    (c4) => withProject(c4, handleRunCategories)
   );
   app2.get(
-    "/api/benchmarks/:benchmarkId/runs/:filename/categories/:category/suites",
-    (c4) => withBenchmark(c4, handleCategorySuites)
+    "/api/projects/:projectId/runs/:filename/categories/:category/suites",
+    (c4) => withProject(c4, handleCategorySuites)
   );
   app2.get(
-    "/api/benchmarks/:benchmarkId/runs/:filename/evals/:evalId",
-    (c4) => withBenchmark(c4, handleEvalDetail)
+    "/api/projects/:projectId/runs/:filename/evals/:evalId",
+    (c4) => withProject(c4, handleEvalDetail)
   );
   app2.get(
-    "/api/benchmarks/:benchmarkId/runs/:filename/evals/:evalId/files",
-    (c4) => withBenchmark(c4, handleEvalFiles)
+    "/api/projects/:projectId/runs/:filename/evals/:evalId/files",
+    (c4) => withProject(c4, handleEvalFiles)
   );
   app2.get(
-    "/api/benchmarks/:benchmarkId/runs/:filename/evals/:evalId/files/*",
-    (c4) => withBenchmark(c4, handleEvalFileContent)
+    "/api/projects/:projectId/runs/:filename/evals/:evalId/files/*",
+    (c4) => withProject(c4, handleEvalFileContent)
   );
-  app2.get("/api/benchmarks/:benchmarkId/experiments", (c4) => withBenchmark(c4, handleExperiments));
-  app2.get("/api/benchmarks/:benchmarkId/compare", (c4) => withBenchmark(c4, handleCompare));
-  app2.get("/api/benchmarks/:benchmarkId/targets", (c4) => withBenchmark(c4, handleTargets));
-  app2.get("/api/benchmarks/:benchmarkId/feedback", (c4) => withBenchmark(c4, handleFeedbackRead));
+  app2.get("/api/projects/:projectId/experiments", (c4) => withProject(c4, handleExperiments));
+  app2.get("/api/projects/:projectId/compare", (c4) => withProject(c4, handleCompare));
+  app2.get("/api/projects/:projectId/targets", (c4) => withProject(c4, handleTargets));
+  app2.get("/api/projects/:projectId/feedback", (c4) => withProject(c4, handleFeedbackRead));
   registerEvalRoutes(
     app2,
     (c4) => {
-      const benchmarkId = c4.req.param("benchmarkId");
-      if (benchmarkId) {
-        const benchmark = getBenchmark(benchmarkId);
-        if (benchmark) return benchmark.path;
+      const projectId = c4.req.param("projectId");
+      if (projectId) {
+        const project = getProject(projectId);
+        if (project) return project.path;
       }
       return searchDir;
     },
@@ -10981,36 +11032,32 @@ var resultsServeCommand = command({
       short: "d",
       description: "Working directory (default: current directory)"
     }),
-    multi: flag({
-      long: "multi",
-      description: "Launch in multi-benchmark dashboard mode (deprecated; use auto-detect or --single)"
-    }),
     single: flag({
       long: "single",
-      description: "Force single-benchmark dashboard mode"
+      description: "Force single-project dashboard mode"
     }),
     add: option({
       type: optional(string),
       long: "add",
-      description: "Register a benchmark by path"
+      description: "Register a project by path"
     }),
     remove: option({
       type: optional(string),
       long: "remove",
-      description: "Unregister a benchmark by ID"
+      description: "Unregister a project by ID"
     }),
     readOnly: flag({
       long: "read-only",
       description: "Disable write operations and launch Studio in read-only leaderboard mode"
     })
   },
-  handler: async ({ source, port, dir, multi, single, add, remove, readOnly }) => {
+  handler: async ({ source, port, dir, single, add, remove, readOnly }) => {
     const cwd = dir ?? process.cwd();
     const listenPort = port ?? (process.env.PORT ? Number(process.env.PORT) : 3117);
     if (add) {
       try {
-        const entry = addBenchmark(add);
-        console.log(`Registered benchmark: ${entry.name} (${entry.id}) at ${entry.path}`);
+        const entry = addProject(add);
+        console.log(`Registered project: ${entry.name} (${entry.id}) at ${entry.path}`);
       } catch (err2) {
         console.error(`Error: ${err2.message}`);
         process.exit(1);
@@ -11018,11 +11065,11 @@ var resultsServeCommand = command({
       return;
     }
     if (remove) {
-      const removed = removeBenchmark(remove);
+      const removed = removeProject(remove);
       if (removed) {
-        console.log(`Unregistered benchmark: ${remove}`);
+        console.log(`Unregistered project: ${remove}`);
       } else {
-        console.error(`Benchmark not found: ${remove}`);
+        console.error(`Project not found: ${remove}`);
         process.exit(1);
       }
       return;
@@ -11032,12 +11079,9 @@ var resultsServeCommand = command({
     if (yamlConfig?.required_version) {
       await enforceRequiredVersion(yamlConfig.required_version);
     }
-    const registry = loadBenchmarkRegistry();
-    const { isMultiBenchmark, showMultiWarning } = resolveDashboardMode(
-      registry.benchmarks.length,
-      { multi, single }
-    );
-    await syncBenchmarks(registry.benchmarks);
+    const registry = loadProjectRegistry();
+    const { projectDashboard } = resolveDashboardMode(registry.projects.length, { single });
+    await syncProjects(registry.projects);
     try {
       let results = [];
       let sourceFile;
@@ -11061,15 +11105,10 @@ var resultsServeCommand = command({
       const resultDir = sourceFile ? path20.dirname(path20.resolve(sourceFile)) : cwd;
       const app2 = createApp(results, resultDir, cwd, sourceFile, {
         readOnly,
-        multiBenchmarkDashboard: isMultiBenchmark
+        projectDashboard
       });
-      if (showMultiWarning) {
-        console.warn(
-          "Warning: --multi is deprecated. Studio now auto-detects multi-benchmark mode when multiple benchmarks are registered. Use --single to force the single-benchmark view."
-        );
-      }
-      if (isMultiBenchmark) {
-        console.log(`Multi-benchmark mode: ${registry.benchmarks.length} benchmark(s) registered`);
+      if (projectDashboard) {
+        console.log(`Project dashboard: ${registry.projects.length} project(s) registered`);
       } else if (results.length > 0 && sourceFile) {
         console.log(`Serving ${results.length} result(s) from ${sourceFile}`);
       } else {
@@ -11077,7 +11116,7 @@ var resultsServeCommand = command({
         console.log("Run an evaluation to see results: agentv eval <eval-file>");
       }
       console.log(`Dashboard: http://localhost:${listenPort}`);
-      console.log(`Benchmarks API: http://localhost:${listenPort}/api/benchmarks`);
+      console.log(`Projects API: http://localhost:${listenPort}/api/projects`);
       console.log("Press Ctrl+C to stop");
       const { serve: startServer } = await import("@hono/node-server");
       startServer({
@@ -12549,4 +12588,4 @@ export {
   preprocessArgv,
   runCli
 };
-//# sourceMappingURL=chunk-2WS3BEPV.js.map
+//# sourceMappingURL=chunk-GLJVO5PK.js.map