agentv 4.3.3 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,27 +24,32 @@ import {
24
24
  validateFileReferences,
25
25
  validateTargetsFile,
26
26
  writeArtifactsFromResults
27
- } from "./chunk-EW4COQU2.js";
27
+ } from "./chunk-BAYNXTX6.js";
28
28
  import {
29
29
  DEFAULT_CATEGORY,
30
30
  PASS_THRESHOLD,
31
+ addProject,
31
32
  createBuiltinRegistry,
32
33
  deriveCategory,
34
+ discoverProjects,
33
35
  executeScript,
34
36
  getAgentvHome,
35
37
  getOutputFilenames,
38
+ getProject,
36
39
  getWorkspacePoolRoot,
37
40
  isAgentSkillsFormat,
41
+ loadProjectRegistry,
38
42
  loadTestById,
39
43
  loadTestSuite,
40
44
  loadTests,
41
45
  normalizeLineEndings,
42
46
  parseAgentSkillsEvals,
47
+ removeProject,
43
48
  toCamelCaseDeep,
44
49
  toSnakeCaseDeep as toSnakeCaseDeep2,
45
50
  transpileEvalYamlFile,
46
51
  trimBaselineResult
47
- } from "./chunk-HMOXP7T5.js";
52
+ } from "./chunk-63NDZ6UC.js";
48
53
  import {
49
54
  __commonJS,
50
55
  __esm,
@@ -4218,7 +4223,7 @@ var evalRunCommand = command({
4218
4223
  },
4219
4224
  handler: async (args) => {
4220
4225
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
4221
- const { launchInteractiveWizard } = await import("./interactive-6BO4RY6U.js");
4226
+ const { launchInteractiveWizard } = await import("./interactive-YNSOO2BS.js");
4222
4227
  await launchInteractiveWizard();
4223
4228
  return;
4224
4229
  }
@@ -6255,7 +6260,16 @@ function loadStudioConfig(agentvDir) {
6255
6260
  if (!parsed || typeof parsed !== "object") {
6256
6261
  return { ...DEFAULTS };
6257
6262
  }
6258
- const threshold = typeof parsed.pass_threshold === "number" ? parsed.pass_threshold : DEFAULTS.pass_threshold;
6263
+ const studio = parsed.studio;
6264
+ let threshold = DEFAULTS.pass_threshold;
6265
+ if (studio && typeof studio === "object" && !Array.isArray(studio)) {
6266
+ const studioThreshold = studio.pass_threshold;
6267
+ if (typeof studioThreshold === "number") {
6268
+ threshold = studioThreshold;
6269
+ }
6270
+ } else if (typeof parsed.pass_threshold === "number") {
6271
+ threshold = parsed.pass_threshold;
6272
+ }
6259
6273
  return {
6260
6274
  pass_threshold: Math.min(1, Math.max(0, threshold))
6261
6275
  };
@@ -6265,7 +6279,18 @@ function saveStudioConfig(agentvDir, config) {
6265
6279
  mkdirSync2(agentvDir, { recursive: true });
6266
6280
  }
6267
6281
  const configPath = path9.join(agentvDir, "config.yaml");
6268
- const yamlStr = stringifyYaml2(config);
6282
+ let existing = {};
6283
+ if (existsSync7(configPath)) {
6284
+ const raw = readFileSync8(configPath, "utf-8");
6285
+ const parsed = parseYaml(raw);
6286
+ if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
6287
+ existing = parsed;
6288
+ }
6289
+ }
6290
+ const { pass_threshold: _, ...rest } = existing;
6291
+ existing = rest;
6292
+ existing.studio = { ...config };
6293
+ const yamlStr = stringifyYaml2(existing);
6269
6294
  writeFileSync3(configPath, yamlStr, "utf-8");
6270
6295
  }
6271
6296
 
@@ -6289,11 +6314,366 @@ function writeFeedback(cwd, data) {
6289
6314
  writeFileSync4(feedbackPath(cwd), `${JSON.stringify(data, null, 2)}
6290
6315
  `, "utf8");
6291
6316
  }
6317
+ function buildFileTree(dirPath, relativeTo) {
6318
+ if (!existsSync8(dirPath) || !statSync4(dirPath).isDirectory()) {
6319
+ return [];
6320
+ }
6321
+ const entries2 = readdirSync3(dirPath, { withFileTypes: true });
6322
+ return entries2.sort((a, b) => {
6323
+ if (a.isDirectory() !== b.isDirectory()) return a.isDirectory() ? -1 : 1;
6324
+ return a.name.localeCompare(b.name);
6325
+ }).map((entry) => {
6326
+ const fullPath = path10.join(dirPath, entry.name);
6327
+ const relPath = path10.relative(relativeTo, fullPath);
6328
+ if (entry.isDirectory()) {
6329
+ return {
6330
+ name: entry.name,
6331
+ path: relPath,
6332
+ type: "dir",
6333
+ children: buildFileTree(fullPath, relativeTo)
6334
+ };
6335
+ }
6336
+ return { name: entry.name, path: relPath, type: "file" };
6337
+ });
6338
+ }
6339
+ function inferLanguage(filePath) {
6340
+ const ext = path10.extname(filePath).toLowerCase();
6341
+ const langMap = {
6342
+ ".json": "json",
6343
+ ".jsonl": "json",
6344
+ ".ts": "typescript",
6345
+ ".tsx": "typescript",
6346
+ ".js": "javascript",
6347
+ ".jsx": "javascript",
6348
+ ".md": "markdown",
6349
+ ".yaml": "yaml",
6350
+ ".yml": "yaml",
6351
+ ".log": "plaintext",
6352
+ ".txt": "plaintext",
6353
+ ".py": "python",
6354
+ ".sh": "shell",
6355
+ ".bash": "shell",
6356
+ ".css": "css",
6357
+ ".html": "html",
6358
+ ".xml": "xml",
6359
+ ".svg": "xml",
6360
+ ".toml": "toml",
6361
+ ".diff": "diff",
6362
+ ".patch": "diff"
6363
+ };
6364
+ return langMap[ext] ?? "plaintext";
6365
+ }
6366
+ function stripHeavyFields(results) {
6367
+ return results.map((r) => {
6368
+ const { requests, trace, ...rest } = r;
6369
+ const toolCalls = trace?.toolCalls && Object.keys(trace.toolCalls).length > 0 ? trace.toolCalls : void 0;
6370
+ const graderDurationMs = (r.scores ?? []).reduce((sum, s) => sum + (s.durationMs ?? 0), 0);
6371
+ return {
6372
+ ...rest,
6373
+ ...toolCalls && { _toolCalls: toolCalls },
6374
+ ...graderDurationMs > 0 && { _graderDurationMs: graderDurationMs }
6375
+ };
6376
+ });
6377
+ }
6378
+ function handleRuns(c3, { searchDir }) {
6379
+ const metas = listResultFiles(searchDir);
6380
+ return c3.json({
6381
+ runs: metas.map((m) => {
6382
+ let target;
6383
+ let experiment;
6384
+ try {
6385
+ const records = loadLightweightResults(m.path);
6386
+ if (records.length > 0) {
6387
+ target = records[0].target;
6388
+ experiment = records[0].experiment;
6389
+ }
6390
+ } catch {
6391
+ }
6392
+ return {
6393
+ filename: m.filename,
6394
+ path: m.path,
6395
+ timestamp: m.timestamp,
6396
+ test_count: m.testCount,
6397
+ pass_rate: m.passRate,
6398
+ avg_score: m.avgScore,
6399
+ size_bytes: m.sizeBytes,
6400
+ ...target && { target },
6401
+ ...experiment && { experiment }
6402
+ };
6403
+ })
6404
+ });
6405
+ }
6406
+ function handleRunDetail(c3, { searchDir }) {
6407
+ const filename = c3.req.param("filename");
6408
+ const meta = listResultFiles(searchDir).find((m) => m.filename === filename);
6409
+ if (!meta) return c3.json({ error: "Run not found" }, 404);
6410
+ try {
6411
+ const loaded = patchTestIds(loadManifestResults(meta.path));
6412
+ return c3.json({ results: stripHeavyFields(loaded), source: meta.filename });
6413
+ } catch {
6414
+ return c3.json({ error: "Failed to load run" }, 500);
6415
+ }
6416
+ }
6417
+ function handleRunDatasets(c3, { searchDir, agentvDir }) {
6418
+ const filename = c3.req.param("filename");
6419
+ const meta = listResultFiles(searchDir).find((m) => m.filename === filename);
6420
+ if (!meta) return c3.json({ error: "Run not found" }, 404);
6421
+ try {
6422
+ const loaded = patchTestIds(loadManifestResults(meta.path));
6423
+ const { pass_threshold } = loadStudioConfig(agentvDir);
6424
+ const datasetMap = /* @__PURE__ */ new Map();
6425
+ for (const r of loaded) {
6426
+ const ds = r.dataset ?? r.target ?? "default";
6427
+ const entry = datasetMap.get(ds) ?? { total: 0, passed: 0, scoreSum: 0 };
6428
+ entry.total++;
6429
+ if (r.score >= pass_threshold) entry.passed++;
6430
+ entry.scoreSum += r.score;
6431
+ datasetMap.set(ds, entry);
6432
+ }
6433
+ const datasets = [...datasetMap.entries()].map(([name, entry]) => ({
6434
+ name,
6435
+ total: entry.total,
6436
+ passed: entry.passed,
6437
+ failed: entry.total - entry.passed,
6438
+ avg_score: entry.total > 0 ? entry.scoreSum / entry.total : 0
6439
+ }));
6440
+ return c3.json({ datasets });
6441
+ } catch {
6442
+ return c3.json({ error: "Failed to load datasets" }, 500);
6443
+ }
6444
+ }
6445
+ function handleRunCategories(c3, { searchDir, agentvDir }) {
6446
+ const filename = c3.req.param("filename");
6447
+ const meta = listResultFiles(searchDir).find((m) => m.filename === filename);
6448
+ if (!meta) return c3.json({ error: "Run not found" }, 404);
6449
+ try {
6450
+ const loaded = patchTestIds(loadManifestResults(meta.path));
6451
+ const { pass_threshold } = loadStudioConfig(agentvDir);
6452
+ const categoryMap = /* @__PURE__ */ new Map();
6453
+ for (const r of loaded) {
6454
+ const cat = r.category ?? DEFAULT_CATEGORY;
6455
+ const entry = categoryMap.get(cat) ?? {
6456
+ total: 0,
6457
+ passed: 0,
6458
+ scoreSum: 0,
6459
+ datasets: /* @__PURE__ */ new Set()
6460
+ };
6461
+ entry.total++;
6462
+ if (r.score >= pass_threshold) entry.passed++;
6463
+ entry.scoreSum += r.score;
6464
+ entry.datasets.add(r.dataset ?? r.target ?? "default");
6465
+ categoryMap.set(cat, entry);
6466
+ }
6467
+ const categories = [...categoryMap.entries()].map(([name, entry]) => ({
6468
+ name,
6469
+ total: entry.total,
6470
+ passed: entry.passed,
6471
+ failed: entry.total - entry.passed,
6472
+ avg_score: entry.total > 0 ? entry.scoreSum / entry.total : 0,
6473
+ dataset_count: entry.datasets.size
6474
+ }));
6475
+ return c3.json({ categories });
6476
+ } catch {
6477
+ return c3.json({ error: "Failed to load categories" }, 500);
6478
+ }
6479
+ }
6480
+ function handleCategoryDatasets(c3, { searchDir, agentvDir }) {
6481
+ const filename = c3.req.param("filename");
6482
+ const category = decodeURIComponent(c3.req.param("category") ?? "");
6483
+ const meta = listResultFiles(searchDir).find((m) => m.filename === filename);
6484
+ if (!meta) return c3.json({ error: "Run not found" }, 404);
6485
+ try {
6486
+ const loaded = patchTestIds(loadManifestResults(meta.path));
6487
+ const { pass_threshold } = loadStudioConfig(agentvDir);
6488
+ const filtered = loaded.filter((r) => (r.category ?? DEFAULT_CATEGORY) === category);
6489
+ const datasetMap = /* @__PURE__ */ new Map();
6490
+ for (const r of filtered) {
6491
+ const ds = r.dataset ?? r.target ?? "default";
6492
+ const entry = datasetMap.get(ds) ?? { total: 0, passed: 0, scoreSum: 0 };
6493
+ entry.total++;
6494
+ if (r.score >= pass_threshold) entry.passed++;
6495
+ entry.scoreSum += r.score;
6496
+ datasetMap.set(ds, entry);
6497
+ }
6498
+ const datasets = [...datasetMap.entries()].map(([name, entry]) => ({
6499
+ name,
6500
+ total: entry.total,
6501
+ passed: entry.passed,
6502
+ failed: entry.total - entry.passed,
6503
+ avg_score: entry.total > 0 ? entry.scoreSum / entry.total : 0
6504
+ }));
6505
+ return c3.json({ datasets });
6506
+ } catch {
6507
+ return c3.json({ error: "Failed to load datasets" }, 500);
6508
+ }
6509
+ }
6510
+ function handleEvalDetail(c3, { searchDir }) {
6511
+ const filename = c3.req.param("filename");
6512
+ const evalId = c3.req.param("evalId");
6513
+ const meta = listResultFiles(searchDir).find((m) => m.filename === filename);
6514
+ if (!meta) return c3.json({ error: "Run not found" }, 404);
6515
+ try {
6516
+ const loaded = patchTestIds(loadManifestResults(meta.path));
6517
+ const result = loaded.find((r) => r.testId === evalId);
6518
+ if (!result) return c3.json({ error: "Eval not found" }, 404);
6519
+ return c3.json({ eval: result });
6520
+ } catch {
6521
+ return c3.json({ error: "Failed to load eval" }, 500);
6522
+ }
6523
+ }
6524
+ function handleEvalFiles(c3, { searchDir }) {
6525
+ const filename = c3.req.param("filename");
6526
+ const evalId = c3.req.param("evalId");
6527
+ const meta = listResultFiles(searchDir).find((m) => m.filename === filename);
6528
+ if (!meta) return c3.json({ error: "Run not found" }, 404);
6529
+ try {
6530
+ const content = readFileSync9(meta.path, "utf8");
6531
+ const records = parseResultManifest(content);
6532
+ const record = records.find((r) => (r.test_id ?? r.eval_id) === evalId);
6533
+ if (!record) return c3.json({ error: "Eval not found" }, 404);
6534
+ const baseDir = path10.dirname(meta.path);
6535
+ const knownPaths = [
6536
+ record.grading_path,
6537
+ record.timing_path,
6538
+ record.input_path,
6539
+ record.output_path,
6540
+ record.response_path
6541
+ ].filter((p) => !!p);
6542
+ if (knownPaths.length === 0) return c3.json({ files: [] });
6543
+ const artifactDirs = knownPaths.map((p) => path10.dirname(p));
6544
+ let commonDir = artifactDirs[0];
6545
+ for (const dir of artifactDirs) {
6546
+ while (!dir.startsWith(commonDir)) {
6547
+ commonDir = path10.dirname(commonDir);
6548
+ }
6549
+ }
6550
+ const artifactAbsDir = path10.join(baseDir, commonDir);
6551
+ const files = buildFileTree(artifactAbsDir, baseDir);
6552
+ return c3.json({ files });
6553
+ } catch {
6554
+ return c3.json({ error: "Failed to load file tree" }, 500);
6555
+ }
6556
+ }
6557
+ function handleEvalFileContent(c3, { searchDir }) {
6558
+ const filename = c3.req.param("filename");
6559
+ const evalId = c3.req.param("evalId");
6560
+ const meta = listResultFiles(searchDir).find((m) => m.filename === filename);
6561
+ if (!meta) return c3.json({ error: "Run not found" }, 404);
6562
+ const marker = `/runs/${filename}/evals/${evalId}/files/`;
6563
+ const markerIdx = c3.req.path.indexOf(marker);
6564
+ const filePath = markerIdx >= 0 ? c3.req.path.slice(markerIdx + marker.length) : "";
6565
+ if (!filePath) return c3.json({ error: "No file path specified" }, 400);
6566
+ const baseDir = path10.dirname(meta.path);
6567
+ const absolutePath = path10.resolve(baseDir, filePath);
6568
+ if (!absolutePath.startsWith(path10.resolve(baseDir) + path10.sep) && absolutePath !== path10.resolve(baseDir)) {
6569
+ return c3.json({ error: "Path traversal not allowed" }, 403);
6570
+ }
6571
+ if (!existsSync8(absolutePath) || !statSync4(absolutePath).isFile()) {
6572
+ return c3.json({ error: "File not found" }, 404);
6573
+ }
6574
+ try {
6575
+ const fileContent = readFileSync9(absolutePath, "utf8");
6576
+ const language = inferLanguage(absolutePath);
6577
+ return c3.json({ content: fileContent, language });
6578
+ } catch {
6579
+ return c3.json({ error: "Failed to read file" }, 500);
6580
+ }
6581
+ }
6582
+ function handleExperiments(c3, { searchDir, agentvDir }) {
6583
+ const metas = listResultFiles(searchDir);
6584
+ const { pass_threshold } = loadStudioConfig(agentvDir);
6585
+ const experimentMap = /* @__PURE__ */ new Map();
6586
+ for (const m of metas) {
6587
+ try {
6588
+ const records = loadLightweightResults(m.path);
6589
+ for (const r of records) {
6590
+ const experiment = r.experiment ?? "default";
6591
+ const entry = experimentMap.get(experiment) ?? {
6592
+ targets: /* @__PURE__ */ new Set(),
6593
+ runFilenames: /* @__PURE__ */ new Set(),
6594
+ evalCount: 0,
6595
+ passedCount: 0,
6596
+ lastTimestamp: ""
6597
+ };
6598
+ entry.runFilenames.add(m.filename);
6599
+ if (r.target) entry.targets.add(r.target);
6600
+ entry.evalCount++;
6601
+ if (r.score >= pass_threshold) entry.passedCount++;
6602
+ if (r.timestamp && r.timestamp > entry.lastTimestamp) {
6603
+ entry.lastTimestamp = r.timestamp;
6604
+ }
6605
+ experimentMap.set(experiment, entry);
6606
+ }
6607
+ } catch {
6608
+ }
6609
+ }
6610
+ const experiments = [...experimentMap.entries()].map(([name, entry]) => ({
6611
+ name,
6612
+ run_count: entry.runFilenames.size,
6613
+ target_count: entry.targets.size,
6614
+ eval_count: entry.evalCount,
6615
+ passed_count: entry.passedCount,
6616
+ pass_rate: entry.evalCount > 0 ? entry.passedCount / entry.evalCount : 0,
6617
+ last_run: entry.lastTimestamp || null
6618
+ }));
6619
+ return c3.json({ experiments });
6620
+ }
6621
+ function handleTargets(c3, { searchDir, agentvDir }) {
6622
+ const metas = listResultFiles(searchDir);
6623
+ const { pass_threshold } = loadStudioConfig(agentvDir);
6624
+ const targetMap = /* @__PURE__ */ new Map();
6625
+ for (const m of metas) {
6626
+ try {
6627
+ const records = loadLightweightResults(m.path);
6628
+ for (const r of records) {
6629
+ const target = r.target ?? "default";
6630
+ const entry = targetMap.get(target) ?? {
6631
+ experiments: /* @__PURE__ */ new Set(),
6632
+ runFilenames: /* @__PURE__ */ new Set(),
6633
+ evalCount: 0,
6634
+ passedCount: 0
6635
+ };
6636
+ entry.runFilenames.add(m.filename);
6637
+ if (r.experiment) entry.experiments.add(r.experiment);
6638
+ entry.evalCount++;
6639
+ if (r.score >= pass_threshold) entry.passedCount++;
6640
+ targetMap.set(target, entry);
6641
+ }
6642
+ } catch {
6643
+ }
6644
+ }
6645
+ const targets = [...targetMap.entries()].map(([name, entry]) => ({
6646
+ name,
6647
+ run_count: entry.runFilenames.size,
6648
+ experiment_count: entry.experiments.size,
6649
+ eval_count: entry.evalCount,
6650
+ passed_count: entry.passedCount,
6651
+ pass_rate: entry.evalCount > 0 ? entry.passedCount / entry.evalCount : 0
6652
+ }));
6653
+ return c3.json({ targets });
6654
+ }
6655
+ function handleConfig(c3, { agentvDir }) {
6656
+ return c3.json(loadStudioConfig(agentvDir));
6657
+ }
6658
+ function handleFeedbackRead(c3, { searchDir }) {
6659
+ const resultsDir = path10.join(searchDir, ".agentv", "results");
6660
+ return c3.json(readFeedback(existsSync8(resultsDir) ? resultsDir : searchDir));
6661
+ }
6292
6662
  function createApp(results, resultDir, cwd, sourceFile, options) {
6293
6663
  const searchDir = cwd ?? resultDir;
6294
6664
  const agentvDir = path10.join(searchDir, ".agentv");
6665
+ const defaultCtx = { searchDir, agentvDir };
6295
6666
  const app2 = new Hono();
6296
- app2.get("/api/config", (c3) => c3.json(loadStudioConfig(agentvDir)));
6667
+ function withProject(c3, handler) {
6668
+ const project = getProject(c3.req.param("projectId") ?? "");
6669
+ if (!project || !existsSync8(project.path)) {
6670
+ return c3.json({ error: "Project not found" }, 404);
6671
+ }
6672
+ return handler(c3, {
6673
+ searchDir: project.path,
6674
+ agentvDir: path10.join(project.path, ".agentv")
6675
+ });
6676
+ }
6297
6677
  app2.post("/api/config", async (c3) => {
6298
6678
  try {
6299
6679
  const body = await c3.req.json();
@@ -6308,60 +6688,100 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
6308
6688
  return c3.json({ error: "Failed to save config" }, 500);
6309
6689
  }
6310
6690
  });
6311
- const studioDistPath = options?.studioDir ?? resolveStudioDistDir();
6312
- if (!studioDistPath || !existsSync8(path10.join(studioDistPath, "index.html"))) {
6313
- throw new Error('Studio dist not found. Run "bun run build" in apps/studio/ to build the SPA.');
6691
+ function projectEntryToWire(entry) {
6692
+ return {
6693
+ id: entry.id,
6694
+ name: entry.name,
6695
+ path: entry.path,
6696
+ added_at: entry.addedAt,
6697
+ last_opened_at: entry.lastOpenedAt
6698
+ };
6314
6699
  }
6315
- app2.get("/", (c3) => {
6316
- const indexPath = path10.join(studioDistPath, "index.html");
6317
- if (existsSync8(indexPath)) {
6318
- return c3.html(readFileSync9(indexPath, "utf8"));
6319
- }
6320
- return c3.notFound();
6321
- });
6322
- app2.get("/api/runs", (c3) => {
6323
- const metas = listResultFiles(searchDir);
6324
- return c3.json({
6325
- runs: metas.map((m) => {
6326
- let target;
6327
- let experiment;
6328
- try {
6329
- const records = loadLightweightResults(m.path);
6330
- if (records.length > 0) {
6331
- target = records[0].target;
6332
- experiment = records[0].experiment;
6333
- }
6334
- } catch {
6700
+ app2.get("/api/projects", (c3) => {
6701
+ const registry = loadProjectRegistry();
6702
+ const projects = registry.projects.map((p) => {
6703
+ let runCount = 0;
6704
+ let passRate = 0;
6705
+ let lastRun = null;
6706
+ try {
6707
+ const metas = listResultFiles(p.path);
6708
+ runCount = metas.length;
6709
+ if (metas.length > 0) {
6710
+ const totalPassRate = metas.reduce((sum, m) => sum + m.passRate, 0);
6711
+ passRate = totalPassRate / metas.length;
6712
+ lastRun = metas[0].timestamp;
6335
6713
  }
6336
- return {
6337
- filename: m.filename,
6338
- path: m.path,
6339
- timestamp: m.timestamp,
6340
- test_count: m.testCount,
6341
- pass_rate: m.passRate,
6342
- avg_score: m.avgScore,
6343
- size_bytes: m.sizeBytes,
6344
- ...target && { target },
6345
- ...experiment && { experiment }
6346
- };
6347
- })
6714
+ } catch {
6715
+ }
6716
+ return {
6717
+ ...projectEntryToWire(p),
6718
+ run_count: runCount,
6719
+ pass_rate: passRate,
6720
+ last_run: lastRun
6721
+ };
6348
6722
  });
6723
+ return c3.json({ projects });
6349
6724
  });
6350
- app2.get("/api/runs/:filename", (c3) => {
6351
- const filename = c3.req.param("filename");
6352
- const metas = listResultFiles(searchDir);
6353
- const meta = metas.find((m) => m.filename === filename);
6354
- if (!meta) {
6355
- return c3.json({ error: "Run not found" }, 404);
6725
+ app2.post("/api/projects", async (c3) => {
6726
+ try {
6727
+ const body = await c3.req.json();
6728
+ if (!body.path) return c3.json({ error: "Missing path" }, 400);
6729
+ const entry = addProject(body.path);
6730
+ return c3.json(projectEntryToWire(entry), 201);
6731
+ } catch (err2) {
6732
+ return c3.json({ error: err2.message }, 400);
6733
+ }
6734
+ });
6735
+ app2.delete("/api/projects/:projectId", (c3) => {
6736
+ const removed = removeProject(c3.req.param("projectId") ?? "");
6737
+ if (!removed) return c3.json({ error: "Project not found" }, 404);
6738
+ return c3.json({ ok: true });
6739
+ });
6740
+ app2.get("/api/projects/:projectId/summary", (c3) => {
6741
+ const project = getProject(c3.req.param("projectId") ?? "");
6742
+ if (!project) return c3.json({ error: "Project not found" }, 404);
6743
+ try {
6744
+ const metas = listResultFiles(project.path);
6745
+ const runCount = metas.length;
6746
+ const passRate = runCount > 0 ? metas.reduce((s, m) => s + m.passRate, 0) / runCount : 0;
6747
+ const lastRun = metas.length > 0 ? metas[0].timestamp : null;
6748
+ return c3.json({
6749
+ id: project.id,
6750
+ name: project.name,
6751
+ path: project.path,
6752
+ run_count: runCount,
6753
+ pass_rate: passRate,
6754
+ last_run: lastRun
6755
+ });
6756
+ } catch {
6757
+ return c3.json({ error: "Failed to read project" }, 500);
6356
6758
  }
6759
+ });
6760
+ app2.post("/api/projects/discover", async (c3) => {
6357
6761
  try {
6358
- const loaded = patchTestIds(loadManifestResults(meta.path));
6359
- const lightResults = stripHeavyFields(loaded);
6360
- return c3.json({ results: lightResults, source: meta.filename });
6762
+ const body = await c3.req.json();
6763
+ if (!body.path) return c3.json({ error: "Missing path" }, 400);
6764
+ const discovered = discoverProjects(body.path);
6765
+ const registered = discovered.map((p) => projectEntryToWire(addProject(p)));
6766
+ return c3.json({ discovered: registered });
6361
6767
  } catch (err2) {
6362
- return c3.json({ error: "Failed to load run" }, 500);
6768
+ return c3.json({ error: err2.message }, 400);
6363
6769
  }
6364
6770
  });
6771
+ app2.get("/api/config", (c3) => handleConfig(c3, defaultCtx));
6772
+ app2.get("/api/runs", (c3) => handleRuns(c3, defaultCtx));
6773
+ app2.get("/api/runs/:filename", (c3) => handleRunDetail(c3, defaultCtx));
6774
+ app2.get("/api/runs/:filename/datasets", (c3) => handleRunDatasets(c3, defaultCtx));
6775
+ app2.get("/api/runs/:filename/categories", (c3) => handleRunCategories(c3, defaultCtx));
6776
+ app2.get(
6777
+ "/api/runs/:filename/categories/:category/datasets",
6778
+ (c3) => handleCategoryDatasets(c3, defaultCtx)
6779
+ );
6780
+ app2.get("/api/runs/:filename/evals/:evalId", (c3) => handleEvalDetail(c3, defaultCtx));
6781
+ app2.get("/api/runs/:filename/evals/:evalId/files", (c3) => handleEvalFiles(c3, defaultCtx));
6782
+ app2.get("/api/runs/:filename/evals/:evalId/files/*", (c3) => handleEvalFileContent(c3, defaultCtx));
6783
+ app2.get("/api/experiments", (c3) => handleExperiments(c3, defaultCtx));
6784
+ app2.get("/api/targets", (c3) => handleTargets(c3, defaultCtx));
6365
6785
  app2.get("/api/feedback", (c3) => {
6366
6786
  const data = readFeedback(resultDir);
6367
6787
  return c3.json(data);
@@ -6404,127 +6824,6 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
6404
6824
  writeFeedback(resultDir, existing);
6405
6825
  return c3.json(existing);
6406
6826
  });
6407
- app2.get("/api/runs/:filename/datasets", (c3) => {
6408
- const filename = c3.req.param("filename");
6409
- const metas = listResultFiles(searchDir);
6410
- const meta = metas.find((m) => m.filename === filename);
6411
- if (!meta) {
6412
- return c3.json({ error: "Run not found" }, 404);
6413
- }
6414
- try {
6415
- const loaded = patchTestIds(loadManifestResults(meta.path));
6416
- const { pass_threshold } = loadStudioConfig(agentvDir);
6417
- const datasetMap = /* @__PURE__ */ new Map();
6418
- for (const r of loaded) {
6419
- const ds = r.dataset ?? r.target ?? "default";
6420
- const entry = datasetMap.get(ds) ?? { total: 0, passed: 0, scoreSum: 0 };
6421
- entry.total++;
6422
- if (r.score >= pass_threshold) entry.passed++;
6423
- entry.scoreSum += r.score;
6424
- datasetMap.set(ds, entry);
6425
- }
6426
- const datasets = [...datasetMap.entries()].map(([name, entry]) => ({
6427
- name,
6428
- total: entry.total,
6429
- passed: entry.passed,
6430
- failed: entry.total - entry.passed,
6431
- avg_score: entry.total > 0 ? entry.scoreSum / entry.total : 0
6432
- }));
6433
- return c3.json({ datasets });
6434
- } catch {
6435
- return c3.json({ error: "Failed to load datasets" }, 500);
6436
- }
6437
- });
6438
- app2.get("/api/runs/:filename/categories", (c3) => {
6439
- const filename = c3.req.param("filename");
6440
- const metas = listResultFiles(searchDir);
6441
- const meta = metas.find((m) => m.filename === filename);
6442
- if (!meta) {
6443
- return c3.json({ error: "Run not found" }, 404);
6444
- }
6445
- try {
6446
- const loaded = patchTestIds(loadManifestResults(meta.path));
6447
- const { pass_threshold } = loadStudioConfig(agentvDir);
6448
- const categoryMap = /* @__PURE__ */ new Map();
6449
- for (const r of loaded) {
6450
- const cat = r.category ?? DEFAULT_CATEGORY;
6451
- const entry = categoryMap.get(cat) ?? {
6452
- total: 0,
6453
- passed: 0,
6454
- scoreSum: 0,
6455
- datasets: /* @__PURE__ */ new Set()
6456
- };
6457
- entry.total++;
6458
- if (r.score >= pass_threshold) entry.passed++;
6459
- entry.scoreSum += r.score;
6460
- entry.datasets.add(r.dataset ?? r.target ?? "default");
6461
- categoryMap.set(cat, entry);
6462
- }
6463
- const categories = [...categoryMap.entries()].map(([name, entry]) => ({
6464
- name,
6465
- total: entry.total,
6466
- passed: entry.passed,
6467
- failed: entry.total - entry.passed,
6468
- avg_score: entry.total > 0 ? entry.scoreSum / entry.total : 0,
6469
- dataset_count: entry.datasets.size
6470
- }));
6471
- return c3.json({ categories });
6472
- } catch {
6473
- return c3.json({ error: "Failed to load categories" }, 500);
6474
- }
6475
- });
6476
- app2.get("/api/runs/:filename/categories/:category/datasets", (c3) => {
6477
- const filename = c3.req.param("filename");
6478
- const category = decodeURIComponent(c3.req.param("category"));
6479
- const metas = listResultFiles(searchDir);
6480
- const meta = metas.find((m) => m.filename === filename);
6481
- if (!meta) {
6482
- return c3.json({ error: "Run not found" }, 404);
6483
- }
6484
- try {
6485
- const loaded = patchTestIds(loadManifestResults(meta.path));
6486
- const { pass_threshold } = loadStudioConfig(agentvDir);
6487
- const filtered = loaded.filter((r) => (r.category ?? DEFAULT_CATEGORY) === category);
6488
- const datasetMap = /* @__PURE__ */ new Map();
6489
- for (const r of filtered) {
6490
- const ds = r.dataset ?? r.target ?? "default";
6491
- const entry = datasetMap.get(ds) ?? { total: 0, passed: 0, scoreSum: 0 };
6492
- entry.total++;
6493
- if (r.score >= pass_threshold) entry.passed++;
6494
- entry.scoreSum += r.score;
6495
- datasetMap.set(ds, entry);
6496
- }
6497
- const datasets = [...datasetMap.entries()].map(([name, entry]) => ({
6498
- name,
6499
- total: entry.total,
6500
- passed: entry.passed,
6501
- failed: entry.total - entry.passed,
6502
- avg_score: entry.total > 0 ? entry.scoreSum / entry.total : 0
6503
- }));
6504
- return c3.json({ datasets });
6505
- } catch {
6506
- return c3.json({ error: "Failed to load datasets" }, 500);
6507
- }
6508
- });
6509
- app2.get("/api/runs/:filename/evals/:evalId", (c3) => {
6510
- const filename = c3.req.param("filename");
6511
- const evalId = c3.req.param("evalId");
6512
- const metas = listResultFiles(searchDir);
6513
- const meta = metas.find((m) => m.filename === filename);
6514
- if (!meta) {
6515
- return c3.json({ error: "Run not found" }, 404);
6516
- }
6517
- try {
6518
- const loaded = patchTestIds(loadManifestResults(meta.path));
6519
- const result = loaded.find((r) => r.testId === evalId);
6520
- if (!result) {
6521
- return c3.json({ error: "Eval not found" }, 404);
6522
- }
6523
- return c3.json({ eval: result });
6524
- } catch {
6525
- return c3.json({ error: "Failed to load eval" }, 500);
6526
- }
6527
- });
6528
6827
  app2.get("/api/index", (c3) => {
6529
6828
  const metas = listResultFiles(searchDir);
6530
6829
  const entries2 = metas.map((m) => {
@@ -6545,204 +6844,49 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
6545
6844
  });
6546
6845
  return c3.json({ entries: entries2 });
6547
6846
  });
6548
- function buildFileTree(dirPath, relativeTo) {
6549
- if (!existsSync8(dirPath) || !statSync4(dirPath).isDirectory()) {
6550
- return [];
6551
- }
6552
- const entries2 = readdirSync3(dirPath, { withFileTypes: true });
6553
- return entries2.sort((a, b) => {
6554
- if (a.isDirectory() !== b.isDirectory()) return a.isDirectory() ? -1 : 1;
6555
- return a.name.localeCompare(b.name);
6556
- }).map((entry) => {
6557
- const fullPath = path10.join(dirPath, entry.name);
6558
- const relPath = path10.relative(relativeTo, fullPath);
6559
- if (entry.isDirectory()) {
6560
- return {
6561
- name: entry.name,
6562
- path: relPath,
6563
- type: "dir",
6564
- children: buildFileTree(fullPath, relativeTo)
6565
- };
6566
- }
6567
- return { name: entry.name, path: relPath, type: "file" };
6568
- });
6569
- }
6570
- function inferLanguage(filePath) {
6571
- const ext = path10.extname(filePath).toLowerCase();
6572
- const langMap = {
6573
- ".json": "json",
6574
- ".jsonl": "json",
6575
- ".ts": "typescript",
6576
- ".tsx": "typescript",
6577
- ".js": "javascript",
6578
- ".jsx": "javascript",
6579
- ".md": "markdown",
6580
- ".yaml": "yaml",
6581
- ".yml": "yaml",
6582
- ".log": "plaintext",
6583
- ".txt": "plaintext",
6584
- ".py": "python",
6585
- ".sh": "shell",
6586
- ".bash": "shell",
6587
- ".css": "css",
6588
- ".html": "html",
6589
- ".xml": "xml",
6590
- ".svg": "xml",
6591
- ".toml": "toml",
6592
- ".diff": "diff",
6593
- ".patch": "diff"
6594
- };
6595
- return langMap[ext] ?? "plaintext";
6847
+ app2.get("/api/projects/:projectId/config", (c3) => withProject(c3, handleConfig));
6848
+ app2.get("/api/projects/:projectId/runs", (c3) => withProject(c3, handleRuns));
6849
+ app2.get("/api/projects/:projectId/runs/:filename", (c3) => withProject(c3, handleRunDetail));
6850
+ app2.get(
6851
+ "/api/projects/:projectId/runs/:filename/datasets",
6852
+ (c3) => withProject(c3, handleRunDatasets)
6853
+ );
6854
+ app2.get(
6855
+ "/api/projects/:projectId/runs/:filename/categories",
6856
+ (c3) => withProject(c3, handleRunCategories)
6857
+ );
6858
+ app2.get(
6859
+ "/api/projects/:projectId/runs/:filename/categories/:category/datasets",
6860
+ (c3) => withProject(c3, handleCategoryDatasets)
6861
+ );
6862
+ app2.get(
6863
+ "/api/projects/:projectId/runs/:filename/evals/:evalId",
6864
+ (c3) => withProject(c3, handleEvalDetail)
6865
+ );
6866
+ app2.get(
6867
+ "/api/projects/:projectId/runs/:filename/evals/:evalId/files",
6868
+ (c3) => withProject(c3, handleEvalFiles)
6869
+ );
6870
+ app2.get(
6871
+ "/api/projects/:projectId/runs/:filename/evals/:evalId/files/*",
6872
+ (c3) => withProject(c3, handleEvalFileContent)
6873
+ );
6874
+ app2.get("/api/projects/:projectId/experiments", (c3) => withProject(c3, handleExperiments));
6875
+ app2.get("/api/projects/:projectId/targets", (c3) => withProject(c3, handleTargets));
6876
+ app2.get("/api/projects/:projectId/feedback", (c3) => withProject(c3, handleFeedbackRead));
6877
+ const studioDistPath = options?.studioDir ?? resolveStudioDistDir();
6878
+ if (!studioDistPath || !existsSync8(path10.join(studioDistPath, "index.html"))) {
6879
+ throw new Error('Studio dist not found. Run "bun run build" in apps/studio/ to build the SPA.');
6596
6880
  }
6597
- app2.get("/api/runs/:filename/evals/:evalId/files", (c3) => {
6598
- const filename = c3.req.param("filename");
6599
- const evalId = c3.req.param("evalId");
6600
- const metas = listResultFiles(searchDir);
6601
- const meta = metas.find((m) => m.filename === filename);
6602
- if (!meta) {
6603
- return c3.json({ error: "Run not found" }, 404);
6604
- }
6605
- try {
6606
- const content = readFileSync9(meta.path, "utf8");
6607
- const records = parseResultManifest(content);
6608
- const record = records.find((r) => (r.test_id ?? r.eval_id) === evalId);
6609
- if (!record) {
6610
- return c3.json({ error: "Eval not found" }, 404);
6611
- }
6612
- const baseDir = path10.dirname(meta.path);
6613
- const knownPaths = [
6614
- record.grading_path,
6615
- record.timing_path,
6616
- record.input_path,
6617
- record.output_path,
6618
- record.response_path
6619
- ].filter((p) => !!p);
6620
- if (knownPaths.length === 0) {
6621
- return c3.json({ files: [] });
6622
- }
6623
- const artifactDirs = knownPaths.map((p) => path10.dirname(p));
6624
- let commonDir = artifactDirs[0];
6625
- for (const dir of artifactDirs) {
6626
- while (!dir.startsWith(commonDir)) {
6627
- commonDir = path10.dirname(commonDir);
6628
- }
6629
- }
6630
- const artifactAbsDir = path10.join(baseDir, commonDir);
6631
- const files = buildFileTree(artifactAbsDir, baseDir);
6632
- return c3.json({ files });
6633
- } catch {
6634
- return c3.json({ error: "Failed to load file tree" }, 500);
6635
- }
6636
- });
6637
- app2.get("/api/runs/:filename/evals/:evalId/files/*", (c3) => {
6638
- const filename = c3.req.param("filename");
6639
- const evalId = c3.req.param("evalId");
6640
- const metas = listResultFiles(searchDir);
6641
- const meta = metas.find((m) => m.filename === filename);
6642
- if (!meta) {
6643
- return c3.json({ error: "Run not found" }, 404);
6644
- }
6645
- const requestPath = c3.req.path;
6646
- const prefix = `/api/runs/${filename}/evals/${evalId}/files/`;
6647
- const filePath = requestPath.slice(prefix.length);
6648
- if (!filePath) {
6649
- return c3.json({ error: "No file path specified" }, 400);
6650
- }
6651
- const baseDir = path10.dirname(meta.path);
6652
- const absolutePath = path10.resolve(baseDir, filePath);
6653
- if (!absolutePath.startsWith(path10.resolve(baseDir) + path10.sep) && absolutePath !== path10.resolve(baseDir)) {
6654
- return c3.json({ error: "Path traversal not allowed" }, 403);
6655
- }
6656
- if (!existsSync8(absolutePath) || !statSync4(absolutePath).isFile()) {
6657
- return c3.json({ error: "File not found" }, 404);
6658
- }
6659
- try {
6660
- const fileContent = readFileSync9(absolutePath, "utf8");
6661
- const language = inferLanguage(absolutePath);
6662
- return c3.json({ content: fileContent, language });
6663
- } catch {
6664
- return c3.json({ error: "Failed to read file" }, 500);
6665
- }
6666
- });
6667
- app2.get("/api/experiments", (c3) => {
6668
- const metas = listResultFiles(searchDir);
6669
- const { pass_threshold } = loadStudioConfig(agentvDir);
6670
- const experimentMap = /* @__PURE__ */ new Map();
6671
- for (const m of metas) {
6672
- try {
6673
- const records = loadLightweightResults(m.path);
6674
- for (const r of records) {
6675
- const experiment = r.experiment ?? "default";
6676
- const entry = experimentMap.get(experiment) ?? {
6677
- targets: /* @__PURE__ */ new Set(),
6678
- runFilenames: /* @__PURE__ */ new Set(),
6679
- evalCount: 0,
6680
- passedCount: 0,
6681
- lastTimestamp: ""
6682
- };
6683
- entry.runFilenames.add(m.filename);
6684
- if (r.target) entry.targets.add(r.target);
6685
- entry.evalCount++;
6686
- if (r.score >= pass_threshold) entry.passedCount++;
6687
- if (r.timestamp && r.timestamp > entry.lastTimestamp) {
6688
- entry.lastTimestamp = r.timestamp;
6689
- }
6690
- experimentMap.set(experiment, entry);
6691
- }
6692
- } catch {
6693
- }
6694
- }
6695
- const experiments = [...experimentMap.entries()].map(([name, entry]) => ({
6696
- name,
6697
- run_count: entry.runFilenames.size,
6698
- target_count: entry.targets.size,
6699
- eval_count: entry.evalCount,
6700
- passed_count: entry.passedCount,
6701
- pass_rate: entry.evalCount > 0 ? entry.passedCount / entry.evalCount : 0,
6702
- last_run: entry.lastTimestamp || null
6703
- }));
6704
- return c3.json({ experiments });
6705
- });
6706
- app2.get("/api/targets", (c3) => {
6707
- const metas = listResultFiles(searchDir);
6708
- const { pass_threshold } = loadStudioConfig(agentvDir);
6709
- const targetMap = /* @__PURE__ */ new Map();
6710
- for (const m of metas) {
6711
- try {
6712
- const records = loadLightweightResults(m.path);
6713
- for (const r of records) {
6714
- const target = r.target ?? "default";
6715
- const entry = targetMap.get(target) ?? {
6716
- experiments: /* @__PURE__ */ new Set(),
6717
- runFilenames: /* @__PURE__ */ new Set(),
6718
- evalCount: 0,
6719
- passedCount: 0
6720
- };
6721
- entry.runFilenames.add(m.filename);
6722
- if (r.experiment) entry.experiments.add(r.experiment);
6723
- entry.evalCount++;
6724
- if (r.score >= pass_threshold) entry.passedCount++;
6725
- targetMap.set(target, entry);
6726
- }
6727
- } catch {
6728
- }
6729
- }
6730
- const targets = [...targetMap.entries()].map(([name, entry]) => ({
6731
- name,
6732
- run_count: entry.runFilenames.size,
6733
- experiment_count: entry.experiments.size,
6734
- eval_count: entry.evalCount,
6735
- passed_count: entry.passedCount,
6736
- pass_rate: entry.evalCount > 0 ? entry.passedCount / entry.evalCount : 0
6737
- }));
6738
- return c3.json({ targets });
6881
+ app2.get("/", (c3) => {
6882
+ const indexPath = path10.join(studioDistPath, "index.html");
6883
+ if (existsSync8(indexPath)) return c3.html(readFileSync9(indexPath, "utf8"));
6884
+ return c3.notFound();
6739
6885
  });
6740
6886
  app2.get("/assets/*", (c3) => {
6741
6887
  const assetPath = c3.req.path;
6742
6888
  const filePath = path10.join(studioDistPath, assetPath);
6743
- if (!existsSync8(filePath)) {
6744
- return c3.notFound();
6745
- }
6889
+ if (!existsSync8(filePath)) return c3.notFound();
6746
6890
  const content = readFileSync9(filePath);
6747
6891
  const ext = path10.extname(filePath);
6748
6892
  const mimeTypes = {
@@ -6764,13 +6908,9 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
6764
6908
  });
6765
6909
  });
6766
6910
  app2.get("*", (c3) => {
6767
- if (c3.req.path.startsWith("/api/")) {
6768
- return c3.json({ error: "Not found" }, 404);
6769
- }
6911
+ if (c3.req.path.startsWith("/api/")) return c3.json({ error: "Not found" }, 404);
6770
6912
  const indexPath = path10.join(studioDistPath, "index.html");
6771
- if (existsSync8(indexPath)) {
6772
- return c3.html(readFileSync9(indexPath, "utf8"));
6773
- }
6913
+ if (existsSync8(indexPath)) return c3.html(readFileSync9(indexPath, "utf8"));
6774
6914
  return c3.notFound();
6775
6915
  });
6776
6916
  return app2;
@@ -6794,18 +6934,6 @@ function resolveStudioDistDir() {
6794
6934
  }
6795
6935
  return void 0;
6796
6936
  }
6797
- function stripHeavyFields(results) {
6798
- return results.map((r) => {
6799
- const { requests, trace, ...rest } = r;
6800
- const toolCalls = trace?.toolCalls && Object.keys(trace.toolCalls).length > 0 ? trace.toolCalls : void 0;
6801
- const graderDurationMs = (r.scores ?? []).reduce((sum, s) => sum + (s.durationMs ?? 0), 0);
6802
- return {
6803
- ...rest,
6804
- ...toolCalls && { _toolCalls: toolCalls },
6805
- ...graderDurationMs > 0 && { _graderDurationMs: graderDurationMs }
6806
- };
6807
- });
6808
- }
6809
6937
  var resultsServeCommand = command({
6810
6938
  name: "studio",
6811
6939
  description: "Start AgentV Studio \u2014 a local dashboard for reviewing evaluation results",
@@ -6826,11 +6954,66 @@ var resultsServeCommand = command({
6826
6954
  long: "dir",
6827
6955
  short: "d",
6828
6956
  description: "Working directory (default: current directory)"
6957
+ }),
6958
+ multi: flag({
6959
+ long: "multi",
6960
+ description: "Launch in multi-project dashboard mode"
6961
+ }),
6962
+ add: option({
6963
+ type: optional(string),
6964
+ long: "add",
6965
+ description: "Register a project by path"
6966
+ }),
6967
+ remove: option({
6968
+ type: optional(string),
6969
+ long: "remove",
6970
+ description: "Unregister a project by ID"
6971
+ }),
6972
+ discover: option({
6973
+ type: optional(string),
6974
+ long: "discover",
6975
+ description: "Scan a directory tree for repos with .agentv/"
6829
6976
  })
6830
6977
  },
6831
- handler: async ({ source, port, dir }) => {
6978
+ handler: async ({ source, port, dir, multi, add, remove, discover }) => {
6832
6979
  const cwd = dir ?? process.cwd();
6833
6980
  const listenPort = port ?? (process.env.PORT ? Number(process.env.PORT) : 3117);
6981
+ if (add) {
6982
+ try {
6983
+ const entry = addProject(add);
6984
+ console.log(`Registered project: ${entry.name} (${entry.id}) at ${entry.path}`);
6985
+ } catch (err2) {
6986
+ console.error(`Error: ${err2.message}`);
6987
+ process.exit(1);
6988
+ }
6989
+ return;
6990
+ }
6991
+ if (remove) {
6992
+ const removed = removeProject(remove);
6993
+ if (removed) {
6994
+ console.log(`Unregistered project: ${remove}`);
6995
+ } else {
6996
+ console.error(`Project not found: ${remove}`);
6997
+ process.exit(1);
6998
+ }
6999
+ return;
7000
+ }
7001
+ if (discover) {
7002
+ const discovered = discoverProjects(discover);
7003
+ if (discovered.length === 0) {
7004
+ console.log(`No projects with .agentv/ found under ${discover}`);
7005
+ return;
7006
+ }
7007
+ for (const p of discovered) {
7008
+ const entry = addProject(p);
7009
+ console.log(`Registered: ${entry.name} (${entry.id}) at ${entry.path}`);
7010
+ }
7011
+ console.log(`
7012
+ Discovered ${discovered.length} project(s).`);
7013
+ return;
7014
+ }
7015
+ const registry = loadProjectRegistry();
7016
+ const isMultiProject = multi || registry.projects.length > 0;
6834
7017
  try {
6835
7018
  let results = [];
6836
7019
  let sourceFile;
@@ -6858,16 +7041,16 @@ var resultsServeCommand = command({
6858
7041
  }
6859
7042
  const resultDir = sourceFile ? path10.dirname(path10.resolve(sourceFile)) : cwd;
6860
7043
  const app2 = createApp(results, resultDir, cwd, sourceFile);
6861
- if (results.length > 0 && sourceFile) {
7044
+ if (isMultiProject) {
7045
+ console.log(`Multi-project mode: ${registry.projects.length} project(s) registered`);
7046
+ } else if (results.length > 0 && sourceFile) {
6862
7047
  console.log(`Serving ${results.length} result(s) from ${sourceFile}`);
6863
7048
  } else {
6864
7049
  console.log("No results found. Dashboard will show an empty state.");
6865
7050
  console.log("Run an evaluation to see results: agentv eval <eval-file>");
6866
7051
  }
6867
7052
  console.log(`Dashboard: http://localhost:${listenPort}`);
6868
- console.log(`Feedback API: http://localhost:${listenPort}/api/feedback`);
6869
- console.log(`Result picker API: http://localhost:${listenPort}/api/runs`);
6870
- console.log(`Feedback file: ${feedbackPath(resultDir)}`);
7053
+ console.log(`Projects API: http://localhost:${listenPort}/api/projects`);
6871
7054
  console.log("Press Ctrl+C to stop");
6872
7055
  const { serve: startServer } = await import("@hono/node-server");
6873
7056
  startServer({
@@ -8042,7 +8225,7 @@ function isYamlFile(filePath) {
8042
8225
  }
8043
8226
 
8044
8227
  // src/commands/validate/index.ts
8045
- async function runValidateCommand(paths) {
8228
+ async function runValidateCommand(paths, maxWarnings) {
8046
8229
  if (paths.length === 0) {
8047
8230
  console.error("Error: No paths specified. Usage: agentv validate <paths...>");
8048
8231
  process.exit(1);
@@ -8053,6 +8236,18 @@ async function runValidateCommand(paths) {
8053
8236
  if (summary.invalidFiles > 0) {
8054
8237
  process.exit(1);
8055
8238
  }
8239
+ if (maxWarnings !== void 0) {
8240
+ const warningCount = summary.results.reduce(
8241
+ (count, r) => count + r.errors.filter((e) => e.severity === "warning").length,
8242
+ 0
8243
+ );
8244
+ if (warningCount > maxWarnings) {
8245
+ console.error(
8246
+ `Found ${warningCount} warning${warningCount === 1 ? "" : "s"} (max allowed: ${maxWarnings})`
8247
+ );
8248
+ process.exit(1);
8249
+ }
8250
+ }
8056
8251
  }
8057
8252
  var validateCommand = command({
8058
8253
  name: "validate",
@@ -8062,11 +8257,16 @@ var validateCommand = command({
8062
8257
  type: string,
8063
8258
  displayName: "paths",
8064
8259
  description: "Files or directories to validate"
8260
+ }),
8261
+ maxWarnings: option({
8262
+ type: optional(number),
8263
+ long: "max-warnings",
8264
+ description: "Maximum number of warnings allowed before failing (e.g., --max-warnings 0)"
8065
8265
  })
8066
8266
  },
8067
- handler: async ({ paths }) => {
8267
+ handler: async ({ paths, maxWarnings }) => {
8068
8268
  try {
8069
- await runValidateCommand(paths);
8269
+ await runValidateCommand(paths, maxWarnings);
8070
8270
  } catch (error) {
8071
8271
  console.error(`Error: ${error.message}`);
8072
8272
  process.exit(1);
@@ -8414,4 +8614,4 @@ export {
8414
8614
  preprocessArgv,
8415
8615
  runCli
8416
8616
  };
8417
- //# sourceMappingURL=chunk-5DDVNHOS.js.map
8617
+ //# sourceMappingURL=chunk-YORCRL4G.js.map