agentv 3.13.2 → 3.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,7 +22,7 @@ import {
22
22
  validateFileReferences,
23
23
  validateTargetsFile,
24
24
  writeArtifactsFromResults
25
- } from "./chunk-4Z5E5CYT.js";
25
+ } from "./chunk-YYECEMUV.js";
26
26
  import {
27
27
  createBuiltinRegistry,
28
28
  executeScript,
@@ -39,7 +39,7 @@ import {
39
39
  toSnakeCaseDeep as toSnakeCaseDeep2,
40
40
  transpileEvalYamlFile,
41
41
  trimBaselineResult
42
- } from "./chunk-D3LNJUUB.js";
42
+ } from "./chunk-3TBDSUYD.js";
43
43
  import {
44
44
  __commonJS,
45
45
  __esm,
@@ -4185,7 +4185,7 @@ var evalRunCommand = command({
4185
4185
  },
4186
4186
  handler: async (args) => {
4187
4187
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
4188
- const { launchInteractiveWizard } = await import("./interactive-HVKLYGRX.js");
4188
+ const { launchInteractiveWizard } = await import("./interactive-AI75XY3X.js");
4189
4189
  await launchInteractiveWizard();
4190
4190
  return;
4191
4191
  }
@@ -5543,8 +5543,8 @@ var resultsCommand = subcommands({
5543
5543
  import { existsSync as existsSync4, readFileSync as readFileSync6, writeFileSync as writeFileSync3 } from "node:fs";
5544
5544
  import path8 from "node:path";
5545
5545
  import { Hono } from "hono";
5546
- function feedbackPath(cwd) {
5547
- return path8.join(cwd, "feedback.json");
5546
+ function feedbackPath(resultDir) {
5547
+ return path8.join(resultDir, "feedback.json");
5548
5548
  }
5549
5549
  function readFeedback(cwd) {
5550
5550
  const fp = feedbackPath(cwd);
@@ -5562,13 +5562,43 @@ function writeFeedback(cwd, data) {
5562
5562
  writeFileSync3(feedbackPath(cwd), `${JSON.stringify(data, null, 2)}
5563
5563
  `, "utf8");
5564
5564
  }
5565
- function createApp(results, cwd) {
5565
+ function createApp(results, resultDir, cwd, sourceFile) {
5566
+ const searchDir = cwd ?? resultDir;
5566
5567
  const app2 = new Hono();
5567
5568
  app2.get("/", (c3) => {
5568
- return c3.html(generateServeHtml(results));
5569
+ return c3.html(generateServeHtml(results, sourceFile));
5570
+ });
5571
+ app2.get("/api/runs", (c3) => {
5572
+ const metas = listResultFiles(searchDir);
5573
+ return c3.json({
5574
+ runs: metas.map((m) => ({
5575
+ filename: m.filename,
5576
+ path: m.path,
5577
+ timestamp: m.timestamp,
5578
+ test_count: m.testCount,
5579
+ pass_rate: m.passRate,
5580
+ avg_score: m.avgScore,
5581
+ size_bytes: m.sizeBytes
5582
+ }))
5583
+ });
5584
+ });
5585
+ app2.get("/api/runs/:filename", (c3) => {
5586
+ const filename = c3.req.param("filename");
5587
+ const metas = listResultFiles(searchDir);
5588
+ const meta = metas.find((m) => m.filename === filename);
5589
+ if (!meta) {
5590
+ return c3.json({ error: "Run not found" }, 404);
5591
+ }
5592
+ try {
5593
+ const loaded = patchTestIds(loadManifestResults(meta.path));
5594
+ const lightResults = stripHeavyFields(loaded);
5595
+ return c3.json({ results: lightResults, source: meta.filename });
5596
+ } catch (err2) {
5597
+ return c3.json({ error: "Failed to load run" }, 500);
5598
+ }
5569
5599
  });
5570
5600
  app2.get("/api/feedback", (c3) => {
5571
- const data = readFeedback(cwd);
5601
+ const data = readFeedback(resultDir);
5572
5602
  return c3.json(data);
5573
5603
  });
5574
5604
  app2.post("/api/feedback", async (c3) => {
@@ -5591,7 +5621,7 @@ function createApp(results, cwd) {
5591
5621
  return c3.json({ error: "Each review must have test_id and comment strings" }, 400);
5592
5622
  }
5593
5623
  }
5594
- const existing = readFeedback(cwd);
5624
+ const existing = readFeedback(resultDir);
5595
5625
  const now = (/* @__PURE__ */ new Date()).toISOString();
5596
5626
  for (const review of incoming) {
5597
5627
  const newReview = {
@@ -5606,16 +5636,13 @@ function createApp(results, cwd) {
5606
5636
  existing.reviews.push(newReview);
5607
5637
  }
5608
5638
  }
5609
- writeFeedback(cwd, existing);
5639
+ writeFeedback(resultDir, existing);
5610
5640
  return c3.json(existing);
5611
5641
  });
5612
5642
  return app2;
5613
5643
  }
5614
- function escapeHtml(s) {
5615
- return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;");
5616
- }
5617
- function generateServeHtml(results) {
5618
- const lightResults = results.map((r) => {
5644
+ function stripHeavyFields(results) {
5645
+ return results.map((r) => {
5619
5646
  const { requests, trace, ...rest } = r;
5620
5647
  const toolCalls = trace?.toolCalls && Object.keys(trace.toolCalls).length > 0 ? trace.toolCalls : void 0;
5621
5648
  const graderDurationMs = (r.scores ?? []).reduce((sum, s) => sum + (s.durationMs ?? 0), 0);
@@ -5625,6 +5652,12 @@ function generateServeHtml(results) {
5625
5652
  ...graderDurationMs > 0 && { _graderDurationMs: graderDurationMs }
5626
5653
  };
5627
5654
  });
5655
+ }
5656
+ function escapeHtml(s) {
5657
+ return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;");
5658
+ }
5659
+ function generateServeHtml(results, sourceFile) {
5660
+ const lightResults = stripHeavyFields(results);
5628
5661
  const dataJson = JSON.stringify(lightResults).replace(/</g, "\\u003c").replace(/>/g, "\\u003e").replace(/\u2028/g, "\\u2028").replace(/\u2029/g, "\\u2029");
5629
5662
  return `<!DOCTYPE html>
5630
5663
  <html lang="en">
@@ -5642,6 +5675,11 @@ ${SERVE_STYLES}
5642
5675
  <h1 class="header-title">AgentV</h1>
5643
5676
  <span class="header-subtitle">Results Review</span>
5644
5677
  </div>
5678
+ <div class="header-center">
5679
+ <select id="run-picker" class="run-picker" title="Switch result file">
5680
+ <option value="">Loading runs...</option>
5681
+ </select>
5682
+ </div>
5645
5683
  <div class="header-right">
5646
5684
  <span class="timestamp">${escapeHtml((/* @__PURE__ */ new Date()).toISOString())}</span>
5647
5685
  </div>
@@ -5653,6 +5691,7 @@ ${SERVE_STYLES}
5653
5691
  <main id="app"></main>
5654
5692
  <script>
5655
5693
  var DATA = ${dataJson};
5694
+ var INITIAL_SOURCE = ${sourceFile ? JSON.stringify(path8.basename(sourceFile)).replace(/</g, "\\u003c").replace(/>/g, "\\u003e") : "null"};
5656
5695
  ${SERVE_SCRIPT}
5657
5696
  </script>
5658
5697
  </body>
@@ -5679,6 +5718,10 @@ body{font-family:var(--font);background:var(--bg);color:var(--text);line-height:
5679
5718
  .header-left{display:flex;align-items:baseline;gap:12px}
5680
5719
  .header-title{font-size:18px;font-weight:600}
5681
5720
  .header-subtitle{font-size:14px;color:var(--text-muted)}
5721
+ .header-center{flex:1;display:flex;justify-content:center;padding:0 16px}
5722
+ .run-picker{padding:6px 10px;border:1px solid var(--border);border-radius:var(--radius);font-size:13px;background:var(--surface);color:var(--text);font-family:var(--font);max-width:400px;width:100%;cursor:pointer}
5723
+ .run-picker:hover{border-color:var(--primary)}
5724
+ .run-picker:focus{outline:none;border-color:var(--primary);box-shadow:0 0 0 3px var(--primary-bg)}
5682
5725
  .timestamp{font-size:12px;color:var(--text-muted);font-family:var(--mono)}
5683
5726
 
5684
5727
  /* Tabs */
@@ -5778,6 +5821,11 @@ body{font-family:var(--font);background:var(--bg);color:var(--text);line-height:
5778
5821
  .tool-tag{display:inline-block;padding:2px 10px;font-size:12px;font-family:var(--mono);background:var(--primary-bg);color:var(--primary);border:1px solid var(--border);border-radius:12px}
5779
5822
  .empty-state{text-align:center;padding:48px 24px;color:var(--text-muted)}
5780
5823
  .empty-state h3{font-size:16px;margin-bottom:8px;color:var(--text)}
5824
+ .welcome-state{text-align:center;padding:80px 24px;color:var(--text-muted)}
5825
+ .welcome-state h2{font-size:24px;margin-bottom:12px;color:var(--text);font-weight:600}
5826
+ .welcome-state p{font-size:15px;margin-bottom:8px;max-width:500px;margin-left:auto;margin-right:auto}
5827
+ .welcome-state code{font-family:var(--mono);background:var(--surface);border:1px solid var(--border);border-radius:3px;padding:2px 6px;font-size:13px}
5828
+ .welcome-state .hint{margin-top:24px;font-size:13px;color:var(--text-muted)}
5781
5829
 
5782
5830
  /* Feedback */
5783
5831
  .feedback-section{margin-top:16px;padding-top:16px;border-top:1px solid var(--border-light)}
@@ -5935,7 +5983,15 @@ var SERVE_SCRIPT = `
5935
5983
 
5936
5984
  /* ---- render ---- */
5937
5985
  function render(){
5938
- if(DATA.length===0){app.innerHTML='<div class="empty-state"><h3>No results</h3><p>No evaluation results to display.</p></div>';return;}
5986
+ if(DATA.length===0){
5987
+ app.innerHTML='<div class="welcome-state">'
5988
+ +'<h2>No results yet</h2>'
5989
+ +'<p>Run an evaluation or mount a results directory to see results here.</p>'
5990
+ +'<p><code>agentv eval &lt;eval-file&gt;</code></p>'
5991
+ +'<p class="hint">The dashboard will automatically detect new result files.</p>'
5992
+ +'</div>';
5993
+ return;
5994
+ }
5939
5995
  if(state.tab==="overview")renderOverview();else renderTests();
5940
5996
  }
5941
5997
 
@@ -6198,6 +6254,69 @@ var SERVE_SCRIPT = `
6198
6254
  return h;
6199
6255
  }
6200
6256
 
6257
+ /* ---- run picker ---- */
6258
+ var runPicker=document.getElementById("run-picker");
6259
+ var knownRunFilenames=[];
6260
+
6261
+ function refreshRunList(){
6262
+ fetch("/api/runs").then(function(r){return r.json();}).then(function(d){
6263
+ if(!d||!d.runs)return;
6264
+ var runs=d.runs;
6265
+ var newFilenames=runs.map(function(r){return r.filename;});
6266
+
6267
+ /* Detect new runs that appeared since last poll */
6268
+ if(knownRunFilenames.length>0){
6269
+ var hasNew=newFilenames.some(function(f){return knownRunFilenames.indexOf(f)===-1;});
6270
+ if(hasNew&&DATA.length===0){
6271
+ /* Auto-load the first (most recent) run when starting from empty state */
6272
+ loadRun(runs[0].filename);
6273
+ }
6274
+ }
6275
+ knownRunFilenames=newFilenames;
6276
+
6277
+ /* Rebuild picker options */
6278
+ var h='<option value="">Select a result file...</option>';
6279
+ if(runs.length===0){
6280
+ h='<option value="">No result files</option>';
6281
+ }
6282
+ for(var i=0;i<runs.length;i++){
6283
+ var r=runs[i];
6284
+ var label=r.filename+" ("+r.test_count+" tests, "+(r.pass_rate*100).toFixed(0)+"% pass)";
6285
+ h+='<option value="'+esc(r.filename)+'">'+esc(label)+"</option>";
6286
+ }
6287
+ runPicker.innerHTML=h;
6288
+ /* Pre-select the initially loaded run */
6289
+ if(INITIAL_SOURCE&&runs.length>0){
6290
+ runPicker.value=INITIAL_SOURCE;
6291
+ }
6292
+ }).catch(function(err){console.warn("Failed to refresh run list:",err);});
6293
+ }
6294
+
6295
+ function loadRun(filename){
6296
+ fetch("/api/runs/"+encodeURIComponent(filename)).then(function(r){return r.json();}).then(function(d){
6297
+ if(d.error){console.error(d.error);return;}
6298
+ DATA=d.results;
6299
+ stats=computeStats(DATA);
6300
+ tgtStats=computeTargets(DATA);
6301
+ tgtNames=tgtStats.map(function(t){return t.target;});
6302
+ state.expanded={};
6303
+ feedbackCache={};
6304
+ loadFeedback();
6305
+ render();
6306
+ /* Update picker selection */
6307
+ runPicker.value=filename;
6308
+ }).catch(function(err){console.error("Failed to load run:",err);});
6309
+ }
6310
+
6311
+ runPicker.addEventListener("change",function(){
6312
+ var val=runPicker.value;
6313
+ if(val)loadRun(val);
6314
+ });
6315
+
6316
+ /* Poll for new result files every 5 seconds */
6317
+ refreshRunList();
6318
+ setInterval(refreshRunList,5000);
6319
+
6201
6320
  /* ---- init ---- */
6202
6321
  loadFeedback();
6203
6322
  render();
@@ -6216,7 +6335,7 @@ var resultsServeCommand = command({
6216
6335
  type: optional(number),
6217
6336
  long: "port",
6218
6337
  short: "p",
6219
- description: "Port to listen on (default: 3117)"
6338
+ description: "Port to listen on (flag \u2192 PORT env var \u2192 3117)"
6220
6339
  }),
6221
6340
  dir: option({
6222
6341
  type: optional(string),
@@ -6227,14 +6346,44 @@ var resultsServeCommand = command({
6227
6346
  },
6228
6347
  handler: async ({ source, port, dir }) => {
6229
6348
  const cwd = dir ?? process.cwd();
6230
- const listenPort = port ?? 3117;
6349
+ const listenPort = port ?? (process.env.PORT ? Number(process.env.PORT) : 3117);
6231
6350
  try {
6232
- const { results, sourceFile } = await loadResults(source, cwd);
6233
- const app2 = createApp(results, cwd);
6234
- console.log(`Serving ${results.length} result(s) from ${sourceFile}`);
6351
+ let results = [];
6352
+ let sourceFile;
6353
+ if (source) {
6354
+ const resolved = resolveResultSourcePath(source, cwd);
6355
+ if (!existsSync4(resolved)) {
6356
+ console.error(`Error: Source file not found: ${resolved}`);
6357
+ process.exit(1);
6358
+ }
6359
+ sourceFile = resolved;
6360
+ results = patchTestIds(loadManifestResults(resolved));
6361
+ } else {
6362
+ const cache = await loadRunCache(cwd);
6363
+ const cachedFile = cache ? resolveRunCacheFile(cache) : "";
6364
+ if (cachedFile && existsSync4(cachedFile)) {
6365
+ sourceFile = cachedFile;
6366
+ results = patchTestIds(loadManifestResults(cachedFile));
6367
+ } else {
6368
+ const metas = listResultFiles(cwd, 1);
6369
+ if (metas.length > 0) {
6370
+ sourceFile = metas[0].path;
6371
+ results = patchTestIds(loadManifestResults(metas[0].path));
6372
+ }
6373
+ }
6374
+ }
6375
+ const resultDir = sourceFile ? path8.dirname(path8.resolve(sourceFile)) : cwd;
6376
+ const app2 = createApp(results, resultDir, cwd, sourceFile);
6377
+ if (results.length > 0 && sourceFile) {
6378
+ console.log(`Serving ${results.length} result(s) from ${sourceFile}`);
6379
+ } else {
6380
+ console.log("No results found. Dashboard will show an empty state.");
6381
+ console.log("Run an evaluation to see results: agentv eval <eval-file>");
6382
+ }
6235
6383
  console.log(`Dashboard: http://localhost:${listenPort}`);
6236
6384
  console.log(`Feedback API: http://localhost:${listenPort}/api/feedback`);
6237
- console.log(`Feedback file: ${feedbackPath(cwd)}`);
6385
+ console.log(`Result picker API: http://localhost:${listenPort}/api/runs`);
6386
+ console.log(`Feedback file: ${feedbackPath(resultDir)}`);
6238
6387
  console.log("Press Ctrl+C to stop");
6239
6388
  const { serve: startServer } = await import("@hono/node-server");
6240
6389
  startServer({
@@ -7765,4 +7914,4 @@ export {
7765
7914
  preprocessArgv,
7766
7915
  runCli
7767
7916
  };
7768
- //# sourceMappingURL=chunk-X2343WOK.js.map
7917
+ //# sourceMappingURL=chunk-W6CGDNQR.js.map