agentv 3.13.3 → 3.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,7 +22,7 @@ import {
22
22
  validateFileReferences,
23
23
  validateTargetsFile,
24
24
  writeArtifactsFromResults
25
- } from "./chunk-PACTPWEN.js";
25
+ } from "./chunk-YYECEMUV.js";
26
26
  import {
27
27
  createBuiltinRegistry,
28
28
  executeScript,
@@ -39,7 +39,7 @@ import {
39
39
  toSnakeCaseDeep as toSnakeCaseDeep2,
40
40
  transpileEvalYamlFile,
41
41
  trimBaselineResult
42
- } from "./chunk-D3LNJUUB.js";
42
+ } from "./chunk-3TBDSUYD.js";
43
43
  import {
44
44
  __commonJS,
45
45
  __esm,
@@ -4185,7 +4185,7 @@ var evalRunCommand = command({
4185
4185
  },
4186
4186
  handler: async (args) => {
4187
4187
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
4188
- const { launchInteractiveWizard } = await import("./interactive-OMJAMCQP.js");
4188
+ const { launchInteractiveWizard } = await import("./interactive-AI75XY3X.js");
4189
4189
  await launchInteractiveWizard();
4190
4190
  return;
4191
4191
  }
@@ -5562,10 +5562,40 @@ function writeFeedback(cwd, data) {
5562
5562
  writeFileSync3(feedbackPath(cwd), `${JSON.stringify(data, null, 2)}
5563
5563
  `, "utf8");
5564
5564
  }
5565
- function createApp(results, resultDir) {
5565
+ function createApp(results, resultDir, cwd, sourceFile) {
5566
+ const searchDir = cwd ?? resultDir;
5566
5567
  const app2 = new Hono();
5567
5568
  app2.get("/", (c3) => {
5568
- return c3.html(generateServeHtml(results));
5569
+ return c3.html(generateServeHtml(results, sourceFile));
5570
+ });
5571
+ app2.get("/api/runs", (c3) => {
5572
+ const metas = listResultFiles(searchDir);
5573
+ return c3.json({
5574
+ runs: metas.map((m) => ({
5575
+ filename: m.filename,
5576
+ path: m.path,
5577
+ timestamp: m.timestamp,
5578
+ test_count: m.testCount,
5579
+ pass_rate: m.passRate,
5580
+ avg_score: m.avgScore,
5581
+ size_bytes: m.sizeBytes
5582
+ }))
5583
+ });
5584
+ });
5585
+ app2.get("/api/runs/:filename", (c3) => {
5586
+ const filename = c3.req.param("filename");
5587
+ const metas = listResultFiles(searchDir);
5588
+ const meta = metas.find((m) => m.filename === filename);
5589
+ if (!meta) {
5590
+ return c3.json({ error: "Run not found" }, 404);
5591
+ }
5592
+ try {
5593
+ const loaded = patchTestIds(loadManifestResults(meta.path));
5594
+ const lightResults = stripHeavyFields(loaded);
5595
+ return c3.json({ results: lightResults, source: meta.filename });
5596
+ } catch (err2) {
5597
+ return c3.json({ error: "Failed to load run" }, 500);
5598
+ }
5569
5599
  });
5570
5600
  app2.get("/api/feedback", (c3) => {
5571
5601
  const data = readFeedback(resultDir);
@@ -5611,11 +5641,8 @@ function createApp(results, resultDir) {
5611
5641
  });
5612
5642
  return app2;
5613
5643
  }
5614
- function escapeHtml(s) {
5615
- return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;");
5616
- }
5617
- function generateServeHtml(results) {
5618
- const lightResults = results.map((r) => {
5644
+ function stripHeavyFields(results) {
5645
+ return results.map((r) => {
5619
5646
  const { requests, trace, ...rest } = r;
5620
5647
  const toolCalls = trace?.toolCalls && Object.keys(trace.toolCalls).length > 0 ? trace.toolCalls : void 0;
5621
5648
  const graderDurationMs = (r.scores ?? []).reduce((sum, s) => sum + (s.durationMs ?? 0), 0);
@@ -5625,6 +5652,12 @@ function generateServeHtml(results) {
5625
5652
  ...graderDurationMs > 0 && { _graderDurationMs: graderDurationMs }
5626
5653
  };
5627
5654
  });
5655
+ }
5656
+ function escapeHtml(s) {
5657
+ return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;");
5658
+ }
5659
+ function generateServeHtml(results, sourceFile) {
5660
+ const lightResults = stripHeavyFields(results);
5628
5661
  const dataJson = JSON.stringify(lightResults).replace(/</g, "\\u003c").replace(/>/g, "\\u003e").replace(/\u2028/g, "\\u2028").replace(/\u2029/g, "\\u2029");
5629
5662
  return `<!DOCTYPE html>
5630
5663
  <html lang="en">
@@ -5642,6 +5675,11 @@ ${SERVE_STYLES}
5642
5675
  <h1 class="header-title">AgentV</h1>
5643
5676
  <span class="header-subtitle">Results Review</span>
5644
5677
  </div>
5678
+ <div class="header-center">
5679
+ <select id="run-picker" class="run-picker" title="Switch result file">
5680
+ <option value="">Loading runs...</option>
5681
+ </select>
5682
+ </div>
5645
5683
  <div class="header-right">
5646
5684
  <span class="timestamp">${escapeHtml((/* @__PURE__ */ new Date()).toISOString())}</span>
5647
5685
  </div>
@@ -5653,6 +5691,7 @@ ${SERVE_STYLES}
5653
5691
  <main id="app"></main>
5654
5692
  <script>
5655
5693
  var DATA = ${dataJson};
5694
+ var INITIAL_SOURCE = ${sourceFile ? JSON.stringify(path8.basename(sourceFile)).replace(/</g, "\\u003c").replace(/>/g, "\\u003e") : "null"};
5656
5695
  ${SERVE_SCRIPT}
5657
5696
  </script>
5658
5697
  </body>
@@ -5679,6 +5718,10 @@ body{font-family:var(--font);background:var(--bg);color:var(--text);line-height:
5679
5718
  .header-left{display:flex;align-items:baseline;gap:12px}
5680
5719
  .header-title{font-size:18px;font-weight:600}
5681
5720
  .header-subtitle{font-size:14px;color:var(--text-muted)}
5721
+ .header-center{flex:1;display:flex;justify-content:center;padding:0 16px}
5722
+ .run-picker{padding:6px 10px;border:1px solid var(--border);border-radius:var(--radius);font-size:13px;background:var(--surface);color:var(--text);font-family:var(--font);max-width:400px;width:100%;cursor:pointer}
5723
+ .run-picker:hover{border-color:var(--primary)}
5724
+ .run-picker:focus{outline:none;border-color:var(--primary);box-shadow:0 0 0 3px var(--primary-bg)}
5682
5725
  .timestamp{font-size:12px;color:var(--text-muted);font-family:var(--mono)}
5683
5726
 
5684
5727
  /* Tabs */
@@ -5778,6 +5821,11 @@ body{font-family:var(--font);background:var(--bg);color:var(--text);line-height:
5778
5821
  .tool-tag{display:inline-block;padding:2px 10px;font-size:12px;font-family:var(--mono);background:var(--primary-bg);color:var(--primary);border:1px solid var(--border);border-radius:12px}
5779
5822
  .empty-state{text-align:center;padding:48px 24px;color:var(--text-muted)}
5780
5823
  .empty-state h3{font-size:16px;margin-bottom:8px;color:var(--text)}
5824
+ .welcome-state{text-align:center;padding:80px 24px;color:var(--text-muted)}
5825
+ .welcome-state h2{font-size:24px;margin-bottom:12px;color:var(--text);font-weight:600}
5826
+ .welcome-state p{font-size:15px;margin-bottom:8px;max-width:500px;margin-left:auto;margin-right:auto}
5827
+ .welcome-state code{font-family:var(--mono);background:var(--surface);border:1px solid var(--border);border-radius:3px;padding:2px 6px;font-size:13px}
5828
+ .welcome-state .hint{margin-top:24px;font-size:13px;color:var(--text-muted)}
5781
5829
 
5782
5830
  /* Feedback */
5783
5831
  .feedback-section{margin-top:16px;padding-top:16px;border-top:1px solid var(--border-light)}
@@ -5935,7 +5983,15 @@ var SERVE_SCRIPT = `
5935
5983
 
5936
5984
  /* ---- render ---- */
5937
5985
  function render(){
5938
- if(DATA.length===0){app.innerHTML='<div class="empty-state"><h3>No results</h3><p>No evaluation results to display.</p></div>';return;}
5986
+ if(DATA.length===0){
5987
+ app.innerHTML='<div class="welcome-state">'
5988
+ +'<h2>No results yet</h2>'
5989
+ +'<p>Run an evaluation or mount a results directory to see results here.</p>'
5990
+ +'<p><code>agentv eval &lt;eval-file&gt;</code></p>'
5991
+ +'<p class="hint">The dashboard will automatically detect new result files.</p>'
5992
+ +'</div>';
5993
+ return;
5994
+ }
5939
5995
  if(state.tab==="overview")renderOverview();else renderTests();
5940
5996
  }
5941
5997
 
@@ -6198,6 +6254,69 @@ var SERVE_SCRIPT = `
6198
6254
  return h;
6199
6255
  }
6200
6256
 
6257
+ /* ---- run picker ---- */
6258
+ var runPicker=document.getElementById("run-picker");
6259
+ var knownRunFilenames=[];
6260
+
6261
+ function refreshRunList(){
6262
+ fetch("/api/runs").then(function(r){return r.json();}).then(function(d){
6263
+ if(!d||!d.runs)return;
6264
+ var runs=d.runs;
6265
+ var newFilenames=runs.map(function(r){return r.filename;});
6266
+
6267
+ /* Detect new runs that appeared since last poll */
6268
+ if(knownRunFilenames.length>0){
6269
+ var hasNew=newFilenames.some(function(f){return knownRunFilenames.indexOf(f)===-1;});
6270
+ if(hasNew&&DATA.length===0){
6271
+ /* Auto-load the first (most recent) run when starting from empty state */
6272
+ loadRun(runs[0].filename);
6273
+ }
6274
+ }
6275
+ knownRunFilenames=newFilenames;
6276
+
6277
+ /* Rebuild picker options */
6278
+ var h='<option value="">Select a result file...</option>';
6279
+ if(runs.length===0){
6280
+ h='<option value="">No result files</option>';
6281
+ }
6282
+ for(var i=0;i<runs.length;i++){
6283
+ var r=runs[i];
6284
+ var label=r.filename+" ("+r.test_count+" tests, "+(r.pass_rate*100).toFixed(0)+"% pass)";
6285
+ h+='<option value="'+esc(r.filename)+'">'+esc(label)+"</option>";
6286
+ }
6287
+ runPicker.innerHTML=h;
6288
+ /* Pre-select the initially loaded run */
6289
+ if(INITIAL_SOURCE&&runs.length>0){
6290
+ runPicker.value=INITIAL_SOURCE;
6291
+ }
6292
+ }).catch(function(err){console.warn("Failed to refresh run list:",err);});
6293
+ }
6294
+
6295
+ function loadRun(filename){
6296
+ fetch("/api/runs/"+encodeURIComponent(filename)).then(function(r){return r.json();}).then(function(d){
6297
+ if(d.error){console.error(d.error);return;}
6298
+ DATA=d.results;
6299
+ stats=computeStats(DATA);
6300
+ tgtStats=computeTargets(DATA);
6301
+ tgtNames=tgtStats.map(function(t){return t.target;});
6302
+ state.expanded={};
6303
+ feedbackCache={};
6304
+ loadFeedback();
6305
+ render();
6306
+ /* Update picker selection */
6307
+ runPicker.value=filename;
6308
+ }).catch(function(err){console.error("Failed to load run:",err);});
6309
+ }
6310
+
6311
+ runPicker.addEventListener("change",function(){
6312
+ var val=runPicker.value;
6313
+ if(val)loadRun(val);
6314
+ });
6315
+
6316
+ /* Poll for new result files every 5 seconds */
6317
+ refreshRunList();
6318
+ setInterval(refreshRunList,5000);
6319
+
6201
6320
  /* ---- init ---- */
6202
6321
  loadFeedback();
6203
6322
  render();
@@ -6216,7 +6335,7 @@ var resultsServeCommand = command({
6216
6335
  type: optional(number),
6217
6336
  long: "port",
6218
6337
  short: "p",
6219
- description: "Port to listen on (default: 3117)"
6338
+ description: "Port to listen on (flag \u2192 PORT env var \u2192 3117)"
6220
6339
  }),
6221
6340
  dir: option({
6222
6341
  type: optional(string),
@@ -6227,14 +6346,43 @@ var resultsServeCommand = command({
6227
6346
  },
6228
6347
  handler: async ({ source, port, dir }) => {
6229
6348
  const cwd = dir ?? process.cwd();
6230
- const listenPort = port ?? 3117;
6349
+ const listenPort = port ?? (process.env.PORT ? Number(process.env.PORT) : 3117);
6231
6350
  try {
6232
- const { results, sourceFile } = await loadResults(source, cwd);
6233
- const resultDir = path8.dirname(path8.resolve(sourceFile));
6234
- const app2 = createApp(results, resultDir);
6235
- console.log(`Serving ${results.length} result(s) from ${sourceFile}`);
6351
+ let results = [];
6352
+ let sourceFile;
6353
+ if (source) {
6354
+ const resolved = resolveResultSourcePath(source, cwd);
6355
+ if (!existsSync4(resolved)) {
6356
+ console.error(`Error: Source file not found: ${resolved}`);
6357
+ process.exit(1);
6358
+ }
6359
+ sourceFile = resolved;
6360
+ results = patchTestIds(loadManifestResults(resolved));
6361
+ } else {
6362
+ const cache = await loadRunCache(cwd);
6363
+ const cachedFile = cache ? resolveRunCacheFile(cache) : "";
6364
+ if (cachedFile && existsSync4(cachedFile)) {
6365
+ sourceFile = cachedFile;
6366
+ results = patchTestIds(loadManifestResults(cachedFile));
6367
+ } else {
6368
+ const metas = listResultFiles(cwd, 1);
6369
+ if (metas.length > 0) {
6370
+ sourceFile = metas[0].path;
6371
+ results = patchTestIds(loadManifestResults(metas[0].path));
6372
+ }
6373
+ }
6374
+ }
6375
+ const resultDir = sourceFile ? path8.dirname(path8.resolve(sourceFile)) : cwd;
6376
+ const app2 = createApp(results, resultDir, cwd, sourceFile);
6377
+ if (results.length > 0 && sourceFile) {
6378
+ console.log(`Serving ${results.length} result(s) from ${sourceFile}`);
6379
+ } else {
6380
+ console.log("No results found. Dashboard will show an empty state.");
6381
+ console.log("Run an evaluation to see results: agentv eval <eval-file>");
6382
+ }
6236
6383
  console.log(`Dashboard: http://localhost:${listenPort}`);
6237
6384
  console.log(`Feedback API: http://localhost:${listenPort}/api/feedback`);
6385
+ console.log(`Result picker API: http://localhost:${listenPort}/api/runs`);
6238
6386
  console.log(`Feedback file: ${feedbackPath(resultDir)}`);
6239
6387
  console.log("Press Ctrl+C to stop");
6240
6388
  const { serve: startServer } = await import("@hono/node-server");
@@ -7766,4 +7914,4 @@ export {
7766
7914
  preprocessArgv,
7767
7915
  runCli
7768
7916
  };
7769
- //# sourceMappingURL=chunk-TGCWIHBH.js.map
7917
+ //# sourceMappingURL=chunk-W6CGDNQR.js.map