agentv 3.13.2 → 3.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-D3LNJUUB.js → chunk-3TBDSUYD.js} +659 -320
- package/dist/chunk-3TBDSUYD.js.map +1 -0
- package/dist/{chunk-X2343WOK.js → chunk-W6CGDNQR.js} +172 -23
- package/dist/chunk-W6CGDNQR.js.map +1 -0
- package/dist/{chunk-4Z5E5CYT.js → chunk-YYECEMUV.js} +8 -5
- package/dist/chunk-YYECEMUV.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-KPMR7RBT.js → dist-ZGLENPVH.js} +4 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-HVKLYGRX.js → interactive-AI75XY3X.js} +3 -3
- package/dist/templates/.agentv/config.yaml +4 -13
- package/dist/templates/.agentv/targets.yaml +0 -16
- package/package.json +5 -2
- package/dist/chunk-4Z5E5CYT.js.map +0 -1
- package/dist/chunk-D3LNJUUB.js.map +0 -1
- package/dist/chunk-X2343WOK.js.map +0 -1
- package/dist/templates/.agentv/.env.example +0 -23
- /package/dist/{dist-KPMR7RBT.js.map → dist-ZGLENPVH.js.map} +0 -0
- /package/dist/{interactive-HVKLYGRX.js.map → interactive-AI75XY3X.js.map} +0 -0
|
@@ -22,7 +22,7 @@ import {
|
|
|
22
22
|
validateFileReferences,
|
|
23
23
|
validateTargetsFile,
|
|
24
24
|
writeArtifactsFromResults
|
|
25
|
-
} from "./chunk-
|
|
25
|
+
} from "./chunk-YYECEMUV.js";
|
|
26
26
|
import {
|
|
27
27
|
createBuiltinRegistry,
|
|
28
28
|
executeScript,
|
|
@@ -39,7 +39,7 @@ import {
|
|
|
39
39
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
40
40
|
transpileEvalYamlFile,
|
|
41
41
|
trimBaselineResult
|
|
42
|
-
} from "./chunk-
|
|
42
|
+
} from "./chunk-3TBDSUYD.js";
|
|
43
43
|
import {
|
|
44
44
|
__commonJS,
|
|
45
45
|
__esm,
|
|
@@ -4185,7 +4185,7 @@ var evalRunCommand = command({
|
|
|
4185
4185
|
},
|
|
4186
4186
|
handler: async (args) => {
|
|
4187
4187
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4188
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
4188
|
+
const { launchInteractiveWizard } = await import("./interactive-AI75XY3X.js");
|
|
4189
4189
|
await launchInteractiveWizard();
|
|
4190
4190
|
return;
|
|
4191
4191
|
}
|
|
@@ -5543,8 +5543,8 @@ var resultsCommand = subcommands({
|
|
|
5543
5543
|
import { existsSync as existsSync4, readFileSync as readFileSync6, writeFileSync as writeFileSync3 } from "node:fs";
|
|
5544
5544
|
import path8 from "node:path";
|
|
5545
5545
|
import { Hono } from "hono";
|
|
5546
|
-
function feedbackPath(
|
|
5547
|
-
return path8.join(
|
|
5546
|
+
function feedbackPath(resultDir) {
|
|
5547
|
+
return path8.join(resultDir, "feedback.json");
|
|
5548
5548
|
}
|
|
5549
5549
|
function readFeedback(cwd) {
|
|
5550
5550
|
const fp = feedbackPath(cwd);
|
|
@@ -5562,13 +5562,43 @@ function writeFeedback(cwd, data) {
|
|
|
5562
5562
|
writeFileSync3(feedbackPath(cwd), `${JSON.stringify(data, null, 2)}
|
|
5563
5563
|
`, "utf8");
|
|
5564
5564
|
}
|
|
5565
|
-
function createApp(results, cwd) {
|
|
5565
|
+
function createApp(results, resultDir, cwd, sourceFile) {
|
|
5566
|
+
const searchDir = cwd ?? resultDir;
|
|
5566
5567
|
const app2 = new Hono();
|
|
5567
5568
|
app2.get("/", (c3) => {
|
|
5568
|
-
return c3.html(generateServeHtml(results));
|
|
5569
|
+
return c3.html(generateServeHtml(results, sourceFile));
|
|
5570
|
+
});
|
|
5571
|
+
app2.get("/api/runs", (c3) => {
|
|
5572
|
+
const metas = listResultFiles(searchDir);
|
|
5573
|
+
return c3.json({
|
|
5574
|
+
runs: metas.map((m) => ({
|
|
5575
|
+
filename: m.filename,
|
|
5576
|
+
path: m.path,
|
|
5577
|
+
timestamp: m.timestamp,
|
|
5578
|
+
test_count: m.testCount,
|
|
5579
|
+
pass_rate: m.passRate,
|
|
5580
|
+
avg_score: m.avgScore,
|
|
5581
|
+
size_bytes: m.sizeBytes
|
|
5582
|
+
}))
|
|
5583
|
+
});
|
|
5584
|
+
});
|
|
5585
|
+
app2.get("/api/runs/:filename", (c3) => {
|
|
5586
|
+
const filename = c3.req.param("filename");
|
|
5587
|
+
const metas = listResultFiles(searchDir);
|
|
5588
|
+
const meta = metas.find((m) => m.filename === filename);
|
|
5589
|
+
if (!meta) {
|
|
5590
|
+
return c3.json({ error: "Run not found" }, 404);
|
|
5591
|
+
}
|
|
5592
|
+
try {
|
|
5593
|
+
const loaded = patchTestIds(loadManifestResults(meta.path));
|
|
5594
|
+
const lightResults = stripHeavyFields(loaded);
|
|
5595
|
+
return c3.json({ results: lightResults, source: meta.filename });
|
|
5596
|
+
} catch (err2) {
|
|
5597
|
+
return c3.json({ error: "Failed to load run" }, 500);
|
|
5598
|
+
}
|
|
5569
5599
|
});
|
|
5570
5600
|
app2.get("/api/feedback", (c3) => {
|
|
5571
|
-
const data = readFeedback(
|
|
5601
|
+
const data = readFeedback(resultDir);
|
|
5572
5602
|
return c3.json(data);
|
|
5573
5603
|
});
|
|
5574
5604
|
app2.post("/api/feedback", async (c3) => {
|
|
@@ -5591,7 +5621,7 @@ function createApp(results, cwd) {
|
|
|
5591
5621
|
return c3.json({ error: "Each review must have test_id and comment strings" }, 400);
|
|
5592
5622
|
}
|
|
5593
5623
|
}
|
|
5594
|
-
const existing = readFeedback(
|
|
5624
|
+
const existing = readFeedback(resultDir);
|
|
5595
5625
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
5596
5626
|
for (const review of incoming) {
|
|
5597
5627
|
const newReview = {
|
|
@@ -5606,16 +5636,13 @@ function createApp(results, cwd) {
|
|
|
5606
5636
|
existing.reviews.push(newReview);
|
|
5607
5637
|
}
|
|
5608
5638
|
}
|
|
5609
|
-
writeFeedback(
|
|
5639
|
+
writeFeedback(resultDir, existing);
|
|
5610
5640
|
return c3.json(existing);
|
|
5611
5641
|
});
|
|
5612
5642
|
return app2;
|
|
5613
5643
|
}
|
|
5614
|
-
function
|
|
5615
|
-
return
|
|
5616
|
-
}
|
|
5617
|
-
function generateServeHtml(results) {
|
|
5618
|
-
const lightResults = results.map((r) => {
|
|
5644
|
+
function stripHeavyFields(results) {
|
|
5645
|
+
return results.map((r) => {
|
|
5619
5646
|
const { requests, trace, ...rest } = r;
|
|
5620
5647
|
const toolCalls = trace?.toolCalls && Object.keys(trace.toolCalls).length > 0 ? trace.toolCalls : void 0;
|
|
5621
5648
|
const graderDurationMs = (r.scores ?? []).reduce((sum, s) => sum + (s.durationMs ?? 0), 0);
|
|
@@ -5625,6 +5652,12 @@ function generateServeHtml(results) {
|
|
|
5625
5652
|
...graderDurationMs > 0 && { _graderDurationMs: graderDurationMs }
|
|
5626
5653
|
};
|
|
5627
5654
|
});
|
|
5655
|
+
}
|
|
5656
|
+
function escapeHtml(s) {
|
|
5657
|
+
return s.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """);
|
|
5658
|
+
}
|
|
5659
|
+
function generateServeHtml(results, sourceFile) {
|
|
5660
|
+
const lightResults = stripHeavyFields(results);
|
|
5628
5661
|
const dataJson = JSON.stringify(lightResults).replace(/</g, "\\u003c").replace(/>/g, "\\u003e").replace(/\u2028/g, "\\u2028").replace(/\u2029/g, "\\u2029");
|
|
5629
5662
|
return `<!DOCTYPE html>
|
|
5630
5663
|
<html lang="en">
|
|
@@ -5642,6 +5675,11 @@ ${SERVE_STYLES}
|
|
|
5642
5675
|
<h1 class="header-title">AgentV</h1>
|
|
5643
5676
|
<span class="header-subtitle">Results Review</span>
|
|
5644
5677
|
</div>
|
|
5678
|
+
<div class="header-center">
|
|
5679
|
+
<select id="run-picker" class="run-picker" title="Switch result file">
|
|
5680
|
+
<option value="">Loading runs...</option>
|
|
5681
|
+
</select>
|
|
5682
|
+
</div>
|
|
5645
5683
|
<div class="header-right">
|
|
5646
5684
|
<span class="timestamp">${escapeHtml((/* @__PURE__ */ new Date()).toISOString())}</span>
|
|
5647
5685
|
</div>
|
|
@@ -5653,6 +5691,7 @@ ${SERVE_STYLES}
|
|
|
5653
5691
|
<main id="app"></main>
|
|
5654
5692
|
<script>
|
|
5655
5693
|
var DATA = ${dataJson};
|
|
5694
|
+
var INITIAL_SOURCE = ${sourceFile ? JSON.stringify(path8.basename(sourceFile)).replace(/</g, "\\u003c").replace(/>/g, "\\u003e") : "null"};
|
|
5656
5695
|
${SERVE_SCRIPT}
|
|
5657
5696
|
</script>
|
|
5658
5697
|
</body>
|
|
@@ -5679,6 +5718,10 @@ body{font-family:var(--font);background:var(--bg);color:var(--text);line-height:
|
|
|
5679
5718
|
.header-left{display:flex;align-items:baseline;gap:12px}
|
|
5680
5719
|
.header-title{font-size:18px;font-weight:600}
|
|
5681
5720
|
.header-subtitle{font-size:14px;color:var(--text-muted)}
|
|
5721
|
+
.header-center{flex:1;display:flex;justify-content:center;padding:0 16px}
|
|
5722
|
+
.run-picker{padding:6px 10px;border:1px solid var(--border);border-radius:var(--radius);font-size:13px;background:var(--surface);color:var(--text);font-family:var(--font);max-width:400px;width:100%;cursor:pointer}
|
|
5723
|
+
.run-picker:hover{border-color:var(--primary)}
|
|
5724
|
+
.run-picker:focus{outline:none;border-color:var(--primary);box-shadow:0 0 0 3px var(--primary-bg)}
|
|
5682
5725
|
.timestamp{font-size:12px;color:var(--text-muted);font-family:var(--mono)}
|
|
5683
5726
|
|
|
5684
5727
|
/* Tabs */
|
|
@@ -5778,6 +5821,11 @@ body{font-family:var(--font);background:var(--bg);color:var(--text);line-height:
|
|
|
5778
5821
|
.tool-tag{display:inline-block;padding:2px 10px;font-size:12px;font-family:var(--mono);background:var(--primary-bg);color:var(--primary);border:1px solid var(--border);border-radius:12px}
|
|
5779
5822
|
.empty-state{text-align:center;padding:48px 24px;color:var(--text-muted)}
|
|
5780
5823
|
.empty-state h3{font-size:16px;margin-bottom:8px;color:var(--text)}
|
|
5824
|
+
.welcome-state{text-align:center;padding:80px 24px;color:var(--text-muted)}
|
|
5825
|
+
.welcome-state h2{font-size:24px;margin-bottom:12px;color:var(--text);font-weight:600}
|
|
5826
|
+
.welcome-state p{font-size:15px;margin-bottom:8px;max-width:500px;margin-left:auto;margin-right:auto}
|
|
5827
|
+
.welcome-state code{font-family:var(--mono);background:var(--surface);border:1px solid var(--border);border-radius:3px;padding:2px 6px;font-size:13px}
|
|
5828
|
+
.welcome-state .hint{margin-top:24px;font-size:13px;color:var(--text-muted)}
|
|
5781
5829
|
|
|
5782
5830
|
/* Feedback */
|
|
5783
5831
|
.feedback-section{margin-top:16px;padding-top:16px;border-top:1px solid var(--border-light)}
|
|
@@ -5935,7 +5983,15 @@ var SERVE_SCRIPT = `
|
|
|
5935
5983
|
|
|
5936
5984
|
/* ---- render ---- */
|
|
5937
5985
|
function render(){
|
|
5938
|
-
if(DATA.length===0){
|
|
5986
|
+
if(DATA.length===0){
|
|
5987
|
+
app.innerHTML='<div class="welcome-state">'
|
|
5988
|
+
+'<h2>No results yet</h2>'
|
|
5989
|
+
+'<p>Run an evaluation or mount a results directory to see results here.</p>'
|
|
5990
|
+
+'<p><code>agentv eval <eval-file></code></p>'
|
|
5991
|
+
+'<p class="hint">The dashboard will automatically detect new result files.</p>'
|
|
5992
|
+
+'</div>';
|
|
5993
|
+
return;
|
|
5994
|
+
}
|
|
5939
5995
|
if(state.tab==="overview")renderOverview();else renderTests();
|
|
5940
5996
|
}
|
|
5941
5997
|
|
|
@@ -6198,6 +6254,69 @@ var SERVE_SCRIPT = `
|
|
|
6198
6254
|
return h;
|
|
6199
6255
|
}
|
|
6200
6256
|
|
|
6257
|
+
/* ---- run picker ---- */
|
|
6258
|
+
var runPicker=document.getElementById("run-picker");
|
|
6259
|
+
var knownRunFilenames=[];
|
|
6260
|
+
|
|
6261
|
+
function refreshRunList(){
|
|
6262
|
+
fetch("/api/runs").then(function(r){return r.json();}).then(function(d){
|
|
6263
|
+
if(!d||!d.runs)return;
|
|
6264
|
+
var runs=d.runs;
|
|
6265
|
+
var newFilenames=runs.map(function(r){return r.filename;});
|
|
6266
|
+
|
|
6267
|
+
/* Detect new runs that appeared since last poll */
|
|
6268
|
+
if(knownRunFilenames.length>0){
|
|
6269
|
+
var hasNew=newFilenames.some(function(f){return knownRunFilenames.indexOf(f)===-1;});
|
|
6270
|
+
if(hasNew&&DATA.length===0){
|
|
6271
|
+
/* Auto-load the first (most recent) run when starting from empty state */
|
|
6272
|
+
loadRun(runs[0].filename);
|
|
6273
|
+
}
|
|
6274
|
+
}
|
|
6275
|
+
knownRunFilenames=newFilenames;
|
|
6276
|
+
|
|
6277
|
+
/* Rebuild picker options */
|
|
6278
|
+
var h='<option value="">Select a result file...</option>';
|
|
6279
|
+
if(runs.length===0){
|
|
6280
|
+
h='<option value="">No result files</option>';
|
|
6281
|
+
}
|
|
6282
|
+
for(var i=0;i<runs.length;i++){
|
|
6283
|
+
var r=runs[i];
|
|
6284
|
+
var label=r.filename+" ("+r.test_count+" tests, "+(r.pass_rate*100).toFixed(0)+"% pass)";
|
|
6285
|
+
h+='<option value="'+esc(r.filename)+'">'+esc(label)+"</option>";
|
|
6286
|
+
}
|
|
6287
|
+
runPicker.innerHTML=h;
|
|
6288
|
+
/* Pre-select the initially loaded run */
|
|
6289
|
+
if(INITIAL_SOURCE&&runs.length>0){
|
|
6290
|
+
runPicker.value=INITIAL_SOURCE;
|
|
6291
|
+
}
|
|
6292
|
+
}).catch(function(err){console.warn("Failed to refresh run list:",err);});
|
|
6293
|
+
}
|
|
6294
|
+
|
|
6295
|
+
function loadRun(filename){
|
|
6296
|
+
fetch("/api/runs/"+encodeURIComponent(filename)).then(function(r){return r.json();}).then(function(d){
|
|
6297
|
+
if(d.error){console.error(d.error);return;}
|
|
6298
|
+
DATA=d.results;
|
|
6299
|
+
stats=computeStats(DATA);
|
|
6300
|
+
tgtStats=computeTargets(DATA);
|
|
6301
|
+
tgtNames=tgtStats.map(function(t){return t.target;});
|
|
6302
|
+
state.expanded={};
|
|
6303
|
+
feedbackCache={};
|
|
6304
|
+
loadFeedback();
|
|
6305
|
+
render();
|
|
6306
|
+
/* Update picker selection */
|
|
6307
|
+
runPicker.value=filename;
|
|
6308
|
+
}).catch(function(err){console.error("Failed to load run:",err);});
|
|
6309
|
+
}
|
|
6310
|
+
|
|
6311
|
+
runPicker.addEventListener("change",function(){
|
|
6312
|
+
var val=runPicker.value;
|
|
6313
|
+
if(val)loadRun(val);
|
|
6314
|
+
});
|
|
6315
|
+
|
|
6316
|
+
/* Poll for new result files every 5 seconds */
|
|
6317
|
+
refreshRunList();
|
|
6318
|
+
setInterval(refreshRunList,5000);
|
|
6319
|
+
|
|
6201
6320
|
/* ---- init ---- */
|
|
6202
6321
|
loadFeedback();
|
|
6203
6322
|
render();
|
|
@@ -6216,7 +6335,7 @@ var resultsServeCommand = command({
|
|
|
6216
6335
|
type: optional(number),
|
|
6217
6336
|
long: "port",
|
|
6218
6337
|
short: "p",
|
|
6219
|
-
description: "Port to listen on (
|
|
6338
|
+
description: "Port to listen on (flag \u2192 PORT env var \u2192 3117)"
|
|
6220
6339
|
}),
|
|
6221
6340
|
dir: option({
|
|
6222
6341
|
type: optional(string),
|
|
@@ -6227,14 +6346,44 @@ var resultsServeCommand = command({
|
|
|
6227
6346
|
},
|
|
6228
6347
|
handler: async ({ source, port, dir }) => {
|
|
6229
6348
|
const cwd = dir ?? process.cwd();
|
|
6230
|
-
const listenPort = port ?? 3117;
|
|
6349
|
+
const listenPort = port ?? (process.env.PORT ? Number(process.env.PORT) : 3117);
|
|
6231
6350
|
try {
|
|
6232
|
-
|
|
6233
|
-
|
|
6234
|
-
|
|
6351
|
+
let results = [];
|
|
6352
|
+
let sourceFile;
|
|
6353
|
+
if (source) {
|
|
6354
|
+
const resolved = resolveResultSourcePath(source, cwd);
|
|
6355
|
+
if (!existsSync4(resolved)) {
|
|
6356
|
+
console.error(`Error: Source file not found: ${resolved}`);
|
|
6357
|
+
process.exit(1);
|
|
6358
|
+
}
|
|
6359
|
+
sourceFile = resolved;
|
|
6360
|
+
results = patchTestIds(loadManifestResults(resolved));
|
|
6361
|
+
} else {
|
|
6362
|
+
const cache = await loadRunCache(cwd);
|
|
6363
|
+
const cachedFile = cache ? resolveRunCacheFile(cache) : "";
|
|
6364
|
+
if (cachedFile && existsSync4(cachedFile)) {
|
|
6365
|
+
sourceFile = cachedFile;
|
|
6366
|
+
results = patchTestIds(loadManifestResults(cachedFile));
|
|
6367
|
+
} else {
|
|
6368
|
+
const metas = listResultFiles(cwd, 1);
|
|
6369
|
+
if (metas.length > 0) {
|
|
6370
|
+
sourceFile = metas[0].path;
|
|
6371
|
+
results = patchTestIds(loadManifestResults(metas[0].path));
|
|
6372
|
+
}
|
|
6373
|
+
}
|
|
6374
|
+
}
|
|
6375
|
+
const resultDir = sourceFile ? path8.dirname(path8.resolve(sourceFile)) : cwd;
|
|
6376
|
+
const app2 = createApp(results, resultDir, cwd, sourceFile);
|
|
6377
|
+
if (results.length > 0 && sourceFile) {
|
|
6378
|
+
console.log(`Serving ${results.length} result(s) from ${sourceFile}`);
|
|
6379
|
+
} else {
|
|
6380
|
+
console.log("No results found. Dashboard will show an empty state.");
|
|
6381
|
+
console.log("Run an evaluation to see results: agentv eval <eval-file>");
|
|
6382
|
+
}
|
|
6235
6383
|
console.log(`Dashboard: http://localhost:${listenPort}`);
|
|
6236
6384
|
console.log(`Feedback API: http://localhost:${listenPort}/api/feedback`);
|
|
6237
|
-
console.log(`
|
|
6385
|
+
console.log(`Result picker API: http://localhost:${listenPort}/api/runs`);
|
|
6386
|
+
console.log(`Feedback file: ${feedbackPath(resultDir)}`);
|
|
6238
6387
|
console.log("Press Ctrl+C to stop");
|
|
6239
6388
|
const { serve: startServer } = await import("@hono/node-server");
|
|
6240
6389
|
startServer({
|
|
@@ -7765,4 +7914,4 @@ export {
|
|
|
7765
7914
|
preprocessArgv,
|
|
7766
7915
|
runCli
|
|
7767
7916
|
};
|
|
7768
|
-
//# sourceMappingURL=chunk-
|
|
7917
|
+
//# sourceMappingURL=chunk-W6CGDNQR.js.map
|