agentv 3.13.3 → 3.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-D3LNJUUB.js → chunk-3TBDSUYD.js} +659 -320
- package/dist/chunk-3TBDSUYD.js.map +1 -0
- package/dist/{chunk-TGCWIHBH.js → chunk-W6CGDNQR.js} +166 -18
- package/dist/chunk-W6CGDNQR.js.map +1 -0
- package/dist/{chunk-PACTPWEN.js → chunk-YYECEMUV.js} +8 -5
- package/dist/chunk-YYECEMUV.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-KPMR7RBT.js → dist-ZGLENPVH.js} +4 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-OMJAMCQP.js → interactive-AI75XY3X.js} +3 -3
- package/dist/templates/.agentv/config.yaml +4 -13
- package/dist/templates/.agentv/targets.yaml +0 -16
- package/package.json +5 -2
- package/dist/chunk-D3LNJUUB.js.map +0 -1
- package/dist/chunk-PACTPWEN.js.map +0 -1
- package/dist/chunk-TGCWIHBH.js.map +0 -1
- package/dist/templates/.agentv/.env.example +0 -23
- /package/dist/{dist-KPMR7RBT.js.map → dist-ZGLENPVH.js.map} +0 -0
- /package/dist/{interactive-OMJAMCQP.js.map → interactive-AI75XY3X.js.map} +0 -0
|
@@ -22,7 +22,7 @@ import {
|
|
|
22
22
|
validateFileReferences,
|
|
23
23
|
validateTargetsFile,
|
|
24
24
|
writeArtifactsFromResults
|
|
25
|
-
} from "./chunk-
|
|
25
|
+
} from "./chunk-YYECEMUV.js";
|
|
26
26
|
import {
|
|
27
27
|
createBuiltinRegistry,
|
|
28
28
|
executeScript,
|
|
@@ -39,7 +39,7 @@ import {
|
|
|
39
39
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
40
40
|
transpileEvalYamlFile,
|
|
41
41
|
trimBaselineResult
|
|
42
|
-
} from "./chunk-
|
|
42
|
+
} from "./chunk-3TBDSUYD.js";
|
|
43
43
|
import {
|
|
44
44
|
__commonJS,
|
|
45
45
|
__esm,
|
|
@@ -4185,7 +4185,7 @@ var evalRunCommand = command({
|
|
|
4185
4185
|
},
|
|
4186
4186
|
handler: async (args) => {
|
|
4187
4187
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4188
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
4188
|
+
const { launchInteractiveWizard } = await import("./interactive-AI75XY3X.js");
|
|
4189
4189
|
await launchInteractiveWizard();
|
|
4190
4190
|
return;
|
|
4191
4191
|
}
|
|
@@ -5562,10 +5562,40 @@ function writeFeedback(cwd, data) {
|
|
|
5562
5562
|
writeFileSync3(feedbackPath(cwd), `${JSON.stringify(data, null, 2)}
|
|
5563
5563
|
`, "utf8");
|
|
5564
5564
|
}
|
|
5565
|
-
function createApp(results, resultDir) {
|
|
5565
|
+
function createApp(results, resultDir, cwd, sourceFile) {
|
|
5566
|
+
const searchDir = cwd ?? resultDir;
|
|
5566
5567
|
const app2 = new Hono();
|
|
5567
5568
|
app2.get("/", (c3) => {
|
|
5568
|
-
return c3.html(generateServeHtml(results));
|
|
5569
|
+
return c3.html(generateServeHtml(results, sourceFile));
|
|
5570
|
+
});
|
|
5571
|
+
app2.get("/api/runs", (c3) => {
|
|
5572
|
+
const metas = listResultFiles(searchDir);
|
|
5573
|
+
return c3.json({
|
|
5574
|
+
runs: metas.map((m) => ({
|
|
5575
|
+
filename: m.filename,
|
|
5576
|
+
path: m.path,
|
|
5577
|
+
timestamp: m.timestamp,
|
|
5578
|
+
test_count: m.testCount,
|
|
5579
|
+
pass_rate: m.passRate,
|
|
5580
|
+
avg_score: m.avgScore,
|
|
5581
|
+
size_bytes: m.sizeBytes
|
|
5582
|
+
}))
|
|
5583
|
+
});
|
|
5584
|
+
});
|
|
5585
|
+
app2.get("/api/runs/:filename", (c3) => {
|
|
5586
|
+
const filename = c3.req.param("filename");
|
|
5587
|
+
const metas = listResultFiles(searchDir);
|
|
5588
|
+
const meta = metas.find((m) => m.filename === filename);
|
|
5589
|
+
if (!meta) {
|
|
5590
|
+
return c3.json({ error: "Run not found" }, 404);
|
|
5591
|
+
}
|
|
5592
|
+
try {
|
|
5593
|
+
const loaded = patchTestIds(loadManifestResults(meta.path));
|
|
5594
|
+
const lightResults = stripHeavyFields(loaded);
|
|
5595
|
+
return c3.json({ results: lightResults, source: meta.filename });
|
|
5596
|
+
} catch (err2) {
|
|
5597
|
+
return c3.json({ error: "Failed to load run" }, 500);
|
|
5598
|
+
}
|
|
5569
5599
|
});
|
|
5570
5600
|
app2.get("/api/feedback", (c3) => {
|
|
5571
5601
|
const data = readFeedback(resultDir);
|
|
@@ -5611,11 +5641,8 @@ function createApp(results, resultDir) {
|
|
|
5611
5641
|
});
|
|
5612
5642
|
return app2;
|
|
5613
5643
|
}
|
|
5614
|
-
function
|
|
5615
|
-
return
|
|
5616
|
-
}
|
|
5617
|
-
function generateServeHtml(results) {
|
|
5618
|
-
const lightResults = results.map((r) => {
|
|
5644
|
+
function stripHeavyFields(results) {
|
|
5645
|
+
return results.map((r) => {
|
|
5619
5646
|
const { requests, trace, ...rest } = r;
|
|
5620
5647
|
const toolCalls = trace?.toolCalls && Object.keys(trace.toolCalls).length > 0 ? trace.toolCalls : void 0;
|
|
5621
5648
|
const graderDurationMs = (r.scores ?? []).reduce((sum, s) => sum + (s.durationMs ?? 0), 0);
|
|
@@ -5625,6 +5652,12 @@ function generateServeHtml(results) {
|
|
|
5625
5652
|
...graderDurationMs > 0 && { _graderDurationMs: graderDurationMs }
|
|
5626
5653
|
};
|
|
5627
5654
|
});
|
|
5655
|
+
}
|
|
5656
|
+
function escapeHtml(s) {
|
|
5657
|
+
return s.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """);
|
|
5658
|
+
}
|
|
5659
|
+
function generateServeHtml(results, sourceFile) {
|
|
5660
|
+
const lightResults = stripHeavyFields(results);
|
|
5628
5661
|
const dataJson = JSON.stringify(lightResults).replace(/</g, "\\u003c").replace(/>/g, "\\u003e").replace(/\u2028/g, "\\u2028").replace(/\u2029/g, "\\u2029");
|
|
5629
5662
|
return `<!DOCTYPE html>
|
|
5630
5663
|
<html lang="en">
|
|
@@ -5642,6 +5675,11 @@ ${SERVE_STYLES}
|
|
|
5642
5675
|
<h1 class="header-title">AgentV</h1>
|
|
5643
5676
|
<span class="header-subtitle">Results Review</span>
|
|
5644
5677
|
</div>
|
|
5678
|
+
<div class="header-center">
|
|
5679
|
+
<select id="run-picker" class="run-picker" title="Switch result file">
|
|
5680
|
+
<option value="">Loading runs...</option>
|
|
5681
|
+
</select>
|
|
5682
|
+
</div>
|
|
5645
5683
|
<div class="header-right">
|
|
5646
5684
|
<span class="timestamp">${escapeHtml((/* @__PURE__ */ new Date()).toISOString())}</span>
|
|
5647
5685
|
</div>
|
|
@@ -5653,6 +5691,7 @@ ${SERVE_STYLES}
|
|
|
5653
5691
|
<main id="app"></main>
|
|
5654
5692
|
<script>
|
|
5655
5693
|
var DATA = ${dataJson};
|
|
5694
|
+
var INITIAL_SOURCE = ${sourceFile ? JSON.stringify(path8.basename(sourceFile)).replace(/</g, "\\u003c").replace(/>/g, "\\u003e") : "null"};
|
|
5656
5695
|
${SERVE_SCRIPT}
|
|
5657
5696
|
</script>
|
|
5658
5697
|
</body>
|
|
@@ -5679,6 +5718,10 @@ body{font-family:var(--font);background:var(--bg);color:var(--text);line-height:
|
|
|
5679
5718
|
.header-left{display:flex;align-items:baseline;gap:12px}
|
|
5680
5719
|
.header-title{font-size:18px;font-weight:600}
|
|
5681
5720
|
.header-subtitle{font-size:14px;color:var(--text-muted)}
|
|
5721
|
+
.header-center{flex:1;display:flex;justify-content:center;padding:0 16px}
|
|
5722
|
+
.run-picker{padding:6px 10px;border:1px solid var(--border);border-radius:var(--radius);font-size:13px;background:var(--surface);color:var(--text);font-family:var(--font);max-width:400px;width:100%;cursor:pointer}
|
|
5723
|
+
.run-picker:hover{border-color:var(--primary)}
|
|
5724
|
+
.run-picker:focus{outline:none;border-color:var(--primary);box-shadow:0 0 0 3px var(--primary-bg)}
|
|
5682
5725
|
.timestamp{font-size:12px;color:var(--text-muted);font-family:var(--mono)}
|
|
5683
5726
|
|
|
5684
5727
|
/* Tabs */
|
|
@@ -5778,6 +5821,11 @@ body{font-family:var(--font);background:var(--bg);color:var(--text);line-height:
|
|
|
5778
5821
|
.tool-tag{display:inline-block;padding:2px 10px;font-size:12px;font-family:var(--mono);background:var(--primary-bg);color:var(--primary);border:1px solid var(--border);border-radius:12px}
|
|
5779
5822
|
.empty-state{text-align:center;padding:48px 24px;color:var(--text-muted)}
|
|
5780
5823
|
.empty-state h3{font-size:16px;margin-bottom:8px;color:var(--text)}
|
|
5824
|
+
.welcome-state{text-align:center;padding:80px 24px;color:var(--text-muted)}
|
|
5825
|
+
.welcome-state h2{font-size:24px;margin-bottom:12px;color:var(--text);font-weight:600}
|
|
5826
|
+
.welcome-state p{font-size:15px;margin-bottom:8px;max-width:500px;margin-left:auto;margin-right:auto}
|
|
5827
|
+
.welcome-state code{font-family:var(--mono);background:var(--surface);border:1px solid var(--border);border-radius:3px;padding:2px 6px;font-size:13px}
|
|
5828
|
+
.welcome-state .hint{margin-top:24px;font-size:13px;color:var(--text-muted)}
|
|
5781
5829
|
|
|
5782
5830
|
/* Feedback */
|
|
5783
5831
|
.feedback-section{margin-top:16px;padding-top:16px;border-top:1px solid var(--border-light)}
|
|
@@ -5935,7 +5983,15 @@ var SERVE_SCRIPT = `
|
|
|
5935
5983
|
|
|
5936
5984
|
/* ---- render ---- */
|
|
5937
5985
|
function render(){
|
|
5938
|
-
if(DATA.length===0){
|
|
5986
|
+
if(DATA.length===0){
|
|
5987
|
+
app.innerHTML='<div class="welcome-state">'
|
|
5988
|
+
+'<h2>No results yet</h2>'
|
|
5989
|
+
+'<p>Run an evaluation or mount a results directory to see results here.</p>'
|
|
5990
|
+
+'<p><code>agentv eval <eval-file></code></p>'
|
|
5991
|
+
+'<p class="hint">The dashboard will automatically detect new result files.</p>'
|
|
5992
|
+
+'</div>';
|
|
5993
|
+
return;
|
|
5994
|
+
}
|
|
5939
5995
|
if(state.tab==="overview")renderOverview();else renderTests();
|
|
5940
5996
|
}
|
|
5941
5997
|
|
|
@@ -6198,6 +6254,69 @@ var SERVE_SCRIPT = `
|
|
|
6198
6254
|
return h;
|
|
6199
6255
|
}
|
|
6200
6256
|
|
|
6257
|
+
/* ---- run picker ---- */
|
|
6258
|
+
var runPicker=document.getElementById("run-picker");
|
|
6259
|
+
var knownRunFilenames=[];
|
|
6260
|
+
|
|
6261
|
+
function refreshRunList(){
|
|
6262
|
+
fetch("/api/runs").then(function(r){return r.json();}).then(function(d){
|
|
6263
|
+
if(!d||!d.runs)return;
|
|
6264
|
+
var runs=d.runs;
|
|
6265
|
+
var newFilenames=runs.map(function(r){return r.filename;});
|
|
6266
|
+
|
|
6267
|
+
/* Detect new runs that appeared since last poll */
|
|
6268
|
+
if(knownRunFilenames.length>0){
|
|
6269
|
+
var hasNew=newFilenames.some(function(f){return knownRunFilenames.indexOf(f)===-1;});
|
|
6270
|
+
if(hasNew&&DATA.length===0){
|
|
6271
|
+
/* Auto-load the first (most recent) run when starting from empty state */
|
|
6272
|
+
loadRun(runs[0].filename);
|
|
6273
|
+
}
|
|
6274
|
+
}
|
|
6275
|
+
knownRunFilenames=newFilenames;
|
|
6276
|
+
|
|
6277
|
+
/* Rebuild picker options */
|
|
6278
|
+
var h='<option value="">Select a result file...</option>';
|
|
6279
|
+
if(runs.length===0){
|
|
6280
|
+
h='<option value="">No result files</option>';
|
|
6281
|
+
}
|
|
6282
|
+
for(var i=0;i<runs.length;i++){
|
|
6283
|
+
var r=runs[i];
|
|
6284
|
+
var label=r.filename+" ("+r.test_count+" tests, "+(r.pass_rate*100).toFixed(0)+"% pass)";
|
|
6285
|
+
h+='<option value="'+esc(r.filename)+'">'+esc(label)+"</option>";
|
|
6286
|
+
}
|
|
6287
|
+
runPicker.innerHTML=h;
|
|
6288
|
+
/* Pre-select the initially loaded run */
|
|
6289
|
+
if(INITIAL_SOURCE&&runs.length>0){
|
|
6290
|
+
runPicker.value=INITIAL_SOURCE;
|
|
6291
|
+
}
|
|
6292
|
+
}).catch(function(err){console.warn("Failed to refresh run list:",err);});
|
|
6293
|
+
}
|
|
6294
|
+
|
|
6295
|
+
function loadRun(filename){
|
|
6296
|
+
fetch("/api/runs/"+encodeURIComponent(filename)).then(function(r){return r.json();}).then(function(d){
|
|
6297
|
+
if(d.error){console.error(d.error);return;}
|
|
6298
|
+
DATA=d.results;
|
|
6299
|
+
stats=computeStats(DATA);
|
|
6300
|
+
tgtStats=computeTargets(DATA);
|
|
6301
|
+
tgtNames=tgtStats.map(function(t){return t.target;});
|
|
6302
|
+
state.expanded={};
|
|
6303
|
+
feedbackCache={};
|
|
6304
|
+
loadFeedback();
|
|
6305
|
+
render();
|
|
6306
|
+
/* Update picker selection */
|
|
6307
|
+
runPicker.value=filename;
|
|
6308
|
+
}).catch(function(err){console.error("Failed to load run:",err);});
|
|
6309
|
+
}
|
|
6310
|
+
|
|
6311
|
+
runPicker.addEventListener("change",function(){
|
|
6312
|
+
var val=runPicker.value;
|
|
6313
|
+
if(val)loadRun(val);
|
|
6314
|
+
});
|
|
6315
|
+
|
|
6316
|
+
/* Poll for new result files every 5 seconds */
|
|
6317
|
+
refreshRunList();
|
|
6318
|
+
setInterval(refreshRunList,5000);
|
|
6319
|
+
|
|
6201
6320
|
/* ---- init ---- */
|
|
6202
6321
|
loadFeedback();
|
|
6203
6322
|
render();
|
|
@@ -6216,7 +6335,7 @@ var resultsServeCommand = command({
|
|
|
6216
6335
|
type: optional(number),
|
|
6217
6336
|
long: "port",
|
|
6218
6337
|
short: "p",
|
|
6219
|
-
description: "Port to listen on (
|
|
6338
|
+
description: "Port to listen on (flag \u2192 PORT env var \u2192 3117)"
|
|
6220
6339
|
}),
|
|
6221
6340
|
dir: option({
|
|
6222
6341
|
type: optional(string),
|
|
@@ -6227,14 +6346,43 @@ var resultsServeCommand = command({
|
|
|
6227
6346
|
},
|
|
6228
6347
|
handler: async ({ source, port, dir }) => {
|
|
6229
6348
|
const cwd = dir ?? process.cwd();
|
|
6230
|
-
const listenPort = port ?? 3117;
|
|
6349
|
+
const listenPort = port ?? (process.env.PORT ? Number(process.env.PORT) : 3117);
|
|
6231
6350
|
try {
|
|
6232
|
-
|
|
6233
|
-
|
|
6234
|
-
|
|
6235
|
-
|
|
6351
|
+
let results = [];
|
|
6352
|
+
let sourceFile;
|
|
6353
|
+
if (source) {
|
|
6354
|
+
const resolved = resolveResultSourcePath(source, cwd);
|
|
6355
|
+
if (!existsSync4(resolved)) {
|
|
6356
|
+
console.error(`Error: Source file not found: ${resolved}`);
|
|
6357
|
+
process.exit(1);
|
|
6358
|
+
}
|
|
6359
|
+
sourceFile = resolved;
|
|
6360
|
+
results = patchTestIds(loadManifestResults(resolved));
|
|
6361
|
+
} else {
|
|
6362
|
+
const cache = await loadRunCache(cwd);
|
|
6363
|
+
const cachedFile = cache ? resolveRunCacheFile(cache) : "";
|
|
6364
|
+
if (cachedFile && existsSync4(cachedFile)) {
|
|
6365
|
+
sourceFile = cachedFile;
|
|
6366
|
+
results = patchTestIds(loadManifestResults(cachedFile));
|
|
6367
|
+
} else {
|
|
6368
|
+
const metas = listResultFiles(cwd, 1);
|
|
6369
|
+
if (metas.length > 0) {
|
|
6370
|
+
sourceFile = metas[0].path;
|
|
6371
|
+
results = patchTestIds(loadManifestResults(metas[0].path));
|
|
6372
|
+
}
|
|
6373
|
+
}
|
|
6374
|
+
}
|
|
6375
|
+
const resultDir = sourceFile ? path8.dirname(path8.resolve(sourceFile)) : cwd;
|
|
6376
|
+
const app2 = createApp(results, resultDir, cwd, sourceFile);
|
|
6377
|
+
if (results.length > 0 && sourceFile) {
|
|
6378
|
+
console.log(`Serving ${results.length} result(s) from ${sourceFile}`);
|
|
6379
|
+
} else {
|
|
6380
|
+
console.log("No results found. Dashboard will show an empty state.");
|
|
6381
|
+
console.log("Run an evaluation to see results: agentv eval <eval-file>");
|
|
6382
|
+
}
|
|
6236
6383
|
console.log(`Dashboard: http://localhost:${listenPort}`);
|
|
6237
6384
|
console.log(`Feedback API: http://localhost:${listenPort}/api/feedback`);
|
|
6385
|
+
console.log(`Result picker API: http://localhost:${listenPort}/api/runs`);
|
|
6238
6386
|
console.log(`Feedback file: ${feedbackPath(resultDir)}`);
|
|
6239
6387
|
console.log("Press Ctrl+C to stop");
|
|
6240
6388
|
const { serve: startServer } = await import("@hono/node-server");
|
|
@@ -7766,4 +7914,4 @@ export {
|
|
|
7766
7914
|
preprocessArgv,
|
|
7767
7915
|
runCli
|
|
7768
7916
|
};
|
|
7769
|
-
//# sourceMappingURL=chunk-
|
|
7917
|
+
//# sourceMappingURL=chunk-W6CGDNQR.js.map
|