agentv 4.3.4 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-HMOXP7T5.js → chunk-63NDZ6UC.js} +182 -60
- package/dist/chunk-63NDZ6UC.js.map +1 -0
- package/dist/{chunk-WICUFOIA.js → chunk-BAYNXTX6.js} +4 -4
- package/dist/{chunk-WICUFOIA.js.map → chunk-BAYNXTX6.js.map} +1 -1
- package/dist/{chunk-TCJKPOU7.js → chunk-YORCRL4G.js} +550 -387
- package/dist/chunk-YORCRL4G.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-M7R6II6Y.js → dist-P74O2P2I.js} +20 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-GVBU4GSC.js → interactive-YNSOO2BS.js} +3 -3
- package/dist/studio/assets/index-4pi03cUm.js +65 -0
- package/dist/studio/assets/{index-tOa8ADje.js → index-CnW7PJA8.js} +1 -1
- package/dist/studio/assets/index-jJVIJh8b.css +1 -0
- package/dist/studio/index.html +2 -2
- package/package.json +1 -1
- package/dist/chunk-HMOXP7T5.js.map +0 -1
- package/dist/chunk-TCJKPOU7.js.map +0 -1
- package/dist/studio/assets/index-CcrZuqEa.js +0 -65
- package/dist/studio/assets/index-xvMmIJ7Q.css +0 -1
- /package/dist/{dist-M7R6II6Y.js.map → dist-P74O2P2I.js.map} +0 -0
- /package/dist/{interactive-GVBU4GSC.js.map → interactive-YNSOO2BS.js.map} +0 -0
|
@@ -24,27 +24,32 @@ import {
|
|
|
24
24
|
validateFileReferences,
|
|
25
25
|
validateTargetsFile,
|
|
26
26
|
writeArtifactsFromResults
|
|
27
|
-
} from "./chunk-
|
|
27
|
+
} from "./chunk-BAYNXTX6.js";
|
|
28
28
|
import {
|
|
29
29
|
DEFAULT_CATEGORY,
|
|
30
30
|
PASS_THRESHOLD,
|
|
31
|
+
addProject,
|
|
31
32
|
createBuiltinRegistry,
|
|
32
33
|
deriveCategory,
|
|
34
|
+
discoverProjects,
|
|
33
35
|
executeScript,
|
|
34
36
|
getAgentvHome,
|
|
35
37
|
getOutputFilenames,
|
|
38
|
+
getProject,
|
|
36
39
|
getWorkspacePoolRoot,
|
|
37
40
|
isAgentSkillsFormat,
|
|
41
|
+
loadProjectRegistry,
|
|
38
42
|
loadTestById,
|
|
39
43
|
loadTestSuite,
|
|
40
44
|
loadTests,
|
|
41
45
|
normalizeLineEndings,
|
|
42
46
|
parseAgentSkillsEvals,
|
|
47
|
+
removeProject,
|
|
43
48
|
toCamelCaseDeep,
|
|
44
49
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
45
50
|
transpileEvalYamlFile,
|
|
46
51
|
trimBaselineResult
|
|
47
|
-
} from "./chunk-
|
|
52
|
+
} from "./chunk-63NDZ6UC.js";
|
|
48
53
|
import {
|
|
49
54
|
__commonJS,
|
|
50
55
|
__esm,
|
|
@@ -4218,7 +4223,7 @@ var evalRunCommand = command({
|
|
|
4218
4223
|
},
|
|
4219
4224
|
handler: async (args) => {
|
|
4220
4225
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4221
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
4226
|
+
const { launchInteractiveWizard } = await import("./interactive-YNSOO2BS.js");
|
|
4222
4227
|
await launchInteractiveWizard();
|
|
4223
4228
|
return;
|
|
4224
4229
|
}
|
|
@@ -6309,11 +6314,366 @@ function writeFeedback(cwd, data) {
|
|
|
6309
6314
|
writeFileSync4(feedbackPath(cwd), `${JSON.stringify(data, null, 2)}
|
|
6310
6315
|
`, "utf8");
|
|
6311
6316
|
}
|
|
6317
|
+
function buildFileTree(dirPath, relativeTo) {
|
|
6318
|
+
if (!existsSync8(dirPath) || !statSync4(dirPath).isDirectory()) {
|
|
6319
|
+
return [];
|
|
6320
|
+
}
|
|
6321
|
+
const entries2 = readdirSync3(dirPath, { withFileTypes: true });
|
|
6322
|
+
return entries2.sort((a, b) => {
|
|
6323
|
+
if (a.isDirectory() !== b.isDirectory()) return a.isDirectory() ? -1 : 1;
|
|
6324
|
+
return a.name.localeCompare(b.name);
|
|
6325
|
+
}).map((entry) => {
|
|
6326
|
+
const fullPath = path10.join(dirPath, entry.name);
|
|
6327
|
+
const relPath = path10.relative(relativeTo, fullPath);
|
|
6328
|
+
if (entry.isDirectory()) {
|
|
6329
|
+
return {
|
|
6330
|
+
name: entry.name,
|
|
6331
|
+
path: relPath,
|
|
6332
|
+
type: "dir",
|
|
6333
|
+
children: buildFileTree(fullPath, relativeTo)
|
|
6334
|
+
};
|
|
6335
|
+
}
|
|
6336
|
+
return { name: entry.name, path: relPath, type: "file" };
|
|
6337
|
+
});
|
|
6338
|
+
}
|
|
6339
|
+
function inferLanguage(filePath) {
|
|
6340
|
+
const ext = path10.extname(filePath).toLowerCase();
|
|
6341
|
+
const langMap = {
|
|
6342
|
+
".json": "json",
|
|
6343
|
+
".jsonl": "json",
|
|
6344
|
+
".ts": "typescript",
|
|
6345
|
+
".tsx": "typescript",
|
|
6346
|
+
".js": "javascript",
|
|
6347
|
+
".jsx": "javascript",
|
|
6348
|
+
".md": "markdown",
|
|
6349
|
+
".yaml": "yaml",
|
|
6350
|
+
".yml": "yaml",
|
|
6351
|
+
".log": "plaintext",
|
|
6352
|
+
".txt": "plaintext",
|
|
6353
|
+
".py": "python",
|
|
6354
|
+
".sh": "shell",
|
|
6355
|
+
".bash": "shell",
|
|
6356
|
+
".css": "css",
|
|
6357
|
+
".html": "html",
|
|
6358
|
+
".xml": "xml",
|
|
6359
|
+
".svg": "xml",
|
|
6360
|
+
".toml": "toml",
|
|
6361
|
+
".diff": "diff",
|
|
6362
|
+
".patch": "diff"
|
|
6363
|
+
};
|
|
6364
|
+
return langMap[ext] ?? "plaintext";
|
|
6365
|
+
}
|
|
6366
|
+
function stripHeavyFields(results) {
|
|
6367
|
+
return results.map((r) => {
|
|
6368
|
+
const { requests, trace, ...rest } = r;
|
|
6369
|
+
const toolCalls = trace?.toolCalls && Object.keys(trace.toolCalls).length > 0 ? trace.toolCalls : void 0;
|
|
6370
|
+
const graderDurationMs = (r.scores ?? []).reduce((sum, s) => sum + (s.durationMs ?? 0), 0);
|
|
6371
|
+
return {
|
|
6372
|
+
...rest,
|
|
6373
|
+
...toolCalls && { _toolCalls: toolCalls },
|
|
6374
|
+
...graderDurationMs > 0 && { _graderDurationMs: graderDurationMs }
|
|
6375
|
+
};
|
|
6376
|
+
});
|
|
6377
|
+
}
|
|
6378
|
+
function handleRuns(c3, { searchDir }) {
|
|
6379
|
+
const metas = listResultFiles(searchDir);
|
|
6380
|
+
return c3.json({
|
|
6381
|
+
runs: metas.map((m) => {
|
|
6382
|
+
let target;
|
|
6383
|
+
let experiment;
|
|
6384
|
+
try {
|
|
6385
|
+
const records = loadLightweightResults(m.path);
|
|
6386
|
+
if (records.length > 0) {
|
|
6387
|
+
target = records[0].target;
|
|
6388
|
+
experiment = records[0].experiment;
|
|
6389
|
+
}
|
|
6390
|
+
} catch {
|
|
6391
|
+
}
|
|
6392
|
+
return {
|
|
6393
|
+
filename: m.filename,
|
|
6394
|
+
path: m.path,
|
|
6395
|
+
timestamp: m.timestamp,
|
|
6396
|
+
test_count: m.testCount,
|
|
6397
|
+
pass_rate: m.passRate,
|
|
6398
|
+
avg_score: m.avgScore,
|
|
6399
|
+
size_bytes: m.sizeBytes,
|
|
6400
|
+
...target && { target },
|
|
6401
|
+
...experiment && { experiment }
|
|
6402
|
+
};
|
|
6403
|
+
})
|
|
6404
|
+
});
|
|
6405
|
+
}
|
|
6406
|
+
function handleRunDetail(c3, { searchDir }) {
|
|
6407
|
+
const filename = c3.req.param("filename");
|
|
6408
|
+
const meta = listResultFiles(searchDir).find((m) => m.filename === filename);
|
|
6409
|
+
if (!meta) return c3.json({ error: "Run not found" }, 404);
|
|
6410
|
+
try {
|
|
6411
|
+
const loaded = patchTestIds(loadManifestResults(meta.path));
|
|
6412
|
+
return c3.json({ results: stripHeavyFields(loaded), source: meta.filename });
|
|
6413
|
+
} catch {
|
|
6414
|
+
return c3.json({ error: "Failed to load run" }, 500);
|
|
6415
|
+
}
|
|
6416
|
+
}
|
|
6417
|
+
function handleRunDatasets(c3, { searchDir, agentvDir }) {
|
|
6418
|
+
const filename = c3.req.param("filename");
|
|
6419
|
+
const meta = listResultFiles(searchDir).find((m) => m.filename === filename);
|
|
6420
|
+
if (!meta) return c3.json({ error: "Run not found" }, 404);
|
|
6421
|
+
try {
|
|
6422
|
+
const loaded = patchTestIds(loadManifestResults(meta.path));
|
|
6423
|
+
const { pass_threshold } = loadStudioConfig(agentvDir);
|
|
6424
|
+
const datasetMap = /* @__PURE__ */ new Map();
|
|
6425
|
+
for (const r of loaded) {
|
|
6426
|
+
const ds = r.dataset ?? r.target ?? "default";
|
|
6427
|
+
const entry = datasetMap.get(ds) ?? { total: 0, passed: 0, scoreSum: 0 };
|
|
6428
|
+
entry.total++;
|
|
6429
|
+
if (r.score >= pass_threshold) entry.passed++;
|
|
6430
|
+
entry.scoreSum += r.score;
|
|
6431
|
+
datasetMap.set(ds, entry);
|
|
6432
|
+
}
|
|
6433
|
+
const datasets = [...datasetMap.entries()].map(([name, entry]) => ({
|
|
6434
|
+
name,
|
|
6435
|
+
total: entry.total,
|
|
6436
|
+
passed: entry.passed,
|
|
6437
|
+
failed: entry.total - entry.passed,
|
|
6438
|
+
avg_score: entry.total > 0 ? entry.scoreSum / entry.total : 0
|
|
6439
|
+
}));
|
|
6440
|
+
return c3.json({ datasets });
|
|
6441
|
+
} catch {
|
|
6442
|
+
return c3.json({ error: "Failed to load datasets" }, 500);
|
|
6443
|
+
}
|
|
6444
|
+
}
|
|
6445
|
+
function handleRunCategories(c3, { searchDir, agentvDir }) {
|
|
6446
|
+
const filename = c3.req.param("filename");
|
|
6447
|
+
const meta = listResultFiles(searchDir).find((m) => m.filename === filename);
|
|
6448
|
+
if (!meta) return c3.json({ error: "Run not found" }, 404);
|
|
6449
|
+
try {
|
|
6450
|
+
const loaded = patchTestIds(loadManifestResults(meta.path));
|
|
6451
|
+
const { pass_threshold } = loadStudioConfig(agentvDir);
|
|
6452
|
+
const categoryMap = /* @__PURE__ */ new Map();
|
|
6453
|
+
for (const r of loaded) {
|
|
6454
|
+
const cat = r.category ?? DEFAULT_CATEGORY;
|
|
6455
|
+
const entry = categoryMap.get(cat) ?? {
|
|
6456
|
+
total: 0,
|
|
6457
|
+
passed: 0,
|
|
6458
|
+
scoreSum: 0,
|
|
6459
|
+
datasets: /* @__PURE__ */ new Set()
|
|
6460
|
+
};
|
|
6461
|
+
entry.total++;
|
|
6462
|
+
if (r.score >= pass_threshold) entry.passed++;
|
|
6463
|
+
entry.scoreSum += r.score;
|
|
6464
|
+
entry.datasets.add(r.dataset ?? r.target ?? "default");
|
|
6465
|
+
categoryMap.set(cat, entry);
|
|
6466
|
+
}
|
|
6467
|
+
const categories = [...categoryMap.entries()].map(([name, entry]) => ({
|
|
6468
|
+
name,
|
|
6469
|
+
total: entry.total,
|
|
6470
|
+
passed: entry.passed,
|
|
6471
|
+
failed: entry.total - entry.passed,
|
|
6472
|
+
avg_score: entry.total > 0 ? entry.scoreSum / entry.total : 0,
|
|
6473
|
+
dataset_count: entry.datasets.size
|
|
6474
|
+
}));
|
|
6475
|
+
return c3.json({ categories });
|
|
6476
|
+
} catch {
|
|
6477
|
+
return c3.json({ error: "Failed to load categories" }, 500);
|
|
6478
|
+
}
|
|
6479
|
+
}
|
|
6480
|
+
function handleCategoryDatasets(c3, { searchDir, agentvDir }) {
|
|
6481
|
+
const filename = c3.req.param("filename");
|
|
6482
|
+
const category = decodeURIComponent(c3.req.param("category") ?? "");
|
|
6483
|
+
const meta = listResultFiles(searchDir).find((m) => m.filename === filename);
|
|
6484
|
+
if (!meta) return c3.json({ error: "Run not found" }, 404);
|
|
6485
|
+
try {
|
|
6486
|
+
const loaded = patchTestIds(loadManifestResults(meta.path));
|
|
6487
|
+
const { pass_threshold } = loadStudioConfig(agentvDir);
|
|
6488
|
+
const filtered = loaded.filter((r) => (r.category ?? DEFAULT_CATEGORY) === category);
|
|
6489
|
+
const datasetMap = /* @__PURE__ */ new Map();
|
|
6490
|
+
for (const r of filtered) {
|
|
6491
|
+
const ds = r.dataset ?? r.target ?? "default";
|
|
6492
|
+
const entry = datasetMap.get(ds) ?? { total: 0, passed: 0, scoreSum: 0 };
|
|
6493
|
+
entry.total++;
|
|
6494
|
+
if (r.score >= pass_threshold) entry.passed++;
|
|
6495
|
+
entry.scoreSum += r.score;
|
|
6496
|
+
datasetMap.set(ds, entry);
|
|
6497
|
+
}
|
|
6498
|
+
const datasets = [...datasetMap.entries()].map(([name, entry]) => ({
|
|
6499
|
+
name,
|
|
6500
|
+
total: entry.total,
|
|
6501
|
+
passed: entry.passed,
|
|
6502
|
+
failed: entry.total - entry.passed,
|
|
6503
|
+
avg_score: entry.total > 0 ? entry.scoreSum / entry.total : 0
|
|
6504
|
+
}));
|
|
6505
|
+
return c3.json({ datasets });
|
|
6506
|
+
} catch {
|
|
6507
|
+
return c3.json({ error: "Failed to load datasets" }, 500);
|
|
6508
|
+
}
|
|
6509
|
+
}
|
|
6510
|
+
function handleEvalDetail(c3, { searchDir }) {
|
|
6511
|
+
const filename = c3.req.param("filename");
|
|
6512
|
+
const evalId = c3.req.param("evalId");
|
|
6513
|
+
const meta = listResultFiles(searchDir).find((m) => m.filename === filename);
|
|
6514
|
+
if (!meta) return c3.json({ error: "Run not found" }, 404);
|
|
6515
|
+
try {
|
|
6516
|
+
const loaded = patchTestIds(loadManifestResults(meta.path));
|
|
6517
|
+
const result = loaded.find((r) => r.testId === evalId);
|
|
6518
|
+
if (!result) return c3.json({ error: "Eval not found" }, 404);
|
|
6519
|
+
return c3.json({ eval: result });
|
|
6520
|
+
} catch {
|
|
6521
|
+
return c3.json({ error: "Failed to load eval" }, 500);
|
|
6522
|
+
}
|
|
6523
|
+
}
|
|
6524
|
+
function handleEvalFiles(c3, { searchDir }) {
|
|
6525
|
+
const filename = c3.req.param("filename");
|
|
6526
|
+
const evalId = c3.req.param("evalId");
|
|
6527
|
+
const meta = listResultFiles(searchDir).find((m) => m.filename === filename);
|
|
6528
|
+
if (!meta) return c3.json({ error: "Run not found" }, 404);
|
|
6529
|
+
try {
|
|
6530
|
+
const content = readFileSync9(meta.path, "utf8");
|
|
6531
|
+
const records = parseResultManifest(content);
|
|
6532
|
+
const record = records.find((r) => (r.test_id ?? r.eval_id) === evalId);
|
|
6533
|
+
if (!record) return c3.json({ error: "Eval not found" }, 404);
|
|
6534
|
+
const baseDir = path10.dirname(meta.path);
|
|
6535
|
+
const knownPaths = [
|
|
6536
|
+
record.grading_path,
|
|
6537
|
+
record.timing_path,
|
|
6538
|
+
record.input_path,
|
|
6539
|
+
record.output_path,
|
|
6540
|
+
record.response_path
|
|
6541
|
+
].filter((p) => !!p);
|
|
6542
|
+
if (knownPaths.length === 0) return c3.json({ files: [] });
|
|
6543
|
+
const artifactDirs = knownPaths.map((p) => path10.dirname(p));
|
|
6544
|
+
let commonDir = artifactDirs[0];
|
|
6545
|
+
for (const dir of artifactDirs) {
|
|
6546
|
+
while (!dir.startsWith(commonDir)) {
|
|
6547
|
+
commonDir = path10.dirname(commonDir);
|
|
6548
|
+
}
|
|
6549
|
+
}
|
|
6550
|
+
const artifactAbsDir = path10.join(baseDir, commonDir);
|
|
6551
|
+
const files = buildFileTree(artifactAbsDir, baseDir);
|
|
6552
|
+
return c3.json({ files });
|
|
6553
|
+
} catch {
|
|
6554
|
+
return c3.json({ error: "Failed to load file tree" }, 500);
|
|
6555
|
+
}
|
|
6556
|
+
}
|
|
6557
|
+
function handleEvalFileContent(c3, { searchDir }) {
|
|
6558
|
+
const filename = c3.req.param("filename");
|
|
6559
|
+
const evalId = c3.req.param("evalId");
|
|
6560
|
+
const meta = listResultFiles(searchDir).find((m) => m.filename === filename);
|
|
6561
|
+
if (!meta) return c3.json({ error: "Run not found" }, 404);
|
|
6562
|
+
const marker = `/runs/${filename}/evals/${evalId}/files/`;
|
|
6563
|
+
const markerIdx = c3.req.path.indexOf(marker);
|
|
6564
|
+
const filePath = markerIdx >= 0 ? c3.req.path.slice(markerIdx + marker.length) : "";
|
|
6565
|
+
if (!filePath) return c3.json({ error: "No file path specified" }, 400);
|
|
6566
|
+
const baseDir = path10.dirname(meta.path);
|
|
6567
|
+
const absolutePath = path10.resolve(baseDir, filePath);
|
|
6568
|
+
if (!absolutePath.startsWith(path10.resolve(baseDir) + path10.sep) && absolutePath !== path10.resolve(baseDir)) {
|
|
6569
|
+
return c3.json({ error: "Path traversal not allowed" }, 403);
|
|
6570
|
+
}
|
|
6571
|
+
if (!existsSync8(absolutePath) || !statSync4(absolutePath).isFile()) {
|
|
6572
|
+
return c3.json({ error: "File not found" }, 404);
|
|
6573
|
+
}
|
|
6574
|
+
try {
|
|
6575
|
+
const fileContent = readFileSync9(absolutePath, "utf8");
|
|
6576
|
+
const language = inferLanguage(absolutePath);
|
|
6577
|
+
return c3.json({ content: fileContent, language });
|
|
6578
|
+
} catch {
|
|
6579
|
+
return c3.json({ error: "Failed to read file" }, 500);
|
|
6580
|
+
}
|
|
6581
|
+
}
|
|
6582
|
+
function handleExperiments(c3, { searchDir, agentvDir }) {
|
|
6583
|
+
const metas = listResultFiles(searchDir);
|
|
6584
|
+
const { pass_threshold } = loadStudioConfig(agentvDir);
|
|
6585
|
+
const experimentMap = /* @__PURE__ */ new Map();
|
|
6586
|
+
for (const m of metas) {
|
|
6587
|
+
try {
|
|
6588
|
+
const records = loadLightweightResults(m.path);
|
|
6589
|
+
for (const r of records) {
|
|
6590
|
+
const experiment = r.experiment ?? "default";
|
|
6591
|
+
const entry = experimentMap.get(experiment) ?? {
|
|
6592
|
+
targets: /* @__PURE__ */ new Set(),
|
|
6593
|
+
runFilenames: /* @__PURE__ */ new Set(),
|
|
6594
|
+
evalCount: 0,
|
|
6595
|
+
passedCount: 0,
|
|
6596
|
+
lastTimestamp: ""
|
|
6597
|
+
};
|
|
6598
|
+
entry.runFilenames.add(m.filename);
|
|
6599
|
+
if (r.target) entry.targets.add(r.target);
|
|
6600
|
+
entry.evalCount++;
|
|
6601
|
+
if (r.score >= pass_threshold) entry.passedCount++;
|
|
6602
|
+
if (r.timestamp && r.timestamp > entry.lastTimestamp) {
|
|
6603
|
+
entry.lastTimestamp = r.timestamp;
|
|
6604
|
+
}
|
|
6605
|
+
experimentMap.set(experiment, entry);
|
|
6606
|
+
}
|
|
6607
|
+
} catch {
|
|
6608
|
+
}
|
|
6609
|
+
}
|
|
6610
|
+
const experiments = [...experimentMap.entries()].map(([name, entry]) => ({
|
|
6611
|
+
name,
|
|
6612
|
+
run_count: entry.runFilenames.size,
|
|
6613
|
+
target_count: entry.targets.size,
|
|
6614
|
+
eval_count: entry.evalCount,
|
|
6615
|
+
passed_count: entry.passedCount,
|
|
6616
|
+
pass_rate: entry.evalCount > 0 ? entry.passedCount / entry.evalCount : 0,
|
|
6617
|
+
last_run: entry.lastTimestamp || null
|
|
6618
|
+
}));
|
|
6619
|
+
return c3.json({ experiments });
|
|
6620
|
+
}
|
|
6621
|
+
function handleTargets(c3, { searchDir, agentvDir }) {
|
|
6622
|
+
const metas = listResultFiles(searchDir);
|
|
6623
|
+
const { pass_threshold } = loadStudioConfig(agentvDir);
|
|
6624
|
+
const targetMap = /* @__PURE__ */ new Map();
|
|
6625
|
+
for (const m of metas) {
|
|
6626
|
+
try {
|
|
6627
|
+
const records = loadLightweightResults(m.path);
|
|
6628
|
+
for (const r of records) {
|
|
6629
|
+
const target = r.target ?? "default";
|
|
6630
|
+
const entry = targetMap.get(target) ?? {
|
|
6631
|
+
experiments: /* @__PURE__ */ new Set(),
|
|
6632
|
+
runFilenames: /* @__PURE__ */ new Set(),
|
|
6633
|
+
evalCount: 0,
|
|
6634
|
+
passedCount: 0
|
|
6635
|
+
};
|
|
6636
|
+
entry.runFilenames.add(m.filename);
|
|
6637
|
+
if (r.experiment) entry.experiments.add(r.experiment);
|
|
6638
|
+
entry.evalCount++;
|
|
6639
|
+
if (r.score >= pass_threshold) entry.passedCount++;
|
|
6640
|
+
targetMap.set(target, entry);
|
|
6641
|
+
}
|
|
6642
|
+
} catch {
|
|
6643
|
+
}
|
|
6644
|
+
}
|
|
6645
|
+
const targets = [...targetMap.entries()].map(([name, entry]) => ({
|
|
6646
|
+
name,
|
|
6647
|
+
run_count: entry.runFilenames.size,
|
|
6648
|
+
experiment_count: entry.experiments.size,
|
|
6649
|
+
eval_count: entry.evalCount,
|
|
6650
|
+
passed_count: entry.passedCount,
|
|
6651
|
+
pass_rate: entry.evalCount > 0 ? entry.passedCount / entry.evalCount : 0
|
|
6652
|
+
}));
|
|
6653
|
+
return c3.json({ targets });
|
|
6654
|
+
}
|
|
6655
|
+
function handleConfig(c3, { agentvDir }) {
|
|
6656
|
+
return c3.json(loadStudioConfig(agentvDir));
|
|
6657
|
+
}
|
|
6658
|
+
function handleFeedbackRead(c3, { searchDir }) {
|
|
6659
|
+
const resultsDir = path10.join(searchDir, ".agentv", "results");
|
|
6660
|
+
return c3.json(readFeedback(existsSync8(resultsDir) ? resultsDir : searchDir));
|
|
6661
|
+
}
|
|
6312
6662
|
function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
6313
6663
|
const searchDir = cwd ?? resultDir;
|
|
6314
6664
|
const agentvDir = path10.join(searchDir, ".agentv");
|
|
6665
|
+
const defaultCtx = { searchDir, agentvDir };
|
|
6315
6666
|
const app2 = new Hono();
|
|
6316
|
-
|
|
6667
|
+
function withProject(c3, handler) {
|
|
6668
|
+
const project = getProject(c3.req.param("projectId") ?? "");
|
|
6669
|
+
if (!project || !existsSync8(project.path)) {
|
|
6670
|
+
return c3.json({ error: "Project not found" }, 404);
|
|
6671
|
+
}
|
|
6672
|
+
return handler(c3, {
|
|
6673
|
+
searchDir: project.path,
|
|
6674
|
+
agentvDir: path10.join(project.path, ".agentv")
|
|
6675
|
+
});
|
|
6676
|
+
}
|
|
6317
6677
|
app2.post("/api/config", async (c3) => {
|
|
6318
6678
|
try {
|
|
6319
6679
|
const body = await c3.req.json();
|
|
@@ -6328,60 +6688,100 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
6328
6688
|
return c3.json({ error: "Failed to save config" }, 500);
|
|
6329
6689
|
}
|
|
6330
6690
|
});
|
|
6331
|
-
|
|
6332
|
-
|
|
6333
|
-
|
|
6691
|
+
function projectEntryToWire(entry) {
|
|
6692
|
+
return {
|
|
6693
|
+
id: entry.id,
|
|
6694
|
+
name: entry.name,
|
|
6695
|
+
path: entry.path,
|
|
6696
|
+
added_at: entry.addedAt,
|
|
6697
|
+
last_opened_at: entry.lastOpenedAt
|
|
6698
|
+
};
|
|
6334
6699
|
}
|
|
6335
|
-
app2.get("/", (c3) => {
|
|
6336
|
-
const
|
|
6337
|
-
|
|
6338
|
-
|
|
6339
|
-
|
|
6340
|
-
|
|
6341
|
-
|
|
6342
|
-
|
|
6343
|
-
|
|
6344
|
-
|
|
6345
|
-
|
|
6346
|
-
|
|
6347
|
-
|
|
6348
|
-
try {
|
|
6349
|
-
const records = loadLightweightResults(m.path);
|
|
6350
|
-
if (records.length > 0) {
|
|
6351
|
-
target = records[0].target;
|
|
6352
|
-
experiment = records[0].experiment;
|
|
6353
|
-
}
|
|
6354
|
-
} catch {
|
|
6700
|
+
app2.get("/api/projects", (c3) => {
|
|
6701
|
+
const registry = loadProjectRegistry();
|
|
6702
|
+
const projects = registry.projects.map((p) => {
|
|
6703
|
+
let runCount = 0;
|
|
6704
|
+
let passRate = 0;
|
|
6705
|
+
let lastRun = null;
|
|
6706
|
+
try {
|
|
6707
|
+
const metas = listResultFiles(p.path);
|
|
6708
|
+
runCount = metas.length;
|
|
6709
|
+
if (metas.length > 0) {
|
|
6710
|
+
const totalPassRate = metas.reduce((sum, m) => sum + m.passRate, 0);
|
|
6711
|
+
passRate = totalPassRate / metas.length;
|
|
6712
|
+
lastRun = metas[0].timestamp;
|
|
6355
6713
|
}
|
|
6356
|
-
|
|
6357
|
-
|
|
6358
|
-
|
|
6359
|
-
|
|
6360
|
-
|
|
6361
|
-
|
|
6362
|
-
|
|
6363
|
-
|
|
6364
|
-
...target && { target },
|
|
6365
|
-
...experiment && { experiment }
|
|
6366
|
-
};
|
|
6367
|
-
})
|
|
6714
|
+
} catch {
|
|
6715
|
+
}
|
|
6716
|
+
return {
|
|
6717
|
+
...projectEntryToWire(p),
|
|
6718
|
+
run_count: runCount,
|
|
6719
|
+
pass_rate: passRate,
|
|
6720
|
+
last_run: lastRun
|
|
6721
|
+
};
|
|
6368
6722
|
});
|
|
6723
|
+
return c3.json({ projects });
|
|
6369
6724
|
});
|
|
6370
|
-
app2.
|
|
6371
|
-
|
|
6372
|
-
|
|
6373
|
-
|
|
6374
|
-
|
|
6375
|
-
return c3.json(
|
|
6725
|
+
app2.post("/api/projects", async (c3) => {
|
|
6726
|
+
try {
|
|
6727
|
+
const body = await c3.req.json();
|
|
6728
|
+
if (!body.path) return c3.json({ error: "Missing path" }, 400);
|
|
6729
|
+
const entry = addProject(body.path);
|
|
6730
|
+
return c3.json(projectEntryToWire(entry), 201);
|
|
6731
|
+
} catch (err2) {
|
|
6732
|
+
return c3.json({ error: err2.message }, 400);
|
|
6376
6733
|
}
|
|
6734
|
+
});
|
|
6735
|
+
app2.delete("/api/projects/:projectId", (c3) => {
|
|
6736
|
+
const removed = removeProject(c3.req.param("projectId") ?? "");
|
|
6737
|
+
if (!removed) return c3.json({ error: "Project not found" }, 404);
|
|
6738
|
+
return c3.json({ ok: true });
|
|
6739
|
+
});
|
|
6740
|
+
app2.get("/api/projects/:projectId/summary", (c3) => {
|
|
6741
|
+
const project = getProject(c3.req.param("projectId") ?? "");
|
|
6742
|
+
if (!project) return c3.json({ error: "Project not found" }, 404);
|
|
6743
|
+
try {
|
|
6744
|
+
const metas = listResultFiles(project.path);
|
|
6745
|
+
const runCount = metas.length;
|
|
6746
|
+
const passRate = runCount > 0 ? metas.reduce((s, m) => s + m.passRate, 0) / runCount : 0;
|
|
6747
|
+
const lastRun = metas.length > 0 ? metas[0].timestamp : null;
|
|
6748
|
+
return c3.json({
|
|
6749
|
+
id: project.id,
|
|
6750
|
+
name: project.name,
|
|
6751
|
+
path: project.path,
|
|
6752
|
+
run_count: runCount,
|
|
6753
|
+
pass_rate: passRate,
|
|
6754
|
+
last_run: lastRun
|
|
6755
|
+
});
|
|
6756
|
+
} catch {
|
|
6757
|
+
return c3.json({ error: "Failed to read project" }, 500);
|
|
6758
|
+
}
|
|
6759
|
+
});
|
|
6760
|
+
app2.post("/api/projects/discover", async (c3) => {
|
|
6377
6761
|
try {
|
|
6378
|
-
const
|
|
6379
|
-
|
|
6380
|
-
|
|
6762
|
+
const body = await c3.req.json();
|
|
6763
|
+
if (!body.path) return c3.json({ error: "Missing path" }, 400);
|
|
6764
|
+
const discovered = discoverProjects(body.path);
|
|
6765
|
+
const registered = discovered.map((p) => projectEntryToWire(addProject(p)));
|
|
6766
|
+
return c3.json({ discovered: registered });
|
|
6381
6767
|
} catch (err2) {
|
|
6382
|
-
return c3.json({ error:
|
|
6768
|
+
return c3.json({ error: err2.message }, 400);
|
|
6383
6769
|
}
|
|
6384
6770
|
});
|
|
6771
|
+
app2.get("/api/config", (c3) => handleConfig(c3, defaultCtx));
|
|
6772
|
+
app2.get("/api/runs", (c3) => handleRuns(c3, defaultCtx));
|
|
6773
|
+
app2.get("/api/runs/:filename", (c3) => handleRunDetail(c3, defaultCtx));
|
|
6774
|
+
app2.get("/api/runs/:filename/datasets", (c3) => handleRunDatasets(c3, defaultCtx));
|
|
6775
|
+
app2.get("/api/runs/:filename/categories", (c3) => handleRunCategories(c3, defaultCtx));
|
|
6776
|
+
app2.get(
|
|
6777
|
+
"/api/runs/:filename/categories/:category/datasets",
|
|
6778
|
+
(c3) => handleCategoryDatasets(c3, defaultCtx)
|
|
6779
|
+
);
|
|
6780
|
+
app2.get("/api/runs/:filename/evals/:evalId", (c3) => handleEvalDetail(c3, defaultCtx));
|
|
6781
|
+
app2.get("/api/runs/:filename/evals/:evalId/files", (c3) => handleEvalFiles(c3, defaultCtx));
|
|
6782
|
+
app2.get("/api/runs/:filename/evals/:evalId/files/*", (c3) => handleEvalFileContent(c3, defaultCtx));
|
|
6783
|
+
app2.get("/api/experiments", (c3) => handleExperiments(c3, defaultCtx));
|
|
6784
|
+
app2.get("/api/targets", (c3) => handleTargets(c3, defaultCtx));
|
|
6385
6785
|
app2.get("/api/feedback", (c3) => {
|
|
6386
6786
|
const data = readFeedback(resultDir);
|
|
6387
6787
|
return c3.json(data);
|
|
@@ -6424,127 +6824,6 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
6424
6824
|
writeFeedback(resultDir, existing);
|
|
6425
6825
|
return c3.json(existing);
|
|
6426
6826
|
});
|
|
6427
|
-
app2.get("/api/runs/:filename/datasets", (c3) => {
|
|
6428
|
-
const filename = c3.req.param("filename");
|
|
6429
|
-
const metas = listResultFiles(searchDir);
|
|
6430
|
-
const meta = metas.find((m) => m.filename === filename);
|
|
6431
|
-
if (!meta) {
|
|
6432
|
-
return c3.json({ error: "Run not found" }, 404);
|
|
6433
|
-
}
|
|
6434
|
-
try {
|
|
6435
|
-
const loaded = patchTestIds(loadManifestResults(meta.path));
|
|
6436
|
-
const { pass_threshold } = loadStudioConfig(agentvDir);
|
|
6437
|
-
const datasetMap = /* @__PURE__ */ new Map();
|
|
6438
|
-
for (const r of loaded) {
|
|
6439
|
-
const ds = r.dataset ?? r.target ?? "default";
|
|
6440
|
-
const entry = datasetMap.get(ds) ?? { total: 0, passed: 0, scoreSum: 0 };
|
|
6441
|
-
entry.total++;
|
|
6442
|
-
if (r.score >= pass_threshold) entry.passed++;
|
|
6443
|
-
entry.scoreSum += r.score;
|
|
6444
|
-
datasetMap.set(ds, entry);
|
|
6445
|
-
}
|
|
6446
|
-
const datasets = [...datasetMap.entries()].map(([name, entry]) => ({
|
|
6447
|
-
name,
|
|
6448
|
-
total: entry.total,
|
|
6449
|
-
passed: entry.passed,
|
|
6450
|
-
failed: entry.total - entry.passed,
|
|
6451
|
-
avg_score: entry.total > 0 ? entry.scoreSum / entry.total : 0
|
|
6452
|
-
}));
|
|
6453
|
-
return c3.json({ datasets });
|
|
6454
|
-
} catch {
|
|
6455
|
-
return c3.json({ error: "Failed to load datasets" }, 500);
|
|
6456
|
-
}
|
|
6457
|
-
});
|
|
6458
|
-
app2.get("/api/runs/:filename/categories", (c3) => {
|
|
6459
|
-
const filename = c3.req.param("filename");
|
|
6460
|
-
const metas = listResultFiles(searchDir);
|
|
6461
|
-
const meta = metas.find((m) => m.filename === filename);
|
|
6462
|
-
if (!meta) {
|
|
6463
|
-
return c3.json({ error: "Run not found" }, 404);
|
|
6464
|
-
}
|
|
6465
|
-
try {
|
|
6466
|
-
const loaded = patchTestIds(loadManifestResults(meta.path));
|
|
6467
|
-
const { pass_threshold } = loadStudioConfig(agentvDir);
|
|
6468
|
-
const categoryMap = /* @__PURE__ */ new Map();
|
|
6469
|
-
for (const r of loaded) {
|
|
6470
|
-
const cat = r.category ?? DEFAULT_CATEGORY;
|
|
6471
|
-
const entry = categoryMap.get(cat) ?? {
|
|
6472
|
-
total: 0,
|
|
6473
|
-
passed: 0,
|
|
6474
|
-
scoreSum: 0,
|
|
6475
|
-
datasets: /* @__PURE__ */ new Set()
|
|
6476
|
-
};
|
|
6477
|
-
entry.total++;
|
|
6478
|
-
if (r.score >= pass_threshold) entry.passed++;
|
|
6479
|
-
entry.scoreSum += r.score;
|
|
6480
|
-
entry.datasets.add(r.dataset ?? r.target ?? "default");
|
|
6481
|
-
categoryMap.set(cat, entry);
|
|
6482
|
-
}
|
|
6483
|
-
const categories = [...categoryMap.entries()].map(([name, entry]) => ({
|
|
6484
|
-
name,
|
|
6485
|
-
total: entry.total,
|
|
6486
|
-
passed: entry.passed,
|
|
6487
|
-
failed: entry.total - entry.passed,
|
|
6488
|
-
avg_score: entry.total > 0 ? entry.scoreSum / entry.total : 0,
|
|
6489
|
-
dataset_count: entry.datasets.size
|
|
6490
|
-
}));
|
|
6491
|
-
return c3.json({ categories });
|
|
6492
|
-
} catch {
|
|
6493
|
-
return c3.json({ error: "Failed to load categories" }, 500);
|
|
6494
|
-
}
|
|
6495
|
-
});
|
|
6496
|
-
app2.get("/api/runs/:filename/categories/:category/datasets", (c3) => {
|
|
6497
|
-
const filename = c3.req.param("filename");
|
|
6498
|
-
const category = decodeURIComponent(c3.req.param("category"));
|
|
6499
|
-
const metas = listResultFiles(searchDir);
|
|
6500
|
-
const meta = metas.find((m) => m.filename === filename);
|
|
6501
|
-
if (!meta) {
|
|
6502
|
-
return c3.json({ error: "Run not found" }, 404);
|
|
6503
|
-
}
|
|
6504
|
-
try {
|
|
6505
|
-
const loaded = patchTestIds(loadManifestResults(meta.path));
|
|
6506
|
-
const { pass_threshold } = loadStudioConfig(agentvDir);
|
|
6507
|
-
const filtered = loaded.filter((r) => (r.category ?? DEFAULT_CATEGORY) === category);
|
|
6508
|
-
const datasetMap = /* @__PURE__ */ new Map();
|
|
6509
|
-
for (const r of filtered) {
|
|
6510
|
-
const ds = r.dataset ?? r.target ?? "default";
|
|
6511
|
-
const entry = datasetMap.get(ds) ?? { total: 0, passed: 0, scoreSum: 0 };
|
|
6512
|
-
entry.total++;
|
|
6513
|
-
if (r.score >= pass_threshold) entry.passed++;
|
|
6514
|
-
entry.scoreSum += r.score;
|
|
6515
|
-
datasetMap.set(ds, entry);
|
|
6516
|
-
}
|
|
6517
|
-
const datasets = [...datasetMap.entries()].map(([name, entry]) => ({
|
|
6518
|
-
name,
|
|
6519
|
-
total: entry.total,
|
|
6520
|
-
passed: entry.passed,
|
|
6521
|
-
failed: entry.total - entry.passed,
|
|
6522
|
-
avg_score: entry.total > 0 ? entry.scoreSum / entry.total : 0
|
|
6523
|
-
}));
|
|
6524
|
-
return c3.json({ datasets });
|
|
6525
|
-
} catch {
|
|
6526
|
-
return c3.json({ error: "Failed to load datasets" }, 500);
|
|
6527
|
-
}
|
|
6528
|
-
});
|
|
6529
|
-
app2.get("/api/runs/:filename/evals/:evalId", (c3) => {
|
|
6530
|
-
const filename = c3.req.param("filename");
|
|
6531
|
-
const evalId = c3.req.param("evalId");
|
|
6532
|
-
const metas = listResultFiles(searchDir);
|
|
6533
|
-
const meta = metas.find((m) => m.filename === filename);
|
|
6534
|
-
if (!meta) {
|
|
6535
|
-
return c3.json({ error: "Run not found" }, 404);
|
|
6536
|
-
}
|
|
6537
|
-
try {
|
|
6538
|
-
const loaded = patchTestIds(loadManifestResults(meta.path));
|
|
6539
|
-
const result = loaded.find((r) => r.testId === evalId);
|
|
6540
|
-
if (!result) {
|
|
6541
|
-
return c3.json({ error: "Eval not found" }, 404);
|
|
6542
|
-
}
|
|
6543
|
-
return c3.json({ eval: result });
|
|
6544
|
-
} catch {
|
|
6545
|
-
return c3.json({ error: "Failed to load eval" }, 500);
|
|
6546
|
-
}
|
|
6547
|
-
});
|
|
6548
6827
|
app2.get("/api/index", (c3) => {
|
|
6549
6828
|
const metas = listResultFiles(searchDir);
|
|
6550
6829
|
const entries2 = metas.map((m) => {
|
|
@@ -6565,204 +6844,49 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
6565
6844
|
});
|
|
6566
6845
|
return c3.json({ entries: entries2 });
|
|
6567
6846
|
});
|
|
6568
|
-
|
|
6569
|
-
|
|
6570
|
-
|
|
6571
|
-
|
|
6572
|
-
|
|
6573
|
-
|
|
6574
|
-
|
|
6575
|
-
|
|
6576
|
-
|
|
6577
|
-
|
|
6578
|
-
|
|
6579
|
-
|
|
6580
|
-
|
|
6581
|
-
|
|
6582
|
-
|
|
6583
|
-
|
|
6584
|
-
|
|
6585
|
-
|
|
6586
|
-
|
|
6587
|
-
|
|
6588
|
-
|
|
6589
|
-
|
|
6590
|
-
|
|
6591
|
-
|
|
6592
|
-
|
|
6593
|
-
|
|
6594
|
-
|
|
6595
|
-
|
|
6596
|
-
|
|
6597
|
-
|
|
6598
|
-
|
|
6599
|
-
|
|
6600
|
-
|
|
6601
|
-
".yml": "yaml",
|
|
6602
|
-
".log": "plaintext",
|
|
6603
|
-
".txt": "plaintext",
|
|
6604
|
-
".py": "python",
|
|
6605
|
-
".sh": "shell",
|
|
6606
|
-
".bash": "shell",
|
|
6607
|
-
".css": "css",
|
|
6608
|
-
".html": "html",
|
|
6609
|
-
".xml": "xml",
|
|
6610
|
-
".svg": "xml",
|
|
6611
|
-
".toml": "toml",
|
|
6612
|
-
".diff": "diff",
|
|
6613
|
-
".patch": "diff"
|
|
6614
|
-
};
|
|
6615
|
-
return langMap[ext] ?? "plaintext";
|
|
6847
|
+
app2.get("/api/projects/:projectId/config", (c3) => withProject(c3, handleConfig));
|
|
6848
|
+
app2.get("/api/projects/:projectId/runs", (c3) => withProject(c3, handleRuns));
|
|
6849
|
+
app2.get("/api/projects/:projectId/runs/:filename", (c3) => withProject(c3, handleRunDetail));
|
|
6850
|
+
app2.get(
|
|
6851
|
+
"/api/projects/:projectId/runs/:filename/datasets",
|
|
6852
|
+
(c3) => withProject(c3, handleRunDatasets)
|
|
6853
|
+
);
|
|
6854
|
+
app2.get(
|
|
6855
|
+
"/api/projects/:projectId/runs/:filename/categories",
|
|
6856
|
+
(c3) => withProject(c3, handleRunCategories)
|
|
6857
|
+
);
|
|
6858
|
+
app2.get(
|
|
6859
|
+
"/api/projects/:projectId/runs/:filename/categories/:category/datasets",
|
|
6860
|
+
(c3) => withProject(c3, handleCategoryDatasets)
|
|
6861
|
+
);
|
|
6862
|
+
app2.get(
|
|
6863
|
+
"/api/projects/:projectId/runs/:filename/evals/:evalId",
|
|
6864
|
+
(c3) => withProject(c3, handleEvalDetail)
|
|
6865
|
+
);
|
|
6866
|
+
app2.get(
|
|
6867
|
+
"/api/projects/:projectId/runs/:filename/evals/:evalId/files",
|
|
6868
|
+
(c3) => withProject(c3, handleEvalFiles)
|
|
6869
|
+
);
|
|
6870
|
+
app2.get(
|
|
6871
|
+
"/api/projects/:projectId/runs/:filename/evals/:evalId/files/*",
|
|
6872
|
+
(c3) => withProject(c3, handleEvalFileContent)
|
|
6873
|
+
);
|
|
6874
|
+
app2.get("/api/projects/:projectId/experiments", (c3) => withProject(c3, handleExperiments));
|
|
6875
|
+
app2.get("/api/projects/:projectId/targets", (c3) => withProject(c3, handleTargets));
|
|
6876
|
+
app2.get("/api/projects/:projectId/feedback", (c3) => withProject(c3, handleFeedbackRead));
|
|
6877
|
+
const studioDistPath = options?.studioDir ?? resolveStudioDistDir();
|
|
6878
|
+
if (!studioDistPath || !existsSync8(path10.join(studioDistPath, "index.html"))) {
|
|
6879
|
+
throw new Error('Studio dist not found. Run "bun run build" in apps/studio/ to build the SPA.');
|
|
6616
6880
|
}
|
|
6617
|
-
app2.get("/
|
|
6618
|
-
const
|
|
6619
|
-
|
|
6620
|
-
|
|
6621
|
-
const meta = metas.find((m) => m.filename === filename);
|
|
6622
|
-
if (!meta) {
|
|
6623
|
-
return c3.json({ error: "Run not found" }, 404);
|
|
6624
|
-
}
|
|
6625
|
-
try {
|
|
6626
|
-
const content = readFileSync9(meta.path, "utf8");
|
|
6627
|
-
const records = parseResultManifest(content);
|
|
6628
|
-
const record = records.find((r) => (r.test_id ?? r.eval_id) === evalId);
|
|
6629
|
-
if (!record) {
|
|
6630
|
-
return c3.json({ error: "Eval not found" }, 404);
|
|
6631
|
-
}
|
|
6632
|
-
const baseDir = path10.dirname(meta.path);
|
|
6633
|
-
const knownPaths = [
|
|
6634
|
-
record.grading_path,
|
|
6635
|
-
record.timing_path,
|
|
6636
|
-
record.input_path,
|
|
6637
|
-
record.output_path,
|
|
6638
|
-
record.response_path
|
|
6639
|
-
].filter((p) => !!p);
|
|
6640
|
-
if (knownPaths.length === 0) {
|
|
6641
|
-
return c3.json({ files: [] });
|
|
6642
|
-
}
|
|
6643
|
-
const artifactDirs = knownPaths.map((p) => path10.dirname(p));
|
|
6644
|
-
let commonDir = artifactDirs[0];
|
|
6645
|
-
for (const dir of artifactDirs) {
|
|
6646
|
-
while (!dir.startsWith(commonDir)) {
|
|
6647
|
-
commonDir = path10.dirname(commonDir);
|
|
6648
|
-
}
|
|
6649
|
-
}
|
|
6650
|
-
const artifactAbsDir = path10.join(baseDir, commonDir);
|
|
6651
|
-
const files = buildFileTree(artifactAbsDir, baseDir);
|
|
6652
|
-
return c3.json({ files });
|
|
6653
|
-
} catch {
|
|
6654
|
-
return c3.json({ error: "Failed to load file tree" }, 500);
|
|
6655
|
-
}
|
|
6656
|
-
});
|
|
6657
|
-
app2.get("/api/runs/:filename/evals/:evalId/files/*", (c3) => {
|
|
6658
|
-
const filename = c3.req.param("filename");
|
|
6659
|
-
const evalId = c3.req.param("evalId");
|
|
6660
|
-
const metas = listResultFiles(searchDir);
|
|
6661
|
-
const meta = metas.find((m) => m.filename === filename);
|
|
6662
|
-
if (!meta) {
|
|
6663
|
-
return c3.json({ error: "Run not found" }, 404);
|
|
6664
|
-
}
|
|
6665
|
-
const requestPath = c3.req.path;
|
|
6666
|
-
const prefix = `/api/runs/${filename}/evals/${evalId}/files/`;
|
|
6667
|
-
const filePath = requestPath.slice(prefix.length);
|
|
6668
|
-
if (!filePath) {
|
|
6669
|
-
return c3.json({ error: "No file path specified" }, 400);
|
|
6670
|
-
}
|
|
6671
|
-
const baseDir = path10.dirname(meta.path);
|
|
6672
|
-
const absolutePath = path10.resolve(baseDir, filePath);
|
|
6673
|
-
if (!absolutePath.startsWith(path10.resolve(baseDir) + path10.sep) && absolutePath !== path10.resolve(baseDir)) {
|
|
6674
|
-
return c3.json({ error: "Path traversal not allowed" }, 403);
|
|
6675
|
-
}
|
|
6676
|
-
if (!existsSync8(absolutePath) || !statSync4(absolutePath).isFile()) {
|
|
6677
|
-
return c3.json({ error: "File not found" }, 404);
|
|
6678
|
-
}
|
|
6679
|
-
try {
|
|
6680
|
-
const fileContent = readFileSync9(absolutePath, "utf8");
|
|
6681
|
-
const language = inferLanguage(absolutePath);
|
|
6682
|
-
return c3.json({ content: fileContent, language });
|
|
6683
|
-
} catch {
|
|
6684
|
-
return c3.json({ error: "Failed to read file" }, 500);
|
|
6685
|
-
}
|
|
6686
|
-
});
|
|
6687
|
-
app2.get("/api/experiments", (c3) => {
|
|
6688
|
-
const metas = listResultFiles(searchDir);
|
|
6689
|
-
const { pass_threshold } = loadStudioConfig(agentvDir);
|
|
6690
|
-
const experimentMap = /* @__PURE__ */ new Map();
|
|
6691
|
-
for (const m of metas) {
|
|
6692
|
-
try {
|
|
6693
|
-
const records = loadLightweightResults(m.path);
|
|
6694
|
-
for (const r of records) {
|
|
6695
|
-
const experiment = r.experiment ?? "default";
|
|
6696
|
-
const entry = experimentMap.get(experiment) ?? {
|
|
6697
|
-
targets: /* @__PURE__ */ new Set(),
|
|
6698
|
-
runFilenames: /* @__PURE__ */ new Set(),
|
|
6699
|
-
evalCount: 0,
|
|
6700
|
-
passedCount: 0,
|
|
6701
|
-
lastTimestamp: ""
|
|
6702
|
-
};
|
|
6703
|
-
entry.runFilenames.add(m.filename);
|
|
6704
|
-
if (r.target) entry.targets.add(r.target);
|
|
6705
|
-
entry.evalCount++;
|
|
6706
|
-
if (r.score >= pass_threshold) entry.passedCount++;
|
|
6707
|
-
if (r.timestamp && r.timestamp > entry.lastTimestamp) {
|
|
6708
|
-
entry.lastTimestamp = r.timestamp;
|
|
6709
|
-
}
|
|
6710
|
-
experimentMap.set(experiment, entry);
|
|
6711
|
-
}
|
|
6712
|
-
} catch {
|
|
6713
|
-
}
|
|
6714
|
-
}
|
|
6715
|
-
const experiments = [...experimentMap.entries()].map(([name, entry]) => ({
|
|
6716
|
-
name,
|
|
6717
|
-
run_count: entry.runFilenames.size,
|
|
6718
|
-
target_count: entry.targets.size,
|
|
6719
|
-
eval_count: entry.evalCount,
|
|
6720
|
-
passed_count: entry.passedCount,
|
|
6721
|
-
pass_rate: entry.evalCount > 0 ? entry.passedCount / entry.evalCount : 0,
|
|
6722
|
-
last_run: entry.lastTimestamp || null
|
|
6723
|
-
}));
|
|
6724
|
-
return c3.json({ experiments });
|
|
6725
|
-
});
|
|
6726
|
-
app2.get("/api/targets", (c3) => {
|
|
6727
|
-
const metas = listResultFiles(searchDir);
|
|
6728
|
-
const { pass_threshold } = loadStudioConfig(agentvDir);
|
|
6729
|
-
const targetMap = /* @__PURE__ */ new Map();
|
|
6730
|
-
for (const m of metas) {
|
|
6731
|
-
try {
|
|
6732
|
-
const records = loadLightweightResults(m.path);
|
|
6733
|
-
for (const r of records) {
|
|
6734
|
-
const target = r.target ?? "default";
|
|
6735
|
-
const entry = targetMap.get(target) ?? {
|
|
6736
|
-
experiments: /* @__PURE__ */ new Set(),
|
|
6737
|
-
runFilenames: /* @__PURE__ */ new Set(),
|
|
6738
|
-
evalCount: 0,
|
|
6739
|
-
passedCount: 0
|
|
6740
|
-
};
|
|
6741
|
-
entry.runFilenames.add(m.filename);
|
|
6742
|
-
if (r.experiment) entry.experiments.add(r.experiment);
|
|
6743
|
-
entry.evalCount++;
|
|
6744
|
-
if (r.score >= pass_threshold) entry.passedCount++;
|
|
6745
|
-
targetMap.set(target, entry);
|
|
6746
|
-
}
|
|
6747
|
-
} catch {
|
|
6748
|
-
}
|
|
6749
|
-
}
|
|
6750
|
-
const targets = [...targetMap.entries()].map(([name, entry]) => ({
|
|
6751
|
-
name,
|
|
6752
|
-
run_count: entry.runFilenames.size,
|
|
6753
|
-
experiment_count: entry.experiments.size,
|
|
6754
|
-
eval_count: entry.evalCount,
|
|
6755
|
-
passed_count: entry.passedCount,
|
|
6756
|
-
pass_rate: entry.evalCount > 0 ? entry.passedCount / entry.evalCount : 0
|
|
6757
|
-
}));
|
|
6758
|
-
return c3.json({ targets });
|
|
6881
|
+
app2.get("/", (c3) => {
|
|
6882
|
+
const indexPath = path10.join(studioDistPath, "index.html");
|
|
6883
|
+
if (existsSync8(indexPath)) return c3.html(readFileSync9(indexPath, "utf8"));
|
|
6884
|
+
return c3.notFound();
|
|
6759
6885
|
});
|
|
6760
6886
|
app2.get("/assets/*", (c3) => {
|
|
6761
6887
|
const assetPath = c3.req.path;
|
|
6762
6888
|
const filePath = path10.join(studioDistPath, assetPath);
|
|
6763
|
-
if (!existsSync8(filePath))
|
|
6764
|
-
return c3.notFound();
|
|
6765
|
-
}
|
|
6889
|
+
if (!existsSync8(filePath)) return c3.notFound();
|
|
6766
6890
|
const content = readFileSync9(filePath);
|
|
6767
6891
|
const ext = path10.extname(filePath);
|
|
6768
6892
|
const mimeTypes = {
|
|
@@ -6784,13 +6908,9 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
6784
6908
|
});
|
|
6785
6909
|
});
|
|
6786
6910
|
app2.get("*", (c3) => {
|
|
6787
|
-
if (c3.req.path.startsWith("/api/")) {
|
|
6788
|
-
return c3.json({ error: "Not found" }, 404);
|
|
6789
|
-
}
|
|
6911
|
+
if (c3.req.path.startsWith("/api/")) return c3.json({ error: "Not found" }, 404);
|
|
6790
6912
|
const indexPath = path10.join(studioDistPath, "index.html");
|
|
6791
|
-
if (existsSync8(indexPath))
|
|
6792
|
-
return c3.html(readFileSync9(indexPath, "utf8"));
|
|
6793
|
-
}
|
|
6913
|
+
if (existsSync8(indexPath)) return c3.html(readFileSync9(indexPath, "utf8"));
|
|
6794
6914
|
return c3.notFound();
|
|
6795
6915
|
});
|
|
6796
6916
|
return app2;
|
|
@@ -6814,18 +6934,6 @@ function resolveStudioDistDir() {
|
|
|
6814
6934
|
}
|
|
6815
6935
|
return void 0;
|
|
6816
6936
|
}
|
|
6817
|
-
function stripHeavyFields(results) {
|
|
6818
|
-
return results.map((r) => {
|
|
6819
|
-
const { requests, trace, ...rest } = r;
|
|
6820
|
-
const toolCalls = trace?.toolCalls && Object.keys(trace.toolCalls).length > 0 ? trace.toolCalls : void 0;
|
|
6821
|
-
const graderDurationMs = (r.scores ?? []).reduce((sum, s) => sum + (s.durationMs ?? 0), 0);
|
|
6822
|
-
return {
|
|
6823
|
-
...rest,
|
|
6824
|
-
...toolCalls && { _toolCalls: toolCalls },
|
|
6825
|
-
...graderDurationMs > 0 && { _graderDurationMs: graderDurationMs }
|
|
6826
|
-
};
|
|
6827
|
-
});
|
|
6828
|
-
}
|
|
6829
6937
|
var resultsServeCommand = command({
|
|
6830
6938
|
name: "studio",
|
|
6831
6939
|
description: "Start AgentV Studio \u2014 a local dashboard for reviewing evaluation results",
|
|
@@ -6846,11 +6954,66 @@ var resultsServeCommand = command({
|
|
|
6846
6954
|
long: "dir",
|
|
6847
6955
|
short: "d",
|
|
6848
6956
|
description: "Working directory (default: current directory)"
|
|
6957
|
+
}),
|
|
6958
|
+
multi: flag({
|
|
6959
|
+
long: "multi",
|
|
6960
|
+
description: "Launch in multi-project dashboard mode"
|
|
6961
|
+
}),
|
|
6962
|
+
add: option({
|
|
6963
|
+
type: optional(string),
|
|
6964
|
+
long: "add",
|
|
6965
|
+
description: "Register a project by path"
|
|
6966
|
+
}),
|
|
6967
|
+
remove: option({
|
|
6968
|
+
type: optional(string),
|
|
6969
|
+
long: "remove",
|
|
6970
|
+
description: "Unregister a project by ID"
|
|
6971
|
+
}),
|
|
6972
|
+
discover: option({
|
|
6973
|
+
type: optional(string),
|
|
6974
|
+
long: "discover",
|
|
6975
|
+
description: "Scan a directory tree for repos with .agentv/"
|
|
6849
6976
|
})
|
|
6850
6977
|
},
|
|
6851
|
-
handler: async ({ source, port, dir }) => {
|
|
6978
|
+
handler: async ({ source, port, dir, multi, add, remove, discover }) => {
|
|
6852
6979
|
const cwd = dir ?? process.cwd();
|
|
6853
6980
|
const listenPort = port ?? (process.env.PORT ? Number(process.env.PORT) : 3117);
|
|
6981
|
+
if (add) {
|
|
6982
|
+
try {
|
|
6983
|
+
const entry = addProject(add);
|
|
6984
|
+
console.log(`Registered project: ${entry.name} (${entry.id}) at ${entry.path}`);
|
|
6985
|
+
} catch (err2) {
|
|
6986
|
+
console.error(`Error: ${err2.message}`);
|
|
6987
|
+
process.exit(1);
|
|
6988
|
+
}
|
|
6989
|
+
return;
|
|
6990
|
+
}
|
|
6991
|
+
if (remove) {
|
|
6992
|
+
const removed = removeProject(remove);
|
|
6993
|
+
if (removed) {
|
|
6994
|
+
console.log(`Unregistered project: ${remove}`);
|
|
6995
|
+
} else {
|
|
6996
|
+
console.error(`Project not found: ${remove}`);
|
|
6997
|
+
process.exit(1);
|
|
6998
|
+
}
|
|
6999
|
+
return;
|
|
7000
|
+
}
|
|
7001
|
+
if (discover) {
|
|
7002
|
+
const discovered = discoverProjects(discover);
|
|
7003
|
+
if (discovered.length === 0) {
|
|
7004
|
+
console.log(`No projects with .agentv/ found under ${discover}`);
|
|
7005
|
+
return;
|
|
7006
|
+
}
|
|
7007
|
+
for (const p of discovered) {
|
|
7008
|
+
const entry = addProject(p);
|
|
7009
|
+
console.log(`Registered: ${entry.name} (${entry.id}) at ${entry.path}`);
|
|
7010
|
+
}
|
|
7011
|
+
console.log(`
|
|
7012
|
+
Discovered ${discovered.length} project(s).`);
|
|
7013
|
+
return;
|
|
7014
|
+
}
|
|
7015
|
+
const registry = loadProjectRegistry();
|
|
7016
|
+
const isMultiProject = multi || registry.projects.length > 0;
|
|
6854
7017
|
try {
|
|
6855
7018
|
let results = [];
|
|
6856
7019
|
let sourceFile;
|
|
@@ -6878,16 +7041,16 @@ var resultsServeCommand = command({
|
|
|
6878
7041
|
}
|
|
6879
7042
|
const resultDir = sourceFile ? path10.dirname(path10.resolve(sourceFile)) : cwd;
|
|
6880
7043
|
const app2 = createApp(results, resultDir, cwd, sourceFile);
|
|
6881
|
-
if (
|
|
7044
|
+
if (isMultiProject) {
|
|
7045
|
+
console.log(`Multi-project mode: ${registry.projects.length} project(s) registered`);
|
|
7046
|
+
} else if (results.length > 0 && sourceFile) {
|
|
6882
7047
|
console.log(`Serving ${results.length} result(s) from ${sourceFile}`);
|
|
6883
7048
|
} else {
|
|
6884
7049
|
console.log("No results found. Dashboard will show an empty state.");
|
|
6885
7050
|
console.log("Run an evaluation to see results: agentv eval <eval-file>");
|
|
6886
7051
|
}
|
|
6887
7052
|
console.log(`Dashboard: http://localhost:${listenPort}`);
|
|
6888
|
-
console.log(`
|
|
6889
|
-
console.log(`Result picker API: http://localhost:${listenPort}/api/runs`);
|
|
6890
|
-
console.log(`Feedback file: ${feedbackPath(resultDir)}`);
|
|
7053
|
+
console.log(`Projects API: http://localhost:${listenPort}/api/projects`);
|
|
6891
7054
|
console.log("Press Ctrl+C to stop");
|
|
6892
7055
|
const { serve: startServer } = await import("@hono/node-server");
|
|
6893
7056
|
startServer({
|
|
@@ -8451,4 +8614,4 @@ export {
|
|
|
8451
8614
|
preprocessArgv,
|
|
8452
8615
|
runCli
|
|
8453
8616
|
};
|
|
8454
|
-
//# sourceMappingURL=chunk-
|
|
8617
|
+
//# sourceMappingURL=chunk-YORCRL4G.js.map
|