agentv 4.11.2 → 4.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-MIP46NEN.js → chunk-4MEGL2E3.js} +5 -5
- package/dist/{chunk-MIP46NEN.js.map → chunk-4MEGL2E3.js.map} +1 -1
- package/dist/{chunk-FQGY6QXQ.js → chunk-CXAO4VPP.js} +43 -43
- package/dist/chunk-CXAO4VPP.js.map +1 -0
- package/dist/{chunk-7TJ2PON3.js → chunk-VVWPD4CN.js} +104 -89
- package/dist/chunk-VVWPD4CN.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-HNSXNRVK.js → dist-D6EJ3O7Q.js} +20 -20
- package/dist/index.js +3 -3
- package/dist/{interactive-LRW3X5OF.js → interactive-SP2LWOQX.js} +3 -3
- package/dist/studio/assets/index-BdR2qr8G.js +65 -0
- package/dist/studio/assets/{index-VyDFrnoK.js → index-CkXzhDmw.js} +1 -1
- package/dist/studio/assets/index-XVVBVabi.css +1 -0
- package/dist/studio/index.html +2 -2
- package/package.json +1 -1
- package/dist/chunk-7TJ2PON3.js.map +0 -1
- package/dist/chunk-FQGY6QXQ.js.map +0 -1
- package/dist/studio/assets/index-Bi-KHfNm.js +0 -65
- package/dist/studio/assets/index-D_j-w4UO.css +0 -1
- /package/dist/{dist-HNSXNRVK.js.map → dist-D6EJ3O7Q.js.map} +0 -0
- /package/dist/{interactive-LRW3X5OF.js.map → interactive-SP2LWOQX.js.map} +0 -0
|
@@ -42,25 +42,25 @@ import {
|
|
|
42
42
|
validateFileReferences,
|
|
43
43
|
validateTargetsFile,
|
|
44
44
|
writeArtifactsFromResults
|
|
45
|
-
} from "./chunk-
|
|
45
|
+
} from "./chunk-4MEGL2E3.js";
|
|
46
46
|
import {
|
|
47
47
|
DEFAULT_CATEGORY,
|
|
48
48
|
DEFAULT_THRESHOLD,
|
|
49
|
-
|
|
49
|
+
addBenchmark,
|
|
50
50
|
createBuiltinRegistry,
|
|
51
51
|
deriveCategory,
|
|
52
|
+
discoverBenchmarks,
|
|
52
53
|
discoverClaudeSessions,
|
|
53
54
|
discoverCodexSessions,
|
|
54
55
|
discoverCopilotSessions,
|
|
55
|
-
discoverProjects,
|
|
56
56
|
executeScript,
|
|
57
57
|
getAgentvHome,
|
|
58
|
+
getBenchmark,
|
|
58
59
|
getOutputFilenames,
|
|
59
|
-
getProject,
|
|
60
60
|
getWorkspacePoolRoot,
|
|
61
61
|
isAgentSkillsFormat,
|
|
62
62
|
listTargetNames,
|
|
63
|
-
|
|
63
|
+
loadBenchmarkRegistry,
|
|
64
64
|
loadTestSuite,
|
|
65
65
|
normalizeLineEndings,
|
|
66
66
|
parseAgentSkillsEvals,
|
|
@@ -69,14 +69,14 @@ import {
|
|
|
69
69
|
parseCopilotEvents,
|
|
70
70
|
readTargetDefinitions,
|
|
71
71
|
readTranscriptFile,
|
|
72
|
-
|
|
72
|
+
removeBenchmark,
|
|
73
73
|
scanRepoDeps,
|
|
74
74
|
toCamelCaseDeep,
|
|
75
75
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
76
76
|
toTranscriptJsonLine,
|
|
77
77
|
transpileEvalYamlFile,
|
|
78
78
|
trimBaselineResult
|
|
79
|
-
} from "./chunk-
|
|
79
|
+
} from "./chunk-CXAO4VPP.js";
|
|
80
80
|
import {
|
|
81
81
|
__commonJS,
|
|
82
82
|
__require,
|
|
@@ -3912,7 +3912,7 @@ var evalRunCommand = command({
|
|
|
3912
3912
|
},
|
|
3913
3913
|
handler: async (args) => {
|
|
3914
3914
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
3915
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
3915
|
+
const { launchInteractiveWizard } = await import("./interactive-SP2LWOQX.js");
|
|
3916
3916
|
await launchInteractiveWizard();
|
|
3917
3917
|
return;
|
|
3918
3918
|
}
|
|
@@ -7252,13 +7252,14 @@ var resultsCommand = subcommands({
|
|
|
7252
7252
|
// src/commands/results/serve.ts
|
|
7253
7253
|
import { existsSync as existsSync12, readFileSync as readFileSync10, readdirSync as readdirSync4, statSync as statSync5, writeFileSync as writeFileSync4 } from "node:fs";
|
|
7254
7254
|
import path16 from "node:path";
|
|
7255
|
-
import { fileURLToPath as
|
|
7255
|
+
import { fileURLToPath as fileURLToPath3 } from "node:url";
|
|
7256
7256
|
import { Hono } from "hono";
|
|
7257
7257
|
|
|
7258
7258
|
// src/commands/results/eval-runner.ts
|
|
7259
|
-
import { spawn } from "node:child_process";
|
|
7259
|
+
import { execFileSync, spawn } from "node:child_process";
|
|
7260
7260
|
import { existsSync as existsSync10 } from "node:fs";
|
|
7261
7261
|
import path14 from "node:path";
|
|
7262
|
+
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
7262
7263
|
var activeRuns = /* @__PURE__ */ new Map();
|
|
7263
7264
|
function generateRunId() {
|
|
7264
7265
|
const now = /* @__PURE__ */ new Date();
|
|
@@ -7342,16 +7343,27 @@ function resolveCliPath(cwd) {
|
|
|
7342
7343
|
];
|
|
7343
7344
|
for (const c4 of candidates) {
|
|
7344
7345
|
if (existsSync10(c4)) {
|
|
7345
|
-
return {
|
|
7346
|
+
return { binPath: "bun", args: [c4] };
|
|
7346
7347
|
}
|
|
7347
7348
|
}
|
|
7348
|
-
const currentDir = typeof __dirname !== "undefined" ? __dirname : path14.dirname(
|
|
7349
|
+
const currentDir = typeof __dirname !== "undefined" ? __dirname : path14.dirname(fileURLToPath2(import.meta.url));
|
|
7349
7350
|
const fromSrc = path14.resolve(currentDir, "../../../cli.ts");
|
|
7350
7351
|
const fromDist = path14.resolve(currentDir, "../../cli.js");
|
|
7351
|
-
if (existsSync10(fromSrc)) return {
|
|
7352
|
-
if (existsSync10(fromDist)) return {
|
|
7352
|
+
if (existsSync10(fromSrc)) return { binPath: "bun", args: [fromSrc] };
|
|
7353
|
+
if (existsSync10(fromDist)) return { binPath: "bun", args: [fromDist] };
|
|
7354
|
+
if (isCommandAvailable("agentv")) {
|
|
7355
|
+
return { binPath: "agentv", args: [] };
|
|
7356
|
+
}
|
|
7353
7357
|
return void 0;
|
|
7354
7358
|
}
|
|
7359
|
+
function isCommandAvailable(cmd) {
|
|
7360
|
+
try {
|
|
7361
|
+
execFileSync(process.platform === "win32" ? "where" : "which", [cmd], { stdio: "ignore" });
|
|
7362
|
+
return true;
|
|
7363
|
+
} catch {
|
|
7364
|
+
return false;
|
|
7365
|
+
}
|
|
7366
|
+
}
|
|
7355
7367
|
function registerEvalRoutes(app2, getCwd, options) {
|
|
7356
7368
|
const readOnly = options?.readOnly === true;
|
|
7357
7369
|
app2.get("/api/eval/discover", async (c4) => {
|
|
@@ -7409,7 +7421,7 @@ function registerEvalRoutes(app2, getCwd, options) {
|
|
|
7409
7421
|
};
|
|
7410
7422
|
activeRuns.set(runId, run2);
|
|
7411
7423
|
try {
|
|
7412
|
-
const child = spawn(cliPaths.
|
|
7424
|
+
const child = spawn(cliPaths.binPath, [...cliPaths.args, ...args], {
|
|
7413
7425
|
cwd,
|
|
7414
7426
|
stdio: ["ignore", "pipe", "pipe"],
|
|
7415
7427
|
env: { ...process.env }
|
|
@@ -7494,7 +7506,7 @@ Process error: ${err2.message}`;
|
|
|
7494
7506
|
const args = buildCliArgs(body);
|
|
7495
7507
|
return c4.json({ command: buildCliPreview(args) });
|
|
7496
7508
|
});
|
|
7497
|
-
app2.get("/api/
|
|
7509
|
+
app2.get("/api/benchmarks/:benchmarkId/eval/discover", async (c4) => {
|
|
7498
7510
|
const cwd = getCwd(c4);
|
|
7499
7511
|
try {
|
|
7500
7512
|
const files = await discoverEvalFiles(cwd);
|
|
@@ -7509,7 +7521,7 @@ Process error: ${err2.message}`;
|
|
|
7509
7521
|
return c4.json({ error: err2.message, eval_files: [] }, 500);
|
|
7510
7522
|
}
|
|
7511
7523
|
});
|
|
7512
|
-
app2.get("/api/
|
|
7524
|
+
app2.get("/api/benchmarks/:benchmarkId/eval/targets", async (c4) => {
|
|
7513
7525
|
const cwd = getCwd(c4);
|
|
7514
7526
|
try {
|
|
7515
7527
|
const names = await discoverTargetsInProject(cwd);
|
|
@@ -7518,7 +7530,7 @@ Process error: ${err2.message}`;
|
|
|
7518
7530
|
return c4.json({ error: err2.message, targets: [] }, 500);
|
|
7519
7531
|
}
|
|
7520
7532
|
});
|
|
7521
|
-
app2.post("/api/
|
|
7533
|
+
app2.post("/api/benchmarks/:benchmarkId/eval/run", async (c4) => {
|
|
7522
7534
|
const cwd = getCwd(c4);
|
|
7523
7535
|
let body;
|
|
7524
7536
|
try {
|
|
@@ -7546,7 +7558,7 @@ Process error: ${err2.message}`;
|
|
|
7546
7558
|
};
|
|
7547
7559
|
activeRuns.set(runId, run2);
|
|
7548
7560
|
try {
|
|
7549
|
-
const child = spawn(cliPaths.
|
|
7561
|
+
const child = spawn(cliPaths.binPath, [...cliPaths.args, ...args], {
|
|
7550
7562
|
cwd,
|
|
7551
7563
|
stdio: ["ignore", "pipe", "pipe"],
|
|
7552
7564
|
env: { ...process.env }
|
|
@@ -7583,7 +7595,7 @@ Process error: ${err2.message}`;
|
|
|
7583
7595
|
return c4.json({ error: err2.message }, 500);
|
|
7584
7596
|
}
|
|
7585
7597
|
});
|
|
7586
|
-
app2.get("/api/
|
|
7598
|
+
app2.get("/api/benchmarks/:benchmarkId/eval/status/:id", (c4) => {
|
|
7587
7599
|
const id = c4.req.param("id");
|
|
7588
7600
|
const run2 = activeRuns.get(id ?? "");
|
|
7589
7601
|
if (!run2) return c4.json({ error: "Run not found" }, 404);
|
|
@@ -7598,7 +7610,7 @@ Process error: ${err2.message}`;
|
|
|
7598
7610
|
stderr: run2.stderr.slice(-5e3)
|
|
7599
7611
|
});
|
|
7600
7612
|
});
|
|
7601
|
-
app2.get("/api/
|
|
7613
|
+
app2.get("/api/benchmarks/:benchmarkId/eval/runs", (c4) => {
|
|
7602
7614
|
const runs = [...activeRuns.values()].map((r) => ({
|
|
7603
7615
|
id: r.id,
|
|
7604
7616
|
status: r.status,
|
|
@@ -7610,7 +7622,7 @@ Process error: ${err2.message}`;
|
|
|
7610
7622
|
runs.sort((a, b) => b.started_at.localeCompare(a.started_at));
|
|
7611
7623
|
return c4.json({ runs });
|
|
7612
7624
|
});
|
|
7613
|
-
app2.post("/api/
|
|
7625
|
+
app2.post("/api/benchmarks/:benchmarkId/eval/preview", async (c4) => {
|
|
7614
7626
|
let body;
|
|
7615
7627
|
try {
|
|
7616
7628
|
body = await c4.req.json();
|
|
@@ -8164,14 +8176,14 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
8164
8176
|
const defaultCtx = { searchDir, agentvDir };
|
|
8165
8177
|
const readOnly = options?.readOnly === true;
|
|
8166
8178
|
const app2 = new Hono();
|
|
8167
|
-
function
|
|
8168
|
-
const
|
|
8169
|
-
if (!
|
|
8179
|
+
function withBenchmark(c4, handler) {
|
|
8180
|
+
const benchmark = getBenchmark(c4.req.param("benchmarkId") ?? "");
|
|
8181
|
+
if (!benchmark || !existsSync12(benchmark.path)) {
|
|
8170
8182
|
return c4.json({ error: "Project not found" }, 404);
|
|
8171
8183
|
}
|
|
8172
8184
|
return handler(c4, {
|
|
8173
|
-
searchDir:
|
|
8174
|
-
agentvDir: path16.join(
|
|
8185
|
+
searchDir: benchmark.path,
|
|
8186
|
+
agentvDir: path16.join(benchmark.path, ".agentv")
|
|
8175
8187
|
});
|
|
8176
8188
|
}
|
|
8177
8189
|
app2.post("/api/config", async (c4) => {
|
|
@@ -8191,7 +8203,7 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
8191
8203
|
return c4.json({ error: "Failed to save config" }, 500);
|
|
8192
8204
|
}
|
|
8193
8205
|
});
|
|
8194
|
-
function
|
|
8206
|
+
function benchmarkEntryToWire(entry) {
|
|
8195
8207
|
return {
|
|
8196
8208
|
id: entry.id,
|
|
8197
8209
|
name: entry.name,
|
|
@@ -8200,10 +8212,10 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
8200
8212
|
last_opened_at: entry.lastOpenedAt
|
|
8201
8213
|
};
|
|
8202
8214
|
}
|
|
8203
|
-
app2.get("/api/
|
|
8204
|
-
const registry =
|
|
8205
|
-
const
|
|
8206
|
-
registry.
|
|
8215
|
+
app2.get("/api/benchmarks", async (c4) => {
|
|
8216
|
+
const registry = loadBenchmarkRegistry();
|
|
8217
|
+
const benchmarks = await Promise.all(
|
|
8218
|
+
registry.benchmarks.map(async (p) => {
|
|
8207
8219
|
let runCount = 0;
|
|
8208
8220
|
let passRate = 0;
|
|
8209
8221
|
let lastRun = null;
|
|
@@ -8218,48 +8230,48 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
8218
8230
|
} catch {
|
|
8219
8231
|
}
|
|
8220
8232
|
return {
|
|
8221
|
-
...
|
|
8233
|
+
...benchmarkEntryToWire(p),
|
|
8222
8234
|
run_count: runCount,
|
|
8223
8235
|
pass_rate: passRate,
|
|
8224
8236
|
last_run: lastRun
|
|
8225
8237
|
};
|
|
8226
8238
|
})
|
|
8227
8239
|
);
|
|
8228
|
-
return c4.json({ projects });
|
|
8240
|
+
return c4.json({ projects: benchmarks });
|
|
8229
8241
|
});
|
|
8230
|
-
app2.post("/api/
|
|
8242
|
+
app2.post("/api/benchmarks", async (c4) => {
|
|
8231
8243
|
if (readOnly) {
|
|
8232
8244
|
return c4.json({ error: "Studio is running in read-only mode" }, 403);
|
|
8233
8245
|
}
|
|
8234
8246
|
try {
|
|
8235
8247
|
const body = await c4.req.json();
|
|
8236
8248
|
if (!body.path) return c4.json({ error: "Missing path" }, 400);
|
|
8237
|
-
const entry =
|
|
8238
|
-
return c4.json(
|
|
8249
|
+
const entry = addBenchmark(body.path);
|
|
8250
|
+
return c4.json(benchmarkEntryToWire(entry), 201);
|
|
8239
8251
|
} catch (err2) {
|
|
8240
8252
|
return c4.json({ error: err2.message }, 400);
|
|
8241
8253
|
}
|
|
8242
8254
|
});
|
|
8243
|
-
app2.delete("/api/
|
|
8255
|
+
app2.delete("/api/benchmarks/:benchmarkId", (c4) => {
|
|
8244
8256
|
if (readOnly) {
|
|
8245
8257
|
return c4.json({ error: "Studio is running in read-only mode" }, 403);
|
|
8246
8258
|
}
|
|
8247
|
-
const removed =
|
|
8259
|
+
const removed = removeBenchmark(c4.req.param("benchmarkId") ?? "");
|
|
8248
8260
|
if (!removed) return c4.json({ error: "Project not found" }, 404);
|
|
8249
8261
|
return c4.json({ ok: true });
|
|
8250
8262
|
});
|
|
8251
|
-
app2.get("/api/
|
|
8252
|
-
const
|
|
8253
|
-
if (!
|
|
8263
|
+
app2.get("/api/benchmarks/:benchmarkId/summary", async (c4) => {
|
|
8264
|
+
const benchmark = getBenchmark(c4.req.param("benchmarkId") ?? "");
|
|
8265
|
+
if (!benchmark) return c4.json({ error: "Project not found" }, 404);
|
|
8254
8266
|
try {
|
|
8255
|
-
const { runs: metas } = await listMergedResultFiles(
|
|
8267
|
+
const { runs: metas } = await listMergedResultFiles(benchmark.path);
|
|
8256
8268
|
const runCount = metas.length;
|
|
8257
8269
|
const passRate = runCount > 0 ? metas.reduce((s, m) => s + m.passRate, 0) / runCount : 0;
|
|
8258
8270
|
const lastRun = metas.length > 0 ? metas[0].timestamp : null;
|
|
8259
8271
|
return c4.json({
|
|
8260
|
-
id:
|
|
8261
|
-
name:
|
|
8262
|
-
path:
|
|
8272
|
+
id: benchmark.id,
|
|
8273
|
+
name: benchmark.name,
|
|
8274
|
+
path: benchmark.path,
|
|
8263
8275
|
run_count: runCount,
|
|
8264
8276
|
pass_rate: passRate,
|
|
8265
8277
|
last_run: lastRun
|
|
@@ -8268,24 +8280,24 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
8268
8280
|
return c4.json({ error: "Failed to read project" }, 500);
|
|
8269
8281
|
}
|
|
8270
8282
|
});
|
|
8271
|
-
app2.post("/api/
|
|
8283
|
+
app2.post("/api/benchmarks/discover", async (c4) => {
|
|
8272
8284
|
if (readOnly) {
|
|
8273
8285
|
return c4.json({ error: "Studio is running in read-only mode" }, 403);
|
|
8274
8286
|
}
|
|
8275
8287
|
try {
|
|
8276
8288
|
const body = await c4.req.json();
|
|
8277
8289
|
if (!body.path) return c4.json({ error: "Missing path" }, 400);
|
|
8278
|
-
const discovered =
|
|
8279
|
-
const registered = discovered.map((p) =>
|
|
8290
|
+
const discovered = discoverBenchmarks(body.path);
|
|
8291
|
+
const registered = discovered.map((p) => benchmarkEntryToWire(addBenchmark(p)));
|
|
8280
8292
|
return c4.json({ discovered: registered });
|
|
8281
8293
|
} catch (err2) {
|
|
8282
8294
|
return c4.json({ error: err2.message }, 400);
|
|
8283
8295
|
}
|
|
8284
8296
|
});
|
|
8285
|
-
app2.get("/api/
|
|
8286
|
-
const registry =
|
|
8297
|
+
app2.get("/api/benchmarks/all-runs", async (c4) => {
|
|
8298
|
+
const registry = loadBenchmarkRegistry();
|
|
8287
8299
|
const allRuns = [];
|
|
8288
|
-
for (const p of registry.
|
|
8300
|
+
for (const p of registry.benchmarks) {
|
|
8289
8301
|
try {
|
|
8290
8302
|
const { runs: metas } = await listMergedResultFiles(p.path);
|
|
8291
8303
|
for (const m of metas) {
|
|
@@ -8411,8 +8423,8 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
8411
8423
|
return c4.json({ entries: entries2 });
|
|
8412
8424
|
});
|
|
8413
8425
|
app2.get(
|
|
8414
|
-
"/api/
|
|
8415
|
-
(c4) =>
|
|
8426
|
+
"/api/benchmarks/:benchmarkId/config",
|
|
8427
|
+
(c4) => withBenchmark(
|
|
8416
8428
|
c4,
|
|
8417
8429
|
(ctx, dataCtx) => handleConfig(ctx, dataCtx, {
|
|
8418
8430
|
readOnly,
|
|
@@ -8421,50 +8433,53 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
8421
8433
|
)
|
|
8422
8434
|
);
|
|
8423
8435
|
app2.get(
|
|
8424
|
-
"/api/
|
|
8425
|
-
(c4) =>
|
|
8436
|
+
"/api/benchmarks/:benchmarkId/remote/status",
|
|
8437
|
+
(c4) => withBenchmark(
|
|
8426
8438
|
c4,
|
|
8427
8439
|
async (ctx, dataCtx) => ctx.json(await getRemoteResultsStatus(dataCtx.searchDir))
|
|
8428
8440
|
)
|
|
8429
8441
|
);
|
|
8430
8442
|
app2.post(
|
|
8431
|
-
"/api/
|
|
8432
|
-
(c4) =>
|
|
8443
|
+
"/api/benchmarks/:benchmarkId/remote/sync",
|
|
8444
|
+
(c4) => withBenchmark(c4, async (ctx, dataCtx) => ctx.json(await syncRemoteResults(dataCtx.searchDir)))
|
|
8445
|
+
);
|
|
8446
|
+
app2.get("/api/benchmarks/:benchmarkId/runs", (c4) => withBenchmark(c4, handleRuns));
|
|
8447
|
+
app2.get("/api/benchmarks/:benchmarkId/runs/:filename", (c4) => withBenchmark(c4, handleRunDetail));
|
|
8448
|
+
app2.get(
|
|
8449
|
+
"/api/benchmarks/:benchmarkId/runs/:filename/suites",
|
|
8450
|
+
(c4) => withBenchmark(c4, handleRunSuites)
|
|
8433
8451
|
);
|
|
8434
|
-
app2.get("/api/projects/:projectId/runs", (c4) => withProject(c4, handleRuns));
|
|
8435
|
-
app2.get("/api/projects/:projectId/runs/:filename", (c4) => withProject(c4, handleRunDetail));
|
|
8436
|
-
app2.get("/api/projects/:projectId/runs/:filename/suites", (c4) => withProject(c4, handleRunSuites));
|
|
8437
8452
|
app2.get(
|
|
8438
|
-
"/api/
|
|
8439
|
-
(c4) =>
|
|
8453
|
+
"/api/benchmarks/:benchmarkId/runs/:filename/categories",
|
|
8454
|
+
(c4) => withBenchmark(c4, handleRunCategories)
|
|
8440
8455
|
);
|
|
8441
8456
|
app2.get(
|
|
8442
|
-
"/api/
|
|
8443
|
-
(c4) =>
|
|
8457
|
+
"/api/benchmarks/:benchmarkId/runs/:filename/categories/:category/suites",
|
|
8458
|
+
(c4) => withBenchmark(c4, handleCategorySuites)
|
|
8444
8459
|
);
|
|
8445
8460
|
app2.get(
|
|
8446
|
-
"/api/
|
|
8447
|
-
(c4) =>
|
|
8461
|
+
"/api/benchmarks/:benchmarkId/runs/:filename/evals/:evalId",
|
|
8462
|
+
(c4) => withBenchmark(c4, handleEvalDetail)
|
|
8448
8463
|
);
|
|
8449
8464
|
app2.get(
|
|
8450
|
-
"/api/
|
|
8451
|
-
(c4) =>
|
|
8465
|
+
"/api/benchmarks/:benchmarkId/runs/:filename/evals/:evalId/files",
|
|
8466
|
+
(c4) => withBenchmark(c4, handleEvalFiles)
|
|
8452
8467
|
);
|
|
8453
8468
|
app2.get(
|
|
8454
|
-
"/api/
|
|
8455
|
-
(c4) =>
|
|
8469
|
+
"/api/benchmarks/:benchmarkId/runs/:filename/evals/:evalId/files/*",
|
|
8470
|
+
(c4) => withBenchmark(c4, handleEvalFileContent)
|
|
8456
8471
|
);
|
|
8457
|
-
app2.get("/api/
|
|
8458
|
-
app2.get("/api/
|
|
8459
|
-
app2.get("/api/
|
|
8460
|
-
app2.get("/api/
|
|
8472
|
+
app2.get("/api/benchmarks/:benchmarkId/experiments", (c4) => withBenchmark(c4, handleExperiments));
|
|
8473
|
+
app2.get("/api/benchmarks/:benchmarkId/compare", (c4) => withBenchmark(c4, handleCompare));
|
|
8474
|
+
app2.get("/api/benchmarks/:benchmarkId/targets", (c4) => withBenchmark(c4, handleTargets));
|
|
8475
|
+
app2.get("/api/benchmarks/:benchmarkId/feedback", (c4) => withBenchmark(c4, handleFeedbackRead));
|
|
8461
8476
|
registerEvalRoutes(
|
|
8462
8477
|
app2,
|
|
8463
8478
|
(c4) => {
|
|
8464
|
-
const
|
|
8465
|
-
if (
|
|
8466
|
-
const
|
|
8467
|
-
if (
|
|
8479
|
+
const benchmarkId = c4.req.param("benchmarkId");
|
|
8480
|
+
if (benchmarkId) {
|
|
8481
|
+
const benchmark = getBenchmark(benchmarkId);
|
|
8482
|
+
if (benchmark) return benchmark.path;
|
|
8468
8483
|
}
|
|
8469
8484
|
return searchDir;
|
|
8470
8485
|
},
|
|
@@ -8512,7 +8527,7 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
8512
8527
|
return app2;
|
|
8513
8528
|
}
|
|
8514
8529
|
function resolveStudioDistDir() {
|
|
8515
|
-
const currentDir = typeof __dirname !== "undefined" ? __dirname : path16.dirname(
|
|
8530
|
+
const currentDir = typeof __dirname !== "undefined" ? __dirname : path16.dirname(fileURLToPath3(import.meta.url));
|
|
8516
8531
|
const candidates = [
|
|
8517
8532
|
// From src/commands/results/ → sibling apps/studio/dist
|
|
8518
8533
|
path16.resolve(currentDir, "../../../../studio/dist"),
|
|
@@ -8584,7 +8599,7 @@ var resultsServeCommand = command({
|
|
|
8584
8599
|
const listenPort = port ?? (process.env.PORT ? Number(process.env.PORT) : 3117);
|
|
8585
8600
|
if (add) {
|
|
8586
8601
|
try {
|
|
8587
|
-
const entry =
|
|
8602
|
+
const entry = addBenchmark(add);
|
|
8588
8603
|
console.log(`Registered project: ${entry.name} (${entry.id}) at ${entry.path}`);
|
|
8589
8604
|
} catch (err2) {
|
|
8590
8605
|
console.error(`Error: ${err2.message}`);
|
|
@@ -8593,7 +8608,7 @@ var resultsServeCommand = command({
|
|
|
8593
8608
|
return;
|
|
8594
8609
|
}
|
|
8595
8610
|
if (remove) {
|
|
8596
|
-
const removed =
|
|
8611
|
+
const removed = removeBenchmark(remove);
|
|
8597
8612
|
if (removed) {
|
|
8598
8613
|
console.log(`Unregistered project: ${remove}`);
|
|
8599
8614
|
} else {
|
|
@@ -8603,21 +8618,21 @@ var resultsServeCommand = command({
|
|
|
8603
8618
|
return;
|
|
8604
8619
|
}
|
|
8605
8620
|
if (discover) {
|
|
8606
|
-
const discovered =
|
|
8621
|
+
const discovered = discoverBenchmarks(discover);
|
|
8607
8622
|
if (discovered.length === 0) {
|
|
8608
8623
|
console.log(`No projects with .agentv/ found under ${discover}`);
|
|
8609
8624
|
return;
|
|
8610
8625
|
}
|
|
8611
8626
|
for (const p of discovered) {
|
|
8612
|
-
const entry =
|
|
8627
|
+
const entry = addBenchmark(p);
|
|
8613
8628
|
console.log(`Registered: ${entry.name} (${entry.id}) at ${entry.path}`);
|
|
8614
8629
|
}
|
|
8615
8630
|
console.log(`
|
|
8616
8631
|
Discovered ${discovered.length} project(s).`);
|
|
8617
8632
|
return;
|
|
8618
8633
|
}
|
|
8619
|
-
const registry =
|
|
8620
|
-
const { isMultiProject, showMultiWarning } = resolveDashboardMode(registry.
|
|
8634
|
+
const registry = loadBenchmarkRegistry();
|
|
8635
|
+
const { isMultiProject, showMultiWarning } = resolveDashboardMode(registry.benchmarks.length, {
|
|
8621
8636
|
multi,
|
|
8622
8637
|
single
|
|
8623
8638
|
});
|
|
@@ -8652,7 +8667,7 @@ Discovered ${discovered.length} project(s).`);
|
|
|
8652
8667
|
);
|
|
8653
8668
|
}
|
|
8654
8669
|
if (isMultiProject) {
|
|
8655
|
-
console.log(`Multi-project mode: ${registry.
|
|
8670
|
+
console.log(`Multi-project mode: ${registry.benchmarks.length} project(s) registered`);
|
|
8656
8671
|
} else if (results.length > 0 && sourceFile) {
|
|
8657
8672
|
console.log(`Serving ${results.length} result(s) from ${sourceFile}`);
|
|
8658
8673
|
} else {
|
|
@@ -8660,7 +8675,7 @@ Discovered ${discovered.length} project(s).`);
|
|
|
8660
8675
|
console.log("Run an evaluation to see results: agentv eval <eval-file>");
|
|
8661
8676
|
}
|
|
8662
8677
|
console.log(`Dashboard: http://localhost:${listenPort}`);
|
|
8663
|
-
console.log(`
|
|
8678
|
+
console.log(`Benchmarks API: http://localhost:${listenPort}/api/benchmarks`);
|
|
8664
8679
|
console.log("Press Ctrl+C to stop");
|
|
8665
8680
|
const { serve: startServer } = await import("@hono/node-server");
|
|
8666
8681
|
startServer({
|
|
@@ -9835,4 +9850,4 @@ export {
|
|
|
9835
9850
|
preprocessArgv,
|
|
9836
9851
|
runCli
|
|
9837
9852
|
};
|
|
9838
|
-
//# sourceMappingURL=chunk-
|
|
9853
|
+
//# sourceMappingURL=chunk-VVWPD4CN.js.map
|