agentv 4.28.0 → 4.29.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{artifact-writer-WTMNQKPV.js → artifact-writer-RQXU4LZV.js} +3 -3
- package/dist/{chunk-NL5H3TIC.js → chunk-FEDIWLKK.js} +7 -7
- package/dist/chunk-FEDIWLKK.js.map +1 -0
- package/dist/{chunk-2WS3BEPV.js → chunk-GLJVO5PK.js} +165 -126
- package/dist/chunk-GLJVO5PK.js.map +1 -0
- package/dist/{chunk-7T6AF75O.js → chunk-OS67VZUO.js} +2 -2
- package/dist/{chunk-WZVOY2W2.js → chunk-VZMGBDJD.js} +109 -52
- package/dist/chunk-VZMGBDJD.js.map +1 -0
- package/dist/cli.js +4 -4
- package/dist/{dist-VYHXTFO3.js → dist-HLU6WIYL.js} +24 -24
- package/dist/index.js +4 -4
- package/dist/{interactive-7R2K3CBY.js → interactive-5LEM6ITD.js} +4 -4
- package/dist/studio/assets/index-CIpCCDKl.css +1 -0
- package/dist/studio/assets/{index-DLabAPXU.js → index-DWPeWzK5.js} +1 -1
- package/dist/studio/assets/{index-inPa17Qe.js → index-LnuhQgnU.js} +21 -21
- package/dist/studio/index.html +2 -2
- package/package.json +1 -1
- package/dist/chunk-2WS3BEPV.js.map +0 -1
- package/dist/chunk-NL5H3TIC.js.map +0 -1
- package/dist/chunk-WZVOY2W2.js.map +0 -1
- package/dist/studio/assets/index-BGFW04Lj.css +0 -1
- /package/dist/{artifact-writer-WTMNQKPV.js.map → artifact-writer-RQXU4LZV.js.map} +0 -0
- /package/dist/{chunk-7T6AF75O.js.map → chunk-OS67VZUO.js.map} +0 -0
- /package/dist/{dist-VYHXTFO3.js.map → dist-HLU6WIYL.js.map} +0 -0
- /package/dist/{interactive-7R2K3CBY.js.map → interactive-5LEM6ITD.js.map} +0 -0
|
@@ -45,7 +45,7 @@ import {
|
|
|
45
45
|
validateFileReferences,
|
|
46
46
|
validateTargetsFile,
|
|
47
47
|
validateWorkspacePaths
|
|
48
|
-
} from "./chunk-
|
|
48
|
+
} from "./chunk-FEDIWLKK.js";
|
|
49
49
|
import {
|
|
50
50
|
RESULT_INDEX_FILENAME,
|
|
51
51
|
aggregateRunDir,
|
|
@@ -53,27 +53,27 @@ import {
|
|
|
53
53
|
resolveRunManifestPath,
|
|
54
54
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
55
55
|
writeArtifactsFromResults
|
|
56
|
-
} from "./chunk-
|
|
56
|
+
} from "./chunk-OS67VZUO.js";
|
|
57
57
|
import {
|
|
58
58
|
DEFAULT_CATEGORY,
|
|
59
|
-
|
|
59
|
+
addProject,
|
|
60
60
|
deriveCategory,
|
|
61
61
|
discoverClaudeSessions,
|
|
62
62
|
discoverCodexSessions,
|
|
63
|
-
getBenchmark,
|
|
64
63
|
getOutputFilenames,
|
|
65
|
-
|
|
64
|
+
getProject,
|
|
65
|
+
loadProjectRegistry,
|
|
66
66
|
parseClaudeSession,
|
|
67
67
|
parseCodexSession,
|
|
68
68
|
readTranscriptFile,
|
|
69
|
-
|
|
69
|
+
removeProject,
|
|
70
70
|
runBeforeSessionHook,
|
|
71
71
|
scanRepoDeps,
|
|
72
|
-
|
|
72
|
+
syncProjects,
|
|
73
73
|
toTranscriptJsonLines,
|
|
74
74
|
transpileEvalYamlFile,
|
|
75
75
|
trimBaselineResult
|
|
76
|
-
} from "./chunk-
|
|
76
|
+
} from "./chunk-VZMGBDJD.js";
|
|
77
77
|
import {
|
|
78
78
|
DEFAULT_THRESHOLD,
|
|
79
79
|
createBuiltinRegistry,
|
|
@@ -4053,7 +4053,7 @@ var evalRunCommand = command({
|
|
|
4053
4053
|
},
|
|
4054
4054
|
handler: async (args) => {
|
|
4055
4055
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4056
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
4056
|
+
const { launchInteractiveWizard } = await import("./interactive-5LEM6ITD.js");
|
|
4057
4057
|
await launchInteractiveWizard();
|
|
4058
4058
|
return;
|
|
4059
4059
|
}
|
|
@@ -4121,7 +4121,7 @@ var evalRunCommand = command({
|
|
|
4121
4121
|
// src/commands/eval/index.ts
|
|
4122
4122
|
var evalCommand = subcommands({
|
|
4123
4123
|
name: "eval",
|
|
4124
|
-
description: "Evaluation commands",
|
|
4124
|
+
description: "Evaluation commands. Shorthand: `agentv eval <eval-paths...>` aliases `agentv eval run <eval-paths...>`.",
|
|
4125
4125
|
cmds: {
|
|
4126
4126
|
run: evalRunCommand,
|
|
4127
4127
|
assert: evalAssertCommand,
|
|
@@ -9338,7 +9338,7 @@ import { Hono } from "hono";
|
|
|
9338
9338
|
|
|
9339
9339
|
// src/commands/results/eval-runner.ts
|
|
9340
9340
|
import { execFileSync as execFileSync2, spawn } from "node:child_process";
|
|
9341
|
-
import { existsSync as existsSync12 } from "node:fs";
|
|
9341
|
+
import { createWriteStream, existsSync as existsSync12, mkdirSync as mkdirSync3 } from "node:fs";
|
|
9342
9342
|
import path17 from "node:path";
|
|
9343
9343
|
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
9344
9344
|
var activeRuns = /* @__PURE__ */ new Map();
|
|
@@ -9365,6 +9365,14 @@ function getActiveRunTarget(indexJsonlPath) {
|
|
|
9365
9365
|
}
|
|
9366
9366
|
return void 0;
|
|
9367
9367
|
}
|
|
9368
|
+
function getActiveRunStatus(indexJsonlPath) {
|
|
9369
|
+
for (const run2 of activeRuns.values()) {
|
|
9370
|
+
if (run2.outputDir && path17.join(run2.outputDir, "index.jsonl") === indexJsonlPath) {
|
|
9371
|
+
return run2.status;
|
|
9372
|
+
}
|
|
9373
|
+
}
|
|
9374
|
+
return void 0;
|
|
9375
|
+
}
|
|
9368
9376
|
async function discoverTargetsInProject(cwd) {
|
|
9369
9377
|
const repoRoot = await findRepoRoot(cwd) ?? cwd;
|
|
9370
9378
|
let targetsFilePath;
|
|
@@ -9477,6 +9485,17 @@ function isCommandAvailable(cmd) {
|
|
|
9477
9485
|
return false;
|
|
9478
9486
|
}
|
|
9479
9487
|
}
|
|
9488
|
+
function openConsoleLogStream(outputDir) {
|
|
9489
|
+
try {
|
|
9490
|
+
mkdirSync3(outputDir, { recursive: true });
|
|
9491
|
+
const stream = createWriteStream(path17.join(outputDir, "console.log"), { flags: "w" });
|
|
9492
|
+
stream.on("error", () => {
|
|
9493
|
+
});
|
|
9494
|
+
return stream;
|
|
9495
|
+
} catch {
|
|
9496
|
+
return void 0;
|
|
9497
|
+
}
|
|
9498
|
+
}
|
|
9480
9499
|
function registerEvalRoutes(app2, getCwd, options) {
|
|
9481
9500
|
const readOnly = options?.readOnly === true;
|
|
9482
9501
|
app2.get("/api/eval/discover", async (c4) => {
|
|
@@ -9553,13 +9572,16 @@ function registerEvalRoutes(app2, getCwd, options) {
|
|
|
9553
9572
|
});
|
|
9554
9573
|
run2.process = child;
|
|
9555
9574
|
run2.status = "running";
|
|
9575
|
+
const logStream = openConsoleLogStream(outputDir);
|
|
9556
9576
|
child.stdout?.on("data", (chunk) => {
|
|
9577
|
+
logStream?.write(chunk);
|
|
9557
9578
|
run2.stdout += chunk.toString();
|
|
9558
9579
|
if (run2.stdout.length > 1e5) {
|
|
9559
9580
|
run2.stdout = run2.stdout.slice(-8e4);
|
|
9560
9581
|
}
|
|
9561
9582
|
});
|
|
9562
9583
|
child.stderr?.on("data", (chunk) => {
|
|
9584
|
+
logStream?.write(chunk);
|
|
9563
9585
|
run2.stderr += chunk.toString();
|
|
9564
9586
|
if (run2.stderr.length > 1e5) {
|
|
9565
9587
|
run2.stderr = run2.stderr.slice(-8e4);
|
|
@@ -9570,6 +9592,7 @@ function registerEvalRoutes(app2, getCwd, options) {
|
|
|
9570
9592
|
run2.status = code === 0 ? "finished" : "failed";
|
|
9571
9593
|
run2.finishedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
9572
9594
|
run2.process = void 0;
|
|
9595
|
+
logStream?.end();
|
|
9573
9596
|
pruneFinishedRuns();
|
|
9574
9597
|
});
|
|
9575
9598
|
child.on("error", (err2) => {
|
|
@@ -9578,6 +9601,10 @@ function registerEvalRoutes(app2, getCwd, options) {
|
|
|
9578
9601
|
Process error: ${err2.message}`;
|
|
9579
9602
|
run2.finishedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
9580
9603
|
run2.process = void 0;
|
|
9604
|
+
logStream?.write(`
|
|
9605
|
+
Process error: ${err2.message}
|
|
9606
|
+
`);
|
|
9607
|
+
logStream?.end();
|
|
9581
9608
|
});
|
|
9582
9609
|
return c4.json(
|
|
9583
9610
|
{
|
|
@@ -9649,7 +9676,7 @@ Process error: ${err2.message}`;
|
|
|
9649
9676
|
const args = buildCliArgs(body);
|
|
9650
9677
|
return c4.json({ command: buildCliPreview(args) });
|
|
9651
9678
|
});
|
|
9652
|
-
app2.get("/api/
|
|
9679
|
+
app2.get("/api/projects/:projectId/eval/discover", async (c4) => {
|
|
9653
9680
|
const cwd = getCwd(c4);
|
|
9654
9681
|
try {
|
|
9655
9682
|
const files = await discoverEvalFiles(cwd);
|
|
@@ -9664,7 +9691,7 @@ Process error: ${err2.message}`;
|
|
|
9664
9691
|
return c4.json({ error: err2.message, eval_files: [] }, 500);
|
|
9665
9692
|
}
|
|
9666
9693
|
});
|
|
9667
|
-
app2.get("/api/
|
|
9694
|
+
app2.get("/api/projects/:projectId/eval/targets", async (c4) => {
|
|
9668
9695
|
const cwd = getCwd(c4);
|
|
9669
9696
|
try {
|
|
9670
9697
|
const names = await discoverTargetsInProject(cwd);
|
|
@@ -9673,7 +9700,7 @@ Process error: ${err2.message}`;
|
|
|
9673
9700
|
return c4.json({ error: err2.message, targets: [] }, 500);
|
|
9674
9701
|
}
|
|
9675
9702
|
});
|
|
9676
|
-
app2.post("/api/
|
|
9703
|
+
app2.post("/api/projects/:projectId/eval/run", async (c4) => {
|
|
9677
9704
|
if (readOnly) {
|
|
9678
9705
|
return c4.json({ error: "Studio is running in read-only mode" }, 403);
|
|
9679
9706
|
}
|
|
@@ -9722,11 +9749,14 @@ Process error: ${err2.message}`;
|
|
|
9722
9749
|
});
|
|
9723
9750
|
run2.process = child;
|
|
9724
9751
|
run2.status = "running";
|
|
9752
|
+
const logStream = openConsoleLogStream(outputDir);
|
|
9725
9753
|
child.stdout?.on("data", (chunk) => {
|
|
9754
|
+
logStream?.write(chunk);
|
|
9726
9755
|
run2.stdout += chunk.toString();
|
|
9727
9756
|
if (run2.stdout.length > 1e5) run2.stdout = run2.stdout.slice(-8e4);
|
|
9728
9757
|
});
|
|
9729
9758
|
child.stderr?.on("data", (chunk) => {
|
|
9759
|
+
logStream?.write(chunk);
|
|
9730
9760
|
run2.stderr += chunk.toString();
|
|
9731
9761
|
if (run2.stderr.length > 1e5) run2.stderr = run2.stderr.slice(-8e4);
|
|
9732
9762
|
});
|
|
@@ -9735,6 +9765,7 @@ Process error: ${err2.message}`;
|
|
|
9735
9765
|
run2.status = code === 0 ? "finished" : "failed";
|
|
9736
9766
|
run2.finishedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
9737
9767
|
run2.process = void 0;
|
|
9768
|
+
logStream?.end();
|
|
9738
9769
|
pruneFinishedRuns();
|
|
9739
9770
|
});
|
|
9740
9771
|
child.on("error", (err2) => {
|
|
@@ -9743,6 +9774,10 @@ Process error: ${err2.message}`;
|
|
|
9743
9774
|
Process error: ${err2.message}`;
|
|
9744
9775
|
run2.finishedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
9745
9776
|
run2.process = void 0;
|
|
9777
|
+
logStream?.write(`
|
|
9778
|
+
Process error: ${err2.message}
|
|
9779
|
+
`);
|
|
9780
|
+
logStream?.end();
|
|
9746
9781
|
});
|
|
9747
9782
|
return c4.json({ id: runId, status: run2.status, command: command2 }, 202);
|
|
9748
9783
|
} catch (err2) {
|
|
@@ -9752,7 +9787,7 @@ Process error: ${err2.message}`;
|
|
|
9752
9787
|
return c4.json({ error: err2.message }, 500);
|
|
9753
9788
|
}
|
|
9754
9789
|
});
|
|
9755
|
-
app2.post("/api/
|
|
9790
|
+
app2.post("/api/projects/:projectId/eval/run/:id/stop", (c4) => {
|
|
9756
9791
|
if (readOnly) {
|
|
9757
9792
|
return c4.json({ error: "Studio is running in read-only mode" }, 403);
|
|
9758
9793
|
}
|
|
@@ -9769,7 +9804,7 @@ Process error: ${err2.message}`;
|
|
|
9769
9804
|
}
|
|
9770
9805
|
return c4.json({ stopped: true, status: run2.status });
|
|
9771
9806
|
});
|
|
9772
|
-
app2.get("/api/
|
|
9807
|
+
app2.get("/api/projects/:projectId/eval/status/:id", (c4) => {
|
|
9773
9808
|
const id = c4.req.param("id");
|
|
9774
9809
|
const run2 = activeRuns.get(id ?? "");
|
|
9775
9810
|
if (!run2) return c4.json({ error: "Run not found" }, 404);
|
|
@@ -9784,7 +9819,7 @@ Process error: ${err2.message}`;
|
|
|
9784
9819
|
stderr: run2.stderr.slice(-5e3)
|
|
9785
9820
|
});
|
|
9786
9821
|
});
|
|
9787
|
-
app2.get("/api/
|
|
9822
|
+
app2.get("/api/projects/:projectId/eval/runs", (c4) => {
|
|
9788
9823
|
const runs = [...activeRuns.values()].map((r) => ({
|
|
9789
9824
|
id: r.id,
|
|
9790
9825
|
status: r.status,
|
|
@@ -9797,7 +9832,7 @@ Process error: ${err2.message}`;
|
|
|
9797
9832
|
runs.sort((a, b) => b.started_at.localeCompare(a.started_at));
|
|
9798
9833
|
return c4.json({ runs });
|
|
9799
9834
|
});
|
|
9800
|
-
app2.post("/api/
|
|
9835
|
+
app2.post("/api/projects/:projectId/eval/preview", async (c4) => {
|
|
9801
9836
|
let body;
|
|
9802
9837
|
try {
|
|
9803
9838
|
body = await c4.req.json();
|
|
@@ -9887,7 +9922,7 @@ function normalizeTags(tags) {
|
|
|
9887
9922
|
}
|
|
9888
9923
|
|
|
9889
9924
|
// src/commands/results/studio-config.ts
|
|
9890
|
-
import { existsSync as existsSync14, mkdirSync as
|
|
9925
|
+
import { existsSync as existsSync14, mkdirSync as mkdirSync4, readFileSync as readFileSync11, writeFileSync as writeFileSync5 } from "node:fs";
|
|
9891
9926
|
import path19 from "node:path";
|
|
9892
9927
|
import { stringify as stringifyYaml2 } from "yaml";
|
|
9893
9928
|
var DEFAULTS = {
|
|
@@ -9921,7 +9956,7 @@ function loadStudioConfig(agentvDir) {
|
|
|
9921
9956
|
}
|
|
9922
9957
|
function saveStudioConfig(agentvDir, config) {
|
|
9923
9958
|
if (!existsSync14(agentvDir)) {
|
|
9924
|
-
|
|
9959
|
+
mkdirSync4(agentvDir, { recursive: true });
|
|
9925
9960
|
}
|
|
9926
9961
|
const configPath = path19.join(agentvDir, "config.yaml");
|
|
9927
9962
|
let existing = {};
|
|
@@ -9977,14 +10012,11 @@ Serving most recent: ${metas[0].path}
|
|
|
9977
10012
|
}
|
|
9978
10013
|
return metas[0].path;
|
|
9979
10014
|
}
|
|
9980
|
-
function resolveDashboardMode(
|
|
10015
|
+
function resolveDashboardMode(projectCount, options) {
|
|
9981
10016
|
if (options.single === true) {
|
|
9982
|
-
return {
|
|
10017
|
+
return { projectDashboard: false };
|
|
9983
10018
|
}
|
|
9984
|
-
|
|
9985
|
-
return { isMultiBenchmark: true, showMultiWarning: true };
|
|
9986
|
-
}
|
|
9987
|
-
return { isMultiBenchmark: benchmarkCount > 1, showMultiWarning: false };
|
|
10019
|
+
return { projectDashboard: projectCount > 1 };
|
|
9988
10020
|
}
|
|
9989
10021
|
function feedbackPath(resultDir) {
|
|
9990
10022
|
return path20.join(resultDir, "feedback.json");
|
|
@@ -10096,6 +10128,7 @@ async function handleRuns(c4, { searchDir, agentvDir }) {
|
|
|
10096
10128
|
}
|
|
10097
10129
|
} catch {
|
|
10098
10130
|
}
|
|
10131
|
+
const liveStatus = getActiveRunStatus(m.path);
|
|
10099
10132
|
const tagsEntry = readRunTags(m.path);
|
|
10100
10133
|
return {
|
|
10101
10134
|
filename: m.filename,
|
|
@@ -10109,11 +10142,30 @@ async function handleRuns(c4, { searchDir, agentvDir }) {
|
|
|
10109
10142
|
source: m.source,
|
|
10110
10143
|
...target && { target },
|
|
10111
10144
|
...experiment && { experiment },
|
|
10112
|
-
...tagsEntry && { tags: tagsEntry.tags }
|
|
10145
|
+
...tagsEntry && { tags: tagsEntry.tags },
|
|
10146
|
+
...liveStatus && { status: liveStatus }
|
|
10113
10147
|
};
|
|
10114
10148
|
})
|
|
10115
10149
|
});
|
|
10116
10150
|
}
|
|
10151
|
+
async function handleRunLog(c4, { searchDir }) {
|
|
10152
|
+
const filename = c4.req.param("filename") ?? "";
|
|
10153
|
+
const meta = await findRunById(searchDir, filename);
|
|
10154
|
+
if (!meta) return c4.json({ error: "Run not found" }, 404);
|
|
10155
|
+
if (meta.source === "remote") {
|
|
10156
|
+
return c4.json({ error: "Console log is not available for remote runs" }, 404);
|
|
10157
|
+
}
|
|
10158
|
+
const logPath = path20.join(path20.dirname(meta.path), "console.log");
|
|
10159
|
+
if (!existsSync15(logPath)) {
|
|
10160
|
+
return c4.json({ error: "Console log not found for this run" }, 404);
|
|
10161
|
+
}
|
|
10162
|
+
try {
|
|
10163
|
+
const content = readFileSync12(logPath, "utf8");
|
|
10164
|
+
return c4.text(content);
|
|
10165
|
+
} catch {
|
|
10166
|
+
return c4.json({ error: "Failed to read console log" }, 500);
|
|
10167
|
+
}
|
|
10168
|
+
}
|
|
10117
10169
|
async function handleRunDetail(c4, { searchDir }) {
|
|
10118
10170
|
const filename = c4.req.param("filename") ?? "";
|
|
10119
10171
|
const meta = await findRunById(searchDir, filename);
|
|
@@ -10523,8 +10575,8 @@ function handleConfig(c4, { agentvDir, searchDir }, options) {
|
|
|
10523
10575
|
return c4.json({
|
|
10524
10576
|
...loadStudioConfig(agentvDir),
|
|
10525
10577
|
read_only: options?.readOnly === true,
|
|
10526
|
-
|
|
10527
|
-
|
|
10578
|
+
project_name: path20.basename(searchDir),
|
|
10579
|
+
project_dashboard: options?.projectDashboard === true
|
|
10528
10580
|
});
|
|
10529
10581
|
}
|
|
10530
10582
|
function handleFeedbackRead(c4, { searchDir }) {
|
|
@@ -10581,14 +10633,14 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
10581
10633
|
const defaultCtx = { searchDir, agentvDir };
|
|
10582
10634
|
const readOnly = options?.readOnly === true;
|
|
10583
10635
|
const app2 = new Hono();
|
|
10584
|
-
function
|
|
10585
|
-
const
|
|
10586
|
-
if (!
|
|
10587
|
-
return c4.json({ error: "
|
|
10636
|
+
function withProject(c4, handler) {
|
|
10637
|
+
const project = getProject(c4.req.param("projectId") ?? "");
|
|
10638
|
+
if (!project || !existsSync15(project.path)) {
|
|
10639
|
+
return c4.json({ error: "Project not found" }, 404);
|
|
10588
10640
|
}
|
|
10589
10641
|
return handler(c4, {
|
|
10590
|
-
searchDir:
|
|
10591
|
-
agentvDir: path20.join(
|
|
10642
|
+
searchDir: project.path,
|
|
10643
|
+
agentvDir: path20.join(project.path, ".agentv")
|
|
10592
10644
|
});
|
|
10593
10645
|
}
|
|
10594
10646
|
app2.post("/api/config", async (c4) => {
|
|
@@ -10608,7 +10660,7 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
10608
10660
|
return c4.json({ error: "Failed to save config" }, 500);
|
|
10609
10661
|
}
|
|
10610
10662
|
});
|
|
10611
|
-
function
|
|
10663
|
+
function projectEntryToWire(entry) {
|
|
10612
10664
|
return {
|
|
10613
10665
|
id: entry.id,
|
|
10614
10666
|
name: entry.name,
|
|
@@ -10617,10 +10669,10 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
10617
10669
|
last_opened_at: entry.lastOpenedAt
|
|
10618
10670
|
};
|
|
10619
10671
|
}
|
|
10620
|
-
app2.get("/api/
|
|
10621
|
-
const registry =
|
|
10622
|
-
const
|
|
10623
|
-
registry.
|
|
10672
|
+
app2.get("/api/projects", async (c4) => {
|
|
10673
|
+
const registry = loadProjectRegistry();
|
|
10674
|
+
const projects = await Promise.all(
|
|
10675
|
+
registry.projects.map(async (p) => {
|
|
10624
10676
|
let runCount = 0;
|
|
10625
10677
|
let passRate = 0;
|
|
10626
10678
|
let lastRun = null;
|
|
@@ -10635,52 +10687,52 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
10635
10687
|
} catch {
|
|
10636
10688
|
}
|
|
10637
10689
|
return {
|
|
10638
|
-
...
|
|
10690
|
+
...projectEntryToWire(p),
|
|
10639
10691
|
run_count: runCount,
|
|
10640
10692
|
pass_rate: passRate,
|
|
10641
10693
|
last_run: lastRun
|
|
10642
10694
|
};
|
|
10643
10695
|
})
|
|
10644
10696
|
);
|
|
10645
|
-
return c4.json({
|
|
10697
|
+
return c4.json({ projects });
|
|
10646
10698
|
});
|
|
10647
|
-
app2.post("/api/
|
|
10699
|
+
app2.post("/api/projects", async (c4) => {
|
|
10648
10700
|
if (readOnly) {
|
|
10649
10701
|
return c4.json({ error: "Studio is running in read-only mode" }, 403);
|
|
10650
10702
|
}
|
|
10651
10703
|
try {
|
|
10652
10704
|
const body = await c4.req.json();
|
|
10653
10705
|
if (!body.path) return c4.json({ error: "Missing path" }, 400);
|
|
10654
|
-
const entry =
|
|
10655
|
-
return c4.json(
|
|
10706
|
+
const entry = addProject(body.path);
|
|
10707
|
+
return c4.json(projectEntryToWire(entry), 201);
|
|
10656
10708
|
} catch (err2) {
|
|
10657
10709
|
return c4.json({ error: err2.message }, 400);
|
|
10658
10710
|
}
|
|
10659
10711
|
});
|
|
10660
|
-
app2.get("/api/
|
|
10661
|
-
const
|
|
10662
|
-
if (!
|
|
10712
|
+
app2.get("/api/projects/:projectId/summary", async (c4) => {
|
|
10713
|
+
const project = getProject(c4.req.param("projectId") ?? "");
|
|
10714
|
+
if (!project) return c4.json({ error: "Project not found" }, 404);
|
|
10663
10715
|
try {
|
|
10664
|
-
const { runs: metas } = await listMergedResultFiles(
|
|
10716
|
+
const { runs: metas } = await listMergedResultFiles(project.path);
|
|
10665
10717
|
const runCount = metas.length;
|
|
10666
10718
|
const passRate = runCount > 0 ? metas.reduce((s, m) => s + m.passRate, 0) / runCount : 0;
|
|
10667
10719
|
const lastRun = metas.length > 0 ? metas[0].timestamp : null;
|
|
10668
10720
|
return c4.json({
|
|
10669
|
-
id:
|
|
10670
|
-
name:
|
|
10671
|
-
path:
|
|
10721
|
+
id: project.id,
|
|
10722
|
+
name: project.name,
|
|
10723
|
+
path: project.path,
|
|
10672
10724
|
run_count: runCount,
|
|
10673
10725
|
pass_rate: passRate,
|
|
10674
10726
|
last_run: lastRun
|
|
10675
10727
|
});
|
|
10676
10728
|
} catch {
|
|
10677
|
-
return c4.json({ error: "Failed to read
|
|
10729
|
+
return c4.json({ error: "Failed to read project" }, 500);
|
|
10678
10730
|
}
|
|
10679
10731
|
});
|
|
10680
|
-
app2.get("/api/
|
|
10681
|
-
const registry =
|
|
10732
|
+
app2.get("/api/projects/all-runs", async (c4) => {
|
|
10733
|
+
const registry = loadProjectRegistry();
|
|
10682
10734
|
const allRuns = [];
|
|
10683
|
-
for (const p of registry.
|
|
10735
|
+
for (const p of registry.projects) {
|
|
10684
10736
|
try {
|
|
10685
10737
|
const { runs: metas } = await listMergedResultFiles(p.path);
|
|
10686
10738
|
for (const m of metas) {
|
|
@@ -10706,8 +10758,8 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
10706
10758
|
source: m.source,
|
|
10707
10759
|
...target && { target },
|
|
10708
10760
|
...experiment && { experiment },
|
|
10709
|
-
|
|
10710
|
-
|
|
10761
|
+
project_id: p.id,
|
|
10762
|
+
project_name: p.name
|
|
10711
10763
|
});
|
|
10712
10764
|
}
|
|
10713
10765
|
} catch {
|
|
@@ -10716,19 +10768,19 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
10716
10768
|
allRuns.sort((a, b) => b.timestamp.localeCompare(a.timestamp));
|
|
10717
10769
|
return c4.json({ runs: allRuns });
|
|
10718
10770
|
});
|
|
10719
|
-
app2.delete("/api/
|
|
10771
|
+
app2.delete("/api/projects/:projectId", (c4) => {
|
|
10720
10772
|
if (readOnly) {
|
|
10721
10773
|
return c4.json({ error: "Studio is running in read-only mode" }, 403);
|
|
10722
10774
|
}
|
|
10723
|
-
const removed =
|
|
10724
|
-
if (!removed) return c4.json({ error: "
|
|
10775
|
+
const removed = removeProject(c4.req.param("projectId") ?? "");
|
|
10776
|
+
if (!removed) return c4.json({ error: "Project not found" }, 404);
|
|
10725
10777
|
return c4.json({ ok: true });
|
|
10726
10778
|
});
|
|
10727
10779
|
app2.get(
|
|
10728
10780
|
"/api/config",
|
|
10729
10781
|
(c4) => handleConfig(c4, defaultCtx, {
|
|
10730
10782
|
readOnly,
|
|
10731
|
-
|
|
10783
|
+
projectDashboard: options?.projectDashboard
|
|
10732
10784
|
})
|
|
10733
10785
|
);
|
|
10734
10786
|
app2.get("/api/remote/status", async (c4) => c4.json(await getRemoteResultsStatus(searchDir)));
|
|
@@ -10747,6 +10799,7 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
10747
10799
|
return handleRunTagsDelete(c4, defaultCtx);
|
|
10748
10800
|
});
|
|
10749
10801
|
app2.get("/api/runs/:filename", (c4) => handleRunDetail(c4, defaultCtx));
|
|
10802
|
+
app2.get("/api/runs/:filename/log", (c4) => handleRunLog(c4, defaultCtx));
|
|
10750
10803
|
app2.get("/api/runs/:filename/suites", (c4) => handleRunSuites(c4, defaultCtx));
|
|
10751
10804
|
app2.get("/api/runs/:filename/categories", (c4) => handleRunCategories(c4, defaultCtx));
|
|
10752
10805
|
app2.get(
|
|
@@ -10826,75 +10879,73 @@ function createApp(results, resultDir, cwd, sourceFile, options) {
|
|
|
10826
10879
|
return c4.json({ entries: entries2 });
|
|
10827
10880
|
});
|
|
10828
10881
|
app2.get(
|
|
10829
|
-
"/api/
|
|
10830
|
-
(c4) =>
|
|
10882
|
+
"/api/projects/:projectId/config",
|
|
10883
|
+
(c4) => withProject(
|
|
10831
10884
|
c4,
|
|
10832
10885
|
(ctx, dataCtx) => handleConfig(ctx, dataCtx, {
|
|
10833
10886
|
readOnly,
|
|
10834
|
-
|
|
10887
|
+
projectDashboard: options?.projectDashboard
|
|
10835
10888
|
})
|
|
10836
10889
|
)
|
|
10837
10890
|
);
|
|
10838
10891
|
app2.get(
|
|
10839
|
-
"/api/
|
|
10840
|
-
(c4) =>
|
|
10892
|
+
"/api/projects/:projectId/remote/status",
|
|
10893
|
+
(c4) => withProject(
|
|
10841
10894
|
c4,
|
|
10842
10895
|
async (ctx, dataCtx) => ctx.json(await getRemoteResultsStatus(dataCtx.searchDir))
|
|
10843
10896
|
)
|
|
10844
10897
|
);
|
|
10845
10898
|
app2.post(
|
|
10846
|
-
"/api/
|
|
10847
|
-
(c4) =>
|
|
10899
|
+
"/api/projects/:projectId/remote/sync",
|
|
10900
|
+
(c4) => withProject(c4, async (ctx, dataCtx) => ctx.json(await syncRemoteResults(dataCtx.searchDir)))
|
|
10848
10901
|
);
|
|
10849
|
-
app2.get("/api/
|
|
10850
|
-
app2.put("/api/
|
|
10902
|
+
app2.get("/api/projects/:projectId/runs", (c4) => withProject(c4, handleRuns));
|
|
10903
|
+
app2.put("/api/projects/:projectId/runs/:filename/tags", (c4) => {
|
|
10851
10904
|
if (readOnly) {
|
|
10852
10905
|
return c4.json({ error: "Studio is running in read-only mode" }, 403);
|
|
10853
10906
|
}
|
|
10854
|
-
return
|
|
10907
|
+
return withProject(c4, handleRunTagsPut);
|
|
10855
10908
|
});
|
|
10856
|
-
app2.delete("/api/
|
|
10909
|
+
app2.delete("/api/projects/:projectId/runs/:filename/tags", (c4) => {
|
|
10857
10910
|
if (readOnly) {
|
|
10858
10911
|
return c4.json({ error: "Studio is running in read-only mode" }, 403);
|
|
10859
10912
|
}
|
|
10860
|
-
return
|
|
10913
|
+
return withProject(c4, handleRunTagsDelete);
|
|
10861
10914
|
});
|
|
10862
|
-
app2.get("/api/
|
|
10863
|
-
app2.get(
|
|
10864
|
-
|
|
10865
|
-
(c4) => withBenchmark(c4, handleRunSuites)
|
|
10866
|
-
);
|
|
10915
|
+
app2.get("/api/projects/:projectId/runs/:filename", (c4) => withProject(c4, handleRunDetail));
|
|
10916
|
+
app2.get("/api/projects/:projectId/runs/:filename/log", (c4) => withProject(c4, handleRunLog));
|
|
10917
|
+
app2.get("/api/projects/:projectId/runs/:filename/suites", (c4) => withProject(c4, handleRunSuites));
|
|
10867
10918
|
app2.get(
|
|
10868
|
-
"/api/
|
|
10869
|
-
(c4) =>
|
|
10919
|
+
"/api/projects/:projectId/runs/:filename/categories",
|
|
10920
|
+
(c4) => withProject(c4, handleRunCategories)
|
|
10870
10921
|
);
|
|
10871
10922
|
app2.get(
|
|
10872
|
-
"/api/
|
|
10873
|
-
(c4) =>
|
|
10923
|
+
"/api/projects/:projectId/runs/:filename/categories/:category/suites",
|
|
10924
|
+
(c4) => withProject(c4, handleCategorySuites)
|
|
10874
10925
|
);
|
|
10875
10926
|
app2.get(
|
|
10876
|
-
"/api/
|
|
10877
|
-
(c4) =>
|
|
10927
|
+
"/api/projects/:projectId/runs/:filename/evals/:evalId",
|
|
10928
|
+
(c4) => withProject(c4, handleEvalDetail)
|
|
10878
10929
|
);
|
|
10879
10930
|
app2.get(
|
|
10880
|
-
"/api/
|
|
10881
|
-
(c4) =>
|
|
10931
|
+
"/api/projects/:projectId/runs/:filename/evals/:evalId/files",
|
|
10932
|
+
(c4) => withProject(c4, handleEvalFiles)
|
|
10882
10933
|
);
|
|
10883
10934
|
app2.get(
|
|
10884
|
-
"/api/
|
|
10885
|
-
(c4) =>
|
|
10935
|
+
"/api/projects/:projectId/runs/:filename/evals/:evalId/files/*",
|
|
10936
|
+
(c4) => withProject(c4, handleEvalFileContent)
|
|
10886
10937
|
);
|
|
10887
|
-
app2.get("/api/
|
|
10888
|
-
app2.get("/api/
|
|
10889
|
-
app2.get("/api/
|
|
10890
|
-
app2.get("/api/
|
|
10938
|
+
app2.get("/api/projects/:projectId/experiments", (c4) => withProject(c4, handleExperiments));
|
|
10939
|
+
app2.get("/api/projects/:projectId/compare", (c4) => withProject(c4, handleCompare));
|
|
10940
|
+
app2.get("/api/projects/:projectId/targets", (c4) => withProject(c4, handleTargets));
|
|
10941
|
+
app2.get("/api/projects/:projectId/feedback", (c4) => withProject(c4, handleFeedbackRead));
|
|
10891
10942
|
registerEvalRoutes(
|
|
10892
10943
|
app2,
|
|
10893
10944
|
(c4) => {
|
|
10894
|
-
const
|
|
10895
|
-
if (
|
|
10896
|
-
const
|
|
10897
|
-
if (
|
|
10945
|
+
const projectId = c4.req.param("projectId");
|
|
10946
|
+
if (projectId) {
|
|
10947
|
+
const project = getProject(projectId);
|
|
10948
|
+
if (project) return project.path;
|
|
10898
10949
|
}
|
|
10899
10950
|
return searchDir;
|
|
10900
10951
|
},
|
|
@@ -10981,36 +11032,32 @@ var resultsServeCommand = command({
|
|
|
10981
11032
|
short: "d",
|
|
10982
11033
|
description: "Working directory (default: current directory)"
|
|
10983
11034
|
}),
|
|
10984
|
-
multi: flag({
|
|
10985
|
-
long: "multi",
|
|
10986
|
-
description: "Launch in multi-benchmark dashboard mode (deprecated; use auto-detect or --single)"
|
|
10987
|
-
}),
|
|
10988
11035
|
single: flag({
|
|
10989
11036
|
long: "single",
|
|
10990
|
-
description: "Force single-
|
|
11037
|
+
description: "Force single-project dashboard mode"
|
|
10991
11038
|
}),
|
|
10992
11039
|
add: option({
|
|
10993
11040
|
type: optional(string),
|
|
10994
11041
|
long: "add",
|
|
10995
|
-
description: "Register a
|
|
11042
|
+
description: "Register a project by path"
|
|
10996
11043
|
}),
|
|
10997
11044
|
remove: option({
|
|
10998
11045
|
type: optional(string),
|
|
10999
11046
|
long: "remove",
|
|
11000
|
-
description: "Unregister a
|
|
11047
|
+
description: "Unregister a project by ID"
|
|
11001
11048
|
}),
|
|
11002
11049
|
readOnly: flag({
|
|
11003
11050
|
long: "read-only",
|
|
11004
11051
|
description: "Disable write operations and launch Studio in read-only leaderboard mode"
|
|
11005
11052
|
})
|
|
11006
11053
|
},
|
|
11007
|
-
handler: async ({ source, port, dir,
|
|
11054
|
+
handler: async ({ source, port, dir, single, add, remove, readOnly }) => {
|
|
11008
11055
|
const cwd = dir ?? process.cwd();
|
|
11009
11056
|
const listenPort = port ?? (process.env.PORT ? Number(process.env.PORT) : 3117);
|
|
11010
11057
|
if (add) {
|
|
11011
11058
|
try {
|
|
11012
|
-
const entry =
|
|
11013
|
-
console.log(`Registered
|
|
11059
|
+
const entry = addProject(add);
|
|
11060
|
+
console.log(`Registered project: ${entry.name} (${entry.id}) at ${entry.path}`);
|
|
11014
11061
|
} catch (err2) {
|
|
11015
11062
|
console.error(`Error: ${err2.message}`);
|
|
11016
11063
|
process.exit(1);
|
|
@@ -11018,11 +11065,11 @@ var resultsServeCommand = command({
|
|
|
11018
11065
|
return;
|
|
11019
11066
|
}
|
|
11020
11067
|
if (remove) {
|
|
11021
|
-
const removed =
|
|
11068
|
+
const removed = removeProject(remove);
|
|
11022
11069
|
if (removed) {
|
|
11023
|
-
console.log(`Unregistered
|
|
11070
|
+
console.log(`Unregistered project: ${remove}`);
|
|
11024
11071
|
} else {
|
|
11025
|
-
console.error(`
|
|
11072
|
+
console.error(`Project not found: ${remove}`);
|
|
11026
11073
|
process.exit(1);
|
|
11027
11074
|
}
|
|
11028
11075
|
return;
|
|
@@ -11032,12 +11079,9 @@ var resultsServeCommand = command({
|
|
|
11032
11079
|
if (yamlConfig?.required_version) {
|
|
11033
11080
|
await enforceRequiredVersion(yamlConfig.required_version);
|
|
11034
11081
|
}
|
|
11035
|
-
const registry =
|
|
11036
|
-
const {
|
|
11037
|
-
|
|
11038
|
-
{ multi, single }
|
|
11039
|
-
);
|
|
11040
|
-
await syncBenchmarks(registry.benchmarks);
|
|
11082
|
+
const registry = loadProjectRegistry();
|
|
11083
|
+
const { projectDashboard } = resolveDashboardMode(registry.projects.length, { single });
|
|
11084
|
+
await syncProjects(registry.projects);
|
|
11041
11085
|
try {
|
|
11042
11086
|
let results = [];
|
|
11043
11087
|
let sourceFile;
|
|
@@ -11061,15 +11105,10 @@ var resultsServeCommand = command({
|
|
|
11061
11105
|
const resultDir = sourceFile ? path20.dirname(path20.resolve(sourceFile)) : cwd;
|
|
11062
11106
|
const app2 = createApp(results, resultDir, cwd, sourceFile, {
|
|
11063
11107
|
readOnly,
|
|
11064
|
-
|
|
11108
|
+
projectDashboard
|
|
11065
11109
|
});
|
|
11066
|
-
if (
|
|
11067
|
-
console.
|
|
11068
|
-
"Warning: --multi is deprecated. Studio now auto-detects multi-benchmark mode when multiple benchmarks are registered. Use --single to force the single-benchmark view."
|
|
11069
|
-
);
|
|
11070
|
-
}
|
|
11071
|
-
if (isMultiBenchmark) {
|
|
11072
|
-
console.log(`Multi-benchmark mode: ${registry.benchmarks.length} benchmark(s) registered`);
|
|
11110
|
+
if (projectDashboard) {
|
|
11111
|
+
console.log(`Project dashboard: ${registry.projects.length} project(s) registered`);
|
|
11073
11112
|
} else if (results.length > 0 && sourceFile) {
|
|
11074
11113
|
console.log(`Serving ${results.length} result(s) from ${sourceFile}`);
|
|
11075
11114
|
} else {
|
|
@@ -11077,7 +11116,7 @@ var resultsServeCommand = command({
|
|
|
11077
11116
|
console.log("Run an evaluation to see results: agentv eval <eval-file>");
|
|
11078
11117
|
}
|
|
11079
11118
|
console.log(`Dashboard: http://localhost:${listenPort}`);
|
|
11080
|
-
console.log(`
|
|
11119
|
+
console.log(`Projects API: http://localhost:${listenPort}/api/projects`);
|
|
11081
11120
|
console.log("Press Ctrl+C to stop");
|
|
11082
11121
|
const { serve: startServer } = await import("@hono/node-server");
|
|
11083
11122
|
startServer({
|
|
@@ -12549,4 +12588,4 @@ export {
|
|
|
12549
12588
|
preprocessArgv,
|
|
12550
12589
|
runCli
|
|
12551
12590
|
};
|
|
12552
|
-
//# sourceMappingURL=chunk-
|
|
12591
|
+
//# sourceMappingURL=chunk-GLJVO5PK.js.map
|