agentv 4.25.5-next.1 → 4.26.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-XUJ3ATR2.js → chunk-4QP2SFRS.js} +64 -7
- package/dist/chunk-4QP2SFRS.js.map +1 -0
- package/dist/{chunk-WQTBGQ3M.js → chunk-63LUDTLO.js} +21 -5
- package/dist/chunk-63LUDTLO.js.map +1 -0
- package/dist/cli.js +2 -2
- package/dist/index.js +2 -2
- package/dist/{interactive-FMDTIIH7.js → interactive-CCOGDYRK.js} +46 -12
- package/dist/interactive-CCOGDYRK.js.map +1 -0
- package/dist/studio/assets/{index-BRKyoUlX.js → index-DRYeLvWd.js} +1 -1
- package/dist/studio/assets/index-DttU4JJk.css +1 -0
- package/dist/studio/assets/index-qd53QYOH.js +116 -0
- package/dist/studio/index.html +2 -2
- package/package.json +1 -1
- package/dist/chunk-WQTBGQ3M.js.map +0 -1
- package/dist/chunk-XUJ3ATR2.js.map +0 -1
- package/dist/interactive-FMDTIIH7.js.map +0 -1
- package/dist/studio/assets/index-DWr4P2nR.js +0 -116
- package/dist/studio/assets/index-DY-K3PKr.css +0 -1
|
@@ -45,7 +45,7 @@ import {
|
|
|
45
45
|
validateFileReferences,
|
|
46
46
|
validateTargetsFile,
|
|
47
47
|
validateWorkspacePaths
|
|
48
|
-
} from "./chunk-
|
|
48
|
+
} from "./chunk-63LUDTLO.js";
|
|
49
49
|
import {
|
|
50
50
|
RESULT_INDEX_FILENAME,
|
|
51
51
|
aggregateRunDir,
|
|
@@ -3997,7 +3997,7 @@ var evalRunCommand = command({
|
|
|
3997
3997
|
},
|
|
3998
3998
|
handler: async (args) => {
|
|
3999
3999
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4000
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
4000
|
+
const { launchInteractiveWizard } = await import("./interactive-CCOGDYRK.js");
|
|
4001
4001
|
await launchInteractiveWizard();
|
|
4002
4002
|
return;
|
|
4003
4003
|
}
|
|
@@ -9241,6 +9241,18 @@ async function discoverTargetsInProject(cwd) {
|
|
|
9241
9241
|
return [];
|
|
9242
9242
|
}
|
|
9243
9243
|
}
|
|
9244
|
+
function validateResumeOptions(req) {
|
|
9245
|
+
const modes = [];
|
|
9246
|
+
if (req.resume) modes.push("resume");
|
|
9247
|
+
if (req.rerun_failed) modes.push("rerun_failed");
|
|
9248
|
+
if (req.retry_errors?.trim()) {
|
|
9249
|
+
modes.push("retry_errors");
|
|
9250
|
+
}
|
|
9251
|
+
if (modes.length > 1) {
|
|
9252
|
+
return `resume, rerun_failed, and retry_errors are mutually exclusive (got: ${modes.join(", ")})`;
|
|
9253
|
+
}
|
|
9254
|
+
return void 0;
|
|
9255
|
+
}
|
|
9244
9256
|
function buildCliArgs(req) {
|
|
9245
9257
|
const args = ["eval"];
|
|
9246
9258
|
if (req.suite_filter?.trim()) {
|
|
@@ -9269,6 +9281,18 @@ function buildCliArgs(req) {
|
|
|
9269
9281
|
if (req.dry_run) {
|
|
9270
9282
|
args.push("--dry-run");
|
|
9271
9283
|
}
|
|
9284
|
+
if (req.output?.trim()) {
|
|
9285
|
+
args.push("--output", req.output.trim());
|
|
9286
|
+
}
|
|
9287
|
+
if (req.resume) {
|
|
9288
|
+
args.push("--resume");
|
|
9289
|
+
}
|
|
9290
|
+
if (req.rerun_failed) {
|
|
9291
|
+
args.push("--rerun-failed");
|
|
9292
|
+
}
|
|
9293
|
+
if (req.retry_errors?.trim()) {
|
|
9294
|
+
args.push("--retry-errors", req.retry_errors.trim());
|
|
9295
|
+
}
|
|
9272
9296
|
return args;
|
|
9273
9297
|
}
|
|
9274
9298
|
function buildCliPreview(args) {
|
|
@@ -9285,8 +9309,8 @@ function resolveCliPath(cwd) {
|
|
|
9285
9309
|
}
|
|
9286
9310
|
}
|
|
9287
9311
|
const currentDir = typeof __dirname !== "undefined" ? __dirname : path16.dirname(fileURLToPath2(import.meta.url));
|
|
9288
|
-
const fromSrc = path16.resolve(currentDir, "
|
|
9289
|
-
const fromDist = path16.resolve(currentDir, "
|
|
9312
|
+
const fromSrc = path16.resolve(currentDir, "../../cli.ts");
|
|
9313
|
+
const fromDist = path16.resolve(currentDir, "cli.js");
|
|
9290
9314
|
if (existsSync11(fromSrc)) return { binPath: "bun", args: [fromSrc] };
|
|
9291
9315
|
if (existsSync11(fromDist)) return { binPath: "bun", args: [fromDist] };
|
|
9292
9316
|
if (isCommandAvailable("agentv")) {
|
|
@@ -9342,6 +9366,10 @@ function registerEvalRoutes(app2, getCwd, options) {
|
|
|
9342
9366
|
if (!body.suite_filter?.trim() && (!body.test_ids || body.test_ids.length === 0)) {
|
|
9343
9367
|
return c4.json({ error: "Provide suite_filter or test_ids" }, 400);
|
|
9344
9368
|
}
|
|
9369
|
+
const resumeError = validateResumeOptions(body);
|
|
9370
|
+
if (resumeError) {
|
|
9371
|
+
return c4.json({ error: resumeError }, 400);
|
|
9372
|
+
}
|
|
9345
9373
|
const cliPaths = resolveCliPath(cwd);
|
|
9346
9374
|
if (!cliPaths) {
|
|
9347
9375
|
return c4.json({ error: "Cannot locate agentv CLI entry point" }, 500);
|
|
@@ -9471,6 +9499,9 @@ Process error: ${err2.message}`;
|
|
|
9471
9499
|
}
|
|
9472
9500
|
});
|
|
9473
9501
|
app2.post("/api/benchmarks/:benchmarkId/eval/run", async (c4) => {
|
|
9502
|
+
if (readOnly) {
|
|
9503
|
+
return c4.json({ error: "Studio is running in read-only mode" }, 403);
|
|
9504
|
+
}
|
|
9474
9505
|
const cwd = getCwd(c4);
|
|
9475
9506
|
let body;
|
|
9476
9507
|
try {
|
|
@@ -9481,6 +9512,10 @@ Process error: ${err2.message}`;
|
|
|
9481
9512
|
if (!body.suite_filter?.trim() && (!body.test_ids || body.test_ids.length === 0)) {
|
|
9482
9513
|
return c4.json({ error: "Provide suite_filter or test_ids" }, 400);
|
|
9483
9514
|
}
|
|
9515
|
+
const resumeError = validateResumeOptions(body);
|
|
9516
|
+
if (resumeError) {
|
|
9517
|
+
return c4.json({ error: resumeError }, 400);
|
|
9518
|
+
}
|
|
9484
9519
|
const cliPaths = resolveCliPath(cwd);
|
|
9485
9520
|
if (!cliPaths) {
|
|
9486
9521
|
return c4.json({ error: "Cannot locate agentv CLI entry point" }, 500);
|
|
@@ -9884,15 +9919,35 @@ async function handleRunDetail(c4, { searchDir }) {
|
|
|
9884
9919
|
if (!meta) return c4.json({ error: "Run not found" }, 404);
|
|
9885
9920
|
try {
|
|
9886
9921
|
const loaded = loadManifestResults(meta.path);
|
|
9922
|
+
const resumeMeta = meta.source === "local" ? deriveResumeMeta(searchDir, meta.path) : {};
|
|
9887
9923
|
return c4.json({
|
|
9888
9924
|
results: stripHeavyFields(loaded),
|
|
9889
9925
|
source: meta.source,
|
|
9890
|
-
source_label: meta.displayName
|
|
9926
|
+
source_label: meta.displayName,
|
|
9927
|
+
...resumeMeta
|
|
9891
9928
|
});
|
|
9892
9929
|
} catch {
|
|
9893
9930
|
return c4.json({ error: "Failed to load run" }, 500);
|
|
9894
9931
|
}
|
|
9895
9932
|
}
|
|
9933
|
+
function deriveResumeMeta(cwd, manifestPath) {
|
|
9934
|
+
const out = {};
|
|
9935
|
+
const runDir = path19.dirname(manifestPath);
|
|
9936
|
+
const relative3 = path19.relative(cwd, runDir);
|
|
9937
|
+
out.run_dir = relative3 !== "" && !relative3.startsWith("..") ? relative3 : runDir;
|
|
9938
|
+
try {
|
|
9939
|
+
const benchmarkPath = path19.join(runDir, "benchmark.json");
|
|
9940
|
+
if (existsSync14(benchmarkPath)) {
|
|
9941
|
+
const parsed = JSON.parse(readFileSync12(benchmarkPath, "utf8"));
|
|
9942
|
+
const evalFile = parsed.metadata?.eval_file;
|
|
9943
|
+
if (typeof evalFile === "string" && evalFile.trim()) {
|
|
9944
|
+
out.suite_filter = evalFile.trim();
|
|
9945
|
+
}
|
|
9946
|
+
}
|
|
9947
|
+
} catch {
|
|
9948
|
+
}
|
|
9949
|
+
return out;
|
|
9950
|
+
}
|
|
9896
9951
|
async function handleRunSuites(c4, { searchDir, agentvDir }) {
|
|
9897
9952
|
const filename = c4.req.param("filename") ?? "";
|
|
9898
9953
|
const meta = await findRunById(searchDir, filename);
|
|
@@ -11956,7 +12011,9 @@ function preprocessArgv(argv) {
|
|
|
11956
12011
|
const isTopLevel = !result.slice(0, evalIdx).some((arg) => TOP_LEVEL_COMMANDS.has(arg));
|
|
11957
12012
|
if (isTopLevel) {
|
|
11958
12013
|
const nextArg = result[evalIdx + 1];
|
|
11959
|
-
|
|
12014
|
+
const isHelp = nextArg === "--help" || nextArg === "-h";
|
|
12015
|
+
const isKnownSubcommand = nextArg !== void 0 && EVAL_SUBCOMMANDS.has(nextArg);
|
|
12016
|
+
if (!isHelp && !isKnownSubcommand) {
|
|
11960
12017
|
result.splice(evalIdx + 1, 0, "run");
|
|
11961
12018
|
}
|
|
11962
12019
|
}
|
|
@@ -11989,4 +12046,4 @@ export {
|
|
|
11989
12046
|
preprocessArgv,
|
|
11990
12047
|
runCli
|
|
11991
12048
|
};
|
|
11992
|
-
//# sourceMappingURL=chunk-
|
|
12049
|
+
//# sourceMappingURL=chunk-4QP2SFRS.js.map
|