agentv 4.26.0-next.1 → 4.26.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{artifact-writer-EX6SOLA5.js → artifact-writer-VRDVTNSX.js} +6 -4
- package/dist/{chunk-NLS5DS52.js → chunk-6BAB3XBI.js} +2 -2
- package/dist/{chunk-NLS5DS52.js.map → chunk-6BAB3XBI.js.map} +1 -1
- package/dist/{chunk-63LUDTLO.js → chunk-JA4WQNE6.js} +18 -9
- package/dist/chunk-JA4WQNE6.js.map +1 -0
- package/dist/{chunk-A5INWVY3.js → chunk-TWBKRGXR.js} +43 -7
- package/dist/chunk-TWBKRGXR.js.map +1 -0
- package/dist/{chunk-ZGEGRM5T.js → chunk-VOOYHYPR.js} +30 -5
- package/dist/chunk-VOOYHYPR.js.map +1 -0
- package/dist/{chunk-4QP2SFRS.js → chunk-XBUHMRX2.js} +68 -6
- package/dist/{chunk-4QP2SFRS.js.map → chunk-XBUHMRX2.js.map} +1 -1
- package/dist/cli.js +20 -5
- package/dist/cli.js.map +1 -1
- package/dist/{dist-SGFUYCY5.js → dist-COH43OLQ.js} +10 -4
- package/dist/index.js +5 -5
- package/dist/{interactive-CCOGDYRK.js → interactive-YMKWKPD7.js} +5 -5
- package/dist/studio/assets/index-BGFW04Lj.css +1 -0
- package/dist/studio/assets/{index-DRYeLvWd.js → index-DLabAPXU.js} +1 -1
- package/dist/studio/assets/{index-qd53QYOH.js → index-inPa17Qe.js} +17 -17
- package/dist/studio/index.html +2 -2
- package/dist/{ts-eval-loader-E6N374V2-VMFNVF5J.js → ts-eval-loader-BZ54W52K-KLIY3S3N.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-63LUDTLO.js.map +0 -1
- package/dist/chunk-A5INWVY3.js.map +0 -1
- package/dist/chunk-ZGEGRM5T.js.map +0 -1
- package/dist/studio/assets/index-DttU4JJk.css +0 -1
- /package/dist/{artifact-writer-EX6SOLA5.js.map → artifact-writer-VRDVTNSX.js.map} +0 -0
- /package/dist/{dist-SGFUYCY5.js.map → dist-COH43OLQ.js.map} +0 -0
- /package/dist/{interactive-CCOGDYRK.js.map → interactive-YMKWKPD7.js.map} +0 -0
- /package/dist/{ts-eval-loader-E6N374V2-VMFNVF5J.js.map → ts-eval-loader-BZ54W52K-KLIY3S3N.js.map} +0 -0
|
@@ -45,7 +45,7 @@ import {
|
|
|
45
45
|
validateFileReferences,
|
|
46
46
|
validateTargetsFile,
|
|
47
47
|
validateWorkspacePaths
|
|
48
|
-
} from "./chunk-
|
|
48
|
+
} from "./chunk-JA4WQNE6.js";
|
|
49
49
|
import {
|
|
50
50
|
RESULT_INDEX_FILENAME,
|
|
51
51
|
aggregateRunDir,
|
|
@@ -53,7 +53,7 @@ import {
|
|
|
53
53
|
resolveRunManifestPath,
|
|
54
54
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
55
55
|
writeArtifactsFromResults
|
|
56
|
-
} from "./chunk-
|
|
56
|
+
} from "./chunk-TWBKRGXR.js";
|
|
57
57
|
import {
|
|
58
58
|
DEFAULT_CATEGORY,
|
|
59
59
|
addBenchmark,
|
|
@@ -72,7 +72,7 @@ import {
|
|
|
72
72
|
toTranscriptJsonLines,
|
|
73
73
|
transpileEvalYamlFile,
|
|
74
74
|
trimBaselineResult
|
|
75
|
-
} from "./chunk-
|
|
75
|
+
} from "./chunk-6BAB3XBI.js";
|
|
76
76
|
import {
|
|
77
77
|
DEFAULT_THRESHOLD,
|
|
78
78
|
createBuiltinRegistry,
|
|
@@ -102,7 +102,7 @@ import {
|
|
|
102
102
|
runStartsWithAssertion,
|
|
103
103
|
toCamelCaseDeep,
|
|
104
104
|
toSnakeCaseDeep
|
|
105
|
-
} from "./chunk-
|
|
105
|
+
} from "./chunk-VOOYHYPR.js";
|
|
106
106
|
import {
|
|
107
107
|
__commonJS,
|
|
108
108
|
__require,
|
|
@@ -3997,7 +3997,7 @@ var evalRunCommand = command({
|
|
|
3997
3997
|
},
|
|
3998
3998
|
handler: async (args) => {
|
|
3999
3999
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4000
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
4000
|
+
const { launchInteractiveWizard } = await import("./interactive-YMKWKPD7.js");
|
|
4001
4001
|
await launchInteractiveWizard();
|
|
4002
4002
|
return;
|
|
4003
4003
|
}
|
|
@@ -9214,6 +9214,14 @@ function pruneFinishedRuns() {
|
|
|
9214
9214
|
}
|
|
9215
9215
|
}
|
|
9216
9216
|
}
|
|
9217
|
+
function getActiveRunTarget(indexJsonlPath) {
|
|
9218
|
+
for (const run2 of activeRuns.values()) {
|
|
9219
|
+
if (run2.outputDir && path16.join(run2.outputDir, "index.jsonl") === indexJsonlPath) {
|
|
9220
|
+
return run2.target;
|
|
9221
|
+
}
|
|
9222
|
+
}
|
|
9223
|
+
return void 0;
|
|
9224
|
+
}
|
|
9217
9225
|
async function discoverTargetsInProject(cwd) {
|
|
9218
9226
|
const repoRoot = await findRepoRoot(cwd) ?? cwd;
|
|
9219
9227
|
let targetsFilePath;
|
|
@@ -9375,12 +9383,18 @@ function registerEvalRoutes(app2, getCwd, options) {
|
|
|
9375
9383
|
return c4.json({ error: "Cannot locate agentv CLI entry point" }, 500);
|
|
9376
9384
|
}
|
|
9377
9385
|
const args = buildCliArgs(body);
|
|
9386
|
+
const outputDir = body.output?.trim() ? path16.resolve(cwd, body.output.trim()) : buildDefaultRunDir(cwd);
|
|
9387
|
+
if (!body.output?.trim()) {
|
|
9388
|
+
args.push("--output", outputDir);
|
|
9389
|
+
}
|
|
9378
9390
|
const command2 = buildCliPreview(args);
|
|
9379
9391
|
const runId = generateRunId();
|
|
9380
9392
|
const run2 = {
|
|
9381
9393
|
id: runId,
|
|
9382
9394
|
status: "starting",
|
|
9383
9395
|
command: command2,
|
|
9396
|
+
target: body.target?.trim() || void 0,
|
|
9397
|
+
outputDir,
|
|
9384
9398
|
startedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
9385
9399
|
stdout: "",
|
|
9386
9400
|
stderr: ""
|
|
@@ -9437,6 +9451,23 @@ Process error: ${err2.message}`;
|
|
|
9437
9451
|
return c4.json({ error: err2.message }, 500);
|
|
9438
9452
|
}
|
|
9439
9453
|
});
|
|
9454
|
+
app2.post("/api/eval/run/:id/stop", (c4) => {
|
|
9455
|
+
if (readOnly) {
|
|
9456
|
+
return c4.json({ error: "Studio is running in read-only mode" }, 403);
|
|
9457
|
+
}
|
|
9458
|
+
const id = c4.req.param("id");
|
|
9459
|
+
const run2 = activeRuns.get(id ?? "");
|
|
9460
|
+
if (!run2) return c4.json({ error: "Run not found" }, 404);
|
|
9461
|
+
if (run2.status === "finished" || run2.status === "failed" || !run2.process) {
|
|
9462
|
+
return c4.json({ stopped: false, reason: "already_terminal", status: run2.status });
|
|
9463
|
+
}
|
|
9464
|
+
try {
|
|
9465
|
+
run2.process.kill("SIGTERM");
|
|
9466
|
+
} catch (err2) {
|
|
9467
|
+
return c4.json({ error: err2.message }, 500);
|
|
9468
|
+
}
|
|
9469
|
+
return c4.json({ stopped: true, status: run2.status });
|
|
9470
|
+
});
|
|
9440
9471
|
app2.get("/api/eval/status/:id", (c4) => {
|
|
9441
9472
|
const id = c4.req.param("id");
|
|
9442
9473
|
const run2 = activeRuns.get(id ?? "");
|
|
@@ -9457,6 +9488,7 @@ Process error: ${err2.message}`;
|
|
|
9457
9488
|
id: r.id,
|
|
9458
9489
|
status: r.status,
|
|
9459
9490
|
command: r.command,
|
|
9491
|
+
target: r.target,
|
|
9460
9492
|
started_at: r.startedAt,
|
|
9461
9493
|
finished_at: r.finishedAt ?? null,
|
|
9462
9494
|
exit_code: r.exitCode ?? null
|
|
@@ -9521,12 +9553,18 @@ Process error: ${err2.message}`;
|
|
|
9521
9553
|
return c4.json({ error: "Cannot locate agentv CLI entry point" }, 500);
|
|
9522
9554
|
}
|
|
9523
9555
|
const args = buildCliArgs(body);
|
|
9556
|
+
const outputDir = body.output?.trim() ? path16.resolve(cwd, body.output.trim()) : buildDefaultRunDir(cwd);
|
|
9557
|
+
if (!body.output?.trim()) {
|
|
9558
|
+
args.push("--output", outputDir);
|
|
9559
|
+
}
|
|
9524
9560
|
const command2 = buildCliPreview(args);
|
|
9525
9561
|
const runId = generateRunId();
|
|
9526
9562
|
const run2 = {
|
|
9527
9563
|
id: runId,
|
|
9528
9564
|
status: "starting",
|
|
9529
9565
|
command: command2,
|
|
9566
|
+
target: body.target?.trim() || void 0,
|
|
9567
|
+
outputDir,
|
|
9530
9568
|
startedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
9531
9569
|
stdout: "",
|
|
9532
9570
|
stderr: ""
|
|
@@ -9571,6 +9609,23 @@ Process error: ${err2.message}`;
|
|
|
9571
9609
|
return c4.json({ error: err2.message }, 500);
|
|
9572
9610
|
}
|
|
9573
9611
|
});
|
|
9612
|
+
app2.post("/api/benchmarks/:benchmarkId/eval/run/:id/stop", (c4) => {
|
|
9613
|
+
if (readOnly) {
|
|
9614
|
+
return c4.json({ error: "Studio is running in read-only mode" }, 403);
|
|
9615
|
+
}
|
|
9616
|
+
const id = c4.req.param("id");
|
|
9617
|
+
const run2 = activeRuns.get(id ?? "");
|
|
9618
|
+
if (!run2) return c4.json({ error: "Run not found" }, 404);
|
|
9619
|
+
if (run2.status === "finished" || run2.status === "failed" || !run2.process) {
|
|
9620
|
+
return c4.json({ stopped: false, reason: "already_terminal", status: run2.status });
|
|
9621
|
+
}
|
|
9622
|
+
try {
|
|
9623
|
+
run2.process.kill("SIGTERM");
|
|
9624
|
+
} catch (err2) {
|
|
9625
|
+
return c4.json({ error: err2.message }, 500);
|
|
9626
|
+
}
|
|
9627
|
+
return c4.json({ stopped: true, status: run2.status });
|
|
9628
|
+
});
|
|
9574
9629
|
app2.get("/api/benchmarks/:benchmarkId/eval/status/:id", (c4) => {
|
|
9575
9630
|
const id = c4.req.param("id");
|
|
9576
9631
|
const run2 = activeRuns.get(id ?? "");
|
|
@@ -9591,6 +9646,7 @@ Process error: ${err2.message}`;
|
|
|
9591
9646
|
id: r.id,
|
|
9592
9647
|
status: r.status,
|
|
9593
9648
|
command: r.command,
|
|
9649
|
+
target: r.target,
|
|
9594
9650
|
started_at: r.startedAt,
|
|
9595
9651
|
finished_at: r.finishedAt ?? null,
|
|
9596
9652
|
exit_code: r.exitCode ?? null
|
|
@@ -9892,6 +9948,8 @@ async function handleRuns(c4, { searchDir, agentvDir }) {
|
|
|
9892
9948
|
target = records[0].target;
|
|
9893
9949
|
experiment = records[0].experiment ?? experiment;
|
|
9894
9950
|
passRate = records.filter((r) => r.score >= passThreshold).length / records.length;
|
|
9951
|
+
} else {
|
|
9952
|
+
target = getActiveRunTarget(m.path);
|
|
9895
9953
|
}
|
|
9896
9954
|
} catch {
|
|
9897
9955
|
}
|
|
@@ -9943,6 +10001,10 @@ function deriveResumeMeta(cwd, manifestPath) {
|
|
|
9943
10001
|
if (typeof evalFile === "string" && evalFile.trim()) {
|
|
9944
10002
|
out.suite_filter = evalFile.trim();
|
|
9945
10003
|
}
|
|
10004
|
+
const planned = parsed.metadata?.planned_test_count;
|
|
10005
|
+
if (typeof planned === "number" && Number.isFinite(planned) && planned > 0) {
|
|
10006
|
+
out.planned_test_count = planned;
|
|
10007
|
+
}
|
|
9946
10008
|
}
|
|
9947
10009
|
} catch {
|
|
9948
10010
|
}
|
|
@@ -12046,4 +12108,4 @@ export {
|
|
|
12046
12108
|
preprocessArgv,
|
|
12047
12109
|
runCli
|
|
12048
12110
|
};
|
|
12049
|
-
//# sourceMappingURL=chunk-
|
|
12111
|
+
//# sourceMappingURL=chunk-XBUHMRX2.js.map
|