agentv 4.26.0-next.1 → 4.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/dist/{artifact-writer-EX6SOLA5.js → artifact-writer-VRDVTNSX.js} +6 -4
  2. package/dist/{chunk-NLS5DS52.js → chunk-6BAB3XBI.js} +2 -2
  3. package/dist/{chunk-NLS5DS52.js.map → chunk-6BAB3XBI.js.map} +1 -1
  4. package/dist/{chunk-63LUDTLO.js → chunk-JA4WQNE6.js} +18 -9
  5. package/dist/chunk-JA4WQNE6.js.map +1 -0
  6. package/dist/{chunk-A5INWVY3.js → chunk-TWBKRGXR.js} +43 -7
  7. package/dist/chunk-TWBKRGXR.js.map +1 -0
  8. package/dist/{chunk-ZGEGRM5T.js → chunk-VOOYHYPR.js} +30 -5
  9. package/dist/chunk-VOOYHYPR.js.map +1 -0
  10. package/dist/{chunk-4QP2SFRS.js → chunk-XBUHMRX2.js} +68 -6
  11. package/dist/{chunk-4QP2SFRS.js.map → chunk-XBUHMRX2.js.map} +1 -1
  12. package/dist/cli.js +20 -5
  13. package/dist/cli.js.map +1 -1
  14. package/dist/{dist-SGFUYCY5.js → dist-COH43OLQ.js} +10 -4
  15. package/dist/index.js +5 -5
  16. package/dist/{interactive-CCOGDYRK.js → interactive-YMKWKPD7.js} +5 -5
  17. package/dist/studio/assets/index-BGFW04Lj.css +1 -0
  18. package/dist/studio/assets/{index-DRYeLvWd.js → index-DLabAPXU.js} +1 -1
  19. package/dist/studio/assets/{index-qd53QYOH.js → index-inPa17Qe.js} +17 -17
  20. package/dist/studio/index.html +2 -2
  21. package/dist/{ts-eval-loader-E6N374V2-VMFNVF5J.js → ts-eval-loader-BZ54W52K-KLIY3S3N.js} +2 -2
  22. package/package.json +1 -1
  23. package/dist/chunk-63LUDTLO.js.map +0 -1
  24. package/dist/chunk-A5INWVY3.js.map +0 -1
  25. package/dist/chunk-ZGEGRM5T.js.map +0 -1
  26. package/dist/studio/assets/index-DttU4JJk.css +0 -1
  27. /package/dist/{artifact-writer-EX6SOLA5.js.map → artifact-writer-VRDVTNSX.js.map} +0 -0
  28. /package/dist/{dist-SGFUYCY5.js.map → dist-COH43OLQ.js.map} +0 -0
  29. /package/dist/{interactive-CCOGDYRK.js.map → interactive-YMKWKPD7.js.map} +0 -0
  30. /package/dist/{ts-eval-loader-E6N374V2-VMFNVF5J.js.map → ts-eval-loader-BZ54W52K-KLIY3S3N.js.map} +0 -0
@@ -45,7 +45,7 @@ import {
45
45
  validateFileReferences,
46
46
  validateTargetsFile,
47
47
  validateWorkspacePaths
48
- } from "./chunk-63LUDTLO.js";
48
+ } from "./chunk-JA4WQNE6.js";
49
49
  import {
50
50
  RESULT_INDEX_FILENAME,
51
51
  aggregateRunDir,
@@ -53,7 +53,7 @@ import {
53
53
  resolveRunManifestPath,
54
54
  toSnakeCaseDeep as toSnakeCaseDeep2,
55
55
  writeArtifactsFromResults
56
- } from "./chunk-A5INWVY3.js";
56
+ } from "./chunk-TWBKRGXR.js";
57
57
  import {
58
58
  DEFAULT_CATEGORY,
59
59
  addBenchmark,
@@ -72,7 +72,7 @@ import {
72
72
  toTranscriptJsonLines,
73
73
  transpileEvalYamlFile,
74
74
  trimBaselineResult
75
- } from "./chunk-NLS5DS52.js";
75
+ } from "./chunk-6BAB3XBI.js";
76
76
  import {
77
77
  DEFAULT_THRESHOLD,
78
78
  createBuiltinRegistry,
@@ -102,7 +102,7 @@ import {
102
102
  runStartsWithAssertion,
103
103
  toCamelCaseDeep,
104
104
  toSnakeCaseDeep
105
- } from "./chunk-ZGEGRM5T.js";
105
+ } from "./chunk-VOOYHYPR.js";
106
106
  import {
107
107
  __commonJS,
108
108
  __require,
@@ -3997,7 +3997,7 @@ var evalRunCommand = command({
3997
3997
  },
3998
3998
  handler: async (args) => {
3999
3999
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
4000
- const { launchInteractiveWizard } = await import("./interactive-CCOGDYRK.js");
4000
+ const { launchInteractiveWizard } = await import("./interactive-YMKWKPD7.js");
4001
4001
  await launchInteractiveWizard();
4002
4002
  return;
4003
4003
  }
@@ -9214,6 +9214,14 @@ function pruneFinishedRuns() {
9214
9214
  }
9215
9215
  }
9216
9216
  }
9217
+ function getActiveRunTarget(indexJsonlPath) {
9218
+ for (const run2 of activeRuns.values()) {
9219
+ if (run2.outputDir && path16.join(run2.outputDir, "index.jsonl") === indexJsonlPath) {
9220
+ return run2.target;
9221
+ }
9222
+ }
9223
+ return void 0;
9224
+ }
9217
9225
  async function discoverTargetsInProject(cwd) {
9218
9226
  const repoRoot = await findRepoRoot(cwd) ?? cwd;
9219
9227
  let targetsFilePath;
@@ -9375,12 +9383,18 @@ function registerEvalRoutes(app2, getCwd, options) {
9375
9383
  return c4.json({ error: "Cannot locate agentv CLI entry point" }, 500);
9376
9384
  }
9377
9385
  const args = buildCliArgs(body);
9386
+ const outputDir = body.output?.trim() ? path16.resolve(cwd, body.output.trim()) : buildDefaultRunDir(cwd);
9387
+ if (!body.output?.trim()) {
9388
+ args.push("--output", outputDir);
9389
+ }
9378
9390
  const command2 = buildCliPreview(args);
9379
9391
  const runId = generateRunId();
9380
9392
  const run2 = {
9381
9393
  id: runId,
9382
9394
  status: "starting",
9383
9395
  command: command2,
9396
+ target: body.target?.trim() || void 0,
9397
+ outputDir,
9384
9398
  startedAt: (/* @__PURE__ */ new Date()).toISOString(),
9385
9399
  stdout: "",
9386
9400
  stderr: ""
@@ -9437,6 +9451,23 @@ Process error: ${err2.message}`;
9437
9451
  return c4.json({ error: err2.message }, 500);
9438
9452
  }
9439
9453
  });
9454
+ app2.post("/api/eval/run/:id/stop", (c4) => {
9455
+ if (readOnly) {
9456
+ return c4.json({ error: "Studio is running in read-only mode" }, 403);
9457
+ }
9458
+ const id = c4.req.param("id");
9459
+ const run2 = activeRuns.get(id ?? "");
9460
+ if (!run2) return c4.json({ error: "Run not found" }, 404);
9461
+ if (run2.status === "finished" || run2.status === "failed" || !run2.process) {
9462
+ return c4.json({ stopped: false, reason: "already_terminal", status: run2.status });
9463
+ }
9464
+ try {
9465
+ run2.process.kill("SIGTERM");
9466
+ } catch (err2) {
9467
+ return c4.json({ error: err2.message }, 500);
9468
+ }
9469
+ return c4.json({ stopped: true, status: run2.status });
9470
+ });
9440
9471
  app2.get("/api/eval/status/:id", (c4) => {
9441
9472
  const id = c4.req.param("id");
9442
9473
  const run2 = activeRuns.get(id ?? "");
@@ -9457,6 +9488,7 @@ Process error: ${err2.message}`;
9457
9488
  id: r.id,
9458
9489
  status: r.status,
9459
9490
  command: r.command,
9491
+ target: r.target,
9460
9492
  started_at: r.startedAt,
9461
9493
  finished_at: r.finishedAt ?? null,
9462
9494
  exit_code: r.exitCode ?? null
@@ -9521,12 +9553,18 @@ Process error: ${err2.message}`;
9521
9553
  return c4.json({ error: "Cannot locate agentv CLI entry point" }, 500);
9522
9554
  }
9523
9555
  const args = buildCliArgs(body);
9556
+ const outputDir = body.output?.trim() ? path16.resolve(cwd, body.output.trim()) : buildDefaultRunDir(cwd);
9557
+ if (!body.output?.trim()) {
9558
+ args.push("--output", outputDir);
9559
+ }
9524
9560
  const command2 = buildCliPreview(args);
9525
9561
  const runId = generateRunId();
9526
9562
  const run2 = {
9527
9563
  id: runId,
9528
9564
  status: "starting",
9529
9565
  command: command2,
9566
+ target: body.target?.trim() || void 0,
9567
+ outputDir,
9530
9568
  startedAt: (/* @__PURE__ */ new Date()).toISOString(),
9531
9569
  stdout: "",
9532
9570
  stderr: ""
@@ -9571,6 +9609,23 @@ Process error: ${err2.message}`;
9571
9609
  return c4.json({ error: err2.message }, 500);
9572
9610
  }
9573
9611
  });
9612
+ app2.post("/api/benchmarks/:benchmarkId/eval/run/:id/stop", (c4) => {
9613
+ if (readOnly) {
9614
+ return c4.json({ error: "Studio is running in read-only mode" }, 403);
9615
+ }
9616
+ const id = c4.req.param("id");
9617
+ const run2 = activeRuns.get(id ?? "");
9618
+ if (!run2) return c4.json({ error: "Run not found" }, 404);
9619
+ if (run2.status === "finished" || run2.status === "failed" || !run2.process) {
9620
+ return c4.json({ stopped: false, reason: "already_terminal", status: run2.status });
9621
+ }
9622
+ try {
9623
+ run2.process.kill("SIGTERM");
9624
+ } catch (err2) {
9625
+ return c4.json({ error: err2.message }, 500);
9626
+ }
9627
+ return c4.json({ stopped: true, status: run2.status });
9628
+ });
9574
9629
  app2.get("/api/benchmarks/:benchmarkId/eval/status/:id", (c4) => {
9575
9630
  const id = c4.req.param("id");
9576
9631
  const run2 = activeRuns.get(id ?? "");
@@ -9591,6 +9646,7 @@ Process error: ${err2.message}`;
9591
9646
  id: r.id,
9592
9647
  status: r.status,
9593
9648
  command: r.command,
9649
+ target: r.target,
9594
9650
  started_at: r.startedAt,
9595
9651
  finished_at: r.finishedAt ?? null,
9596
9652
  exit_code: r.exitCode ?? null
@@ -9892,6 +9948,8 @@ async function handleRuns(c4, { searchDir, agentvDir }) {
9892
9948
  target = records[0].target;
9893
9949
  experiment = records[0].experiment ?? experiment;
9894
9950
  passRate = records.filter((r) => r.score >= passThreshold).length / records.length;
9951
+ } else {
9952
+ target = getActiveRunTarget(m.path);
9895
9953
  }
9896
9954
  } catch {
9897
9955
  }
@@ -9943,6 +10001,10 @@ function deriveResumeMeta(cwd, manifestPath) {
9943
10001
  if (typeof evalFile === "string" && evalFile.trim()) {
9944
10002
  out.suite_filter = evalFile.trim();
9945
10003
  }
10004
+ const planned = parsed.metadata?.planned_test_count;
10005
+ if (typeof planned === "number" && Number.isFinite(planned) && planned > 0) {
10006
+ out.planned_test_count = planned;
10007
+ }
9946
10008
  }
9947
10009
  } catch {
9948
10010
  }
@@ -12046,4 +12108,4 @@ export {
12046
12108
  preprocessArgv,
12047
12109
  runCli
12048
12110
  };
12049
- //# sourceMappingURL=chunk-4QP2SFRS.js.map
12111
+ //# sourceMappingURL=chunk-XBUHMRX2.js.map