agentv 3.10.3 → 3.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/dist/{agentv-provider-NFFLXG5M-TJAWCWCX.js → agentv-provider-MIDKLYIH-6LIYKQRP.js} +2 -1
  2. package/dist/chunk-2QFWRIYL.js +186 -0
  3. package/dist/chunk-2QFWRIYL.js.map +1 -0
  4. package/dist/chunk-2RMPO6LY.js +747 -0
  5. package/dist/chunk-2RMPO6LY.js.map +1 -0
  6. package/dist/chunk-3Q7WIXT4.js +4846 -0
  7. package/dist/chunk-3Q7WIXT4.js.map +1 -0
  8. package/dist/chunk-73O2DCJP.js +1274 -0
  9. package/dist/chunk-73O2DCJP.js.map +1 -0
  10. package/dist/chunk-AUKF3Y3W.js +212 -0
  11. package/dist/chunk-AUKF3Y3W.js.map +1 -0
  12. package/dist/{chunk-XOSNETAV.js → chunk-BAUNAXHT.js} +1 -1
  13. package/dist/chunk-BRH7SIDP.js +133 -0
  14. package/dist/chunk-BRH7SIDP.js.map +1 -0
  15. package/dist/chunk-BXM4I3BM.js +526 -0
  16. package/dist/chunk-BXM4I3BM.js.map +1 -0
  17. package/dist/{chunk-U556GRI3.js → chunk-CKMAM2GD.js} +76 -34
  18. package/dist/chunk-CKMAM2GD.js.map +1 -0
  19. package/dist/chunk-FHTURHTY.js +546 -0
  20. package/dist/chunk-FHTURHTY.js.map +1 -0
  21. package/dist/chunk-GJFXQQWG.js +21 -0
  22. package/dist/chunk-GJFXQQWG.js.map +1 -0
  23. package/dist/chunk-HKMLG4KF.js +38 -0
  24. package/dist/chunk-HKMLG4KF.js.map +1 -0
  25. package/dist/chunk-JGU3PVA4.js +133 -0
  26. package/dist/chunk-JGU3PVA4.js.map +1 -0
  27. package/dist/chunk-LHU5FGVZ.js +4804 -0
  28. package/dist/chunk-LHU5FGVZ.js.map +1 -0
  29. package/dist/chunk-OL2WGI6E.js +149 -0
  30. package/dist/chunk-OL2WGI6E.js.map +1 -0
  31. package/dist/chunk-ONETZL6N.js +15 -0
  32. package/dist/chunk-ONETZL6N.js.map +1 -0
  33. package/dist/{chunk-JUQCB3ZW.js → chunk-OYD2NB55.js} +1292 -896
  34. package/dist/chunk-OYD2NB55.js.map +1 -0
  35. package/dist/chunk-QV4UGEN6.js +320 -0
  36. package/dist/chunk-QV4UGEN6.js.map +1 -0
  37. package/dist/chunk-QXLDKGF3.js +46 -0
  38. package/dist/chunk-QXLDKGF3.js.map +1 -0
  39. package/dist/chunk-U6VEM66A.js +63 -0
  40. package/dist/chunk-U6VEM66A.js.map +1 -0
  41. package/dist/chunk-UALXHIMX.js +48 -0
  42. package/dist/chunk-UALXHIMX.js.map +1 -0
  43. package/dist/chunk-UGXG73VF.js +55 -0
  44. package/dist/chunk-UGXG73VF.js.map +1 -0
  45. package/dist/chunk-UHP5KEDL.js +38 -0
  46. package/dist/chunk-UHP5KEDL.js.map +1 -0
  47. package/dist/{chunk-7LC3VNOC.js → chunk-V2S5CZU3.js} +1141 -60
  48. package/dist/chunk-V2S5CZU3.js.map +1 -0
  49. package/dist/chunk-WVSXFZWP.js +204 -0
  50. package/dist/chunk-WVSXFZWP.js.map +1 -0
  51. package/dist/chunk-XSUMCWKO.js +30 -0
  52. package/dist/chunk-XSUMCWKO.js.map +1 -0
  53. package/dist/chunk-XUO7ZEHU.js +181 -0
  54. package/dist/chunk-XUO7ZEHU.js.map +1 -0
  55. package/dist/chunk-YSGUX5JT.js +1002 -0
  56. package/dist/chunk-YSGUX5JT.js.map +1 -0
  57. package/dist/cli.js +5 -4
  58. package/dist/cli.js.map +1 -1
  59. package/dist/dist-3PCP5TNF-RYMVLILE.js +25785 -0
  60. package/dist/dist-3PCP5TNF-RYMVLILE.js.map +1 -0
  61. package/dist/dist-BOIN5LC5-T5UWUK43.js +76113 -0
  62. package/dist/dist-BOIN5LC5-T5UWUK43.js.map +1 -0
  63. package/dist/dist-LXPDQOBI-4V5J2WDS.js +13 -0
  64. package/dist/dist-LXPDQOBI-4V5J2WDS.js.map +1 -0
  65. package/dist/{dist-2X7A3TTC.js → dist-VUPMLHIV.js} +4 -3
  66. package/dist/dist-es-4WSJUIYR-XKIX65IH.js +69 -0
  67. package/dist/dist-es-4WSJUIYR-XKIX65IH.js.map +1 -0
  68. package/dist/dist-es-7K7MKRME-CCMAZOQC.js +355 -0
  69. package/dist/dist-es-7K7MKRME-CCMAZOQC.js.map +1 -0
  70. package/dist/dist-es-B2RTOKRI-VWZHK5RE.js +191 -0
  71. package/dist/dist-es-B2RTOKRI-VWZHK5RE.js.map +1 -0
  72. package/dist/dist-es-HHZ4FAXA-CRERHWKB.js +164 -0
  73. package/dist/dist-es-HHZ4FAXA-CRERHWKB.js.map +1 -0
  74. package/dist/dist-es-HVS3RPMX-AYJ3DW4L.js +355 -0
  75. package/dist/dist-es-HVS3RPMX-AYJ3DW4L.js.map +1 -0
  76. package/dist/dist-es-L6R4FPI5-IKIRYN45.js +472 -0
  77. package/dist/dist-es-L6R4FPI5-IKIRYN45.js.map +1 -0
  78. package/dist/dist-es-SRVEB5QV-Q4CTC2HX.js +24 -0
  79. package/dist/dist-es-TRIVUKV4-2J47CDXR.js +85 -0
  80. package/dist/dist-es-TRIVUKV4-2J47CDXR.js.map +1 -0
  81. package/dist/dist-es-UEEUAV34-IZQDTAMW.js +16 -0
  82. package/dist/{esm-5Q4BZALM-5REQWAUV.js → esm-OJ2BXJK4-YKEI3Z7E.js} +3 -2
  83. package/dist/{esm-5Q4BZALM-5REQWAUV.js.map → esm-OJ2BXJK4-YKEI3Z7E.js.map} +1 -1
  84. package/dist/{esm-CZAWIY6F.js → esm-UYZ3HJBU.js} +2 -2
  85. package/dist/event-streams-NZADSH5J-6MOSNEV3.js +247 -0
  86. package/dist/event-streams-NZADSH5J-6MOSNEV3.js.map +1 -0
  87. package/dist/index.js +5 -4
  88. package/dist/{interactive-CSA4KIND.js → interactive-FZJANO4A.js} +5 -4
  89. package/dist/{interactive-CSA4KIND.js.map → interactive-FZJANO4A.js.map} +1 -1
  90. package/dist/loadSso-IQZ5NB6C-DZJTORO3.js +738 -0
  91. package/dist/loadSso-IQZ5NB6C-DZJTORO3.js.map +1 -0
  92. package/dist/multipart-parser-IPYBIGNL-LFMNMM6D.js +387 -0
  93. package/dist/multipart-parser-IPYBIGNL-LFMNMM6D.js.map +1 -0
  94. package/dist/{otlp-json-file-exporter-77FDBRSY-EZAPHWP6.js → otlp-json-file-exporter-VN67MK3S-RQIM6EHY.js} +2 -1
  95. package/dist/otlp-json-file-exporter-VN67MK3S-RQIM6EHY.js.map +1 -0
  96. package/dist/signin-2ANR4DVS-K5VGBEJF.js +556 -0
  97. package/dist/signin-2ANR4DVS-K5VGBEJF.js.map +1 -0
  98. package/dist/{simple-trace-file-exporter-S76DMABU-5FCJESD2.js → simple-trace-file-exporter-XWZTIZR2-4JKATE5G.js} +2 -1
  99. package/dist/simple-trace-file-exporter-XWZTIZR2-4JKATE5G.js.map +1 -0
  100. package/dist/{src-ML4D2MC2.js → src-PXDA7QIS.js} +2 -2
  101. package/dist/src-SLOMUG7K-CV5JG263.js +1408 -0
  102. package/dist/src-SLOMUG7K-CV5JG263.js.map +1 -0
  103. package/dist/sso-oidc-HVCDATR2-CYP3BM5O.js +708 -0
  104. package/dist/sso-oidc-HVCDATR2-CYP3BM5O.js.map +1 -0
  105. package/dist/sts-X7JGSP4H-PDAAYDDH.js +2917 -0
  106. package/dist/sts-X7JGSP4H-PDAAYDDH.js.map +1 -0
  107. package/dist/templates/.agentv/config.yaml +4 -13
  108. package/dist/templates/.agentv/targets.yaml +0 -16
  109. package/dist/undici-VAR2VUJI-6PAOUXZC.js +23388 -0
  110. package/dist/undici-VAR2VUJI-6PAOUXZC.js.map +1 -0
  111. package/package.json +11 -3
  112. package/dist/chunk-7LC3VNOC.js.map +0 -1
  113. package/dist/chunk-JUQCB3ZW.js.map +0 -1
  114. package/dist/chunk-U556GRI3.js.map +0 -1
  115. package/dist/templates/.agentv/.env.example +0 -23
  116. /package/dist/{agentv-provider-NFFLXG5M-TJAWCWCX.js.map → agentv-provider-MIDKLYIH-6LIYKQRP.js.map} +0 -0
  117. /package/dist/{chunk-XOSNETAV.js.map → chunk-BAUNAXHT.js.map} +0 -0
  118. /package/dist/{dist-2X7A3TTC.js.map → dist-VUPMLHIV.js.map} +0 -0
  119. /package/dist/{esm-CZAWIY6F.js.map → dist-es-SRVEB5QV-Q4CTC2HX.js.map} +0 -0
  120. /package/dist/{otlp-json-file-exporter-77FDBRSY-EZAPHWP6.js.map → dist-es-UEEUAV34-IZQDTAMW.js.map} +0 -0
  121. /package/dist/{simple-trace-file-exporter-S76DMABU-5FCJESD2.js.map → esm-UYZ3HJBU.js.map} +0 -0
  122. /package/dist/{src-ML4D2MC2.js.map → src-PXDA7QIS.js.map} +0 -0
@@ -27,12 +27,12 @@ import {
27
27
  subscribeToCopilotCliLogEntries,
28
28
  subscribeToCopilotSdkLogEntries,
29
29
  subscribeToPiLogEntries
30
- } from "./chunk-JUQCB3ZW.js";
30
+ } from "./chunk-OYD2NB55.js";
31
31
 
32
32
  // package.json
33
33
  var package_default = {
34
34
  name: "agentv",
35
- version: "3.10.3",
35
+ version: "3.11.1",
36
36
  description: "CLI entry point for AgentV",
37
37
  type: "module",
38
38
  repository: {
@@ -63,18 +63,26 @@ var package_default = {
63
63
  "@ai-sdk/openai": "^3.0.0",
64
64
  "@anthropic-ai/claude-agent-sdk": "^0.2.49",
65
65
  "@github/copilot-sdk": "^0.1.25",
66
+ "@hono/node-server": "^1.19.11",
66
67
  "@inquirer/prompts": "^8.2.1",
67
- "@mariozechner/pi-agent-core": "^0.54.2",
68
- "@mariozechner/pi-ai": "^0.54.2",
69
68
  "@openai/codex-sdk": "^0.104.0",
70
69
  "cmd-ts": "^0.14.3",
71
70
  dotenv: "^16.4.5",
72
71
  "fast-glob": "^3.3.3",
72
+ hono: "^4.12.9",
73
73
  json5: "^2.2.3",
74
74
  micromatch: "^4.0.8",
75
75
  semver: "^7.7.4",
76
76
  yaml: "^2.6.1"
77
77
  },
78
+ peerDependencies: {
79
+ "@mariozechner/pi-coding-agent": "^0.62.0"
80
+ },
81
+ peerDependenciesMeta: {
82
+ "@mariozechner/pi-coding-agent": {
83
+ optional: true
84
+ }
85
+ },
78
86
  devDependencies: {
79
87
  "@agentv/core": "workspace:*",
80
88
  "@types/semver": "^7.7.1",
@@ -202,7 +210,7 @@ async function discoverTargetsFile(options) {
202
210
  }
203
211
 
204
212
  // src/commands/eval/run-eval.ts
205
- import { constants as constants4 } from "node:fs";
213
+ import { constants as constants4, mkdirSync } from "node:fs";
206
214
  import { access as access4 } from "node:fs/promises";
207
215
  import path13 from "node:path";
208
216
  import { pathToFileURL } from "node:url";
@@ -478,6 +486,33 @@ function buildBenchmarkArtifact(results, evalFile = "") {
478
486
  notes
479
487
  };
480
488
  }
489
+ function buildAggregateGradingArtifact(results) {
490
+ const assertions = [];
491
+ for (const result of results) {
492
+ if (!result.assertions) continue;
493
+ const testId = result.testId ?? "unknown";
494
+ for (const a of result.assertions) {
495
+ assertions.push({
496
+ test_id: testId,
497
+ text: a.text,
498
+ passed: a.passed,
499
+ evidence: a.evidence ?? ""
500
+ });
501
+ }
502
+ }
503
+ const passed = assertions.filter((a) => a.passed).length;
504
+ const failed = assertions.filter((a) => !a.passed).length;
505
+ const total = assertions.length;
506
+ return {
507
+ assertions,
508
+ summary: {
509
+ passed,
510
+ failed,
511
+ total,
512
+ pass_rate: total > 0 ? Math.round(passed / total * 1e3) / 1e3 : 0
513
+ }
514
+ };
515
+ }
481
516
  function toCamelCase(str) {
482
517
  return str.replace(/_([a-z])/g, (_, letter) => letter.toUpperCase());
483
518
  }
@@ -518,6 +553,7 @@ async function writeArtifactsFromResults(results, outputDir, options) {
518
553
  const gradingDir = path3.join(outputDir, "grading");
519
554
  const timingPath = path3.join(outputDir, "timing.json");
520
555
  const benchmarkPath = path3.join(outputDir, "benchmark.json");
556
+ const aggregateGradingPath = path3.join(outputDir, "grading.json");
521
557
  await mkdir(gradingDir, { recursive: true });
522
558
  for (const result of results) {
523
559
  const grading = buildGradingArtifact(result);
@@ -532,7 +568,10 @@ async function writeArtifactsFromResults(results, outputDir, options) {
532
568
  const benchmark = buildBenchmarkArtifact(results, options?.evalFile);
533
569
  await writeFile(benchmarkPath, `${JSON.stringify(benchmark, null, 2)}
534
570
  `, "utf8");
535
- return { gradingDir, timingPath, benchmarkPath };
571
+ const aggregateGrading = buildAggregateGradingArtifact(results);
572
+ await writeFile(aggregateGradingPath, `${JSON.stringify(aggregateGrading, null, 2)}
573
+ `, "utf8");
574
+ return { gradingDir, timingPath, benchmarkPath, aggregateGradingPath };
536
575
  }
537
576
 
538
577
  // src/commands/eval/benchmark-writer.ts
@@ -1646,20 +1685,6 @@ async function createOutputWriter(filePath, format) {
1646
1685
  }
1647
1686
  }
1648
1687
  }
1649
- function getDefaultExtension(format) {
1650
- switch (format) {
1651
- case "jsonl":
1652
- return ".jsonl";
1653
- case "yaml":
1654
- return ".yaml";
1655
- case "html":
1656
- return ".html";
1657
- default: {
1658
- const exhaustiveCheck = format;
1659
- throw new Error(`Unsupported output format: ${exhaustiveCheck}`);
1660
- }
1661
- }
1662
- }
1663
1688
  var SUPPORTED_EXTENSIONS = /* @__PURE__ */ new Set([".jsonl", ".json", ".xml", ".yaml", ".yml", ".html", ".htm"]);
1664
1689
  function createWriterFromPath(filePath) {
1665
1690
  const ext = path10.extname(filePath).toLowerCase();
@@ -1866,6 +1891,12 @@ async function loadNonErrorResults(jsonlPath) {
1866
1891
  import { mkdir as mkdir7, readFile as readFile2, writeFile as writeFile6 } from "node:fs/promises";
1867
1892
  import path11 from "node:path";
1868
1893
  var CACHE_FILENAME = "cache.json";
1894
+ function resolveRunCacheFile(cache) {
1895
+ if (cache.lastRunDir) {
1896
+ return path11.join(cache.lastRunDir, "results.jsonl");
1897
+ }
1898
+ return cache.lastResultFile ?? "";
1899
+ }
1869
1900
  function cachePath(cwd) {
1870
1901
  return path11.join(cwd, ".agentv", CACHE_FILENAME);
1871
1902
  }
@@ -1877,11 +1908,11 @@ async function loadRunCache(cwd) {
1877
1908
  return void 0;
1878
1909
  }
1879
1910
  }
1880
- async function saveRunCache(cwd, resultFile) {
1911
+ async function saveRunCache(cwd, runDir) {
1881
1912
  const dir = path11.join(cwd, ".agentv");
1882
1913
  await mkdir7(dir, { recursive: true });
1883
1914
  const cache = {
1884
- lastResultFile: resultFile,
1915
+ lastRunDir: runDir,
1885
1916
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
1886
1917
  };
1887
1918
  await writeFile6(cachePath(cwd), `${JSON.stringify(cache, null, 2)}
@@ -3787,10 +3818,10 @@ function normalizeOptions(rawOptions, config, yamlExecution) {
3787
3818
  // Precedence: CLI > YAML config > TS config
3788
3819
  otelFile: normalizeString(rawOptions.otelFile) ?? (yamlExecution?.otel_file ? resolveTimestampPlaceholder(yamlExecution.otel_file) : void 0) ?? (config?.execution?.otelFile ? resolveTimestampPlaceholder(config.execution.otelFile) : void 0),
3789
3820
  traceFile: normalizeString(rawOptions.traceFile) ?? (yamlExecution?.trace_file ? resolveTimestampPlaceholder(yamlExecution.trace_file) : void 0) ?? (config?.execution?.traceFile ? resolveTimestampPlaceholder(config.execution.traceFile) : void 0),
3790
- exportOtel: normalizeBoolean(rawOptions.exportOtel),
3791
- otelBackend: normalizeString(rawOptions.otelBackend),
3792
- otelCaptureContent: normalizeBoolean(rawOptions.otelCaptureContent),
3793
- otelGroupTurns: normalizeBoolean(rawOptions.otelGroupTurns),
3821
+ exportOtel: normalizeBoolean(rawOptions.exportOtel) || yamlExecution?.export_otel === true,
3822
+ otelBackend: normalizeString(rawOptions.otelBackend) ?? yamlExecution?.otel_backend,
3823
+ otelCaptureContent: normalizeBoolean(rawOptions.otelCaptureContent) || yamlExecution?.otel_capture_content === true,
3824
+ otelGroupTurns: normalizeBoolean(rawOptions.otelGroupTurns) || yamlExecution?.otel_group_turns === true,
3794
3825
  retryErrors: normalizeString(rawOptions.retryErrors),
3795
3826
  workspaceMode,
3796
3827
  workspacePath,
@@ -3808,11 +3839,12 @@ async function ensureFileExists(filePath, description) {
3808
3839
  throw new Error(`${description} not found: ${filePath}`);
3809
3840
  }
3810
3841
  }
3811
- function buildDefaultOutputPath(cwd, format) {
3842
+ function buildDefaultOutputPath(cwd) {
3812
3843
  const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
3813
- const baseName = "eval";
3814
- const extension = getDefaultExtension(format);
3815
- return path13.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
3844
+ const dirName = `eval_${timestamp}`;
3845
+ const runDir = path13.join(cwd, ".agentv", "results", "raw", dirName);
3846
+ mkdirSync(runDir, { recursive: true });
3847
+ return path13.join(runDir, "results.jsonl");
3816
3848
  }
3817
3849
  function createProgressReporter(maxWorkers, options) {
3818
3850
  const display = new ProgressDisplay(maxWorkers, options);
@@ -4155,7 +4187,7 @@ async function runEvalCommand(input) {
4155
4187
  const useFileExport = !!(options.otelFile || options.traceFile);
4156
4188
  if (options.exportOtel || useFileExport) {
4157
4189
  try {
4158
- const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-2X7A3TTC.js");
4190
+ const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-VUPMLHIV.js");
4159
4191
  let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
4160
4192
  let headers = {};
4161
4193
  if (options.otelBackend) {
@@ -4196,7 +4228,7 @@ async function runEvalCommand(input) {
4196
4228
  otelExporter = null;
4197
4229
  }
4198
4230
  }
4199
- const outputPath = options.outPath ? path13.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
4231
+ const outputPath = options.outPath ? path13.resolve(options.outPath) : buildDefaultOutputPath(cwd);
4200
4232
  const extraOutputPaths = options.outputPaths.map((p) => path13.resolve(p));
4201
4233
  const allOutputPaths = extraOutputPaths.length > 0 ? [outputPath, ...extraOutputPaths] : [outputPath];
4202
4234
  const uniqueOutputPaths = [...new Set(allOutputPaths)];
@@ -4430,7 +4462,15 @@ Results written to: ${outputPath}`);
4430
4462
  console.log(` ${p}`);
4431
4463
  }
4432
4464
  }
4433
- await saveRunCache(cwd, outputPath).catch(() => void 0);
4465
+ const runDir = path13.dirname(outputPath);
4466
+ await saveRunCache(cwd, runDir).catch(() => void 0);
4467
+ if (outputPath.endsWith(".jsonl")) {
4468
+ const { writeFile: writeFile7 } = await import("node:fs/promises");
4469
+ const gradingPath = path13.join(path13.dirname(outputPath), "grading.json");
4470
+ const aggregateGrading = buildAggregateGradingArtifact(allResults);
4471
+ await writeFile7(gradingPath, `${JSON.stringify(aggregateGrading, null, 2)}
4472
+ `, "utf8");
4473
+ }
4434
4474
  }
4435
4475
  if (summary.executionErrorCount > 0 && !options.retryErrors) {
4436
4476
  const evalFileArgs = resolvedTestFiles.map((f) => path13.relative(cwd, f)).join(" ");
@@ -4488,7 +4528,9 @@ export {
4488
4528
  buildGradingArtifact,
4489
4529
  buildTimingArtifact,
4490
4530
  buildBenchmarkArtifact,
4531
+ buildAggregateGradingArtifact,
4491
4532
  parseJsonlResults,
4533
+ resolveRunCacheFile,
4492
4534
  loadRunCache,
4493
4535
  detectFileType,
4494
4536
  validateEvalFile,
@@ -4500,4 +4542,4 @@ export {
4500
4542
  selectTarget,
4501
4543
  runEvalCommand
4502
4544
  };
4503
- //# sourceMappingURL=chunk-U556GRI3.js.map
4545
+ //# sourceMappingURL=chunk-CKMAM2GD.js.map