agentv 4.7.0 → 4.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,12 +32,12 @@ import {
32
32
  subscribeToCopilotCliLogEntries,
33
33
  subscribeToCopilotSdkLogEntries,
34
34
  subscribeToPiLogEntries
35
- } from "./chunk-I6UE4LHZ.js";
35
+ } from "./chunk-H4GQXK5M.js";
36
36
 
37
37
  // package.json
38
38
  var package_default = {
39
39
  name: "agentv",
40
- version: "4.7.0",
40
+ version: "4.8.0",
41
41
  description: "CLI entry point for AgentV",
42
42
  type: "module",
43
43
  repository: {
@@ -1712,8 +1712,9 @@ var JunitWriter = class _JunitWriter {
1712
1712
  }
1713
1713
  return ` <testcase name="${escapeXml(r.testId)}" classname="${escapeXml(suiteName)}" time="${time}">${inner}</testcase>`;
1714
1714
  });
1715
+ const suiteTime = results.reduce((sum, r) => sum + (r.durationMs ?? 0), 0) / 1e3;
1715
1716
  suiteXmls.push(
1716
- ` <testsuite name="${escapeXml(suiteName)}" tests="${results.length}" failures="${failures}" errors="${errors}">
1717
+ ` <testsuite name="${escapeXml(suiteName)}" tests="${results.length}" failures="${failures}" errors="${errors}" time="${suiteTime.toFixed(3)}">
1717
1718
  ${testCases.join("\n")}
1718
1719
  </testsuite>`
1719
1720
  );
@@ -1723,8 +1724,9 @@ ${testCases.join("\n")}
1723
1724
  const totalFailures = this.results.filter(
1724
1725
  (r) => r.executionStatus !== "execution_error" && r.score < this.threshold
1725
1726
  ).length;
1727
+ const totalTime = this.results.reduce((sum, r) => sum + (r.durationMs ?? 0), 0) / 1e3;
1726
1728
  const xml = `<?xml version="1.0" encoding="UTF-8"?>
1727
- <testsuites tests="${totalTests}" failures="${totalFailures}" errors="${totalErrors}">
1729
+ <testsuites tests="${totalTests}" failures="${totalFailures}" errors="${totalErrors}" time="${totalTime.toFixed(3)}">
1728
1730
  ${suiteXmls.join("\n")}
1729
1731
  </testsuites>
1730
1732
  `;
@@ -1823,17 +1825,6 @@ function createWriterFromPath(filePath, options) {
1823
1825
  );
1824
1826
  }
1825
1827
  }
1826
- async function createMultiWriter(filePaths, options) {
1827
- const writers = await Promise.all(filePaths.map((fp) => createWriterFromPath(fp, options)));
1828
- return {
1829
- async append(result) {
1830
- await Promise.all(writers.map((w) => w.append(result)));
1831
- },
1832
- async close() {
1833
- await Promise.all(writers.map((w) => w.close()));
1834
- }
1835
- };
1836
- }
1837
1828
 
1838
1829
  // src/commands/eval/progress-display.ts
1839
1830
  var ANSI_BOLD = "\x1B[1m";
@@ -3141,7 +3132,8 @@ var COPILOT_SDK_SETTINGS = /* @__PURE__ */ new Set([
3141
3132
  "log_dir",
3142
3133
  "log_format",
3143
3134
  "system_prompt",
3144
- "workspace_template"
3135
+ "workspace_template",
3136
+ "byok"
3145
3137
  ]);
3146
3138
  var COPILOT_CLI_SETTINGS = /* @__PURE__ */ new Set([
3147
3139
  ...COMMON_SETTINGS,
@@ -4015,15 +4007,12 @@ function trimOutputMessages(output, outputMessages) {
4015
4007
  return sliced.map((m) => ({ role: m.role, content: m.content }));
4016
4008
  }
4017
4009
  function normalizeOptions(rawOptions, config, yamlExecution) {
4018
- const cliFormat = normalizeString(rawOptions.outputFormat);
4019
- const configFormat = config?.output?.format;
4020
- const formatStr = cliFormat ?? configFormat ?? "jsonl";
4021
- const format = formatStr === "yaml" ? "yaml" : "jsonl";
4022
4010
  const cliWorkers = normalizeOptionalNumber(rawOptions.workers);
4023
4011
  const configWorkers = config?.execution?.workers;
4024
4012
  const workers = cliWorkers ?? configWorkers ?? 0;
4025
- const rawOutputPaths = rawOptions.output;
4026
- const outputPaths = Array.isArray(rawOutputPaths) ? rawOutputPaths.filter((v) => typeof v === "string" && v.trim().length > 0) : [];
4013
+ const cliOutputDir = normalizeString(rawOptions.output);
4014
+ const rawExportPaths = rawOptions.export;
4015
+ const exportPaths = Array.isArray(rawExportPaths) ? rawExportPaths.filter((v) => typeof v === "string" && v.trim().length > 0) : [];
4027
4016
  const rawTarget = rawOptions.target;
4028
4017
  let cliTargets = [];
4029
4018
  let singleTarget;
@@ -4065,9 +4054,9 @@ function normalizeOptions(rawOptions, config, yamlExecution) {
4065
4054
  targetsPath: normalizeString(rawOptions.targets),
4066
4055
  filter: normalizeFilter(rawOptions.filter),
4067
4056
  workers: workers > 0 ? workers : void 0,
4057
+ outputDir: cliOutputDir,
4068
4058
  outPath: cliOut ?? configOut,
4069
- outputPaths,
4070
- format,
4059
+ exportPaths,
4071
4060
  dryRun: normalizeBoolean(rawOptions.dryRun),
4072
4061
  dryRunDelay: normalizeNumber(rawOptions.dryRunDelay, 0),
4073
4062
  dryRunDelayMin: normalizeNumber(rawOptions.dryRunDelayMin, 0),
@@ -4484,13 +4473,48 @@ async function runEvalCommand(input) {
4484
4473
  if (options.verbose) {
4485
4474
  console.log(`Repository root: ${repoRoot}`);
4486
4475
  }
4487
- const usesDefaultArtifactWorkspace = !options.outPath;
4488
- const outputPath = options.outPath ? path15.resolve(options.outPath) : buildDefaultOutputPath(cwd);
4476
+ if (options.outPath) {
4477
+ console.warn("Warning: --out is deprecated. Use --output <dir> to set the artifact directory.");
4478
+ }
4479
+ if (options.artifacts) {
4480
+ console.warn(
4481
+ "Warning: --artifacts is deprecated. Use --output <dir> to set the artifact directory."
4482
+ );
4483
+ }
4484
+ if (options.benchmarkJson) {
4485
+ console.warn(
4486
+ "Warning: --benchmark-json is deprecated. benchmark.json is always written to the artifact directory."
4487
+ );
4488
+ }
4489
+ if (normalizeString(input.rawOptions.outputFormat)) {
4490
+ console.warn(
4491
+ "Warning: --output-format is deprecated. The artifact directory always uses JSONL."
4492
+ );
4493
+ }
4494
+ const explicitDir = options.outputDir ?? options.artifacts;
4495
+ let runDir;
4496
+ let outputPath;
4497
+ let usesDefaultArtifactWorkspace;
4498
+ if (explicitDir) {
4499
+ runDir = path15.resolve(explicitDir);
4500
+ mkdirSync(runDir, { recursive: true });
4501
+ outputPath = path15.join(runDir, "index.jsonl");
4502
+ usesDefaultArtifactWorkspace = true;
4503
+ } else if (options.outPath) {
4504
+ outputPath = path15.resolve(options.outPath);
4505
+ runDir = path15.dirname(outputPath);
4506
+ mkdirSync(runDir, { recursive: true });
4507
+ usesDefaultArtifactWorkspace = false;
4508
+ } else {
4509
+ outputPath = buildDefaultOutputPath(cwd);
4510
+ runDir = path15.dirname(outputPath);
4511
+ usesDefaultArtifactWorkspace = true;
4512
+ }
4489
4513
  let otelExporter = null;
4490
4514
  const useFileExport = !!options.otelFile;
4491
4515
  if (options.exportOtel || useFileExport) {
4492
4516
  try {
4493
- const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-XRVHRBJF.js");
4517
+ const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-QXVR2ZRH.js");
4494
4518
  let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
4495
4519
  let headers = {};
4496
4520
  if (options.otelBackend) {
@@ -4531,16 +4555,11 @@ async function runEvalCommand(input) {
4531
4555
  }
4532
4556
  }
4533
4557
  const primaryWritePath = outputPath;
4534
- const extraOutputPaths = options.outputPaths.map((p) => path15.resolve(p));
4535
- const allOutputPaths = extraOutputPaths.length > 0 ? [primaryWritePath, ...extraOutputPaths] : [primaryWritePath];
4536
- const uniqueOutputPaths = [...new Set(allOutputPaths)];
4537
- const reportedOutputPaths = extraOutputPaths.length > 0 ? [outputPath, ...extraOutputPaths] : [outputPath];
4538
- const uniqueReportedOutputPaths = [...new Set(reportedOutputPaths)];
4539
- if (uniqueOutputPaths.length === 1) {
4540
- console.log(`Output path: ${outputPath}`);
4541
- } else {
4542
- console.log("Output paths:");
4543
- for (const p of uniqueReportedOutputPaths) {
4558
+ const resolvedExportPaths = options.exportPaths.map((p) => path15.resolve(p));
4559
+ console.log(`Artifact directory: ${runDir}`);
4560
+ if (resolvedExportPaths.length > 0) {
4561
+ console.log("Export files:");
4562
+ for (const p of resolvedExportPaths) {
4544
4563
  console.log(` ${p}`);
4545
4564
  }
4546
4565
  }
@@ -4621,12 +4640,7 @@ async function runEvalCommand(input) {
4621
4640
  throw new Error("--threshold must be between 0 and 1");
4622
4641
  }
4623
4642
  const writerOptions = resolvedThreshold !== void 0 ? { threshold: resolvedThreshold } : void 0;
4624
- let outputWriter;
4625
- if (uniqueOutputPaths.length === 1) {
4626
- outputWriter = await createOutputWriter(primaryWritePath, options.format);
4627
- } else {
4628
- outputWriter = await createMultiWriter(uniqueOutputPaths, writerOptions);
4629
- }
4643
+ const outputWriter = await createOutputWriter(primaryWritePath, "jsonl");
4630
4644
  const isMatrixMode = Array.from(fileMetadata.values()).some((meta) => meta.selections.length > 1);
4631
4645
  let totalEvalCount = 0;
4632
4646
  for (const meta of fileMetadata.values()) {
@@ -4694,7 +4708,7 @@ async function runEvalCommand(input) {
4694
4708
  const activeTestFiles = resolvedTestFiles.filter((f) => fileMetadata.has(f));
4695
4709
  let transcriptProviderFactory;
4696
4710
  if (options.transcript) {
4697
- const { TranscriptProvider } = await import("./dist-XRVHRBJF.js");
4711
+ const { TranscriptProvider } = await import("./dist-QXVR2ZRH.js");
4698
4712
  const transcriptProvider = await TranscriptProvider.fromFile(options.transcript);
4699
4713
  const totalTests = [...fileMetadata.values()].reduce(
4700
4714
  (sum, meta) => sum + meta.testCases.length,
@@ -4808,18 +4822,17 @@ async function runEvalCommand(input) {
4808
4822
  await writeBenchmarkJson(benchmarkPath, allResults);
4809
4823
  console.log(`Benchmark written to: ${benchmarkPath}`);
4810
4824
  }
4811
- if (usesDefaultArtifactWorkspace) {
4825
+ if (usesDefaultArtifactWorkspace && allResults.length > 0) {
4812
4826
  const evalFile = activeTestFiles.length === 1 ? activeTestFiles[0] : "";
4813
- const workspaceDir = path15.dirname(outputPath);
4814
4827
  const {
4815
4828
  testArtifactDir,
4816
4829
  timingPath,
4817
4830
  benchmarkPath: workspaceBenchmarkPath,
4818
4831
  indexPath
4819
- } = await writeArtifactsFromResults(allResults, workspaceDir, {
4832
+ } = await writeArtifactsFromResults(allResults, runDir, {
4820
4833
  evalFile
4821
4834
  });
4822
- console.log(`Artifact workspace written to: ${workspaceDir}`);
4835
+ console.log(`Artifact workspace written to: ${runDir}`);
4823
4836
  console.log(` Index: ${indexPath}`);
4824
4837
  console.log(
4825
4838
  ` Per-test artifacts: ${testArtifactDir} (${allResults.length} test directories)`
@@ -4827,24 +4840,17 @@ async function runEvalCommand(input) {
4827
4840
  console.log(` Timing: ${timingPath}`);
4828
4841
  console.log(` Benchmark: ${workspaceBenchmarkPath}`);
4829
4842
  }
4830
- if (options.artifacts) {
4831
- const artifactsDir = path15.resolve(options.artifacts);
4832
- const evalFile = activeTestFiles.length === 1 ? activeTestFiles[0] : "";
4833
- const {
4834
- testArtifactDir,
4835
- indexPath,
4836
- timingPath,
4837
- benchmarkPath: abp
4838
- } = await writeArtifactsFromResults(allResults, artifactsDir, {
4839
- evalFile
4840
- });
4841
- console.log(`Artifacts written to: ${artifactsDir}`);
4842
- console.log(` Index: ${indexPath}`);
4843
+ if (resolvedExportPaths.length > 0 && allResults.length > 0) {
4844
+ for (const exportPath of resolvedExportPaths) {
4845
+ const writer = await createWriterFromPath(exportPath, writerOptions);
4846
+ for (const result of allResults) {
4847
+ await writer.append(result);
4848
+ }
4849
+ await writer.close();
4850
+ }
4843
4851
  console.log(
4844
- ` Per-test artifacts: ${testArtifactDir} (${allResults.length} test directories)`
4852
+ `Export file(s) written: ${resolvedExportPaths.map((p) => path15.relative(cwd, p)).join(", ")}`
4845
4853
  );
4846
- console.log(` Timing: ${timingPath}`);
4847
- console.log(` Benchmark: ${abp}`);
4848
4854
  }
4849
4855
  const failedWithWorkspaces = allResults.filter(
4850
4856
  (r) => r.workspacePath && (r.error || r.score < 0.5)
@@ -4856,15 +4862,8 @@ async function runEvalCommand(input) {
4856
4862
  }
4857
4863
  }
4858
4864
  if (allResults.length > 0) {
4859
- if (uniqueReportedOutputPaths.length === 1) {
4860
- console.log(`
4865
+ console.log(`
4861
4866
  Results written to: ${outputPath}`);
4862
- } else {
4863
- console.log("\nResults written to:");
4864
- for (const p of uniqueReportedOutputPaths) {
4865
- console.log(` ${p}`);
4866
- }
4867
- }
4868
4867
  await saveRunCache(cwd, outputPath).catch(() => void 0);
4869
4868
  }
4870
4869
  if (summary.executionErrorCount > 0 && !options.retryErrors) {
@@ -4985,4 +4984,4 @@ export {
4985
4984
  getCategories,
4986
4985
  filterByCategory
4987
4986
  };
4988
- //# sourceMappingURL=chunk-AX4CQS45.js.map
4987
+ //# sourceMappingURL=chunk-QBZJSQXV.js.map