agentv 3.13.1 → 3.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,12 +27,12 @@ import {
27
27
  subscribeToCopilotCliLogEntries,
28
28
  subscribeToCopilotSdkLogEntries,
29
29
  subscribeToPiLogEntries
30
- } from "./chunk-K747KGDP.js";
30
+ } from "./chunk-D3LNJUUB.js";
31
31
 
32
32
  // package.json
33
33
  var package_default = {
34
34
  name: "agentv",
35
- version: "3.13.1",
35
+ version: "3.13.3",
36
36
  description: "CLI entry point for AgentV",
37
37
  type: "module",
38
38
  repository: {
@@ -1644,13 +1644,15 @@ function escapeXml(str) {
1644
1644
  var JunitWriter = class _JunitWriter {
1645
1645
  filePath;
1646
1646
  results = [];
1647
+ threshold;
1647
1648
  closed = false;
1648
- constructor(filePath) {
1649
+ constructor(filePath, options) {
1649
1650
  this.filePath = filePath;
1651
+ this.threshold = options?.threshold ?? 0.5;
1650
1652
  }
1651
- static async open(filePath) {
1653
+ static async open(filePath, options) {
1652
1654
  await mkdir5(path9.dirname(filePath), { recursive: true });
1653
- return new _JunitWriter(filePath);
1655
+ return new _JunitWriter(filePath, options);
1654
1656
  }
1655
1657
  async append(result) {
1656
1658
  if (this.closed) {
@@ -1675,7 +1677,7 @@ var JunitWriter = class _JunitWriter {
1675
1677
  }
1676
1678
  const suiteXmls = [];
1677
1679
  for (const [suiteName, results] of grouped) {
1678
- const failures = results.filter((r) => r.score < 0.5).length;
1680
+ const failures = results.filter((r) => r.score < this.threshold).length;
1679
1681
  const errors = results.filter((r) => r.error !== void 0).length;
1680
1682
  const testCases = results.map((r) => {
1681
1683
  const time = r.durationMs ? (r.durationMs / 1e3).toFixed(3) : "0.000";
@@ -1684,7 +1686,7 @@ var JunitWriter = class _JunitWriter {
1684
1686
  inner = `
1685
1687
  <error message="${escapeXml(r.error)}">${escapeXml(r.error)}</error>
1686
1688
  `;
1687
- } else if (r.score < 0.5) {
1689
+ } else if (r.score < this.threshold) {
1688
1690
  const message = `score=${r.score.toFixed(3)}`;
1689
1691
  const failedAssertions = r.assertions.filter((a) => !a.passed);
1690
1692
  const detail = [
@@ -1704,7 +1706,7 @@ ${testCases.join("\n")}
1704
1706
  );
1705
1707
  }
1706
1708
  const totalTests = this.results.length;
1707
- const totalFailures = this.results.filter((r) => r.score < 0.5).length;
1709
+ const totalFailures = this.results.filter((r) => r.score < this.threshold).length;
1708
1710
  const totalErrors = this.results.filter((r) => r.error !== void 0).length;
1709
1711
  const xml = `<?xml version="1.0" encoding="UTF-8"?>
1710
1712
  <testsuites tests="${totalTests}" failures="${totalFailures}" errors="${totalErrors}">
@@ -1785,7 +1787,7 @@ async function createOutputWriter(filePath, format) {
1785
1787
  }
1786
1788
  }
1787
1789
  var SUPPORTED_EXTENSIONS = /* @__PURE__ */ new Set([".jsonl", ".json", ".xml", ".yaml", ".yml", ".html", ".htm"]);
1788
- function createWriterFromPath(filePath) {
1790
+ function createWriterFromPath(filePath, options) {
1789
1791
  const ext = path11.extname(filePath).toLowerCase();
1790
1792
  switch (ext) {
1791
1793
  case ".jsonl":
@@ -1793,7 +1795,7 @@ function createWriterFromPath(filePath) {
1793
1795
  case ".json":
1794
1796
  return JsonWriter.open(filePath);
1795
1797
  case ".xml":
1796
- return JunitWriter.open(filePath);
1798
+ return JunitWriter.open(filePath, { threshold: options?.threshold });
1797
1799
  case ".yaml":
1798
1800
  case ".yml":
1799
1801
  return YamlWriter.open(filePath);
@@ -1806,8 +1808,8 @@ function createWriterFromPath(filePath) {
1806
1808
  );
1807
1809
  }
1808
1810
  }
1809
- async function createMultiWriter(filePaths) {
1810
- const writers = await Promise.all(filePaths.map((fp) => createWriterFromPath(fp)));
1811
+ async function createMultiWriter(filePaths, options) {
1812
+ const writers = await Promise.all(filePaths.map((fp) => createWriterFromPath(fp, options)));
1811
1813
  return {
1812
1814
  async append(result) {
1813
1815
  await Promise.all(writers.map((w) => w.append(result)));
@@ -2385,6 +2387,12 @@ function formatMatrixSummary(results) {
2385
2387
  lines.push(`${"Average".padEnd(testIdColWidth)} ${avgCells.join(" ")}`);
2386
2388
  return lines.join("\n");
2387
2389
  }
2390
+ function formatThresholdSummary(meanScore, threshold) {
2391
+ const passed = meanScore >= threshold;
2392
+ const verdict = passed ? "PASS" : "FAIL";
2393
+ const message = `Suite score: ${meanScore.toFixed(2)} (threshold: ${threshold.toFixed(2)}) \u2014 ${verdict}`;
2394
+ return { passed, message };
2395
+ }
2388
2396
 
2389
2397
  // ../../packages/core/dist/evaluation/validation/index.js
2390
2398
  import { readFile as readFile3 } from "node:fs/promises";
@@ -4048,7 +4056,8 @@ function normalizeOptions(rawOptions, config, yamlExecution) {
4048
4056
  artifacts: normalizeString(rawOptions.artifacts),
4049
4057
  graderTarget: normalizeString(rawOptions.graderTarget),
4050
4058
  model: normalizeString(rawOptions.model),
4051
- outputMessages: normalizeOutputMessages(normalizeString(rawOptions.outputMessages))
4059
+ outputMessages: normalizeOutputMessages(normalizeString(rawOptions.outputMessages)),
4060
+ threshold: normalizeOptionalNumber(rawOptions.threshold)
4052
4061
  };
4053
4062
  }
4054
4063
  async function ensureFileExists(filePath, description) {
@@ -4194,7 +4203,8 @@ async function prepareFileMetadata(params) {
4194
4203
  yamlCache: suite.cacheConfig?.enabled,
4195
4204
  yamlCachePath: suite.cacheConfig?.cachePath,
4196
4205
  totalBudgetUsd: suite.totalBudgetUsd,
4197
- failOnError: suite.failOnError
4206
+ failOnError: suite.failOnError,
4207
+ threshold: suite.threshold
4198
4208
  };
4199
4209
  }
4200
4210
  async function runWithLimit(items, limit, task) {
@@ -4350,6 +4360,9 @@ async function runSingleEvalFile(params) {
4350
4360
  }
4351
4361
  async function runEvalCommand(input) {
4352
4362
  const cwd = process.cwd();
4363
+ if (!process.env.AGENTV_RUN_TIMESTAMP) {
4364
+ process.env.AGENTV_RUN_TIMESTAMP = (/* @__PURE__ */ new Date()).toISOString().replace(/:/g, "-").replace(/\./g, "-");
4365
+ }
4353
4366
  let config = null;
4354
4367
  try {
4355
4368
  config = await loadTsConfig(cwd);
@@ -4408,7 +4421,7 @@ async function runEvalCommand(input) {
4408
4421
  const useFileExport = !!options.otelFile;
4409
4422
  if (options.exportOtel || useFileExport) {
4410
4423
  try {
4411
- const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-LCZDS36N.js");
4424
+ const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-KPMR7RBT.js");
4412
4425
  let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
4413
4426
  let headers = {};
4414
4427
  if (options.otelBackend) {
@@ -4454,12 +4467,9 @@ async function runEvalCommand(input) {
4454
4467
  const uniqueOutputPaths = [...new Set(allOutputPaths)];
4455
4468
  const reportedOutputPaths = extraOutputPaths.length > 0 ? [outputPath, ...extraOutputPaths] : [outputPath];
4456
4469
  const uniqueReportedOutputPaths = [...new Set(reportedOutputPaths)];
4457
- let outputWriter;
4458
4470
  if (uniqueOutputPaths.length === 1) {
4459
- outputWriter = await createOutputWriter(primaryWritePath, options.format);
4460
4471
  console.log(`Output path: ${outputPath}`);
4461
4472
  } else {
4462
- outputWriter = await createMultiWriter(uniqueOutputPaths);
4463
4473
  console.log("Output paths:");
4464
4474
  for (const p of uniqueReportedOutputPaths) {
4465
4475
  console.log(` ${p}`);
@@ -4518,6 +4528,18 @@ async function runEvalCommand(input) {
4518
4528
  if (cacheEnabled) {
4519
4529
  console.log(`Response cache: enabled${yamlCachePath ? ` (${yamlCachePath})` : ""}`);
4520
4530
  }
4531
+ const yamlThreshold = firstMeta?.threshold;
4532
+ const resolvedThreshold = options.threshold ?? yamlThreshold;
4533
+ if (resolvedThreshold !== void 0 && (resolvedThreshold < 0 || resolvedThreshold > 1)) {
4534
+ throw new Error("--threshold must be between 0 and 1");
4535
+ }
4536
+ const writerOptions = resolvedThreshold !== void 0 ? { threshold: resolvedThreshold } : void 0;
4537
+ let outputWriter;
4538
+ if (uniqueOutputPaths.length === 1) {
4539
+ outputWriter = await createOutputWriter(primaryWritePath, options.format);
4540
+ } else {
4541
+ outputWriter = await createMultiWriter(uniqueOutputPaths, writerOptions);
4542
+ }
4521
4543
  const isMatrixMode = Array.from(fileMetadata.values()).some((meta) => meta.selections.length > 1);
4522
4544
  let totalEvalCount = 0;
4523
4545
  for (const meta of fileMetadata.values()) {
@@ -4641,6 +4663,13 @@ async function runEvalCommand(input) {
4641
4663
  }
4642
4664
  const summary = calculateEvaluationSummary(allResults);
4643
4665
  console.log(formatEvaluationSummary(summary));
4666
+ let thresholdFailed = false;
4667
+ if (resolvedThreshold !== void 0) {
4668
+ const thresholdResult = formatThresholdSummary(summary.mean, resolvedThreshold);
4669
+ console.log(`
4670
+ ${thresholdResult.message}`);
4671
+ thresholdFailed = !thresholdResult.passed;
4672
+ }
4644
4673
  if (isMatrixMode && allResults.length > 0) {
4645
4674
  console.log(formatMatrixSummary(allResults));
4646
4675
  }
@@ -4722,7 +4751,8 @@ Tip: ${summary.executionErrorCount} execution error(s) detected. Re-run failed t
4722
4751
  executionErrorCount: summary.executionErrorCount,
4723
4752
  outputPath,
4724
4753
  testFiles: resolvedTestFiles,
4725
- target: options.target
4754
+ target: options.target,
4755
+ thresholdFailed
4726
4756
  };
4727
4757
  } finally {
4728
4758
  unsubscribeCodexLogs();
@@ -4781,4 +4811,4 @@ export {
4781
4811
  selectTarget,
4782
4812
  runEvalCommand
4783
4813
  };
4784
- //# sourceMappingURL=chunk-LSXO22CF.js.map
4814
+ //# sourceMappingURL=chunk-PACTPWEN.js.map