npm - agentv - Versions diffs - 3.9.0 → 3.9.2 - Mend

agentv 3.9.0 → 3.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/dist/{chunk-Q2YWV4QM.js → chunk-6ZAFWUBT.js} +29 -18
package/dist/chunk-6ZAFWUBT.js.map +1 -0
package/dist/{chunk-GC5P5HHZ.js → chunk-JGMJL2LV.js} +76 -42
package/dist/chunk-JGMJL2LV.js.map +1 -0
package/dist/{chunk-TXDPYXHY.js → chunk-OIVGGWJ3.js} +102 -43
package/dist/chunk-OIVGGWJ3.js.map +1 -0
package/dist/cli.js +3 -3
package/dist/{dist-PIOSPBKX.js → dist-PUPHGVKL.js} +4 -2
package/dist/index.js +3 -3
package/dist/{interactive-3VTDK5NX.js → interactive-BD56NB23.js} +3 -3
package/dist/templates/.agentv/config.yaml +4 -13
package/dist/templates/.agentv/targets.yaml +0 -16
package/dist/templates/{.agentv/.env.example → .env.example} +11 -9
package/package.json +1 -1
package/dist/chunk-GC5P5HHZ.js.map +0 -1
package/dist/chunk-Q2YWV4QM.js.map +0 -1
package/dist/chunk-TXDPYXHY.js.map +0 -1
/package/dist/{dist-PIOSPBKX.js.map → dist-PUPHGVKL.js.map} +0 -0
/package/dist/{interactive-3VTDK5NX.js.map → interactive-BD56NB23.js.map} +0 -0

package/dist/{chunk-GC5P5HHZ.js → chunk-JGMJL2LV.js} RENAMED Viewed

@@ -27,12 +27,12 @@ import {
   subscribeToCopilotCliLogEntries,
   subscribeToCopilotSdkLogEntries,
   subscribeToPiLogEntries
-} from "./chunk-TXDPYXHY.js";
+} from "./chunk-OIVGGWJ3.js";
 // package.json
 var package_default = {
   name: "agentv",
-  version: "3.9.0",
+  version: "3.9.2",
   description: "CLI entry point for AgentV",
   type: "module",
   repository: {
@@ -204,7 +204,7 @@ async function discoverTargetsFile(options) {
 // src/commands/eval/run-eval.ts
 import { constants as constants4 } from "node:fs";
 import { access as access4 } from "node:fs/promises";
-import path12 from "node:path";
+import path13 from "node:path";
 import { pathToFileURL } from "node:url";
 // src/version-check.ts
@@ -282,7 +282,7 @@ function computePassRate(result) {
     const passed = scores.filter((s) => s.score >= PASS_THRESHOLD).length;
     return passed / scores.length;
   }
-  return result.score >= PASS_THRESHOLD ? 1 : 0;
+  return (result.score ?? 0) >= PASS_THRESHOLD ? 1 : 0;
 }
 function countToolCalls(result) {
   const toolCalls = {};
@@ -323,6 +323,7 @@ function parseWorkspaceChanges(fileChanges) {
   };
 }
 function buildAssertions(result) {
+  if (!result.assertions) return [];
   return result.assertions.map((a) => ({
     text: a.text,
     passed: a.passed,
@@ -403,8 +404,8 @@ function buildBenchmarkArtifact(results, evalFile = "") {
   const targetSet = /* @__PURE__ */ new Set();
   const testIdSet = /* @__PURE__ */ new Set();
   for (const result of results) {
-    targetSet.add(result.target);
-    testIdSet.add(result.testId);
+    targetSet.add(result.target ?? "unknown");
+    testIdSet.add(result.testId ?? "unknown");
   }
   const targets = [...targetSet].sort();
   const testIds = [...testIdSet].sort();
@@ -452,7 +453,9 @@ function buildBenchmarkArtifact(results, evalFile = "") {
       perEvaluatorSummary[key] = computeStats(scores);
     }
   }
-  const errorCount = results.filter((r) => r.executionStatus === "execution_error").length;
+  const errorCount = results.filter(
+    (r) => r.executionStatus != null && r.executionStatus === "execution_error"
+  ).length;
   if (errorCount > 0) {
     notes.push(
       `${errorCount} test(s) had execution errors and are included in pass_rate as failures`
@@ -518,7 +521,7 @@ async function writeArtifactsFromResults(results, outputDir, options) {
   await mkdir(gradingDir, { recursive: true });
   for (const result of results) {
     const grading = buildGradingArtifact(result);
-    const safeTestId = result.testId.replace(/[/\\:*?"<>|]/g, "_");
+    const safeTestId = (result.testId ?? "unknown").replace(/[/\\:*?"<>|]/g, "_");
     const gradingPath = path3.join(gradingDir, `${safeTestId}.json`);
     await writeFile(gradingPath, `${JSON.stringify(grading, null, 2)}
 `, "utf8");
@@ -1766,12 +1769,12 @@ var ProgressDisplay = class {
   }
   addLogPaths(paths, provider) {
     const newPaths = [];
-    for (const path13 of paths) {
-      if (this.logPathSet.has(path13)) {
+    for (const path14 of paths) {
+      if (this.logPathSet.has(path14)) {
         continue;
       }
-      this.logPathSet.add(path13);
-      newPaths.push(path13);
+      this.logPathSet.add(path14);
+      newPaths.push(path14);
     }
     if (newPaths.length === 0) {
       return;
@@ -1784,8 +1787,8 @@ var ProgressDisplay = class {
       this.hasPrintedLogHeader = true;
     }
     const startIndex = this.logPaths.length - newPaths.length;
-    newPaths.forEach((path13, offset) => {
-      console.log(`${startIndex + offset + 1}. ${path13}`);
+    newPaths.forEach((path14, offset) => {
+      console.log(`${startIndex + offset + 1}. ${path14}`);
     });
   }
   finish() {
@@ -1859,6 +1862,32 @@ async function loadNonErrorResults(jsonlPath) {
   return results;
 }
+// src/commands/eval/run-cache.ts
+import { mkdir as mkdir7, readFile as readFile2, writeFile as writeFile6 } from "node:fs/promises";
+import path11 from "node:path";
+var CACHE_FILENAME = "cache.json";
+function cachePath(cwd) {
+  return path11.join(cwd, ".agentv", CACHE_FILENAME);
+}
+async function loadRunCache(cwd) {
+  try {
+    const content = await readFile2(cachePath(cwd), "utf-8");
+    return JSON.parse(content);
+  } catch {
+    return void 0;
+  }
+}
+async function saveRunCache(cwd, resultFile) {
+  const dir = path11.join(cwd, ".agentv");
+  await mkdir7(dir, { recursive: true });
+  const cache = {
+    lastResultFile: resultFile,
+    timestamp: (/* @__PURE__ */ new Date()).toISOString()
+  };
+  await writeFile6(cachePath(cwd), `${JSON.stringify(cache, null, 2)}
+`, "utf-8");
+}
 // src/commands/eval/statistics.ts
 var HISTOGRAM_BREAKPOINTS = [0, 0.2, 0.4, 0.6, 0.8, 1];
 function computeMean(values) {
@@ -2107,13 +2136,13 @@ function formatMatrixSummary(results) {
 }
 // ../../packages/core/dist/evaluation/validation/index.js
-import { readFile as readFile2 } from "node:fs/promises";
-import path11 from "node:path";
+import { readFile as readFile3 } from "node:fs/promises";
+import path12 from "node:path";
 import { parse } from "yaml";
 import { readFile as readFile22 } from "node:fs/promises";
 import path22 from "node:path";
 import { parse as parse2 } from "yaml";
-import { readFile as readFile3 } from "node:fs/promises";
+import { readFile as readFile32 } from "node:fs/promises";
 import path32 from "node:path";
 import { parse as parse3 } from "yaml";
 import { readFile as readFile4 } from "node:fs/promises";
@@ -2126,7 +2155,7 @@ var SCHEMA_TARGETS_V2 = "agentv-targets-v2.2";
 var SCHEMA_CONFIG_V2 = "agentv-config-v2";
 async function detectFileType(filePath) {
   try {
-    const content = await readFile2(filePath, "utf8");
+    const content = await readFile3(filePath, "utf8");
     const parsed = parse(content);
     if (typeof parsed !== "object" || parsed === null) {
       return inferFileTypeFromPath(filePath);
@@ -2151,8 +2180,8 @@ async function detectFileType(filePath) {
   }
 }
 function inferFileTypeFromPath(filePath) {
-  const normalized = path11.normalize(filePath).replace(/\\/g, "/");
-  const basename = path11.basename(filePath);
+  const normalized = path12.normalize(filePath).replace(/\\/g, "/");
+  const basename = path12.basename(filePath);
   if (normalized.includes("/.agentv/")) {
     if (basename === "config.yaml" || basename === "config.yml") {
       return "config";
@@ -3008,7 +3037,7 @@ async function validateTargetsFile(filePath) {
   const absolutePath = path32.resolve(filePath);
   let parsed;
   try {
-    const content = await readFile3(absolutePath, "utf8");
+    const content = await readFile32(absolutePath, "utf8");
     parsed = parse3(content);
   } catch (error) {
     errors.push({
@@ -3784,7 +3813,7 @@ function buildDefaultOutputPath(cwd, format) {
   const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
   const baseName = "eval";
   const extension = getDefaultExtension(format);
-  return path12.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
+  return path13.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
 }
 function createProgressReporter(maxWorkers, options) {
   const display = new ProgressDisplay(maxWorkers, options);
@@ -3798,7 +3827,7 @@ function createProgressReporter(maxWorkers, options) {
   };
 }
 function makeEvalKey(testFilePath, evalId) {
-  return `${path12.resolve(testFilePath)}::${evalId}`;
+  return `${path13.resolve(testFilePath)}::${evalId}`;
 }
 function createDisplayIdTracker() {
   const map = /* @__PURE__ */ new Map();
@@ -3913,6 +3942,7 @@ async function prepareFileMetadata(params) {
     selections,
     trialsConfig: suite.trials,
     suiteTargets,
+    yamlWorkers: suite.workers,
     yamlCache: suite.cacheConfig?.enabled,
     yamlCachePath: suite.cacheConfig?.cachePath,
     totalBudgetUsd: suite.totalBudgetUsd,
@@ -3942,6 +3972,7 @@ async function runSingleEvalFile(params) {
     cache,
     evaluationRunner,
     workersOverride,
+    yamlWorkers,
     progressReporter,
     seenEvalCases,
     displayIdTracker,
@@ -3963,7 +3994,7 @@ async function runSingleEvalFile(params) {
   }
   const agentTimeoutMs = options.agentTimeoutSeconds != null ? Math.max(0, options.agentTimeoutSeconds) * 1e3 : void 0;
   const workerPreference = workersOverride ?? options.workers;
-  let resolvedWorkers = workerPreference ?? resolvedTargetSelection.resolvedTarget.workers ?? DEFAULT_WORKERS;
+  let resolvedWorkers = workerPreference ?? yamlWorkers ?? resolvedTargetSelection.resolvedTarget.workers ?? DEFAULT_WORKERS;
   if (resolvedWorkers < 1 || resolvedWorkers > 50) {
     throw new Error(`Workers must be between 1 and 50, got: ${resolvedWorkers}`);
   }
@@ -4078,7 +4109,7 @@ async function runEvalCommand(input) {
     );
   }
   const repoRoot = await findRepoRoot(cwd);
-  const yamlConfig = await loadConfig(path12.join(cwd, "_"), repoRoot);
+  const yamlConfig = await loadConfig(path13.join(cwd, "_"), repoRoot);
   if (yamlConfig?.required_version) {
     await enforceRequiredVersion(yamlConfig.required_version, {
       strict: normalizeBoolean(input.rawOptions.strict)
@@ -4090,7 +4121,7 @@ async function runEvalCommand(input) {
   }
   let retryNonErrorResults;
   if (options.retryErrors) {
-    const retryPath = path12.resolve(options.retryErrors);
+    const retryPath = path13.resolve(options.retryErrors);
     await ensureFileExists(retryPath, "Retry-errors JSONL file");
     const errorIds = await loadErrorTestIds(retryPath);
     if (errorIds.length === 0) {
@@ -4103,7 +4134,7 @@ async function runEvalCommand(input) {
     retryNonErrorResults = await loadNonErrorResults(retryPath);
   }
   if (options.workspacePath) {
-    const resolvedWorkspace = path12.resolve(options.workspacePath);
+    const resolvedWorkspace = path13.resolve(options.workspacePath);
     try {
       const { stat: stat2 } = await import("node:fs/promises");
       const stats = await stat2(resolvedWorkspace);
@@ -4125,7 +4156,7 @@ async function runEvalCommand(input) {
   const useFileExport = !!(options.otelFile || options.traceFile);
   if (options.exportOtel || useFileExport) {
     try {
-      const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-PIOSPBKX.js");
+      const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-PUPHGVKL.js");
       let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
       let headers = {};
       if (options.otelBackend) {
@@ -4149,8 +4180,8 @@ async function runEvalCommand(input) {
         headers,
         captureContent,
         groupTurns: options.otelGroupTurns,
-        otlpFilePath: options.otelFile ? path12.resolve(options.otelFile) : void 0,
-        traceFilePath: options.traceFile ? path12.resolve(options.traceFile) : void 0
+        otlpFilePath: options.otelFile ? path13.resolve(options.otelFile) : void 0,
+        traceFilePath: options.traceFile ? path13.resolve(options.traceFile) : void 0
       });
       const initialized = await otelExporter.init();
       if (!initialized) {
@@ -4166,8 +4197,8 @@ async function runEvalCommand(input) {
       otelExporter = null;
     }
   }
-  const outputPath = options.outPath ? path12.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
-  const extraOutputPaths = options.outputPaths.map((p) => path12.resolve(p));
+  const outputPath = options.outPath ? path13.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
+  const extraOutputPaths = options.outputPaths.map((p) => path13.resolve(p));
   const allOutputPaths = extraOutputPaths.length > 0 ? [outputPath, ...extraOutputPaths] : [outputPath];
   const uniqueOutputPaths = [...new Set(allOutputPaths)];
   let outputWriter;
@@ -4181,12 +4212,12 @@ async function runEvalCommand(input) {
       console.log(`  ${p}`);
     }
   }
-  const resolvedTestFiles = input.testFiles.map((file) => path12.resolve(file));
+  const resolvedTestFiles = input.testFiles.map((file) => path13.resolve(file));
   if (options.otelFile) {
-    console.log(`OTLP JSON file: ${path12.resolve(options.otelFile)}`);
+    console.log(`OTLP JSON file: ${path13.resolve(options.otelFile)}`);
   }
   if (options.traceFile) {
-    console.log(`Trace file: ${path12.resolve(options.traceFile)}`);
+    console.log(`Trace file: ${path13.resolve(options.traceFile)}`);
   }
   const evaluationRunner = await resolveEvaluationRunner();
   const allResults = [];
@@ -4232,7 +4263,7 @@ async function runEvalCommand(input) {
     cliNoCache: options.noCache,
     yamlCache: yamlCacheEnabled
   });
-  const cache = cacheEnabled ? new ResponseCache(yamlCachePath ? path12.resolve(yamlCachePath) : void 0) : void 0;
+  const cache = cacheEnabled ? new ResponseCache(yamlCachePath ? path13.resolve(yamlCachePath) : void 0) : void 0;
   const useCache = cacheEnabled;
   if (cacheEnabled) {
     console.log(`Response cache: enabled${yamlCachePath ? ` (${yamlCachePath})` : ""}`);
@@ -4329,6 +4360,7 @@ async function runEvalCommand(input) {
             cache,
             evaluationRunner,
             workersOverride: perFileWorkers,
+            yamlWorkers: targetPrep.yamlWorkers,
             progressReporter,
             seenEvalCases,
             displayIdTracker,
@@ -4363,12 +4395,12 @@ async function runEvalCommand(input) {
       console.log(formatMatrixSummary(allResults));
     }
     if (options.benchmarkJson && allResults.length > 0) {
-      const benchmarkPath = path12.resolve(options.benchmarkJson);
+      const benchmarkPath = path13.resolve(options.benchmarkJson);
       await writeBenchmarkJson(benchmarkPath, allResults);
       console.log(`Benchmark written to: ${benchmarkPath}`);
     }
     if (options.artifacts && allResults.length > 0) {
-      const artifactsDir = path12.resolve(options.artifacts);
+      const artifactsDir = path13.resolve(options.artifacts);
       const evalFile = resolvedTestFiles.length === 1 ? resolvedTestFiles[0] : "";
       const {
         gradingDir,
@@ -4399,11 +4431,12 @@ Results written to: ${outputPath}`);
           console.log(`  ${p}`);
         }
       }
+      await saveRunCache(cwd, outputPath).catch(() => void 0);
     }
     if (summary.executionErrorCount > 0 && !options.retryErrors) {
-      const evalFileArgs = resolvedTestFiles.map((f) => path12.relative(cwd, f)).join(" ");
+      const evalFileArgs = resolvedTestFiles.map((f) => path13.relative(cwd, f)).join(" ");
       const targetFlag = options.target ? ` --target ${options.target}` : "";
-      const relativeOutputPath = path12.relative(cwd, outputPath);
+      const relativeOutputPath = path13.relative(cwd, outputPath);
       console.log(
         `
 Tip: ${summary.executionErrorCount} execution error(s) detected. Re-run failed tests with:
@@ -4435,7 +4468,7 @@ async function resolveEvaluationRunner() {
   if (!overridePath) {
     return runEvaluation;
   }
-  const resolved = path12.isAbsolute(overridePath) ? overridePath : path12.resolve(process.cwd(), overridePath);
+  const resolved = path13.isAbsolute(overridePath) ? overridePath : path13.resolve(process.cwd(), overridePath);
   const moduleUrl = pathToFileURL(resolved).href;
   const mod = await import(moduleUrl);
   const candidate = mod.runEvaluation;
@@ -4457,6 +4490,7 @@ export {
   buildTimingArtifact,
   buildBenchmarkArtifact,
   parseJsonlResults,
+  loadRunCache,
   detectFileType,
   validateEvalFile,
   validateTargetsFile,
@@ -4467,4 +4501,4 @@ export {
   selectTarget,
   runEvalCommand
 };
-//# sourceMappingURL=chunk-GC5P5HHZ.js.map
+//# sourceMappingURL=chunk-JGMJL2LV.js.map