npm - agentv - Versions diffs - 3.10.2 → 3.11.0 - Mend

agentv 3.10.2 → 3.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/dist/{chunk-F7LAJMTO.js → chunk-JEW3FEO7.js} RENAMED Viewed

@@ -27,12 +27,12 @@ import {
   subscribeToCopilotCliLogEntries,
   subscribeToCopilotSdkLogEntries,
   subscribeToPiLogEntries
-} from "./chunk-KGK5NUFG.js";
+} from "./chunk-EZGWZVVK.js";
 // package.json
 var package_default = {
   name: "agentv",
-  version: "3.10.2",
+  version: "3.11.0",
   description: "CLI entry point for AgentV",
   type: "module",
   repository: {
@@ -63,6 +63,7 @@ var package_default = {
     "@ai-sdk/openai": "^3.0.0",
     "@anthropic-ai/claude-agent-sdk": "^0.2.49",
     "@github/copilot-sdk": "^0.1.25",
+    "@hono/node-server": "^1.19.11",
     "@inquirer/prompts": "^8.2.1",
     "@mariozechner/pi-agent-core": "^0.54.2",
     "@mariozechner/pi-ai": "^0.54.2",
@@ -70,6 +71,7 @@ var package_default = {
     "cmd-ts": "^0.14.3",
     dotenv: "^16.4.5",
     "fast-glob": "^3.3.3",
+    hono: "^4.12.9",
     json5: "^2.2.3",
     micromatch: "^4.0.8",
     semver: "^7.7.4",
@@ -202,7 +204,7 @@ async function discoverTargetsFile(options) {
 }
 // src/commands/eval/run-eval.ts
-import { constants as constants4 } from "node:fs";
+import { constants as constants4, mkdirSync } from "node:fs";
 import { access as access4 } from "node:fs/promises";
 import path13 from "node:path";
 import { pathToFileURL } from "node:url";
@@ -478,6 +480,33 @@ function buildBenchmarkArtifact(results, evalFile = "") {
     notes
   };
 }
+function buildAggregateGradingArtifact(results) {
+  const assertions = [];
+  for (const result of results) {
+    if (!result.assertions) continue;
+    const testId = result.testId ?? "unknown";
+    for (const a of result.assertions) {
+      assertions.push({
+        test_id: testId,
+        text: a.text,
+        passed: a.passed,
+        evidence: a.evidence ?? ""
+      });
+    }
+  }
+  const passed = assertions.filter((a) => a.passed).length;
+  const failed = assertions.filter((a) => !a.passed).length;
+  const total = assertions.length;
+  return {
+    assertions,
+    summary: {
+      passed,
+      failed,
+      total,
+      pass_rate: total > 0 ? Math.round(passed / total * 1e3) / 1e3 : 0
+    }
+  };
+}
 function toCamelCase(str) {
   return str.replace(/_([a-z])/g, (_, letter) => letter.toUpperCase());
 }
@@ -518,6 +547,7 @@ async function writeArtifactsFromResults(results, outputDir, options) {
   const gradingDir = path3.join(outputDir, "grading");
   const timingPath = path3.join(outputDir, "timing.json");
   const benchmarkPath = path3.join(outputDir, "benchmark.json");
+  const aggregateGradingPath = path3.join(outputDir, "grading.json");
   await mkdir(gradingDir, { recursive: true });
   for (const result of results) {
     const grading = buildGradingArtifact(result);
@@ -532,7 +562,10 @@ async function writeArtifactsFromResults(results, outputDir, options) {
   const benchmark = buildBenchmarkArtifact(results, options?.evalFile);
   await writeFile(benchmarkPath, `${JSON.stringify(benchmark, null, 2)}
 `, "utf8");
-  return { gradingDir, timingPath, benchmarkPath };
+  const aggregateGrading = buildAggregateGradingArtifact(results);
+  await writeFile(aggregateGradingPath, `${JSON.stringify(aggregateGrading, null, 2)}
+`, "utf8");
+  return { gradingDir, timingPath, benchmarkPath, aggregateGradingPath };
 }
 // src/commands/eval/benchmark-writer.ts
@@ -1646,20 +1679,6 @@ async function createOutputWriter(filePath, format) {
     }
   }
 }
-function getDefaultExtension(format) {
-  switch (format) {
-    case "jsonl":
-      return ".jsonl";
-    case "yaml":
-      return ".yaml";
-    case "html":
-      return ".html";
-    default: {
-      const exhaustiveCheck = format;
-      throw new Error(`Unsupported output format: ${exhaustiveCheck}`);
-    }
-  }
-}
 var SUPPORTED_EXTENSIONS = /* @__PURE__ */ new Set([".jsonl", ".json", ".xml", ".yaml", ".yml", ".html", ".htm"]);
 function createWriterFromPath(filePath) {
   const ext = path10.extname(filePath).toLowerCase();
@@ -1866,6 +1885,12 @@ async function loadNonErrorResults(jsonlPath) {
 import { mkdir as mkdir7, readFile as readFile2, writeFile as writeFile6 } from "node:fs/promises";
 import path11 from "node:path";
 var CACHE_FILENAME = "cache.json";
+function resolveRunCacheFile(cache) {
+  if (cache.lastRunDir) {
+    return path11.join(cache.lastRunDir, "results.jsonl");
+  }
+  return cache.lastResultFile ?? "";
+}
 function cachePath(cwd) {
   return path11.join(cwd, ".agentv", CACHE_FILENAME);
 }
@@ -1877,11 +1902,11 @@ async function loadRunCache(cwd) {
     return void 0;
   }
 }
-async function saveRunCache(cwd, resultFile) {
+async function saveRunCache(cwd, runDir) {
   const dir = path11.join(cwd, ".agentv");
   await mkdir7(dir, { recursive: true });
   const cache = {
-    lastResultFile: resultFile,
+    lastRunDir: runDir,
     timestamp: (/* @__PURE__ */ new Date()).toISOString()
   };
   await writeFile6(cachePath(cwd), `${JSON.stringify(cache, null, 2)}
@@ -3787,10 +3812,10 @@ function normalizeOptions(rawOptions, config, yamlExecution) {
     // Precedence: CLI > YAML config > TS config
     otelFile: normalizeString(rawOptions.otelFile) ?? (yamlExecution?.otel_file ? resolveTimestampPlaceholder(yamlExecution.otel_file) : void 0) ?? (config?.execution?.otelFile ? resolveTimestampPlaceholder(config.execution.otelFile) : void 0),
     traceFile: normalizeString(rawOptions.traceFile) ?? (yamlExecution?.trace_file ? resolveTimestampPlaceholder(yamlExecution.trace_file) : void 0) ?? (config?.execution?.traceFile ? resolveTimestampPlaceholder(config.execution.traceFile) : void 0),
-    exportOtel: normalizeBoolean(rawOptions.exportOtel),
-    otelBackend: normalizeString(rawOptions.otelBackend),
-    otelCaptureContent: normalizeBoolean(rawOptions.otelCaptureContent),
-    otelGroupTurns: normalizeBoolean(rawOptions.otelGroupTurns),
+    exportOtel: normalizeBoolean(rawOptions.exportOtel) || yamlExecution?.export_otel === true,
+    otelBackend: normalizeString(rawOptions.otelBackend) ?? yamlExecution?.otel_backend,
+    otelCaptureContent: normalizeBoolean(rawOptions.otelCaptureContent) || yamlExecution?.otel_capture_content === true,
+    otelGroupTurns: normalizeBoolean(rawOptions.otelGroupTurns) || yamlExecution?.otel_group_turns === true,
     retryErrors: normalizeString(rawOptions.retryErrors),
     workspaceMode,
     workspacePath,
@@ -3808,11 +3833,12 @@ async function ensureFileExists(filePath, description) {
     throw new Error(`${description} not found: ${filePath}`);
   }
 }
-function buildDefaultOutputPath(cwd, format) {
+function buildDefaultOutputPath(cwd) {
   const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
-  const baseName = "eval";
-  const extension = getDefaultExtension(format);
-  return path13.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
+  const dirName = `eval_${timestamp}`;
+  const runDir = path13.join(cwd, ".agentv", "results", "raw", dirName);
+  mkdirSync(runDir, { recursive: true });
+  return path13.join(runDir, "results.jsonl");
 }
 function createProgressReporter(maxWorkers, options) {
   const display = new ProgressDisplay(maxWorkers, options);
@@ -4155,7 +4181,7 @@ async function runEvalCommand(input) {
   const useFileExport = !!(options.otelFile || options.traceFile);
   if (options.exportOtel || useFileExport) {
     try {
-      const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-3QUJEJUT.js");
+      const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-QERRYDSC.js");
       let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
       let headers = {};
       if (options.otelBackend) {
@@ -4196,7 +4222,7 @@ async function runEvalCommand(input) {
       otelExporter = null;
     }
   }
-  const outputPath = options.outPath ? path13.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
+  const outputPath = options.outPath ? path13.resolve(options.outPath) : buildDefaultOutputPath(cwd);
   const extraOutputPaths = options.outputPaths.map((p) => path13.resolve(p));
   const allOutputPaths = extraOutputPaths.length > 0 ? [outputPath, ...extraOutputPaths] : [outputPath];
   const uniqueOutputPaths = [...new Set(allOutputPaths)];
@@ -4430,7 +4456,15 @@ Results written to: ${outputPath}`);
           console.log(`  ${p}`);
         }
       }
-      await saveRunCache(cwd, outputPath).catch(() => void 0);
+      const runDir = path13.dirname(outputPath);
+      await saveRunCache(cwd, runDir).catch(() => void 0);
+      if (outputPath.endsWith(".jsonl")) {
+        const { writeFile: writeFile7 } = await import("node:fs/promises");
+        const gradingPath = path13.join(path13.dirname(outputPath), "grading.json");
+        const aggregateGrading = buildAggregateGradingArtifact(allResults);
+        await writeFile7(gradingPath, `${JSON.stringify(aggregateGrading, null, 2)}
+`, "utf8");
+      }
     }
     if (summary.executionErrorCount > 0 && !options.retryErrors) {
       const evalFileArgs = resolvedTestFiles.map((f) => path13.relative(cwd, f)).join(" ");
@@ -4488,7 +4522,9 @@ export {
   buildGradingArtifact,
   buildTimingArtifact,
   buildBenchmarkArtifact,
+  buildAggregateGradingArtifact,
   parseJsonlResults,
+  resolveRunCacheFile,
   loadRunCache,
   detectFileType,
   validateEvalFile,
@@ -4500,4 +4536,4 @@ export {
   selectTarget,
   runEvalCommand
 };
-//# sourceMappingURL=chunk-F7LAJMTO.js.map
+//# sourceMappingURL=chunk-JEW3FEO7.js.map