npm - agentv - Versions diffs - 3.12.0 → 3.13.1 - Mend

agentv 3.12.0 → 3.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/README.md +9 -10
package/dist/{chunk-UYBLUYHN.js → chunk-K747KGDP.js} +64 -49
package/dist/chunk-K747KGDP.js.map +1 -0
package/dist/{chunk-VLOFRXH4.js → chunk-LSXO22CF.js} +17 -43
package/dist/chunk-LSXO22CF.js.map +1 -0
package/dist/{chunk-2ELQ6F3C.js → chunk-UK7UMQOX.js} +29 -34
package/dist/chunk-UK7UMQOX.js.map +1 -0
package/dist/cli.js +3 -4
package/dist/cli.js.map +1 -1
package/dist/{dist-L6R5HJ72.js → dist-LCZDS36N.js} +2 -6
package/dist/index.js +3 -4
package/dist/{interactive-5X62YEEX.js → interactive-76ZJVPI7.js} +3 -4
package/dist/{interactive-5X62YEEX.js.map → interactive-76ZJVPI7.js.map} +1 -1
package/package.json +1 -1
package/dist/chunk-2ELQ6F3C.js.map +0 -1
package/dist/chunk-NR7QVL75.js +0 -122
package/dist/chunk-NR7QVL75.js.map +0 -1
package/dist/chunk-UYBLUYHN.js.map +0 -1
package/dist/chunk-VLOFRXH4.js.map +0 -1
package/dist/simple-trace-file-exporter-CRIO5HDZ-QYYT2QQT.js +0 -9
package/dist/simple-trace-file-exporter-CRIO5HDZ-QYYT2QQT.js.map +0 -1
/package/dist/{dist-L6R5HJ72.js.map → dist-LCZDS36N.js.map} +0 -0

package/dist/{chunk-2ELQ6F3C.js → chunk-UK7UMQOX.js} RENAMED Viewed

@@ -1,8 +1,8 @@
 import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
 import {
   HtmlWriter,
-  LEGACY_RESULTS_FILENAME,
   RESULT_INDEX_FILENAME,
+  RESULT_RUNS_DIRNAME,
   detectFileType,
   findRepoRoot,
   loadLightweightResults,
@@ -22,7 +22,7 @@ import {
   validateFileReferences,
   validateTargetsFile,
   writeArtifactsFromResults
-} from "./chunk-VLOFRXH4.js";
+} from "./chunk-LSXO22CF.js";
 import {
   createBuiltinRegistry,
   executeScript,
@@ -39,7 +39,7 @@ import {
   toSnakeCaseDeep as toSnakeCaseDeep2,
   transpileEvalYamlFile,
   trimBaselineResult
-} from "./chunk-UYBLUYHN.js";
+} from "./chunk-K747KGDP.js";
 import {
   __commonJS,
   __esm,
@@ -3389,7 +3389,7 @@ function convertEvalsJsonToYaml(inputPath) {
       for (const assertion of test.assertions) {
         lines.push(`      - name: ${assertion.name}`);
         lines.push(`        type: ${assertion.type}`);
-        if ((assertion.type === "llm-grader" || assertion.type === "llm-judge") && "prompt" in assertion) {
+        if (assertion.type === "llm-grader" && "prompt" in assertion) {
           const prompt = assertion.prompt;
           lines.push(`        prompt: "${prompt.replace(/"/g, '\\"')}"`);
         }
@@ -3746,10 +3746,10 @@ async function getPromptEvalGradingBrief(evalPath, testId) {
           if (item.outcome) criteria.push(item.outcome);
         }
       }
-    } else if (type === "llm-grader" || type === "llm_grader" || type === "llm-judge" || type === "llm_judge") {
+    } else if (type === "llm-grader" || type === "llm_grader") {
       const prompt = entry.prompt ?? bag.prompt ?? bag.criteria;
       criteria.push(`[llm-grader] ${typeof prompt === "string" ? prompt : ""}`);
-    } else if (type === "code-grader" || type === "code_grader" || type === "code-judge" || type === "code_judge") {
+    } else if (type === "code-grader" || type === "code_grader") {
       const name = entry.name ?? type;
       const desc = bag.description ?? entry.description;
       criteria.push(`[code-grader] ${name}${desc ? `: ${desc}` : ""}`);
@@ -4126,11 +4126,6 @@ var evalRunCommand = command({
       long: "otel-file",
       description: "Write OTLP JSON trace to file (importable by OTel backends)"
     }),
-    traceFile: option({
-      type: optional(string),
-      long: "trace-file",
-      description: "Write human-readable trace JSONL to file"
-    }),
     exportOtel: flag({
       long: "export-otel",
       description: "Export evaluation traces via OTLP/HTTP to configured endpoint"
@@ -4185,7 +4180,7 @@ var evalRunCommand = command({
   },
   handler: async (args) => {
     if (args.evalPaths.length === 0 && process.stdin.isTTY) {
-      const { launchInteractiveWizard } = await import("./interactive-5X62YEEX.js");
+      const { launchInteractiveWizard } = await import("./interactive-76ZJVPI7.js");
       await launchInteractiveWizard();
       return;
     }
@@ -4211,7 +4206,6 @@ var evalRunCommand = command({
       workspacePath: args.workspacePath,
       trace: false,
       otelFile: args.otelFile,
-      traceFile: args.traceFile,
       exportOtel: args.exportOtel,
       otelBackend: args.otelBackend,
       otelCaptureContent: args.otelCaptureContent,
@@ -4767,7 +4761,7 @@ async function writeGraderConfigs(testDir, assertions, evalDir) {
   let hasCodeGraders = false;
   let hasLlmGraders = false;
   for (const assertion of assertions) {
-    if (assertion.type === "code-grader" || assertion.type === "code-judge") {
+    if (assertion.type === "code-grader") {
       if (!hasCodeGraders) {
         await mkdir3(codeGradersDir, { recursive: true });
         hasCodeGraders = true;
@@ -4780,7 +4774,7 @@ async function writeGraderConfigs(testDir, assertions, evalDir) {
         weight: config.weight ?? 1,
         config: config.config ?? {}
       });
-    } else if (assertion.type === "llm-grader" || assertion.type === "llm-judge") {
+    } else if (assertion.type === "llm-grader") {
       if (!hasLlmGraders) {
         await mkdir3(llmGradersDir, { recursive: true });
         hasLlmGraders = true;
@@ -4866,12 +4860,6 @@ function loadResultFile(filePath) {
   return loadJsonlRecords(resolvedFilePath);
 }
 function resolveTraceResultPath(filePath) {
-  if (path6.basename(filePath) === LEGACY_RESULTS_FILENAME) {
-    return filePath;
-  }
-  if (!filePath.endsWith(".jsonl") && !filePath.endsWith(".json")) {
-    return resolveWorkspaceOrFilePath(filePath);
-  }
   return resolveWorkspaceOrFilePath(filePath);
 }
 function loadJsonlRecords(filePath) {
@@ -4945,7 +4933,9 @@ function loadOtlpTraceFile(filePath) {
     }
   }
   const roots = spans.filter((span) => !span.parentSpanId || !spanMap.has(span.parentSpanId));
-  return roots.map((root, index) => {
+  const supportedRoots = roots.filter(isAgentvEvalRoot);
+  const candidateRoots = supportedRoots.length > 0 ? supportedRoots : roots;
+  return candidateRoots.map((root, index) => {
     const descendants = collectChildSpans(root.spanId, childMap);
     const rootAttrs = parseOtlpAttributes(root.attributes);
     const parsedDescendants = descendants.map((span) => ({
@@ -5032,18 +5022,24 @@ function loadOtlpTraceFile(filePath) {
       } : void 0,
       spans: traceSummary?.spans,
       output: stringAttr(rootAttrs.agentv_output_text),
-      scores: root.events?.filter((event) => event.name?.startsWith("agentv.evaluator.")).map((event) => {
+      scores: root.events?.filter(
+        (event) => event.name?.startsWith("agentv.grader.") || event.name?.startsWith("agentv.evaluator.")
+      ).map((event) => {
         const attrs = parseOtlpAttributes(event.attributes);
-        const name = event.name?.replace(/^agentv\.evaluator\./, "") ?? "unknown";
+        const name = event.name?.replace(/^agentv\.grader\./, "").replace(/^agentv\.evaluator\./, "") ?? "unknown";
         return {
           name,
-          type: stringAttr(attrs.agentv_evaluator_type) ?? "unknown",
-          score: numberAttr(attrs.agentv_evaluator_score) ?? 0
+          type: stringAttr(attrs.agentv_grader_type) ?? stringAttr(attrs.agentv_evaluator_type) ?? "unknown",
+          score: numberAttr(attrs.agentv_grader_score) ?? numberAttr(attrs.agentv_evaluator_score) ?? 0
         };
       })
     };
   });
 }
+function isAgentvEvalRoot(span) {
+  const attrs = parseOtlpAttributes(span.attributes);
+  return span.name === "agentv.eval" || numberAttr(attrs.agentv_score) !== void 0 || typeof stringAttr(attrs.agentv_test_id) === "string";
+}
 function collectChildSpans(spanId, childMap) {
   if (!spanId) return [];
   const direct = childMap.get(spanId) ?? [];
@@ -5138,13 +5134,13 @@ function toTraceSummary(result) {
 }
 function listResultFiles(cwd, limit) {
   const baseDir = path6.join(cwd, ".agentv", "results");
-  const rawDir = path6.join(baseDir, "raw");
+  const runsDir = path6.join(baseDir, RESULT_RUNS_DIRNAME);
   const files = [];
   try {
-    const entries2 = readdirSync2(rawDir, { withFileTypes: true });
+    const entries2 = readdirSync2(runsDir, { withFileTypes: true });
     for (const entry of entries2) {
       if (entry.isDirectory()) {
-        const primaryPath = resolveExistingRunPrimaryPath(path6.join(rawDir, entry.name));
+        const primaryPath = resolveExistingRunPrimaryPath(path6.join(runsDir, entry.name));
         if (primaryPath) {
           files.push({ filePath: primaryPath, displayName: entry.name });
         }
@@ -5152,7 +5148,7 @@ function listResultFiles(cwd, limit) {
     }
     for (const entry of entries2) {
       if (!entry.isDirectory() && entry.name.endsWith(".jsonl")) {
-        files.push({ filePath: path6.join(rawDir, entry.name), displayName: entry.name });
+        files.push({ filePath: path6.join(runsDir, entry.name), displayName: entry.name });
       }
     }
   } catch {
@@ -5317,8 +5313,7 @@ var resultsExportCommand = command({
       const { results } = await loadResults(source, cwd);
       const outputDir = out ? path7.isAbsolute(out) ? out : path7.resolve(cwd, out) : deriveOutputDir(cwd, sourceFile);
       await writeArtifactsFromResults(results, outputDir, {
-        evalFile: sourceFile,
-        writeLegacyResults: false
+        evalFile: sourceFile
       });
       console.log(`Exported ${results.length} test(s) to ${outputDir}`);
       for (const result of results) {
@@ -6640,7 +6635,7 @@ var traceScoreCommand = command({
       );
       if (!hasTrace) {
         console.error(
-          `${c2.red}Error:${c2.reset} Source lacks trace metrics. Export a trace file with ${c2.bold}--trace-file${c2.reset} or ${c2.bold}--otel-file${c2.reset}.`
+          `${c2.red}Error:${c2.reset} Source lacks trace metrics. Use an OTLP trace export via ${c2.bold}--otel-file${c2.reset} or a run manifest with summary metrics in ${c2.bold}index.jsonl${c2.reset}.`
         );
         process.exit(1);
       }
@@ -7761,4 +7756,4 @@ export {
   preprocessArgv,
   runCli
 };
-//# sourceMappingURL=chunk-2ELQ6F3C.js.map
+//# sourceMappingURL=chunk-UK7UMQOX.js.map