npm - agentv - Versions diffs - 3.13.0 → 3.13.2 - Mend

agentv 3.13.0 → 3.13.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +5 -5
package/dist/{chunk-6H4IAXQH.js → chunk-4Z5E5CYT.js} +54 -22
package/dist/chunk-4Z5E5CYT.js.map +1 -0
package/dist/{chunk-7OHZAFND.js → chunk-D3LNJUUB.js} +67 -35
package/dist/chunk-D3LNJUUB.js.map +1 -0
package/dist/{chunk-DJU4C6NS.js → chunk-X2343WOK.js} +31 -19
package/dist/chunk-X2343WOK.js.map +1 -0
package/dist/cli.js +3 -3
package/dist/{dist-SMKOBBFB.js → dist-KPMR7RBT.js} +4 -2
package/dist/index.js +3 -3
package/dist/{interactive-RV664PCR.js → interactive-HVKLYGRX.js} +3 -3
package/dist/templates/.agentv/.env.example +23 -0
package/dist/templates/.agentv/config.yaml +13 -4
package/dist/templates/.agentv/targets.yaml +16 -0
package/package.json +1 -1
package/dist/chunk-6H4IAXQH.js.map +0 -1
package/dist/chunk-7OHZAFND.js.map +0 -1
package/dist/chunk-DJU4C6NS.js.map +0 -1
/package/dist/{dist-SMKOBBFB.js.map → dist-KPMR7RBT.js.map} +0 -0
/package/dist/{interactive-RV664PCR.js.map → interactive-HVKLYGRX.js.map} +0 -0

package/dist/{chunk-DJU4C6NS.js → chunk-X2343WOK.js} RENAMED Viewed

@@ -2,6 +2,7 @@ import { createRequire } from 'node:module'; const require = createRequire(impor
 import {
   HtmlWriter,
   RESULT_INDEX_FILENAME,
+  RESULT_RUNS_DIRNAME,
   detectFileType,
   findRepoRoot,
   loadLightweightResults,
@@ -21,7 +22,7 @@ import {
   validateFileReferences,
   validateTargetsFile,
   writeArtifactsFromResults
-} from "./chunk-6H4IAXQH.js";
+} from "./chunk-4Z5E5CYT.js";
 import {
   createBuiltinRegistry,
   executeScript,
@@ -38,7 +39,7 @@ import {
   toSnakeCaseDeep as toSnakeCaseDeep2,
   transpileEvalYamlFile,
   trimBaselineResult
-} from "./chunk-7OHZAFND.js";
+} from "./chunk-D3LNJUUB.js";
 import {
   __commonJS,
   __esm,
@@ -3388,7 +3389,7 @@ function convertEvalsJsonToYaml(inputPath) {
       for (const assertion of test.assertions) {
         lines.push(`      - name: ${assertion.name}`);
         lines.push(`        type: ${assertion.type}`);
-        if ((assertion.type === "llm-grader" || assertion.type === "llm-judge") && "prompt" in assertion) {
+        if (assertion.type === "llm-grader" && "prompt" in assertion) {
           const prompt = assertion.prompt;
           lines.push(`        prompt: "${prompt.replace(/"/g, '\\"')}"`);
         }
@@ -3745,10 +3746,10 @@ async function getPromptEvalGradingBrief(evalPath, testId) {
           if (item.outcome) criteria.push(item.outcome);
         }
       }
-    } else if (type === "llm-grader" || type === "llm_grader" || type === "llm-judge" || type === "llm_judge") {
+    } else if (type === "llm-grader" || type === "llm_grader") {
       const prompt = entry.prompt ?? bag.prompt ?? bag.criteria;
       criteria.push(`[llm-grader] ${typeof prompt === "string" ? prompt : ""}`);
-    } else if (type === "code-grader" || type === "code_grader" || type === "code-judge" || type === "code_judge") {
+    } else if (type === "code-grader" || type === "code_grader") {
       const name = entry.name ?? type;
       const desc = bag.description ?? entry.description;
       criteria.push(`[code-grader] ${name}${desc ? `: ${desc}` : ""}`);
@@ -4175,11 +4176,16 @@ var evalRunCommand = command({
       type: optional(string),
       long: "output-messages",
       description: 'Number of trailing messages to include in results output (default: 1, or "all")'
+    }),
+    threshold: option({
+      type: optional(number),
+      long: "threshold",
+      description: "Suite-level quality gate: exit 1 if mean score falls below this value (0-1)"
     })
   },
   handler: async (args) => {
     if (args.evalPaths.length === 0 && process.stdin.isTTY) {
-      const { launchInteractiveWizard } = await import("./interactive-RV664PCR.js");
+      const { launchInteractiveWizard } = await import("./interactive-HVKLYGRX.js");
       await launchInteractiveWizard();
       return;
     }
@@ -4215,9 +4221,13 @@ var evalRunCommand = command({
       artifacts: args.artifacts,
       graderTarget: args.graderTarget,
       model: args.model,
-      outputMessages: args.outputMessages
+      outputMessages: args.outputMessages,
+      threshold: args.threshold
     };
-    await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
+    const result = await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
+    if (result?.thresholdFailed) {
+      process.exit(1);
+    }
   }
 });
@@ -4760,7 +4770,7 @@ async function writeGraderConfigs(testDir, assertions, evalDir) {
   let hasCodeGraders = false;
   let hasLlmGraders = false;
   for (const assertion of assertions) {
-    if (assertion.type === "code-grader" || assertion.type === "code-judge") {
+    if (assertion.type === "code-grader") {
       if (!hasCodeGraders) {
         await mkdir3(codeGradersDir, { recursive: true });
         hasCodeGraders = true;
@@ -4773,7 +4783,7 @@ async function writeGraderConfigs(testDir, assertions, evalDir) {
         weight: config.weight ?? 1,
         config: config.config ?? {}
       });
-    } else if (assertion.type === "llm-grader" || assertion.type === "llm-judge") {
+    } else if (assertion.type === "llm-grader") {
       if (!hasLlmGraders) {
         await mkdir3(llmGradersDir, { recursive: true });
         hasLlmGraders = true;
@@ -5021,13 +5031,15 @@ function loadOtlpTraceFile(filePath) {
       } : void 0,
       spans: traceSummary?.spans,
       output: stringAttr(rootAttrs.agentv_output_text),
-      scores: root.events?.filter((event) => event.name?.startsWith("agentv.evaluator.")).map((event) => {
+      scores: root.events?.filter(
+        (event) => event.name?.startsWith("agentv.grader.") || event.name?.startsWith("agentv.evaluator.")
+      ).map((event) => {
         const attrs = parseOtlpAttributes(event.attributes);
-        const name = event.name?.replace(/^agentv\.evaluator\./, "") ?? "unknown";
+        const name = event.name?.replace(/^agentv\.grader\./, "").replace(/^agentv\.evaluator\./, "") ?? "unknown";
         return {
           name,
-          type: stringAttr(attrs.agentv_evaluator_type) ?? "unknown",
-          score: numberAttr(attrs.agentv_evaluator_score) ?? 0
+          type: stringAttr(attrs.agentv_grader_type) ?? stringAttr(attrs.agentv_evaluator_type) ?? "unknown",
+          score: numberAttr(attrs.agentv_grader_score) ?? numberAttr(attrs.agentv_evaluator_score) ?? 0
         };
       })
     };
@@ -5131,13 +5143,13 @@ function toTraceSummary(result) {
 }
 function listResultFiles(cwd, limit) {
   const baseDir = path6.join(cwd, ".agentv", "results");
-  const rawDir = path6.join(baseDir, "raw");
+  const runsDir = path6.join(baseDir, RESULT_RUNS_DIRNAME);
   const files = [];
   try {
-    const entries2 = readdirSync2(rawDir, { withFileTypes: true });
+    const entries2 = readdirSync2(runsDir, { withFileTypes: true });
     for (const entry of entries2) {
       if (entry.isDirectory()) {
-        const primaryPath = resolveExistingRunPrimaryPath(path6.join(rawDir, entry.name));
+        const primaryPath = resolveExistingRunPrimaryPath(path6.join(runsDir, entry.name));
         if (primaryPath) {
           files.push({ filePath: primaryPath, displayName: entry.name });
         }
@@ -5145,7 +5157,7 @@ function listResultFiles(cwd, limit) {
     }
     for (const entry of entries2) {
       if (!entry.isDirectory() && entry.name.endsWith(".jsonl")) {
-        files.push({ filePath: path6.join(rawDir, entry.name), displayName: entry.name });
+        files.push({ filePath: path6.join(runsDir, entry.name), displayName: entry.name });
       }
     }
   } catch {
@@ -7753,4 +7765,4 @@ export {
   preprocessArgv,
   runCli
 };
-//# sourceMappingURL=chunk-DJU4C6NS.js.map
+//# sourceMappingURL=chunk-X2343WOK.js.map