npm - agentv - Versions diffs - 3.7.0 → 3.9.0 - Mend

agentv 3.7.0 → 3.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/README.md +1 -1
package/dist/{chunk-7YS6YNJZ.js → chunk-GC5P5HHZ.js} +127 -46
package/dist/chunk-GC5P5HHZ.js.map +1 -0
package/dist/{chunk-TR6H437M.js → chunk-Q2YWV4QM.js} +21 -21
package/dist/chunk-Q2YWV4QM.js.map +1 -0
package/dist/{chunk-XGG64VIY.js → chunk-TXDPYXHY.js} +636 -892
package/dist/chunk-TXDPYXHY.js.map +1 -0
package/dist/cli.js +3 -3
package/dist/{dist-VP6AXX6B.js → dist-PIOSPBKX.js} +2 -4
package/dist/index.js +3 -3
package/dist/{interactive-F6XECJ33.js → interactive-3VTDK5NX.js} +3 -3
package/package.json +1 -1
package/dist/chunk-7YS6YNJZ.js.map +0 -1
package/dist/chunk-TR6H437M.js.map +0 -1
package/dist/chunk-XGG64VIY.js.map +0 -1
/package/dist/{dist-VP6AXX6B.js.map → dist-PIOSPBKX.js.map} +0 -0
/package/dist/{interactive-F6XECJ33.js.map → interactive-3VTDK5NX.js.map} +0 -0

package/dist/{chunk-TR6H437M.js → chunk-Q2YWV4QM.js} RENAMED Viewed

@@ -16,7 +16,7 @@ import {
   validateEvalFile,
   validateFileReferences,
   validateTargetsFile
-} from "./chunk-7YS6YNJZ.js";
+} from "./chunk-GC5P5HHZ.js";
 import {
   createBuiltinRegistry,
   createProvider,
@@ -34,7 +34,7 @@ import {
   toSnakeCaseDeep as toSnakeCaseDeep2,
   transpileEvalYamlFile,
   trimBaselineResult
-} from "./chunk-XGG64VIY.js";
+} from "./chunk-TXDPYXHY.js";
 import {
   __commonJS,
   __esm,
@@ -3714,7 +3714,6 @@ async function getPromptEvalInput(evalPath, testId) {
   return {
     test_id: evalCase.id,
     input: resolveMessages(evalCase.input, fileMap),
-    guideline_paths: evalCase.guideline_paths,
     criteria: evalCase.criteria
   };
 }
@@ -3739,9 +3738,8 @@ async function getPromptEvalGradingBrief(evalPath, testId) {
   if (inputText) {
     lines.push(`Input: "${inputText}"`);
   }
-  const filePaths = evalCase.file_paths.filter((p) => !evalCase.guideline_paths.includes(p));
-  if (filePaths.length > 0) {
-    lines.push(`Files: ${filePaths.join(", ")}`);
+  if (evalCase.file_paths.length > 0) {
+    lines.push(`Files: ${evalCase.file_paths.join(", ")}`);
   }
   if (evalCase.reference_answer) {
     lines.push(`Expected: "${evalCase.reference_answer}"`);
@@ -3973,7 +3971,6 @@ var evalAssertCommand = command({
         criteria: "",
         expected_output: [],
         reference_answer: "",
-        guideline_files: [],
         input_files: [],
         trace: null,
         token_usage: null,
@@ -4180,11 +4177,16 @@ var evalRunCommand = command({
       type: optional(string),
       long: "model",
       description: 'Override model for the grader target (e.g., "openai:gpt-5-mini")'
+    }),
+    outputMessages: option({
+      type: optional(string),
+      long: "output-messages",
+      description: 'Number of trailing messages to include in results output (default: 1, or "all")'
     })
   },
   handler: async (args) => {
     if (args.evalPaths.length === 0 && process.stdin.isTTY) {
-      const { launchInteractiveWizard } = await import("./interactive-F6XECJ33.js");
+      const { launchInteractiveWizard } = await import("./interactive-3VTDK5NX.js");
       await launchInteractiveWizard();
       return;
     }
@@ -4220,7 +4222,8 @@ var evalRunCommand = command({
       benchmarkJson: args.benchmarkJson,
       artifacts: args.artifacts,
       graderTarget: args.graderTarget,
-      model: args.model
+      model: args.model,
+      outputMessages: args.outputMessages
     };
     await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
   }
@@ -4706,10 +4709,9 @@ function exportResults(sourceFile, content, outputDir) {
   const outputsDir = path8.join(outputDir, "outputs");
   mkdirSync2(outputsDir, { recursive: true });
   for (const result of patched) {
-    const outputText = result.outputText;
-    if (outputText) {
+    if (result.output && result.output.length > 0) {
       const id = safeTestId(result);
-      writeFileSync3(path8.join(outputsDir, `${id}.txt`), outputText);
+      writeFileSync3(path8.join(outputsDir, `${id}.txt`), JSON.stringify(result.output, null, 2));
     }
   }
 }
@@ -5021,7 +5023,6 @@ function toTraceSummary(raw) {
   return toCamelCaseDeep(raw.trace);
 }
 function extractCandidate(raw) {
-  if (raw.output_text !== void 0) return raw.output_text;
   if (raw.output !== void 0)
     return typeof raw.output === "string" ? raw.output : JSON.stringify(raw.output);
   return "";
@@ -5033,7 +5034,6 @@ function buildEvalTest(raw) {
     input: [],
     input_segments: [],
     expected_output: [],
-    guideline_paths: [],
     file_paths: [],
     criteria: ""
   };
@@ -5071,7 +5071,7 @@ async function runScore(results, evaluatorConfig, testIdFilter) {
       target: { kind: "custom", name: raw.target ?? "unknown", config: {} },
       provider: stubProvider,
       attempt: 1,
-      promptInputs: { question: "", guidelines: "" },
+      promptInputs: { question: "" },
       now: /* @__PURE__ */ new Date(),
       output: Array.isArray(output) ? output : void 0,
       trace,
@@ -5325,7 +5325,7 @@ function formatResultDetail(result, index, tree) {
   }
   const scoreColor = result.score >= 0.9 ? c2.green : result.score >= 0.5 ? c2.yellow : c2.red;
   lines.push(
-    `${c2.bold}${testId}${c2.reset}  ${scoreColor}${formatScore(result.score)}${c2.reset}${result.target ? `  ${c2.dim}target: ${result.target}${c2.reset}` : ""}${result.dataset ? `  ${c2.dim}dataset: ${result.dataset}${c2.reset}` : ""}`
+    `${c2.bold}${testId}${c2.reset}  ${scoreColor}${formatScore(result.score)}${c2.reset}${result.target ? `  ${c2.dim}target: ${result.target}${c2.reset}` : ""}${result.eval_set ? `  ${c2.dim}eval-set: ${result.eval_set}${c2.reset}` : ""}`
   );
   if (result.error) {
     lines.push(`  ${c2.red}Error: ${result.error}${c2.reset}`);
@@ -5499,8 +5499,8 @@ function groupResults(results, groupBy2) {
       case "target":
         key = result.target ?? "unknown";
         break;
-      case "dataset":
-        key = result.dataset ?? "unknown";
+      case "eval-set":
+        key = result.eval_set ?? "unknown";
         break;
       case "test-id":
         key = result.test_id ?? result.eval_id ?? "unknown";
@@ -5582,10 +5582,10 @@ var traceStatsCommand = command({
       description: "Path to JSONL result file"
     }),
     groupBy: option({
-      type: optional(oneOf(["target", "dataset", "test-id"])),
+      type: optional(oneOf(["target", "eval-set", "test-id"])),
       long: "group-by",
       short: "g",
-      description: "Group statistics by: target, dataset, or test-id"
+      description: "Group statistics by: target, eval-set, or test-id"
     }),
     format: option({
       type: optional(oneOf(["table", "json"])),
@@ -6276,4 +6276,4 @@ export {
   preprocessArgv,
   runCli
 };
-//# sourceMappingURL=chunk-TR6H437M.js.map
+//# sourceMappingURL=chunk-Q2YWV4QM.js.map