npm - agentv - Versions diffs - 3.8.0 → 3.9.1 - Mend

agentv 3.8.0 → 3.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/README.md +1 -1
package/dist/{chunk-YZRGQ6ZS.js → chunk-FNIEABNM.js} +12 -16
package/dist/chunk-FNIEABNM.js.map +1 -0
package/dist/{chunk-ASYRKFAI.js → chunk-FRA6PDLZ.js} +10 -38
package/dist/chunk-FRA6PDLZ.js.map +1 -0
package/dist/{chunk-F4UDJ7LG.js → chunk-X24J6HCV.js} +382 -621
package/dist/chunk-X24J6HCV.js.map +1 -0
package/dist/cli.js +3 -3
package/dist/{dist-4AQUJJAP.js → dist-LPIGPS52.js} +4 -4
package/dist/index.js +3 -3
package/dist/{interactive-OPQGDF77.js → interactive-O7HENH55.js} +3 -3
package/package.json +1 -1
package/dist/chunk-ASYRKFAI.js.map +0 -1
package/dist/chunk-F4UDJ7LG.js.map +0 -1
package/dist/chunk-YZRGQ6ZS.js.map +0 -1
/package/dist/{dist-4AQUJJAP.js.map → dist-LPIGPS52.js.map} +0 -0
/package/dist/{interactive-OPQGDF77.js.map → interactive-O7HENH55.js.map} +0 -0

package/README.md CHANGED Viewed

@@ -164,7 +164,7 @@ For large-scale evaluations, AgentV supports JSONL (JSON Lines) format as an alt
 Optional sidecar YAML metadata file (`dataset.eval.yaml` alongside `dataset.jsonl`):
 ```yaml
 description: Math evaluation dataset
-dataset: math-tests
+name: math-tests
 execution:
   target: azure-llm
 assertions:

package/dist/{chunk-YZRGQ6ZS.js → chunk-FNIEABNM.js} RENAMED Viewed

@@ -16,7 +16,7 @@ import {
   validateEvalFile,
   validateFileReferences,
   validateTargetsFile
-} from "./chunk-ASYRKFAI.js";
+} from "./chunk-FRA6PDLZ.js";
 import {
   createBuiltinRegistry,
   createProvider,
@@ -34,7 +34,7 @@ import {
   toSnakeCaseDeep as toSnakeCaseDeep2,
   transpileEvalYamlFile,
   trimBaselineResult
-} from "./chunk-F4UDJ7LG.js";
+} from "./chunk-X24J6HCV.js";
 import {
   __commonJS,
   __esm,
@@ -3714,7 +3714,6 @@ async function getPromptEvalInput(evalPath, testId) {
   return {
     test_id: evalCase.id,
     input: resolveMessages(evalCase.input, fileMap),
-    guideline_paths: evalCase.guideline_paths,
     criteria: evalCase.criteria
   };
 }
@@ -3739,9 +3738,8 @@ async function getPromptEvalGradingBrief(evalPath, testId) {
   if (inputText) {
     lines.push(`Input: "${inputText}"`);
   }
-  const filePaths = evalCase.file_paths.filter((p) => !evalCase.guideline_paths.includes(p));
-  if (filePaths.length > 0) {
-    lines.push(`Files: ${filePaths.join(", ")}`);
+  if (evalCase.file_paths.length > 0) {
+    lines.push(`Files: ${evalCase.file_paths.join(", ")}`);
   }
   if (evalCase.reference_answer) {
     lines.push(`Expected: "${evalCase.reference_answer}"`);
@@ -3973,7 +3971,6 @@ var evalAssertCommand = command({
         criteria: "",
         expected_output: [],
         reference_answer: "",
-        guideline_files: [],
         input_files: [],
         trace: null,
         token_usage: null,
@@ -4189,7 +4186,7 @@ var evalRunCommand = command({
   },
   handler: async (args) => {
     if (args.evalPaths.length === 0 && process.stdin.isTTY) {
-      const { launchInteractiveWizard } = await import("./interactive-OPQGDF77.js");
+      const { launchInteractiveWizard } = await import("./interactive-O7HENH55.js");
       await launchInteractiveWizard();
       return;
     }
@@ -5037,7 +5034,6 @@ function buildEvalTest(raw) {
     input: [],
     input_segments: [],
     expected_output: [],
-    guideline_paths: [],
     file_paths: [],
     criteria: ""
   };
@@ -5075,7 +5071,7 @@ async function runScore(results, evaluatorConfig, testIdFilter) {
       target: { kind: "custom", name: raw.target ?? "unknown", config: {} },
       provider: stubProvider,
       attempt: 1,
-      promptInputs: { question: "", guidelines: "" },
+      promptInputs: { question: "" },
       now: /* @__PURE__ */ new Date(),
       output: Array.isArray(output) ? output : void 0,
       trace,
@@ -5329,7 +5325,7 @@ function formatResultDetail(result, index, tree) {
   }
   const scoreColor = result.score >= 0.9 ? c2.green : result.score >= 0.5 ? c2.yellow : c2.red;
   lines.push(
-    `${c2.bold}${testId}${c2.reset}  ${scoreColor}${formatScore(result.score)}${c2.reset}${result.target ? `  ${c2.dim}target: ${result.target}${c2.reset}` : ""}${result.dataset ? `  ${c2.dim}dataset: ${result.dataset}${c2.reset}` : ""}`
+    `${c2.bold}${testId}${c2.reset}  ${scoreColor}${formatScore(result.score)}${c2.reset}${result.target ? `  ${c2.dim}target: ${result.target}${c2.reset}` : ""}${result.eval_set ? `  ${c2.dim}eval-set: ${result.eval_set}${c2.reset}` : ""}`
   );
   if (result.error) {
     lines.push(`  ${c2.red}Error: ${result.error}${c2.reset}`);
@@ -5503,8 +5499,8 @@ function groupResults(results, groupBy2) {
       case "target":
         key = result.target ?? "unknown";
         break;
-      case "dataset":
-        key = result.dataset ?? "unknown";
+      case "eval-set":
+        key = result.eval_set ?? "unknown";
         break;
       case "test-id":
         key = result.test_id ?? result.eval_id ?? "unknown";
@@ -5586,10 +5582,10 @@ var traceStatsCommand = command({
       description: "Path to JSONL result file"
     }),
     groupBy: option({
-      type: optional(oneOf(["target", "dataset", "test-id"])),
+      type: optional(oneOf(["target", "eval-set", "test-id"])),
       long: "group-by",
       short: "g",
-      description: "Group statistics by: target, dataset, or test-id"
+      description: "Group statistics by: target, eval-set, or test-id"
     }),
     format: option({
       type: optional(oneOf(["table", "json"])),
@@ -6280,4 +6276,4 @@ export {
   preprocessArgv,
   runCli
 };
-//# sourceMappingURL=chunk-YZRGQ6ZS.js.map
+//# sourceMappingURL=chunk-FNIEABNM.js.map