npm - agentv - Versions diffs - 3.4.0 → 3.5.0 - Mend

agentv 3.4.0 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/README.md CHANGED Viewed

@@ -238,21 +238,19 @@ import json, sys
 data = json.load(sys.stdin)
 answer = data.get("answer", "")
-hits = []
-misses = []
+assertions = []
 if "42" in answer:
-    hits.append("Answer contains correct value (42)")
+    assertions.append({"text": "Answer contains correct value (42)", "passed": True})
 else:
-    misses.append("Answer does not contain expected value (42)")
+    assertions.append({"text": "Answer does not contain expected value (42)", "passed": False})
-score = 1.0 if hits else 0.0
+passed = sum(1 for a in assertions if a["passed"])
+score = 1.0 if passed == len(assertions) else 0.0
 print(json.dumps({
     "score": score,
-    "hits": hits,
-    "misses": misses,
-    "reasoning": f"Passed {len(hits)} check(s)"
+    "assertions": assertions,
 }))
 ```

package/dist/{agentv-provider-HDSAUUEF-LUBMM7TH.js → agentv-provider-NFFLXG5M-TJAWCWCX.js} RENAMED Viewed

@@ -1,9 +1,9 @@
 import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
 import {
   AgentvProvider
-} from "./chunk-AR3QEKXH.js";
+} from "./chunk-BJV6MDBE.js";
 import "./chunk-5H446C7X.js";
 export {
   AgentvProvider
 };
-//# sourceMappingURL=agentv-provider-HDSAUUEF-LUBMM7TH.js.map
+//# sourceMappingURL=agentv-provider-NFFLXG5M-TJAWCWCX.js.map

package/dist/{chunk-A7ZDUB46.js → chunk-5GG6DDP5.js} RENAMED Viewed

@@ -16,7 +16,7 @@ import {
   validateEvalFile,
   validateFileReferences,
   validateTargetsFile
-} from "./chunk-RE5I3U2S.js";
+} from "./chunk-RLL4QGNL.js";
 import {
   createBuiltinRegistry,
   createProvider,
@@ -34,7 +34,7 @@ import {
   toSnakeCaseDeep as toSnakeCaseDeep2,
   transpileEvalYamlFile,
   trimBaselineResult
-} from "./chunk-GOZV2HN2.js";
+} from "./chunk-D6G4N2H2.js";
 import {
   __commonJS,
   __esm,
@@ -4185,7 +4185,7 @@ var evalRunCommand = command({
   },
   handler: async (args) => {
     if (args.evalPaths.length === 0 && process.stdin.isTTY) {
-      const { launchInteractiveWizard } = await import("./interactive-WXXTZ7PD.js");
+      const { launchInteractiveWizard } = await import("./interactive-J7SUWZH2.js");
       await launchInteractiveWizard();
       return;
     }
@@ -5089,9 +5089,7 @@ async function runScore(results, evaluatorConfig, testIdFilter) {
       originalScore: raw.score,
       newScore: score.score,
       verdict: score.verdict,
-      hits: score.hits,
-      misses: score.misses,
-      reasoning: score.reasoning
+      assertions: score.assertions
     });
   }
   return scored;
@@ -5110,7 +5108,9 @@ function renderTable(scored, assertSpec) {
   lines.push(cols.map((col) => "\u2500".repeat(col.width)).join("\u2500\u2500"));
   for (const r of scored) {
     const verdictColor = r.verdict === "pass" ? c2.green : c2.red;
-    const detail = r.misses.length > 0 ? r.misses[0].slice(0, 48) : r.hits.length > 0 ? r.hits[0].slice(0, 48) : r.reasoning?.slice(0, 48) ?? "";
+    const failed = r.assertions.filter((a) => !a.passed);
+    const passed = r.assertions.filter((a) => a.passed);
+    const detail = failed.length > 0 ? failed[0].text.slice(0, 48) : passed.length > 0 ? passed[0].text.slice(0, 48) : "";
     const row = [
       padRight2(r.testId.slice(0, 24), cols[0].width),
       padLeft2(formatScore(r.originalScore), cols[1].width),
@@ -5332,11 +5332,17 @@ function formatResultDetail(result, index, tree) {
   if (result.error) {
     lines.push(`  ${c2.red}Error: ${result.error}${c2.reset}`);
   }
-  if (result.hits && result.hits.length > 0) {
-    lines.push(`  ${c2.green}\u2713 Hits:${c2.reset} ${result.hits.join(", ")}`);
-  }
-  if (result.misses && result.misses.length > 0) {
-    lines.push(`  ${c2.red}\u2717 Misses:${c2.reset} ${result.misses.join(", ")}`);
+  if (result.assertions && result.assertions.length > 0) {
+    const passed = result.assertions.filter((a) => a.passed);
+    const failed = result.assertions.filter((a) => !a.passed);
+    if (passed.length > 0)
+      lines.push(
+        `  ${c2.green}\u2713 Passed:${c2.reset} ${passed.map((a) => a.text).join(", ")}`
+      );
+    if (failed.length > 0)
+      lines.push(
+        `  ${c2.red}\u2717 Failed:${c2.reset} ${failed.map((a) => a.text).join(", ")}`
+      );
   }
   if (result.scores && result.scores.length > 0) {
     lines.push(`  ${c2.dim}Scores:${c2.reset} ${renderScores(result.scores)}`);
@@ -5344,10 +5350,14 @@ function formatResultDetail(result, index, tree) {
   if (result.trace || result.duration_ms !== void 0 || result.cost_usd !== void 0) {
     lines.push(`  ${c2.dim}Trace:${c2.reset} ${renderFlatTrace(result)}`);
   }
-  if (result.reasoning) {
-    const maxLen = 200;
-    const truncated = result.reasoning.length > maxLen ? `${result.reasoning.slice(0, maxLen)}...` : result.reasoning;
-    lines.push(`  ${c2.dim}Reasoning: ${truncated}${c2.reset}`);
+  if (result.assertions && result.assertions.length > 0) {
+    const withEvidence = result.assertions.filter((a) => a.evidence);
+    if (withEvidence.length > 0) {
+      const maxLen = 200;
+      const evidence = withEvidence[0].evidence;
+      const truncated = evidence.length > maxLen ? `${evidence.slice(0, maxLen)}...` : evidence;
+      lines.push(`  ${c2.dim}Evidence: ${truncated}${c2.reset}`);
+    }
   }
   return lines.join("\n");
 }
@@ -6268,4 +6278,4 @@ export {
   preprocessArgv,
   runCli
 };
-//# sourceMappingURL=chunk-A7ZDUB46.js.map
+//# sourceMappingURL=chunk-5GG6DDP5.js.map