npm - agentv - Versions diffs - 3.4.0 → 3.6.0 - Mend

agentv 3.4.0 → 3.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/README.md +20 -11
package/dist/{agentv-provider-HDSAUUEF-LUBMM7TH.js → agentv-provider-NFFLXG5M-TJAWCWCX.js} +2 -2
package/dist/{chunk-AR3QEKXH.js → chunk-BJV6MDBE.js} +3 -3
package/dist/{chunk-AR3QEKXH.js.map → chunk-BJV6MDBE.js.map} +1 -1
package/dist/{chunk-A7ZDUB46.js → chunk-IP5BO54H.js} +35 -26
package/dist/chunk-IP5BO54H.js.map +1 -0
package/dist/{chunk-GOZV2HN2.js → chunk-K4RXLQWV.js} +453 -494
package/dist/chunk-K4RXLQWV.js.map +1 -0
package/dist/{chunk-RE5I3U2S.js → chunk-UU5N43YS.js} +27 -46
package/dist/chunk-UU5N43YS.js.map +1 -0
package/dist/cli.js +4 -4
package/dist/{dist-AFDYFH6Y.js → dist-VWEFBDZ5.js} +3 -5
package/dist/index.js +4 -4
package/dist/{interactive-WXXTZ7PD.js → interactive-5S4ILY2Y.js} +4 -4
package/dist/templates/.agentv/.env.example +9 -11
package/dist/templates/.agentv/config.yaml +0 -5
package/dist/templates/.agentv/targets.yaml +16 -0
package/package.json +1 -1
package/dist/chunk-A7ZDUB46.js.map +0 -1
package/dist/chunk-GOZV2HN2.js.map +0 -1
package/dist/chunk-RE5I3U2S.js.map +0 -1
/package/dist/{agentv-provider-HDSAUUEF-LUBMM7TH.js.map → agentv-provider-NFFLXG5M-TJAWCWCX.js.map} +0 -0
/package/dist/{dist-AFDYFH6Y.js.map → dist-VWEFBDZ5.js.map} +0 -0
/package/dist/{interactive-WXXTZ7PD.js.map → interactive-5S4ILY2Y.js.map} +0 -0

package/dist/{chunk-A7ZDUB46.js → chunk-IP5BO54H.js} RENAMED Viewed

@@ -16,7 +16,7 @@ import {
   validateEvalFile,
   validateFileReferences,
   validateTargetsFile
-} from "./chunk-RE5I3U2S.js";
+} from "./chunk-UU5N43YS.js";
 import {
   createBuiltinRegistry,
   createProvider,
@@ -34,7 +34,7 @@ import {
   toSnakeCaseDeep as toSnakeCaseDeep2,
   transpileEvalYamlFile,
   trimBaselineResult
-} from "./chunk-GOZV2HN2.js";
+} from "./chunk-K4RXLQWV.js";
 import {
   __commonJS,
   __esm,
@@ -3493,9 +3493,9 @@ var ASSERTION_TEMPLATES = {
   default: `#!/usr/bin/env bun
 import { defineAssertion } from '@agentv/eval';
-export default defineAssertion(({ answer }) => {
+export default defineAssertion(({ outputText }) => {
   // TODO: Implement your assertion logic
-  const pass = answer.length > 0;
+  const pass = outputText.length > 0;
   return {
     pass,
     reasoning: pass ? 'Output has content' : 'Output is empty',
@@ -3505,9 +3505,9 @@ export default defineAssertion(({ answer }) => {
   score: `#!/usr/bin/env bun
 import { defineAssertion } from '@agentv/eval';
-export default defineAssertion(({ answer }) => {
+export default defineAssertion(({ outputText }) => {
   // TODO: Implement your scoring logic (0.0 to 1.0)
-  const score = answer.length > 0 ? 1.0 : 0.0;
+  const score = outputText.length > 0 ? 1.0 : 0.0;
   return {
     pass: score >= 0.5,
     score,
@@ -3967,7 +3967,6 @@ var evalAssertCommand = command({
     }
     const payload = JSON.stringify(
       {
-        answer: resolvedOutput,
         output: [{ role: "assistant", content: resolvedOutput }],
         input: [{ role: "user", content: resolvedInput }],
         question: resolvedInput,
@@ -4185,7 +4184,7 @@ var evalRunCommand = command({
   },
   handler: async (args) => {
     if (args.evalPaths.length === 0 && process.stdin.isTTY) {
-      const { launchInteractiveWizard } = await import("./interactive-WXXTZ7PD.js");
+      const { launchInteractiveWizard } = await import("./interactive-5S4ILY2Y.js");
       await launchInteractiveWizard();
       return;
     }
@@ -4707,10 +4706,10 @@ function exportResults(sourceFile, content, outputDir) {
   const outputsDir = path8.join(outputDir, "outputs");
   mkdirSync2(outputsDir, { recursive: true });
   for (const result of patched) {
-    const answer = result.answer;
-    if (answer) {
+    const outputText = result.outputText;
+    if (outputText) {
       const id = safeTestId(result);
-      writeFileSync3(path8.join(outputsDir, `${id}.txt`), answer);
+      writeFileSync3(path8.join(outputsDir, `${id}.txt`), outputText);
     }
   }
 }
@@ -5022,7 +5021,7 @@ function toTraceSummary(raw) {
   return toCamelCaseDeep(raw.trace);
 }
 function extractCandidate(raw) {
-  if (raw.answer !== void 0) return raw.answer;
+  if (raw.output_text !== void 0) return raw.output_text;
   if (raw.output !== void 0)
     return typeof raw.output === "string" ? raw.output : JSON.stringify(raw.output);
   return "";
@@ -5089,9 +5088,7 @@ async function runScore(results, evaluatorConfig, testIdFilter) {
       originalScore: raw.score,
       newScore: score.score,
       verdict: score.verdict,
-      hits: score.hits,
-      misses: score.misses,
-      reasoning: score.reasoning
+      assertions: score.assertions
     });
   }
   return scored;
@@ -5110,7 +5107,9 @@ function renderTable(scored, assertSpec) {
   lines.push(cols.map((col) => "\u2500".repeat(col.width)).join("\u2500\u2500"));
   for (const r of scored) {
     const verdictColor = r.verdict === "pass" ? c2.green : c2.red;
-    const detail = r.misses.length > 0 ? r.misses[0].slice(0, 48) : r.hits.length > 0 ? r.hits[0].slice(0, 48) : r.reasoning?.slice(0, 48) ?? "";
+    const failed = r.assertions.filter((a) => !a.passed);
+    const passed = r.assertions.filter((a) => a.passed);
+    const detail = failed.length > 0 ? failed[0].text.slice(0, 48) : passed.length > 0 ? passed[0].text.slice(0, 48) : "";
     const row = [
       padRight2(r.testId.slice(0, 24), cols[0].width),
       padLeft2(formatScore(r.originalScore), cols[1].width),
@@ -5332,11 +5331,17 @@ function formatResultDetail(result, index, tree) {
   if (result.error) {
     lines.push(`  ${c2.red}Error: ${result.error}${c2.reset}`);
   }
-  if (result.hits && result.hits.length > 0) {
-    lines.push(`  ${c2.green}\u2713 Hits:${c2.reset} ${result.hits.join(", ")}`);
-  }
-  if (result.misses && result.misses.length > 0) {
-    lines.push(`  ${c2.red}\u2717 Misses:${c2.reset} ${result.misses.join(", ")}`);
+  if (result.assertions && result.assertions.length > 0) {
+    const passed = result.assertions.filter((a) => a.passed);
+    const failed = result.assertions.filter((a) => !a.passed);
+    if (passed.length > 0)
+      lines.push(
+        `  ${c2.green}\u2713 Passed:${c2.reset} ${passed.map((a) => a.text).join(", ")}`
+      );
+    if (failed.length > 0)
+      lines.push(
+        `  ${c2.red}\u2717 Failed:${c2.reset} ${failed.map((a) => a.text).join(", ")}`
+      );
   }
   if (result.scores && result.scores.length > 0) {
     lines.push(`  ${c2.dim}Scores:${c2.reset} ${renderScores(result.scores)}`);
@@ -5344,10 +5349,14 @@ function formatResultDetail(result, index, tree) {
   if (result.trace || result.duration_ms !== void 0 || result.cost_usd !== void 0) {
     lines.push(`  ${c2.dim}Trace:${c2.reset} ${renderFlatTrace(result)}`);
   }
-  if (result.reasoning) {
-    const maxLen = 200;
-    const truncated = result.reasoning.length > maxLen ? `${result.reasoning.slice(0, maxLen)}...` : result.reasoning;
-    lines.push(`  ${c2.dim}Reasoning: ${truncated}${c2.reset}`);
+  if (result.assertions && result.assertions.length > 0) {
+    const withEvidence = result.assertions.filter((a) => a.evidence);
+    if (withEvidence.length > 0) {
+      const maxLen = 200;
+      const evidence = withEvidence[0].evidence;
+      const truncated = evidence.length > maxLen ? `${evidence.slice(0, maxLen)}...` : evidence;
+      lines.push(`  ${c2.dim}Evidence: ${truncated}${c2.reset}`);
+    }
   }
   return lines.join("\n");
 }
@@ -6268,4 +6277,4 @@ export {
   preprocessArgv,
   runCli
 };
-//# sourceMappingURL=chunk-A7ZDUB46.js.map
+//# sourceMappingURL=chunk-IP5BO54H.js.map