npm - agentv - Versions diffs - 2.12.0 → 2.14.0-next.1 - Mend

agentv 2.12.0 → 2.14.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/README.md +14 -14
package/dist/{chunk-YBJX5CP6.js → chunk-K2APOWTE.js} +213 -29
package/dist/chunk-K2APOWTE.js.map +1 -0
package/dist/{chunk-LUHCYBMD.js → chunk-OQN2GDEU.js} +251 -164
package/dist/chunk-OQN2GDEU.js.map +1 -0
package/dist/{chunk-6KU2ZUFJ.js → chunk-ZSSGXZX6.js} +39 -77
package/dist/chunk-ZSSGXZX6.js.map +1 -0
package/dist/cli.js +3 -3
package/dist/{dist-OPPA4P5R.js → dist-QR5OZ4DH.js} +4 -2
package/dist/index.js +3 -3
package/dist/{interactive-TOUKPSHP.js → interactive-WF6UO63B.js} +3 -3
package/package.json +4 -2
package/dist/chunk-6KU2ZUFJ.js.map +0 -1
package/dist/chunk-LUHCYBMD.js.map +0 -1
package/dist/chunk-YBJX5CP6.js.map +0 -1
/package/dist/{dist-OPPA4P5R.js.map → dist-QR5OZ4DH.js.map} +0 -0
/package/dist/{interactive-TOUKPSHP.js.map → interactive-WF6UO63B.js.map} +0 -0

package/dist/{chunk-6KU2ZUFJ.js → chunk-ZSSGXZX6.js} RENAMED Viewed

@@ -2,6 +2,7 @@ import { createRequire } from 'node:module'; const require = createRequire(impor
 import {
   detectFileType,
   findRepoRoot,
+  package_default,
   resolveEvalPaths,
   runEvalCommand,
   selectTarget,
@@ -10,7 +11,7 @@ import {
   validateEvalFile,
   validateFileReferences,
   validateTargetsFile
-} from "./chunk-YBJX5CP6.js";
+} from "./chunk-K2APOWTE.js";
 import {
   RepoManager,
   assembleLlmJudgePrompt,
@@ -25,7 +26,7 @@ import {
   toCamelCaseDeep,
   toSnakeCaseDeep,
   trimBaselineResult
-} from "./chunk-LUHCYBMD.js";
+} from "./chunk-OQN2GDEU.js";
 import {
   __commonJS,
   __esm,
@@ -2875,56 +2876,6 @@ function oneOf(literals) {
   };
 }
-// package.json
-var package_default = {
-  name: "agentv",
-  version: "2.12.0",
-  description: "CLI entry point for AgentV",
-  type: "module",
-  repository: {
-    type: "git",
-    url: "https://github.com/EntityProcess/agentv.git"
-  },
-  homepage: "https://github.com/EntityProcess/agentv#readme",
-  bugs: {
-    url: "https://github.com/EntityProcess/agentv/issues"
-  },
-  bin: {
-    agentv: "./dist/cli.js"
-  },
-  files: ["dist", "README.md"],
-  scripts: {
-    dev: "bun src/cli.ts",
-    build: "tsup && bun run copy-readme",
-    "copy-readme": `bun -e "import { cpSync } from 'fs'; cpSync('../../README.md', 'README.md')"`,
-    prepublishOnly: "bun run copy-readme",
-    typecheck: "tsc --noEmit",
-    lint: "biome check .",
-    format: "biome format --write .",
-    fix: "biome check --write .",
-    test: "bun test",
-    "test:watch": "bun test --watch"
-  },
-  dependencies: {
-    "@anthropic-ai/claude-agent-sdk": "^0.2.49",
-    "@github/copilot-sdk": "^0.1.25",
-    "@inquirer/prompts": "^8.2.1",
-    "@mariozechner/pi-agent-core": "^0.54.2",
-    "@mariozechner/pi-ai": "^0.54.2",
-    "@openai/codex-sdk": "^0.104.0",
-    "cmd-ts": "^0.14.3",
-    dotenv: "^16.4.5",
-    "fast-glob": "^3.3.3",
-    json5: "^2.2.3",
-    micromatch: "^4.0.8",
-    yaml: "^2.6.1"
-  },
-  devDependencies: {
-    "@agentv/core": "workspace:*",
-    execa: "^9.3.0"
-  }
-};
 // src/commands/cache/add.ts
 import { existsSync } from "node:fs";
 import { join, resolve } from "node:path";
@@ -3533,7 +3484,7 @@ tests:
     input: "Hello, how are you?"
     expected_output: "I'm doing well, thank you for asking!"
     assert:
-      - type: llm_judge
+      - type: llm-judge
         rubric:
           accuracy:
             weight: 0.6
@@ -3812,7 +3763,7 @@ var evalPromptJudgeCommand = command({
       });
       outputs.push({
         name: "default_llm_judge",
-        type: "llm_judge",
+        type: "llm-judge",
         status: "prompt_ready",
         prompt: {
           system_prompt: assembly.systemPrompt,
@@ -3830,7 +3781,7 @@ var evalPromptJudgeCommand = command({
 });
 async function processEvaluator(config, evalCase, candidate, promptInputs) {
   switch (config.type) {
-    case "code": {
+    case "code-judge": {
       const codeConfig = config;
       const script = codeConfig.command ?? codeConfig.script ?? [];
       const scriptCwd = codeConfig.resolvedCwd ?? codeConfig.cwd;
@@ -3855,14 +3806,14 @@ async function processEvaluator(config, evalCase, candidate, promptInputs) {
         const parsed = JSON.parse(stdout);
         return {
           name: codeConfig.name,
-          type: "code_judge",
+          type: "code-judge",
           status: "completed",
           result: parsed
         };
       } catch (error) {
         return {
           name: codeConfig.name,
-          type: "code_judge",
+          type: "code-judge",
           status: "completed",
           result: {
             score: 0,
@@ -3871,7 +3822,7 @@ async function processEvaluator(config, evalCase, candidate, promptInputs) {
         };
       }
     }
-    case "llm_judge": {
+    case "llm-judge": {
       const llmConfig = config;
       const assembly = assembleLlmJudgePrompt({
         evalCase,
@@ -3881,7 +3832,7 @@ async function processEvaluator(config, evalCase, candidate, promptInputs) {
       });
       return {
         name: llmConfig.name,
-        type: "llm_judge",
+        type: "llm-judge",
         status: "prompt_ready",
         prompt: {
           system_prompt: assembly.systemPrompt,
@@ -3947,7 +3898,7 @@ var evalPromptOverviewCommand = command({
       "",
       "The output contains an `evaluators` array. Each evaluator has a `status`:",
       "",
-      '- **`"completed"`** \u2014 Score is final (code_judge ran deterministically). Read `result.score` (0.0\u20131.0).',
+      '- **`"completed"`** \u2014 Score is final (code-judge ran deterministically). Read `result.score` (0.0\u20131.0).',
       '- **`"prompt_ready"`** \u2014 LLM grading required. Send `prompt.system_prompt` as system and',
       "  `prompt.user_prompt` as user to your LLM. Parse the JSON response to get `score`, `hits`, `misses`.",
       ""
@@ -4123,11 +4074,20 @@ var evalRunCommand = command({
     otelGroupTurns: flag({
       long: "otel-group-turns",
       description: "Group messages into turn spans for multi-turn evaluations (requires --export-otel)"
+    }),
+    retryErrors: option({
+      type: optional(string),
+      long: "retry-errors",
+      description: "Path to previous output JSONL \u2014 re-run only execution_error test cases"
+    }),
+    strict: flag({
+      long: "strict",
+      description: "Exit with error on version mismatch (instead of warning)"
     })
   },
   handler: async (args) => {
     if (args.evalPaths.length === 0 && process.stdin.isTTY) {
-      const { launchInteractiveWizard } = await import("./interactive-TOUKPSHP.js");
+      const { launchInteractiveWizard } = await import("./interactive-WF6UO63B.js");
       await launchInteractiveWizard();
       return;
     }
@@ -4157,7 +4117,9 @@ var evalRunCommand = command({
       exportOtel: args.exportOtel,
       otelBackend: args.otelBackend,
       otelCaptureContent: args.otelCaptureContent,
-      otelGroupTurns: args.otelGroupTurns
+      otelGroupTurns: args.otelGroupTurns,
+      retryErrors: args.retryErrors,
+      strict: args.strict
     };
     await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
   }
@@ -4758,12 +4720,12 @@ var traceListCommand = command({
 var SUPPORTED_TYPES = [
   "contains",
   "regex",
-  "is_json",
+  "is-json",
   "equals",
   "latency",
   "cost",
-  "token_usage",
-  "execution_metrics"
+  "token-usage",
+  "execution-metrics"
 ];
 function parseKeyValues(s) {
   const result = {};
@@ -4777,7 +4739,7 @@ function parseKeyValues(s) {
 }
 function parseAssertSpec(spec) {
   const colonIdx = spec.indexOf(":");
-  const type = colonIdx === -1 ? spec : spec.slice(0, colonIdx);
+  const type = (colonIdx === -1 ? spec : spec.slice(0, colonIdx)).replace(/_/g, "-");
   const params = colonIdx === -1 ? "" : spec.slice(colonIdx + 1);
   switch (type) {
     case "contains":
@@ -4786,8 +4748,8 @@ function parseAssertSpec(spec) {
     case "regex":
       if (!params) throw new Error("regex requires a pattern: regex:<pattern>");
       return { name: "regex", type: "regex", value: params };
-    case "is_json":
-      return { name: "is_json", type: "is_json" };
+    case "is-json":
+      return { name: "is-json", type: "is-json" };
     case "equals":
       if (!params) throw new Error("equals requires a value: equals:<value>");
       return { name: "equals", type: "equals", value: params };
@@ -4803,19 +4765,19 @@ function parseAssertSpec(spec) {
         throw new Error("cost requires a budget in USD: cost:<usd>");
       return { name: "cost", type: "cost", budget };
     }
-    case "token_usage": {
+    case "token-usage": {
       const kv = parseKeyValues(params);
-      const config = { name: "token_usage", type: "token_usage" };
+      const config = { name: "token-usage", type: "token-usage" };
       if (kv.max_total) config.max_total = Number(kv.max_total);
       if (kv.max_input) config.max_input = Number(kv.max_input);
       if (kv.max_output) config.max_output = Number(kv.max_output);
       return config;
     }
-    case "execution_metrics": {
+    case "execution-metrics": {
       const kv = parseKeyValues(params);
       const config = {
-        name: "execution_metrics",
-        type: "execution_metrics"
+        name: "execution-metrics",
+        type: "execution-metrics"
       };
       if (kv.max_tool_calls) config.max_tool_calls = Number(kv.max_tool_calls);
       if (kv.max_llm_calls) config.max_llm_calls = Number(kv.max_llm_calls);
@@ -4861,7 +4823,7 @@ var stubProvider = {
   }
 };
 var stubLlmJudge = {
-  kind: "llm_judge",
+  kind: "llm-judge",
   evaluate() {
     throw new Error("trace score does not support LLM-based evaluators");
   }
@@ -4955,7 +4917,7 @@ var traceScoreCommand = command({
       type: string,
       long: "assert",
       short: "a",
-      description: "Evaluator spec: contains:<val>, regex:<pat>, is_json, equals:<val>, latency:<ms>, cost:<usd>, token_usage:<params>, execution_metrics:<params>"
+      description: "Evaluator spec: contains:<val>, regex:<pat>, is-json, equals:<val>, latency:<ms>, cost:<usd>, token-usage:<params>, execution-metrics:<params>"
     }),
     testId: option({
       type: optional(string),
@@ -4990,7 +4952,7 @@ var traceScoreCommand = command({
       console.error(`${c2.yellow}Warning:${c2.reset} No results found in ${file}`);
       process.exit(0);
     }
-    const traceRequired = ["latency", "cost", "token_usage", "execution_metrics"].includes(
+    const traceRequired = ["latency", "cost", "token-usage", "execution-metrics"].includes(
       evaluatorConfig.type
     );
     if (traceRequired) {
@@ -5831,4 +5793,4 @@ export {
   preprocessArgv,
   runCli
 };
-//# sourceMappingURL=chunk-6KU2ZUFJ.js.map
+//# sourceMappingURL=chunk-ZSSGXZX6.js.map