npm - agentv - Versions diffs - 2.13.0 → 2.14.1 - Mend

agentv 2.13.0 → 2.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/README.md +14 -14
package/dist/{chunk-UWDI4UVN.js → chunk-5646K2XJ.js} +15 -14
package/dist/{chunk-UWDI4UVN.js.map → chunk-5646K2XJ.js.map} +1 -1
package/dist/{chunk-FSBZM3HT.js → chunk-OQN2GDEU.js} +188 -162
package/dist/chunk-OQN2GDEU.js.map +1 -0
package/dist/{chunk-M6JYP6A6.js → chunk-YVWP4Z3W.js} +26 -26
package/dist/chunk-YVWP4Z3W.js.map +1 -0
package/dist/cli.js +3 -3
package/dist/{dist-CCUHG3SN.js → dist-QR5OZ4DH.js} +2 -2
package/dist/index.js +3 -3
package/dist/{interactive-P3D5O673.js → interactive-Z6ZV5OGM.js} +3 -3
package/package.json +1 -1
package/dist/chunk-FSBZM3HT.js.map +0 -1
package/dist/chunk-M6JYP6A6.js.map +0 -1
/package/dist/{dist-CCUHG3SN.js.map → dist-QR5OZ4DH.js.map} +0 -0
/package/dist/{interactive-P3D5O673.js.map → interactive-Z6ZV5OGM.js.map} +0 -0

package/dist/{chunk-M6JYP6A6.js → chunk-YVWP4Z3W.js} RENAMED Viewed

@@ -11,7 +11,7 @@ import {
   validateEvalFile,
   validateFileReferences,
   validateTargetsFile
-} from "./chunk-UWDI4UVN.js";
+} from "./chunk-5646K2XJ.js";
 import {
   RepoManager,
   assembleLlmJudgePrompt,
@@ -26,7 +26,7 @@ import {
   toCamelCaseDeep,
   toSnakeCaseDeep,
   trimBaselineResult
-} from "./chunk-FSBZM3HT.js";
+} from "./chunk-OQN2GDEU.js";
 import {
   __commonJS,
   __esm,
@@ -3484,7 +3484,7 @@ tests:
     input: "Hello, how are you?"
     expected_output: "I'm doing well, thank you for asking!"
     assert:
-      - type: llm_judge
+      - type: llm-judge
         rubric:
           accuracy:
             weight: 0.6
@@ -3763,7 +3763,7 @@ var evalPromptJudgeCommand = command({
       });
       outputs.push({
         name: "default_llm_judge",
-        type: "llm_judge",
+        type: "llm-judge",
         status: "prompt_ready",
         prompt: {
           system_prompt: assembly.systemPrompt,
@@ -3781,7 +3781,7 @@ var evalPromptJudgeCommand = command({
 });
 async function processEvaluator(config, evalCase, candidate, promptInputs) {
   switch (config.type) {
-    case "code": {
+    case "code-judge": {
       const codeConfig = config;
       const script = codeConfig.command ?? codeConfig.script ?? [];
       const scriptCwd = codeConfig.resolvedCwd ?? codeConfig.cwd;
@@ -3806,14 +3806,14 @@ async function processEvaluator(config, evalCase, candidate, promptInputs) {
         const parsed = JSON.parse(stdout);
         return {
           name: codeConfig.name,
-          type: "code_judge",
+          type: "code-judge",
           status: "completed",
           result: parsed
         };
       } catch (error) {
         return {
           name: codeConfig.name,
-          type: "code_judge",
+          type: "code-judge",
           status: "completed",
           result: {
             score: 0,
@@ -3822,7 +3822,7 @@ async function processEvaluator(config, evalCase, candidate, promptInputs) {
         };
       }
     }
-    case "llm_judge": {
+    case "llm-judge": {
       const llmConfig = config;
       const assembly = assembleLlmJudgePrompt({
         evalCase,
@@ -3832,7 +3832,7 @@ async function processEvaluator(config, evalCase, candidate, promptInputs) {
       });
       return {
         name: llmConfig.name,
-        type: "llm_judge",
+        type: "llm-judge",
         status: "prompt_ready",
         prompt: {
           system_prompt: assembly.systemPrompt,
@@ -3898,7 +3898,7 @@ var evalPromptOverviewCommand = command({
       "",
       "The output contains an `evaluators` array. Each evaluator has a `status`:",
       "",
-      '- **`"completed"`** \u2014 Score is final (code_judge ran deterministically). Read `result.score` (0.0\u20131.0).',
+      '- **`"completed"`** \u2014 Score is final (code-judge ran deterministically). Read `result.score` (0.0\u20131.0).',
       '- **`"prompt_ready"`** \u2014 LLM grading required. Send `prompt.system_prompt` as system and',
       "  `prompt.user_prompt` as user to your LLM. Parse the JSON response to get `score`, `hits`, `misses`.",
       ""
@@ -4087,7 +4087,7 @@ var evalRunCommand = command({
   },
   handler: async (args) => {
     if (args.evalPaths.length === 0 && process.stdin.isTTY) {
-      const { launchInteractiveWizard } = await import("./interactive-P3D5O673.js");
+      const { launchInteractiveWizard } = await import("./interactive-Z6ZV5OGM.js");
       await launchInteractiveWizard();
       return;
     }
@@ -4720,12 +4720,12 @@ var traceListCommand = command({
 var SUPPORTED_TYPES = [
   "contains",
   "regex",
-  "is_json",
+  "is-json",
   "equals",
   "latency",
   "cost",
-  "token_usage",
-  "execution_metrics"
+  "token-usage",
+  "execution-metrics"
 ];
 function parseKeyValues(s) {
   const result = {};
@@ -4739,7 +4739,7 @@ function parseKeyValues(s) {
 }
 function parseAssertSpec(spec) {
   const colonIdx = spec.indexOf(":");
-  const type = colonIdx === -1 ? spec : spec.slice(0, colonIdx);
+  const type = (colonIdx === -1 ? spec : spec.slice(0, colonIdx)).replace(/_/g, "-");
   const params = colonIdx === -1 ? "" : spec.slice(colonIdx + 1);
   switch (type) {
     case "contains":
@@ -4748,8 +4748,8 @@ function parseAssertSpec(spec) {
     case "regex":
       if (!params) throw new Error("regex requires a pattern: regex:<pattern>");
       return { name: "regex", type: "regex", value: params };
-    case "is_json":
-      return { name: "is_json", type: "is_json" };
+    case "is-json":
+      return { name: "is-json", type: "is-json" };
     case "equals":
       if (!params) throw new Error("equals requires a value: equals:<value>");
       return { name: "equals", type: "equals", value: params };
@@ -4765,19 +4765,19 @@ function parseAssertSpec(spec) {
         throw new Error("cost requires a budget in USD: cost:<usd>");
       return { name: "cost", type: "cost", budget };
     }
-    case "token_usage": {
+    case "token-usage": {
       const kv = parseKeyValues(params);
-      const config = { name: "token_usage", type: "token_usage" };
+      const config = { name: "token-usage", type: "token-usage" };
       if (kv.max_total) config.max_total = Number(kv.max_total);
       if (kv.max_input) config.max_input = Number(kv.max_input);
       if (kv.max_output) config.max_output = Number(kv.max_output);
       return config;
     }
-    case "execution_metrics": {
+    case "execution-metrics": {
       const kv = parseKeyValues(params);
       const config = {
-        name: "execution_metrics",
-        type: "execution_metrics"
+        name: "execution-metrics",
+        type: "execution-metrics"
       };
       if (kv.max_tool_calls) config.max_tool_calls = Number(kv.max_tool_calls);
       if (kv.max_llm_calls) config.max_llm_calls = Number(kv.max_llm_calls);
@@ -4823,7 +4823,7 @@ var stubProvider = {
   }
 };
 var stubLlmJudge = {
-  kind: "llm_judge",
+  kind: "llm-judge",
   evaluate() {
     throw new Error("trace score does not support LLM-based evaluators");
   }
@@ -4917,7 +4917,7 @@ var traceScoreCommand = command({
       type: string,
       long: "assert",
       short: "a",
-      description: "Evaluator spec: contains:<val>, regex:<pat>, is_json, equals:<val>, latency:<ms>, cost:<usd>, token_usage:<params>, execution_metrics:<params>"
+      description: "Evaluator spec: contains:<val>, regex:<pat>, is-json, equals:<val>, latency:<ms>, cost:<usd>, token-usage:<params>, execution-metrics:<params>"
     }),
     testId: option({
       type: optional(string),
@@ -4952,7 +4952,7 @@ var traceScoreCommand = command({
       console.error(`${c2.yellow}Warning:${c2.reset} No results found in ${file}`);
       process.exit(0);
     }
-    const traceRequired = ["latency", "cost", "token_usage", "execution_metrics"].includes(
+    const traceRequired = ["latency", "cost", "token-usage", "execution-metrics"].includes(
       evaluatorConfig.type
     );
     if (traceRequired) {
@@ -5793,4 +5793,4 @@ export {
   preprocessArgv,
   runCli
 };
-//# sourceMappingURL=chunk-M6JYP6A6.js.map
+//# sourceMappingURL=chunk-YVWP4Z3W.js.map