npm - agentv - Versions diffs - 2.18.4 → 3.0.0-next.1 - Mend

agentv 2.18.4 → 3.0.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/README.md +62 -36
package/dist/agentv-provider-5CJVBBGG-2XVZBW7L.js +9 -0
package/dist/{chunk-RMUVJ44Z.js → chunk-5WIB7A27.js} +598 -403
package/dist/chunk-5WIB7A27.js.map +1 -0
package/dist/chunk-6GSYTMXD.js +31520 -0
package/dist/chunk-6GSYTMXD.js.map +1 -0
package/dist/{chunk-KSUL3F3R.js → chunk-DY4ZDTTO.js} +1018 -140
package/dist/chunk-DY4ZDTTO.js.map +1 -0
package/dist/chunk-HF4X7ALN.js +24299 -0
package/dist/chunk-HF4X7ALN.js.map +1 -0
package/dist/{chunk-FV32QHPB.js → chunk-XOSNETAV.js} +1 -1
package/dist/cli.js +5 -4
package/dist/cli.js.map +1 -1
package/dist/{dist-EDQZMZH2.js → dist-WN2QIOQR.js} +27 -11
package/dist/{esm-DX3WQKEN.js → esm-CZAWIY6F.js} +2 -2
package/dist/esm-CZAWIY6F.js.map +1 -0
package/dist/index.js +5 -4
package/dist/{interactive-J4IBXJF7.js → interactive-B432TCRZ.js} +5 -4
package/dist/{interactive-J4IBXJF7.js.map → interactive-B432TCRZ.js.map} +1 -1
package/dist/{src-2N5EJ2N6.js → src-ML4D2MC2.js} +2 -2
package/dist/templates/.agentv/config.yaml +0 -5
package/dist/templates/.agentv/targets.yaml +8 -11
package/package.json +2 -2
package/dist/chunk-KSUL3F3R.js.map +0 -1
package/dist/chunk-RMUVJ44Z.js.map +0 -1
package/dist/chunk-YTHTGLMT.js +0 -49786
package/dist/chunk-YTHTGLMT.js.map +0 -1
/package/dist/{dist-EDQZMZH2.js.map → agentv-provider-5CJVBBGG-2XVZBW7L.js.map} +0 -0
/package/dist/{chunk-FV32QHPB.js.map → chunk-XOSNETAV.js.map} +0 -0
/package/dist/{esm-DX3WQKEN.js.map → dist-WN2QIOQR.js.map} +0 -0
/package/dist/{src-2N5EJ2N6.js.map → src-ML4D2MC2.js.map} +0 -0

package/dist/{chunk-RMUVJ44Z.js → chunk-5WIB7A27.js} RENAMED Viewed

@@ -1,5 +1,6 @@
 import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
 import {
+  HtmlWriter,
   detectFileType,
   findRepoRoot,
   package_default,
@@ -11,23 +12,25 @@ import {
   validateEvalFile,
   validateFileReferences,
   validateTargetsFile
-} from "./chunk-KSUL3F3R.js";
+} from "./chunk-DY4ZDTTO.js";
 import {
-  assembleLlmJudgePrompt,
-  buildPromptInputs,
   createBuiltinRegistry,
   createProvider,
   executeScript,
   generateRubrics,
   getAgentvHome,
+  getOutputFilenames,
   getWorkspacePoolRoot,
+  isAgentSkillsFormat,
   loadTestById,
   loadTests,
   normalizeLineEndings,
+  parseAgentSkillsEvals,
   toCamelCaseDeep,
   toSnakeCaseDeep as toSnakeCaseDeep2,
+  transpileEvalYamlFile,
   trimBaselineResult
-} from "./chunk-YTHTGLMT.js";
+} from "./chunk-HF4X7ALN.js";
 import {
   __commonJS,
   __esm,
@@ -3296,6 +3299,16 @@ var compareCommand = command({
 import { readFileSync as readFileSync2, writeFileSync } from "node:fs";
 import path from "node:path";
 import { stringify as stringifyYaml } from "yaml";
+async function convertJsonlToHtml(inputPath, outputPath) {
+  const content = readFileSync2(inputPath, "utf8");
+  const lines = content.trim().split("\n").filter((line) => line.trim());
+  const writer = await HtmlWriter.open(outputPath);
+  for (const line of lines) {
+    await writer.append(JSON.parse(line));
+  }
+  await writer.close();
+  return lines.length;
+}
 function convertJsonlToYaml(inputPath, outputPath) {
   const content = readFileSync2(inputPath, "utf8");
   const lines = content.trim().split("\n").filter((line) => line.trim());
@@ -3315,35 +3328,157 @@ function convertJsonlToYaml(inputPath, outputPath) {
   writeFileSync(outputPath, yamlOutput);
   return lines.length;
 }
+function convertEvalsJsonToYaml(inputPath) {
+  const content = readFileSync2(inputPath, "utf8");
+  const parsed = JSON.parse(content);
+  if (!isAgentSkillsFormat(parsed)) {
+    throw new Error(`Not a valid Agent Skills evals.json: missing 'evals' array`);
+  }
+  const tests = parseAgentSkillsEvals(parsed, inputPath, path.dirname(path.resolve(inputPath)));
+  const lines = [];
+  lines.push("# Converted from Agent Skills evals.json");
+  lines.push("# See: https://agentskills.io/skill-creation/evaluating-skills");
+  lines.push("#");
+  lines.push("# AgentV features you can add:");
+  lines.push("#   - type: is_json, contains, regex for deterministic evaluators");
+  lines.push("#   - type: code-grader for custom scoring scripts");
+  lines.push("#   - Multi-turn conversations via input message arrays");
+  lines.push("#   - Composite evaluators with weighted scoring");
+  lines.push("#   - Workspace isolation with repos and hooks");
+  lines.push("");
+  if (parsed.skill_name) {
+    lines.push(`description: "Evals for ${parsed.skill_name} skill"`);
+    lines.push("");
+  }
+  lines.push("tests:");
+  for (const test of tests) {
+    lines.push(`  - id: "${test.id}"`);
+    lines.push("");
+    if (test.criteria) {
+      lines.push("    criteria: |-");
+      for (const line of test.criteria.split("\n")) {
+        lines.push(`      ${line}`);
+      }
+      lines.push("");
+    }
+    lines.push("    input:");
+    for (const msg of test.input) {
+      lines.push(`      - role: ${msg.role}`);
+      if (typeof msg.content === "string" && msg.content.includes("\n")) {
+        lines.push("        content: |-");
+        for (const line of msg.content.split("\n")) {
+          lines.push(`          ${line}`);
+        }
+      } else {
+        lines.push(
+          `        content: "${typeof msg.content === "string" ? msg.content.replace(/"/g, '\\"') : msg.content}"`
+        );
+      }
+    }
+    lines.push("");
+    if (test.expected_output && test.expected_output.length > 0) {
+      lines.push("    expected_output:");
+      for (const msg of test.expected_output) {
+        lines.push(`      - role: ${msg.role}`);
+        if (typeof msg.content === "string" && msg.content.includes("\n")) {
+          lines.push("        content: |-");
+          for (const line of msg.content.split("\n")) {
+            lines.push(`          ${line}`);
+          }
+        } else {
+          lines.push(
+            `        content: "${typeof msg.content === "string" ? msg.content.replace(/"/g, '\\"') : msg.content}"`
+          );
+        }
+      }
+      lines.push("");
+    }
+    if (test.assertions && test.assertions.length > 0) {
+      lines.push("    # Promoted from evals.json assertions[]");
+      lines.push("    # Replace with type: is_json, contains, or regex for deterministic checks");
+      lines.push("    assertions:");
+      for (const assertion of test.assertions) {
+        lines.push(`      - name: ${assertion.name}`);
+        lines.push(`        type: ${assertion.type}`);
+        if ((assertion.type === "llm-grader" || assertion.type === "llm-judge") && "prompt" in assertion) {
+          const prompt = assertion.prompt;
+          lines.push(`        prompt: "${prompt.replace(/"/g, '\\"')}"`);
+        }
+      }
+      lines.push("");
+    }
+    if (test.file_paths && test.file_paths.length > 0) {
+      lines.push("    # TODO: Configure workspace.repos or file references for these files:");
+      const agentSkillsFiles = test.metadata?.agent_skills_files;
+      if (agentSkillsFiles) {
+        for (const file of agentSkillsFiles) {
+          lines.push(`    #   - ${file}`);
+        }
+      }
+      lines.push("");
+    }
+  }
+  return `${lines.join("\n")}
+`;
+}
 var convertCommand = command({
   name: "convert",
-  description: "Convert evaluation results from JSONL to YAML format",
+  description: "Convert between evaluation formats (JSONL\u2192YAML, JSONL\u2192HTML, evals.json\u2192EVAL.yaml)",
   args: {
     input: positional({
       type: string,
       displayName: "input",
-      description: "Path to input JSONL file"
+      description: "Path to input file (.jsonl or .json)"
     }),
     out: option({
       type: optional(string),
       long: "out",
       short: "o",
-      description: "Output file path (defaults to input path with .yaml extension)"
+      description: "Output file path (defaults to stdout for evals.json, .yaml or .html for JSONL)"
     })
   },
   handler: async ({ input, out }) => {
-    if (!input.endsWith(".jsonl")) {
-      console.error("Error: Input file must be a .jsonl file");
-      process.exit(1);
+    const ext = path.extname(input).toLowerCase();
+    if (ext === ".json") {
+      try {
+        const yaml = convertEvalsJsonToYaml(input);
+        if (out) {
+          writeFileSync(out, yaml);
+          console.log(`Converted to ${path.resolve(out)}`);
+        } else {
+          process.stdout.write(yaml);
+        }
+      } catch (error) {
+        console.error(`Error: ${error.message}`);
+        process.exit(1);
+      }
+      return;
     }
-    const outputPath = out ?? input.replace(/\.jsonl$/, ".yaml");
-    try {
-      const count = convertJsonlToYaml(input, outputPath);
-      console.log(`Converted ${count} records to ${path.resolve(outputPath)}`);
-    } catch (error) {
-      console.error(`Error: ${error.message}`);
-      process.exit(1);
+    if (ext === ".jsonl") {
+      const outExt = out ? path.extname(out).toLowerCase() : ".yaml";
+      if (outExt === ".html" || outExt === ".htm") {
+        const outputPath2 = out ?? input.replace(/\.jsonl$/, ".html");
+        try {
+          const count = await convertJsonlToHtml(input, outputPath2);
+          console.log(`Converted ${count} records to ${path.resolve(outputPath2)}`);
+        } catch (error) {
+          console.error(`Error: ${error.message}`);
+          process.exit(1);
+        }
+        return;
+      }
+      const outputPath = out ?? input.replace(/\.jsonl$/, ".yaml");
+      try {
+        const count = convertJsonlToYaml(input, outputPath);
+        console.log(`Converted ${count} records to ${path.resolve(outputPath)}`);
+      } catch (error) {
+        console.error(`Error: ${error.message}`);
+        process.exit(1);
+      }
+      return;
     }
+    console.error(`Error: Unsupported input format '${ext}'. Supported: .json, .jsonl`);
+    process.exit(1);
   }
 });
@@ -3387,7 +3522,7 @@ tests:
     criteria: Agent responds correctly
     input: "Hello, how are you?"
     expected_output: "I'm doing well"
-    assert:
+    assertions:
       - type: contains
         value: "well"
 `,
@@ -3400,8 +3535,8 @@ tests:
     criteria: Agent responds correctly and completely
     input: "Hello, how are you?"
     expected_output: "I'm doing well, thank you for asking!"
-    assert:
-      - type: llm-judge
+    assertions:
+      - type: llm-grader
         rubric:
           accuracy:
             weight: 0.6
@@ -3470,7 +3605,7 @@ var createAssertionCommand = command({
     console.log(`Created ${path2.relative(process.cwd(), filePath)} (template: ${templateName})`);
     console.log(`
 Use in EVAL.yaml:
-  assert:
+  assertions:
     - type: ${name}`);
   }
 });
@@ -3559,38 +3694,104 @@ var createCommand = subcommands({
   }
 });
-// src/commands/eval/commands/prompt/input.ts
-var evalPromptInputCommand = command({
-  name: "input",
-  description: "Output task input JSON for a single test",
-  args: {
-    evalPath: positional({
-      type: string,
-      displayName: "eval-path",
-      description: "Path to evaluation .yaml file"
-    }),
-    testId: option({
-      type: string,
-      long: "test-id",
-      description: "Test ID"
-    })
-  },
-  handler: async (args) => {
-    const cwd = process.cwd();
-    const repoRoot = await findRepoRoot(cwd);
-    const evalCase = await loadTestById(args.evalPath, repoRoot, args.testId);
-    const fileMap = buildFileMap(evalCase.input_segments, evalCase.file_paths);
-    const resolvedMessages = resolveMessages(evalCase.input, fileMap);
-    const output = {
-      test_id: evalCase.id,
-      input: resolvedMessages,
-      guideline_paths: evalCase.guideline_paths,
-      criteria: evalCase.criteria
-    };
-    process.stdout.write(JSON.stringify(output, null, 2));
-    process.stdout.write("\n");
+// src/commands/eval/commands/prompt/accessors.ts
+async function listPromptEvalTestIds(evalPath) {
+  const repoRoot = await findRepoRoot(process.cwd());
+  const tests = await loadTests(evalPath, repoRoot);
+  return {
+    eval_path: evalPath,
+    test_ids: tests.map((test) => test.id).sort()
+  };
+}
+async function getPromptEvalInput(evalPath, testId) {
+  const repoRoot = await findRepoRoot(process.cwd());
+  const evalCase = await loadTestById(evalPath, repoRoot, testId);
+  const fileMap = buildFileMap(evalCase.input_segments, evalCase.file_paths);
+  return {
+    test_id: evalCase.id,
+    input: resolveMessages(evalCase.input, fileMap),
+    guideline_paths: evalCase.guideline_paths,
+    criteria: evalCase.criteria
+  };
+}
+async function getPromptEvalExpectedOutput(evalPath, testId) {
+  const repoRoot = await findRepoRoot(process.cwd());
+  const evalCase = await loadTestById(evalPath, repoRoot, testId);
+  return {
+    test_id: evalCase.id,
+    criteria: evalCase.criteria,
+    expected_output: evalCase.expected_output,
+    reference_answer: evalCase.reference_answer,
+    assertions: evalCase.assertions ?? []
+  };
+}
+async function getPromptEvalGradingBrief(evalPath, testId) {
+  const repoRoot = await findRepoRoot(process.cwd());
+  const evalCase = await loadTestById(evalPath, repoRoot, testId);
+  const fileMap = buildFileMap(evalCase.input_segments, evalCase.file_paths);
+  const resolvedInput = resolveMessages(evalCase.input, fileMap);
+  const lines = [];
+  const inputText = extractTextFromMessages(resolvedInput);
+  if (inputText) {
+    lines.push(`Input: "${inputText}"`);
+  }
+  const filePaths = evalCase.file_paths.filter((p) => !evalCase.guideline_paths.includes(p));
+  if (filePaths.length > 0) {
+    lines.push(`Files: ${filePaths.join(", ")}`);
+  }
+  if (evalCase.reference_answer) {
+    lines.push(`Expected: "${evalCase.reference_answer}"`);
+  }
+  const criteria = [];
+  if (evalCase.criteria) {
+    criteria.push(evalCase.criteria);
+  }
+  for (const assertion of evalCase.assertions ?? []) {
+    const entry = assertion;
+    const type = entry.type;
+    const bag = entry.config ?? {};
+    if (type === "contains") {
+      criteria.push(`Output contains '${entry.value}'`);
+    } else if (type === "rubrics") {
+      const items = entry.criteria ?? bag.criteria;
+      if (Array.isArray(items)) {
+        for (const item of items) {
+          if (item.outcome) criteria.push(item.outcome);
+        }
+      }
+    } else if (type === "llm-grader" || type === "llm_grader" || type === "llm-judge" || type === "llm_judge") {
+      const prompt = entry.prompt ?? bag.prompt ?? bag.criteria;
+      criteria.push(`[llm-grader] ${typeof prompt === "string" ? prompt : ""}`);
+    } else if (type === "code-grader" || type === "code_grader" || type === "code-judge" || type === "code_judge") {
+      const name = entry.name ?? type;
+      const desc = bag.description ?? entry.description;
+      criteria.push(`[code-grader] ${name}${desc ? `: ${desc}` : ""}`);
+    } else if (type === "skill-trigger") {
+      const trigger = entry.should_trigger !== false;
+      criteria.push(`[skill-trigger] should_trigger: ${trigger} for ${entry.skill}`);
+    } else if (type) {
+      criteria.push(`[${type}] ${entry.value ?? bag.criteria ?? bag.prompt ?? ""}`);
+    }
   }
-});
+  if (criteria.length > 0) {
+    lines.push("Criteria:");
+    for (const c3 of criteria) {
+      lines.push(`  - ${c3}`);
+    }
+  }
+  return lines.join("\n");
+}
+function extractTextFromMessages(messages) {
+  for (const msg of messages) {
+    if (msg.role !== "user") continue;
+    if (typeof msg.content === "string") return msg.content;
+    if (Array.isArray(msg.content)) {
+      const textBlocks = msg.content.filter((b) => b.type === "text").map((b) => b.value);
+      if (textBlocks.length > 0) return textBlocks.join(" ");
+    }
+  }
+  return "";
+}
 function buildFileMap(inputSegments, allFilePaths) {
   const map = /* @__PURE__ */ new Map();
   for (const segment of inputSegments) {
@@ -3602,7 +3803,7 @@ function buildFileMap(inputSegments, allFilePaths) {
     get(key) {
       const direct = map.get(key);
       if (direct) return direct;
-      return allFilePaths.find((p) => p.endsWith(`/${key}`) || p === key);
+      return allFilePaths.find((filePath) => filePath.endsWith(`/${key}`) || filePath === key);
     },
     has(key) {
       return this.get(key) !== void 0;
@@ -3638,291 +3839,61 @@ function resolveMessages(messages, fileMap) {
   });
 }
-// src/commands/eval/commands/prompt/judge.ts
-import { readFile } from "node:fs/promises";
-var evalPromptJudgeCommand = command({
-  name: "judge",
-  description: "Run code judges and output LLM judge prompts for a single test",
+// src/commands/eval/commands/prompt/index.ts
+var evalPromptEvalSubcommand = command({
+  name: "eval",
+  description: "Extract eval prompt data for agents",
   args: {
-    evalPath: positional({
-      type: string,
-      displayName: "eval-path",
-      description: "Path to evaluation .yaml file"
+    list: flag({
+      long: "list",
+      description: "List available test IDs"
+    }),
+    input: flag({
+      long: "input",
+      description: "Extract the test input payload for a single test"
+    }),
+    expectedOutput: flag({
+      long: "expected-output",
+      description: "Extract expected output and grading context for a single test"
+    }),
+    gradingBrief: flag({
+      long: "grading-brief",
+      description: "Output human-readable grading brief with typed criteria"
     }),
     testId: option({
-      type: string,
+      type: optional(string),
       long: "test-id",
-      description: "Test ID"
+      description: "Test ID (required for --input and --expected-output)"
     }),
-    answerFile: option({
+    evalPath: positional({
       type: string,
-      long: "answer-file",
-      description: "Path to file containing the candidate answer"
+      displayName: "eval-path",
+      description: "Path to evaluation .yaml, .json, or .jsonl file"
     })
   },
-  handler: async (args) => {
-    const cwd = process.cwd();
-    const repoRoot = await findRepoRoot(cwd);
-    const evalCase = await loadTestById(args.evalPath, repoRoot, args.testId);
-    const candidate = (await readFile(args.answerFile, "utf8")).trim();
-    const promptInputs = await buildPromptInputs(evalCase);
-    const evaluators = evalCase.evaluators ?? [];
-    const outputs = [];
-    for (const config of evaluators) {
-      const output = await processEvaluator(config, evalCase, candidate, promptInputs);
-      outputs.push(output);
-    }
-    if (outputs.length === 0) {
-      const assembly = assembleLlmJudgePrompt({
-        evalCase,
-        candidate,
-        promptInputs
-      });
-      outputs.push({
-        name: "default_llm_judge",
-        type: "llm-judge",
-        status: "prompt_ready",
-        prompt: {
-          system_prompt: assembly.systemPrompt,
-          user_prompt: assembly.userPrompt
-        }
-      });
-    }
-    const result = {
-      test_id: evalCase.id,
-      evaluators: outputs
-    };
-    process.stdout.write(JSON.stringify(result, null, 2));
-    process.stdout.write("\n");
-  }
-});
-async function processEvaluator(config, evalCase, candidate, promptInputs) {
-  switch (config.type) {
-    case "code-judge": {
-      const codeConfig = config;
-      const script = codeConfig.command ?? codeConfig.script ?? [];
-      const scriptCwd = codeConfig.resolvedCwd ?? codeConfig.cwd;
-      const payload = {
-        question: evalCase.question,
-        criteria: evalCase.criteria,
-        expectedOutput: evalCase.expected_output,
-        referenceAnswer: evalCase.reference_answer,
-        answer: candidate,
-        output: null,
-        guidelineFiles: evalCase.guideline_paths,
-        inputFiles: evalCase.file_paths.filter((p) => !evalCase.guideline_paths.includes(p)),
-        input: evalCase.input,
-        trace: null,
-        fileChanges: null,
-        workspacePath: null,
-        config: codeConfig.config ?? null
-      };
-      try {
-        const inputPayload = JSON.stringify(toSnakeCaseDeep2(payload), null, 2);
-        const stdout = await executeScript(script, inputPayload, 6e4, scriptCwd);
-        const parsed = JSON.parse(stdout);
-        return {
-          name: codeConfig.name,
-          type: "code-judge",
-          status: "completed",
-          result: parsed
-        };
-      } catch (error) {
-        return {
-          name: codeConfig.name,
-          type: "code-judge",
-          status: "completed",
-          result: {
-            score: 0,
-            error: error instanceof Error ? error.message : String(error)
-          }
-        };
-      }
-    }
-    case "llm-judge": {
-      const llmConfig = config;
-      const assembly = assembleLlmJudgePrompt({
-        evalCase,
-        candidate,
-        promptInputs,
-        evaluatorConfig: llmConfig
-      });
-      return {
-        name: llmConfig.name,
-        type: "llm-judge",
-        status: "prompt_ready",
-        prompt: {
-          system_prompt: assembly.systemPrompt,
-          user_prompt: assembly.userPrompt
-        }
-      };
-    }
-    default: {
-      return {
-        name: config.name,
-        type: config.type,
-        status: "prompt_ready",
-        result: {
-          message: `Evaluator type "${config.type}" requires the full eval pipeline. Use \`agentv eval\` instead.`
-        }
-      };
+  handler: async ({ evalPath, expectedOutput, gradingBrief, input, list, testId }) => {
+    const selectedModes = [list, input, expectedOutput, gradingBrief].filter(Boolean).length;
+    if (selectedModes !== 1) {
+      throw new Error(
+        "Specify exactly one of --list, --input, --expected-output, or --grading-brief."
+      );
     }
-  }
-}
-// src/commands/eval/commands/prompt/overview.ts
-function getEvalMode() {
-  const mode = process.env.AGENTV_PROMPT_EVAL_MODE ?? "agent";
-  if (mode !== "agent" && mode !== "cli") {
-    throw new Error(`Invalid AGENTV_PROMPT_EVAL_MODE="${mode}". Valid values: agent, cli`);
-  }
-  return mode;
-}
-async function generateOverviewPrompt(evalPaths) {
-  const cwd = process.cwd();
-  const resolvedPaths = await resolveEvalPaths(evalPaths, cwd);
-  const repoRoot = await findRepoRoot(cwd);
-  const mode = getEvalMode();
-  const fileEntries = [];
-  for (const evalPath of resolvedPaths) {
-    const tests = await loadTests(evalPath, repoRoot);
-    fileEntries.push({ path: evalPath, tests });
-  }
-  const totalCases = fileEntries.reduce((sum, e) => sum + e.tests.length, 0);
-  if (mode === "cli") {
-    return generateCliModePrompt(fileEntries, totalCases);
-  }
-  return generateAgentModePrompt(fileEntries, totalCases);
-}
-function generateAgentModePrompt(fileEntries, totalCases) {
-  const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").slice(0, -1);
-  const lines = [
-    "# AgentV Eval Orchestration",
-    "",
-    "**Mode: agent** \u2014 You orchestrate the evaluation using agents. No API keys needed.",
-    "",
-    `You are orchestrating ${totalCases} evaluation case${totalCases === 1 ? "" : "s"}.`,
-    "",
-    "## Setup",
-    "",
-    `- **Results file:** \`.agentv/results/eval_${timestamp}.jsonl\``,
-    "- **Temp answers:** `.agentv/tmp/`",
-    "",
-    "Ensure both directories exist before starting.",
-    "",
-    "## For each test case",
-    "",
-    "Run these two agents **sequentially**:",
-    "",
-    "### 1. Dispatch `eval-candidate` agent",
-    "",
-    "Parameters:",
-    "- `eval-path`: Path to the eval YAML file",
-    "- `test-id`: The test case ID",
-    "- `answer-file`: `.agentv/tmp/eval_<test-id>.txt`",
-    "",
-    "The agent retrieves the task input, acts as the candidate LLM, and saves its response.",
-    "",
-    "### 2. Dispatch `eval-judge` agent (after candidate completes)",
-    "",
-    "Parameters:",
-    "- `eval-path`: Path to the eval YAML file",
-    "- `test-id`: The test case ID",
-    "- `answer-file`: `.agentv/tmp/eval_<test-id>.txt`",
-    `- \`results-file\`: \`.agentv/results/eval_${timestamp}.jsonl\``,
-    "",
-    "The agent runs evaluators, scores the response, and appends results to the JSONL file.",
-    ""
-  ];
-  for (const { path: evalPath, tests } of fileEntries) {
-    lines.push(`## ${evalPath}`);
-    lines.push("");
-    for (const evalCase of tests) {
-      const evaluatorSummary = describeEvaluators(evalCase);
-      lines.push(`### ${evalCase.id}`);
-      lines.push(`Criteria: ${evalCase.criteria}`);
-      if (evaluatorSummary) {
-        lines.push(`Evaluators: ${evaluatorSummary}`);
+    if (gradingBrief) {
+      if (!testId) {
+        throw new Error("--test-id is required with --grading-brief.");
       }
-      lines.push("");
-      lines.push("**1. Dispatch `eval-candidate` agent:**");
-      lines.push(`- eval-path: \`${evalPath}\``);
-      lines.push(`- test-id: \`${evalCase.id}\``);
-      lines.push(`- answer-file: \`.agentv/tmp/eval_${evalCase.id}.txt\``);
-      lines.push("");
-      lines.push("**2. Dispatch `eval-judge` agent** (after candidate completes):");
-      lines.push(`- eval-path: \`${evalPath}\``);
-      lines.push(`- test-id: \`${evalCase.id}\``);
-      lines.push(`- answer-file: \`.agentv/tmp/eval_${evalCase.id}.txt\``);
-      lines.push(`- results-file: \`.agentv/results/eval_${timestamp}.jsonl\``);
-      lines.push("");
+      const brief = await getPromptEvalGradingBrief(evalPath, testId);
+      process.stdout.write(brief);
+      process.stdout.write("\n");
+      return;
     }
-  }
-  return lines.join("\n");
-}
-function generateCliModePrompt(fileEntries, totalCases) {
-  const evalPathArgs = fileEntries.map((e) => e.path).join(" ");
-  const lines = [
-    "# AgentV Eval Orchestration",
-    "",
-    "**Mode: cli** \u2014 Run the evaluation end-to-end using the CLI.",
-    "",
-    `You are orchestrating ${totalCases} evaluation case${totalCases === 1 ? "" : "s"}.`,
-    "",
-    "## Run the evaluation",
-    "",
-    "```bash",
-    `agentv eval ${evalPathArgs}`,
-    "```",
-    "",
-    "Results are written to `.agentv/results/`. The output path is printed in the CLI output.",
-    "Parse the JSONL file for per-test scores, hits, and misses.",
-    ""
-  ];
-  for (const { path: evalPath, tests } of fileEntries) {
-    lines.push(`## ${evalPath}`);
-    lines.push("");
-    for (const evalCase of tests) {
-      const evaluatorSummary = describeEvaluators(evalCase);
-      lines.push(`### ${evalCase.id}`);
-      lines.push(`Criteria: ${evalCase.criteria}`);
-      if (evaluatorSummary) {
-        lines.push(`Evaluators: ${evaluatorSummary}`);
-      }
-      lines.push("");
+    if ((input || expectedOutput) && !testId) {
+      throw new Error("--test-id is required with --input and --expected-output.");
     }
-  }
-  return lines.join("\n");
-}
-var evalPromptOverviewCommand = command({
-  name: "overview",
-  description: "Output orchestration prompt for host agent to run evals",
-  args: {
-    evalPaths: restPositionals({
-      type: string,
-      displayName: "eval-paths",
-      description: "Path(s) or glob(s) to evaluation .yaml file(s)"
-    })
-  },
-  handler: async (args) => {
-    const output = await generateOverviewPrompt(args.evalPaths);
-    process.stdout.write(output);
-  }
-});
-function describeEvaluators(evalCase) {
-  const configs = evalCase.evaluators;
-  if (!configs || configs.length === 0) return void 0;
-  return configs.map((c3) => `${c3.name} (${c3.type})`).join(", ");
-}
-// src/commands/eval/commands/prompt/index.ts
-var evalPromptEvalSubcommand = subcommands({
-  name: "eval",
-  description: "Eval prompt commands (overview, input, judge)",
-  cmds: {
-    overview: evalPromptOverviewCommand,
-    input: evalPromptInputCommand,
-    judge: evalPromptJudgeCommand
+    const requiredTestId = testId ?? "";
+    const output = list ? await listPromptEvalTestIds(evalPath) : input ? await getPromptEvalInput(evalPath, requiredTestId) : await getPromptEvalExpectedOutput(evalPath, requiredTestId);
+    process.stdout.write(JSON.stringify(output, null, 2));
+    process.stdout.write("\n");
   }
 });
 var evalPromptCommand = subcommands({
@@ -3933,6 +3904,120 @@ var evalPromptCommand = subcommands({
   }
 });
+// src/commands/eval/commands/assert.ts
+import { readFileSync as readFileSync3 } from "node:fs";
+import path3 from "node:path";
+import fg from "fast-glob";
+var evalAssertCommand = command({
+  name: "assert",
+  description: "Run a single code-grader assertion from .agentv/graders/ and print the score",
+  args: {
+    graderName: positional({
+      type: string,
+      displayName: "name",
+      description: "Assertion name (matches filename without extension in .agentv/graders/)"
+    }),
+    agentOutput: option({
+      type: optional(string),
+      long: "agent-output",
+      description: "The agent's full response text"
+    }),
+    agentInput: option({
+      type: optional(string),
+      long: "agent-input",
+      description: "The original user prompt"
+    }),
+    file: option({
+      type: optional(string),
+      long: "file",
+      description: "Path to JSON file with { output, input } fields"
+    })
+  },
+  handler: async ({ graderName, agentOutput: output, agentInput: input, file }) => {
+    let resolvedOutput;
+    let resolvedInput;
+    if (file) {
+      const content = JSON.parse(readFileSync3(path3.resolve(file), "utf8"));
+      resolvedOutput = content.output ?? "";
+      resolvedInput = content.input ?? "";
+    } else {
+      if (output === void 0) {
+        console.error("Error: --agent-output is required (or use --file)");
+        process.exit(1);
+      }
+      resolvedOutput = output;
+      resolvedInput = input ?? "";
+    }
+    if (!/^[a-zA-Z0-9_-]+$/.test(graderName)) {
+      console.error(
+        `Error: Invalid grader name '${graderName}' \u2014 only letters, digits, hyphens, and underscores allowed`
+      );
+      process.exit(1);
+    }
+    const scriptPath = await findGraderScript(graderName, process.cwd());
+    if (!scriptPath) {
+      console.error(
+        `Error: Grader '${graderName}' not found in .agentv/graders/ (or .agentv/judges/)`
+      );
+      process.exit(1);
+    }
+    const payload = JSON.stringify(
+      {
+        answer: resolvedOutput,
+        output: [{ role: "assistant", content: resolvedOutput }],
+        input: [{ role: "user", content: resolvedInput }],
+        question: resolvedInput,
+        criteria: "",
+        expected_output: [],
+        reference_answer: "",
+        guideline_files: [],
+        input_files: [],
+        trace: null,
+        token_usage: null,
+        cost_usd: null,
+        duration_ms: null,
+        start_time: null,
+        end_time: null,
+        file_changes: null,
+        workspace_path: null,
+        config: null,
+        metadata: {}
+      },
+      null,
+      2
+    );
+    try {
+      const stdout = await executeScript(["bun", "run", scriptPath], payload);
+      const parsed = JSON.parse(stdout);
+      const score = typeof parsed.score === "number" ? parsed.score : 0;
+      process.stdout.write(JSON.stringify(parsed, null, 2));
+      process.stdout.write("\n");
+      process.exit(score >= 0.5 ? 0 : 1);
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      console.error(`Error: ${message}`);
+      process.exit(1);
+    }
+  }
+});
+async function findGraderScript(graderName, startDir) {
+  let dir = path3.resolve(startDir);
+  const root = path3.parse(dir).root;
+  while (dir !== root) {
+    for (const subdir of ["graders", "judges"]) {
+      const gradersDir = path3.join(dir, ".agentv", subdir);
+      const found = await fg([`${graderName}.{ts,js,mts,mjs}`], {
+        cwd: gradersDir,
+        absolute: true,
+        onlyFiles: true
+      });
+      if (found.length > 0) return found[0];
+    }
+    dir = path3.dirname(dir);
+  }
+  return null;
+}
 // src/commands/eval/commands/run.ts
 var evalRunCommand = command({
   name: "eval",
@@ -3972,12 +4057,12 @@ var evalRunCommand = command({
       type: array(string),
       long: "output",
       short: "o",
-      description: "Output file path(s). Format inferred from extension: .jsonl, .json, .xml, .yaml"
+      description: "Output file path(s). Format inferred from extension: .jsonl, .json, .xml, .yaml, .html"
     }),
     outputFormat: option({
       type: optional(string),
       long: "output-format",
-      description: "Output format: 'jsonl' or 'yaml' (default: jsonl)"
+      description: "Output format: 'jsonl', 'yaml', or 'html' (default: jsonl)"
     }),
     dryRun: flag({
       long: "dry-run",
@@ -4068,11 +4153,31 @@ var evalRunCommand = command({
     strict: flag({
       long: "strict",
       description: "Exit with error on version mismatch (instead of warning)"
+    }),
+    benchmarkJson: option({
+      type: optional(string),
+      long: "benchmark-json",
+      description: "Write Agent Skills benchmark.json to the specified path"
+    }),
+    artifacts: option({
+      type: optional(string),
+      long: "artifacts",
+      description: "Write companion artifacts (grading/<test>.json, timing.json, benchmark.json) to the specified directory"
+    }),
+    graderTarget: option({
+      type: optional(string),
+      long: "grader-target",
+      description: 'Override grader target for all evaluators (e.g., "agentv", or a target name from targets.yaml)'
+    }),
+    model: option({
+      type: optional(string),
+      long: "model",
+      description: 'Override model for the grader target (e.g., "openai:gpt-5-mini")'
     })
   },
   handler: async (args) => {
     if (args.evalPaths.length === 0 && process.stdin.isTTY) {
-      const { launchInteractiveWizard } = await import("./interactive-J4IBXJF7.js");
+      const { launchInteractiveWizard } = await import("./interactive-B432TCRZ.js");
       await launchInteractiveWizard();
       return;
     }
@@ -4104,15 +4209,30 @@ var evalRunCommand = command({
       otelCaptureContent: args.otelCaptureContent,
       otelGroupTurns: args.otelGroupTurns,
       retryErrors: args.retryErrors,
-      strict: args.strict
+      strict: args.strict,
+      benchmarkJson: args.benchmarkJson,
+      artifacts: args.artifacts,
+      graderTarget: args.graderTarget,
+      model: args.model
     };
     await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
   }
 });
+// src/commands/eval/index.ts
+var evalCommand = subcommands({
+  name: "eval",
+  description: "Evaluation commands",
+  cmds: {
+    run: evalRunCommand,
+    prompt: evalPromptCommand,
+    assert: evalAssertCommand
+  }
+});
 // src/commands/generate/rubrics.ts
-import { readFile as readFile2, writeFile as writeFile2 } from "node:fs/promises";
-import path3 from "node:path";
+import { readFile, writeFile as writeFile2 } from "node:fs/promises";
+import path4 from "node:path";
 import { pathToFileURL } from "node:url";
 import { isMap, isSeq, parseDocument } from "yaml";
 function isJsonObject(value) {
@@ -4124,7 +4244,7 @@ function asString(value) {
 async function loadRubricGenerator() {
   const customGenerator = process.env.AGENTEVO_CLI_RUBRIC_GENERATOR;
   if (customGenerator) {
-    const generatorPath = path3.resolve(customGenerator);
+    const generatorPath = path4.resolve(customGenerator);
     const generatorUrl = pathToFileURL(generatorPath).href;
     const module = await import(generatorUrl);
     return module.generateRubrics;
@@ -4134,8 +4254,8 @@ async function loadRubricGenerator() {
 async function generateRubricsCommand(options) {
   const { file, target: targetOverride, verbose } = options;
   console.log(`Generating rubrics for: ${file}`);
-  const absolutePath = path3.resolve(file);
-  const content = await readFile2(absolutePath, "utf8");
+  const absolutePath = path4.resolve(file);
+  const content = await readFile(absolutePath, "utf8");
   const doc = parseDocument(content);
   const parsed = doc.toJSON();
   if (!isJsonObject(parsed)) {
@@ -4293,23 +4413,23 @@ var generateCommand = subcommands({
 // src/commands/init/index.ts
 import { existsSync, mkdirSync, writeFileSync as writeFileSync2 } from "node:fs";
-import path5 from "node:path";
+import path6 from "node:path";
 import * as readline from "node:readline/promises";
 // src/templates/index.ts
-import { readFileSync as readFileSync3, readdirSync, statSync } from "node:fs";
-import path4 from "node:path";
+import { readFileSync as readFileSync4, readdirSync, statSync } from "node:fs";
+import path5 from "node:path";
 import { fileURLToPath } from "node:url";
 function getAgentvTemplates() {
   return getTemplatesFromDir(".agentv");
 }
 function getTemplatesFromDir(subdir) {
-  const currentDir = path4.dirname(fileURLToPath(import.meta.url));
+  const currentDir = path5.dirname(fileURLToPath(import.meta.url));
   let templatesDir;
-  if (currentDir.includes(`${path4.sep}dist`)) {
-    templatesDir = path4.join(currentDir, "templates", subdir);
+  if (currentDir.includes(`${path5.sep}dist`)) {
+    templatesDir = path5.join(currentDir, "templates", subdir);
   } else {
-    templatesDir = path4.join(currentDir, subdir);
+    templatesDir = path5.join(currentDir, subdir);
   }
   return readTemplatesRecursively(templatesDir, "");
 }
@@ -4317,15 +4437,15 @@ function readTemplatesRecursively(dir, relativePath) {
   const templates = [];
   const entries2 = readdirSync(dir);
   for (const entry of entries2) {
-    const fullPath = path4.join(dir, entry);
+    const fullPath = path5.join(dir, entry);
     const stat3 = statSync(fullPath);
-    const entryRelativePath = relativePath ? path4.join(relativePath, entry) : entry;
+    const entryRelativePath = relativePath ? path5.join(relativePath, entry) : entry;
     if (stat3.isDirectory()) {
       templates.push(...readTemplatesRecursively(fullPath, entryRelativePath));
     } else {
-      const content = readFileSync3(fullPath, "utf-8");
+      const content = readFileSync4(fullPath, "utf-8");
       templates.push({
-        path: entryRelativePath.split(path4.sep).join("/"),
+        path: entryRelativePath.split(path5.sep).join("/"),
         // Normalize to forward slashes
         content
       });
@@ -4354,23 +4474,23 @@ async function promptYesNo(message) {
   }
 }
 async function initCommand(options = {}) {
-  const targetPath = path5.resolve(options.targetPath ?? ".");
-  const agentvDir = path5.join(targetPath, ".agentv");
+  const targetPath = path6.resolve(options.targetPath ?? ".");
+  const agentvDir = path6.join(targetPath, ".agentv");
   const agentvTemplates = getAgentvTemplates();
   const envTemplate = agentvTemplates.find((t) => t.path === ".env.example");
   const otherAgentvTemplates = agentvTemplates.filter((t) => t.path !== ".env.example");
   const existingFiles = [];
   if (envTemplate) {
-    const envFilePath = path5.join(targetPath, ".env.example");
+    const envFilePath = path6.join(targetPath, ".env.example");
     if (existsSync(envFilePath)) {
       existingFiles.push(".env.example");
     }
   }
   if (existsSync(agentvDir)) {
     for (const template of otherAgentvTemplates) {
-      const targetFilePath = path5.join(agentvDir, template.path);
+      const targetFilePath = path6.join(agentvDir, template.path);
       if (existsSync(targetFilePath)) {
-        existingFiles.push(path5.relative(targetPath, targetFilePath));
+        existingFiles.push(path6.relative(targetPath, targetFilePath));
       }
     }
   }
@@ -4392,18 +4512,18 @@ async function initCommand(options = {}) {
     mkdirSync(agentvDir, { recursive: true });
   }
   if (envTemplate) {
-    const envFilePath = path5.join(targetPath, ".env.example");
+    const envFilePath = path6.join(targetPath, ".env.example");
     writeFileSync2(envFilePath, envTemplate.content, "utf-8");
     console.log("Created .env.example");
   }
   for (const template of otherAgentvTemplates) {
-    const targetFilePath = path5.join(agentvDir, template.path);
-    const targetDirPath = path5.dirname(targetFilePath);
+    const targetFilePath = path6.join(agentvDir, template.path);
+    const targetDirPath = path6.dirname(targetFilePath);
     if (!existsSync(targetDirPath)) {
       mkdirSync(targetDirPath, { recursive: true });
     }
     writeFileSync2(targetFilePath, template.content, "utf-8");
-    console.log(`Created ${path5.relative(targetPath, targetFilePath)}`);
+    console.log(`Created ${path6.relative(targetPath, targetFilePath)}`);
   }
   console.log("\nAgentV initialized successfully!");
   console.log("\nFiles installed to root:");
@@ -4411,7 +4531,7 @@ async function initCommand(options = {}) {
     console.log("  - .env.example");
   }
   console.log(`
-Files installed to ${path5.relative(targetPath, agentvDir)}:`);
+Files installed to ${path6.relative(targetPath, agentvDir)}:`);
   for (const t of otherAgentvTemplates) {
     console.log(`  - ${t.path}`);
   }
@@ -4530,8 +4650,8 @@ var selfCommand = subcommands({
 });
 // src/commands/trace/utils.ts
-import { readFileSync as readFileSync4, readdirSync as readdirSync2, statSync as statSync2 } from "node:fs";
-import path6 from "node:path";
+import { readFileSync as readFileSync5, readdirSync as readdirSync2, statSync as statSync2 } from "node:fs";
+import path7 from "node:path";
 var colors2 = {
   reset: "\x1B[0m",
   bold: "\x1B[1m",
@@ -4557,7 +4677,7 @@ function padLeft2(str, len) {
   return " ".repeat(Math.max(0, len - plainLen)) + str;
 }
 function loadResultFile(filePath) {
-  const content = readFileSync4(filePath, "utf8");
+  const content = readFileSync5(filePath, "utf8");
   const lines = content.trim().split("\n").filter((line) => line.trim());
   return lines.map((line, i) => {
     const record = JSON.parse(line);
@@ -4568,7 +4688,7 @@ function loadResultFile(filePath) {
   });
 }
 function listResultFiles(cwd, limit) {
-  const resultsDir = path6.join(cwd, ".agentv", "results");
+  const resultsDir = path7.join(cwd, ".agentv", "results");
   let files;
   try {
     files = readdirSync2(resultsDir).filter((f) => f.endsWith(".jsonl"));
@@ -4581,7 +4701,7 @@ function listResultFiles(cwd, limit) {
   }
   const metas = [];
   for (const filename of files) {
-    const filePath = path6.join(resultsDir, filename);
+    const filePath = path7.join(resultsDir, filename);
     try {
       const stat3 = statSync2(filePath);
       const results = loadResultFile(filePath);
@@ -4807,8 +4927,8 @@ var stubProvider = {
     throw new Error("trace score does not support LLM-based evaluators");
   }
 };
-var stubLlmJudge = {
-  kind: "llm-judge",
+var stubLlmGrader = {
+  kind: "llm-grader",
   evaluate() {
     throw new Error("trace score does not support LLM-based evaluators");
   }
@@ -4816,7 +4936,7 @@ var stubLlmJudge = {
 async function runScore(results, evaluatorConfig, testIdFilter) {
   const registry = createBuiltinRegistry();
   const dispatchContext = {
-    llmJudge: stubLlmJudge,
+    llmGrader: stubLlmGrader,
     registry
   };
   const evaluator = await registry.create(evaluatorConfig, dispatchContext);
@@ -5380,8 +5500,70 @@ var traceCommand = subcommands({
   }
 });
+// src/commands/transpile/index.ts
+import { writeFileSync as writeFileSync3 } from "node:fs";
+import path8 from "node:path";
+var transpileCommand = command({
+  name: "transpile",
+  description: "Convert an EVAL.yaml file to Agent Skills evals.json format",
+  args: {
+    input: positional({
+      type: string,
+      displayName: "input",
+      description: "Path to EVAL.yaml file"
+    }),
+    outDir: option({
+      type: optional(string),
+      long: "out-dir",
+      short: "d",
+      description: "Output directory (defaults to directory of input file)"
+    }),
+    stdout: flag({
+      long: "stdout",
+      description: "Write to stdout instead of file(s) (only valid for single-skill output)"
+    })
+  },
+  handler: async ({ input, outDir, stdout }) => {
+    let result;
+    try {
+      result = transpileEvalYamlFile(path8.resolve(input));
+    } catch (error) {
+      console.error(`Error: ${error.message}`);
+      process.exit(1);
+    }
+    for (const warning of result.warnings) {
+      console.warn(`Warning: ${warning}`);
+    }
+    if (result.files.size === 0) {
+      console.error("Error: No output produced (no tests found)");
+      process.exit(1);
+    }
+    if (stdout) {
+      if (result.files.size > 1) {
+        console.error(
+          "Error: --stdout is only valid when input produces a single evals.json (multi-skill input produces multiple files)"
+        );
+        process.exit(1);
+      }
+      const [file] = result.files.values();
+      process.stdout.write(JSON.stringify(file, null, 2));
+      process.stdout.write("\n");
+      return;
+    }
+    const outputDir = outDir ? path8.resolve(outDir) : path8.dirname(path8.resolve(input));
+    const fileNames = getOutputFilenames(result);
+    for (const [skill, evalsJson] of result.files) {
+      const fileName = fileNames.get(skill) ?? "evals.json";
+      const outputPath = path8.join(outputDir, fileName);
+      writeFileSync3(outputPath, `${JSON.stringify(evalsJson, null, 2)}
+`);
+      console.log(`Transpiled to ${outputPath}`);
+    }
+  }
+});
 // src/commands/trim/index.ts
-import { readFileSync as readFileSync5, writeFileSync as writeFileSync3 } from "node:fs";
+import { readFileSync as readFileSync6, writeFileSync as writeFileSync4 } from "node:fs";
 var trimCommand = command({
   name: "trim",
   description: "Trim evaluation results for baseline storage (strips debug/audit fields)",
@@ -5400,7 +5582,7 @@ var trimCommand = command({
   },
   handler: async ({ input, out }) => {
     try {
-      const content = readFileSync5(input, "utf8");
+      const content = readFileSync6(input, "utf8");
       const lines = content.trim().split("\n").filter((line) => line.trim());
       const trimmedLines = lines.map((line) => {
         const record = JSON.parse(line);
@@ -5412,7 +5594,7 @@ var trimCommand = command({
       const output = `${trimmedLines.join("\n")}
 `;
       if (out) {
-        writeFileSync3(out, output, "utf8");
+        writeFileSync4(out, output, "utf8");
         console.error(`Trimmed ${lines.length} record(s) \u2192 ${out}`);
       } else {
         process.stdout.write(output);
@@ -5507,7 +5689,7 @@ function isTTY() {
 // src/commands/validate/validate-files.ts
 import { constants } from "node:fs";
 import { access, readdir, stat } from "node:fs/promises";
-import path7 from "node:path";
+import path9 from "node:path";
 async function validateFiles(paths) {
   const filePaths = await expandPaths(paths);
   const results = [];
@@ -5525,7 +5707,7 @@ async function validateFiles(paths) {
   };
 }
 async function validateSingleFile(filePath) {
-  const absolutePath = path7.resolve(filePath);
+  const absolutePath = path9.resolve(filePath);
   const fileType = await detectFileType(absolutePath);
   let result;
   if (fileType === "eval") {
@@ -5550,7 +5732,7 @@ async function validateSingleFile(filePath) {
 async function expandPaths(paths) {
   const expanded = [];
   for (const inputPath of paths) {
-    const absolutePath = path7.resolve(inputPath);
+    const absolutePath = path9.resolve(inputPath);
     try {
       await access(absolutePath, constants.F_OK);
     } catch {
@@ -5574,7 +5756,7 @@ async function findYamlFiles(dirPath) {
   try {
     const entries2 = await readdir(dirPath, { withFileTypes: true });
     for (const entry of entries2) {
-      const fullPath = path7.join(dirPath, entry.name);
+      const fullPath = path9.join(dirPath, entry.name);
       if (entry.isDirectory()) {
         if (entry.name === "node_modules" || entry.name.startsWith(".")) {
           continue;
@@ -5591,7 +5773,7 @@ async function findYamlFiles(dirPath) {
   return results;
 }
 function isYamlFile(filePath) {
-  const ext = path7.extname(filePath).toLowerCase();
+  const ext = path9.extname(filePath).toLowerCase();
   return ext === ".yaml" || ext === ".yml";
 }
@@ -5630,8 +5812,8 @@ var validateCommand = command({
 // src/commands/workspace/clean.ts
 import { existsSync as existsSync2 } from "node:fs";
-import { readFile as readFile3, readdir as readdir2, rm } from "node:fs/promises";
-import path8 from "node:path";
+import { readFile as readFile2, readdir as readdir2, rm } from "node:fs/promises";
+import path10 from "node:path";
 async function confirm(message) {
   const readline2 = await import("node:readline");
   const rl = readline2.createInterface({ input: process.stdin, output: process.stdout });
@@ -5667,10 +5849,10 @@ var cleanCommand = command({
       const poolDirs = entries2.filter((e) => e.isDirectory());
       const matchingDirs = [];
       for (const dir of poolDirs) {
-        const poolDir = path8.join(poolRoot, dir.name);
-        const metadataPath = path8.join(poolDir, "metadata.json");
+        const poolDir = path10.join(poolRoot, dir.name);
+        const metadataPath = path10.join(poolDir, "metadata.json");
         try {
-          const raw = await readFile3(metadataPath, "utf-8");
+          const raw = await readFile2(metadataPath, "utf-8");
           const metadata = JSON.parse(raw);
           const hasRepo = metadata.repos?.some((r) => {
             if (r.source.type === "git" && r.source.url) {
@@ -5699,7 +5881,7 @@ var cleanCommand = command({
       }
       for (const dir of matchingDirs) {
         await rm(dir, { recursive: true, force: true });
-        console.log(`Removed: ${path8.basename(dir).slice(0, 12)}...`);
+        console.log(`Removed: ${path10.basename(dir).slice(0, 12)}...`);
       }
       console.log("Done.");
     } else {
@@ -5718,14 +5900,14 @@ var cleanCommand = command({
 // src/commands/workspace/list.ts
 import { existsSync as existsSync3 } from "node:fs";
-import { readFile as readFile4, readdir as readdir3, stat as stat2 } from "node:fs/promises";
-import path9 from "node:path";
+import { readFile as readFile3, readdir as readdir3, stat as stat2 } from "node:fs/promises";
+import path11 from "node:path";
 async function getDirectorySize(dirPath) {
   let totalSize = 0;
   try {
     const entries2 = await readdir3(dirPath, { withFileTypes: true });
     for (const entry of entries2) {
-      const fullPath = path9.join(dirPath, entry.name);
+      const fullPath = path11.join(dirPath, entry.name);
       if (entry.isDirectory()) {
         totalSize += await getDirectorySize(fullPath);
       } else {
@@ -5760,14 +5942,14 @@ var listCommand = command({
       return;
     }
     for (const dir of poolDirs) {
-      const poolDir = path9.join(poolRoot, dir.name);
+      const poolDir = path11.join(poolRoot, dir.name);
       const fingerprint = dir.name;
       const poolEntries = await readdir3(poolDir, { withFileTypes: true });
       const slots = poolEntries.filter((e) => e.isDirectory() && e.name.startsWith("slot-"));
-      const metadataPath = path9.join(poolDir, "metadata.json");
+      const metadataPath = path11.join(poolDir, "metadata.json");
       let metadata = null;
       try {
-        const raw = await readFile4(metadataPath, "utf-8");
+        const raw = await readFile3(metadataPath, "utf-8");
         metadata = JSON.parse(raw);
       } catch {
       }
@@ -5804,16 +5986,16 @@ var workspaceCommand = subcommands({
 // src/update-check.ts
 import { spawn as spawn2 } from "node:child_process";
-import { readFile as readFile5 } from "node:fs/promises";
+import { readFile as readFile4 } from "node:fs/promises";
 import { join } from "node:path";
 var CHECK_INTERVAL_MS = 24 * 60 * 60 * 1e3;
 var AGENTV_DIR = getAgentvHome();
 var CACHE_FILE = "version-check.json";
 var NPM_REGISTRY_URL = "https://registry.npmjs.org/agentv/latest";
-async function getCachedUpdateInfo(path10) {
-  const filePath = path10 ?? join(AGENTV_DIR, CACHE_FILE);
+async function getCachedUpdateInfo(path12) {
+  const filePath = path12 ?? join(AGENTV_DIR, CACHE_FILE);
   try {
-    const raw = await readFile5(filePath, "utf-8");
+    const raw = await readFile4(filePath, "utf-8");
     const data = JSON.parse(raw);
     if (typeof data.latestVersion === "string" && typeof data.lastCheckedAt === "string") {
       return data;
@@ -5894,7 +6076,7 @@ var app = subcommands({
   description: "AgentV CLI",
   version: package_default.version,
   cmds: {
-    eval: evalRunCommand,
+    eval: evalCommand,
     prompt: evalPromptCommand,
     compare: compareCommand,
     convert: convertCommand,
@@ -5903,26 +6085,29 @@ var app = subcommands({
     init: initCmdTsCommand,
     self: selfCommand,
     trace: traceCommand,
+    transpile: transpileCommand,
     trim: trimCommand,
     validate: validateCommand,
     workspace: workspaceCommand
   }
 });
-var PROMPT_EVAL_SUBCOMMANDS = /* @__PURE__ */ new Set(["overview", "input", "judge"]);
+var EVAL_SUBCOMMANDS = /* @__PURE__ */ new Set(["run", "prompt", "assert"]);
+var TOP_LEVEL_COMMANDS = /* @__PURE__ */ new Set([
+  "prompt",
+  "compare",
+  "convert",
+  "create",
+  "generate",
+  "init",
+  "self",
+  "trace",
+  "transpile",
+  "trim",
+  "validate",
+  "workspace"
+]);
 function preprocessArgv(argv) {
   const result = [...argv];
-  const promptIndex = result.indexOf("prompt");
-  if (promptIndex !== -1) {
-    const nextArg = result[promptIndex + 1];
-    if (nextArg !== "eval") {
-      result.splice(promptIndex + 1, 0, "eval");
-    }
-    const evalIdx = promptIndex + 1;
-    const subSubArg = result[evalIdx + 1];
-    if (subSubArg === void 0 || !PROMPT_EVAL_SUBCOMMANDS.has(subSubArg)) {
-      result.splice(evalIdx + 1, 0, "overview");
-    }
-  }
   for (let i = 0; i < result.length; i++) {
     if (result[i] === "--eval-id") {
       result[i] = "--test-id";
@@ -5930,6 +6115,16 @@ function preprocessArgv(argv) {
       result[i] = `--test-id=${result[i].slice("--eval-id=".length)}`;
     }
   }
+  const evalIdx = result.indexOf("eval");
+  if (evalIdx !== -1) {
+    const isTopLevel = !result.slice(0, evalIdx).some((arg) => TOP_LEVEL_COMMANDS.has(arg));
+    if (isTopLevel) {
+      const nextArg = result[evalIdx + 1];
+      if (nextArg !== void 0 && !EVAL_SUBCOMMANDS.has(nextArg) && nextArg !== "--help" && nextArg !== "-h") {
+        result.splice(evalIdx + 1, 0, "run");
+      }
+    }
+  }
   return result;
 }
 async function runCli(argv = process.argv) {
@@ -5951,4 +6146,4 @@ export {
   preprocessArgv,
   runCli
 };
-//# sourceMappingURL=chunk-RMUVJ44Z.js.map
+//# sourceMappingURL=chunk-5WIB7A27.js.map