npm - agentv - Versions diffs - 0.21.0 → 0.21.3 - Mend

agentv 0.21.0 → 0.21.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/{chunk-MA3MJNJH.js → chunk-A5T7W63L.js} +481 -420
package/dist/chunk-A5T7W63L.js.map +1 -0
package/dist/cli.js +5 -2
package/dist/cli.js.map +1 -1
package/dist/index.js +3 -3
package/dist/templates/.claude/skills/agentv-eval-builder/SKILL.md +3 -3
package/package.json +8 -5
package/dist/chunk-MA3MJNJH.js.map +0 -1

package/dist/{chunk-MA3MJNJH.js → chunk-A5T7W63L.js} RENAMED Viewed

@@ -142,11 +142,20 @@ var require_dist = __commonJS({
 // src/index.ts
 import { readFileSync as readFileSync2 } from "node:fs";
-import { Command } from "commander";
+import { binary, run, subcommands as subcommands2 } from "cmd-ts";
 // src/commands/eval/index.ts
 import { stat as stat4 } from "node:fs/promises";
 import path19 from "node:path";
+import {
+  command,
+  flag,
+  number as number4,
+  option,
+  optional as optional2,
+  restPositionals,
+  string as string4
+} from "cmd-ts";
 import fg from "fast-glob";
 // src/commands/eval/run-eval.ts
@@ -155,7 +164,7 @@ import { access as access6, mkdir as mkdir7 } from "node:fs/promises";
 import path18 from "node:path";
 import { pathToFileURL } from "node:url";
-// ../../packages/core/dist/chunk-BO7KG7JX.js
+// ../../packages/core/dist/chunk-B2J23S7D.js
 import { constants } from "node:fs";
 import { access, readFile } from "node:fs/promises";
 import path from "node:path";
@@ -1039,8 +1048,8 @@ var ZodType = class {
   promise() {
     return ZodPromise.create(this, this._def);
   }
-  or(option) {
-    return ZodUnion.create([this, option], this._def);
+  or(option4) {
+    return ZodUnion.create([this, option4], this._def);
   }
   and(incoming) {
     return ZodIntersection.create(this, incoming, this._def);
@@ -2890,7 +2899,7 @@ var ZodUnion = class extends ZodType {
       return INVALID;
     }
     if (ctx.common.async) {
-      return Promise.all(options.map(async (option) => {
+      return Promise.all(options.map(async (option4) => {
         const childCtx = {
           ...ctx,
           common: {
@@ -2900,7 +2909,7 @@ var ZodUnion = class extends ZodType {
           parent: null
         };
         return {
-          result: await option._parseAsync({
+          result: await option4._parseAsync({
             data: ctx.data,
             path: ctx.path,
             parent: childCtx
@@ -2911,7 +2920,7 @@ var ZodUnion = class extends ZodType {
     } else {
       let dirty = void 0;
       const issues = [];
-      for (const option of options) {
+      for (const option4 of options) {
         const childCtx = {
           ...ctx,
           common: {
@@ -2920,7 +2929,7 @@ var ZodUnion = class extends ZodType {
           },
           parent: null
         };
-        const result = option._parseSync({
+        const result = option4._parseSync({
           data: ctx.data,
           path: ctx.path,
           parent: childCtx
@@ -3001,8 +3010,8 @@ var ZodDiscriminatedUnion = class _ZodDiscriminatedUnion extends ZodType {
     }
     const discriminator = this.discriminator;
     const discriminatorValue = ctx.data[discriminator];
-    const option = this.optionsMap.get(discriminatorValue);
-    if (!option) {
+    const option4 = this.optionsMap.get(discriminatorValue);
+    if (!option4) {
       addIssueToContext(ctx, {
         code: ZodIssueCode.invalid_union_discriminator,
         options: Array.from(this.optionsMap.keys()),
@@ -3011,13 +3020,13 @@ var ZodDiscriminatedUnion = class _ZodDiscriminatedUnion extends ZodType {
       return INVALID;
     }
     if (ctx.common.async) {
-      return option._parseAsync({
+      return option4._parseAsync({
         data: ctx.data,
         path: ctx.path,
         parent: ctx
       });
     } else {
-      return option._parseSync({
+      return option4._parseSync({
         data: ctx.data,
         path: ctx.path,
         parent: ctx
@@ -4201,7 +4210,7 @@ var coerce = {
 };
 var NEVER = INVALID;
-// ../../packages/core/dist/chunk-BO7KG7JX.js
+// ../../packages/core/dist/chunk-B2J23S7D.js
 async function fileExists(filePath) {
   try {
     await access(filePath, constants.F_OK);
@@ -4577,9 +4586,9 @@ function resolveVSCodeConfig(target, env, insiders) {
   const dryRunSource = target.dry_run ?? target.dryRun;
   const subagentRootSource = target.subagent_root ?? target.subagentRoot;
   const defaultCommand = insiders ? "code-insiders" : "code";
-  const command = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
+  const command5 = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
   return {
-    command,
+    command: command5,
     waitForResponse: resolveOptionalBoolean(waitSource) ?? true,
     dryRun: resolveOptionalBoolean(dryRunSource) ?? false,
     subagentRoot: resolveOptionalString(subagentRootSource, env, `${target.name} subagent root`, {
@@ -8081,7 +8090,7 @@ var $ZodUnion = /* @__PURE__ */ $constructor("$ZodUnion", (inst, def) => {
   defineLazy(inst._zod, "optout", () => def.options.some((o) => o._zod.optout === "optional") ? "optional" : void 0);
   defineLazy(inst._zod, "values", () => {
     if (def.options.every((o) => o._zod.values)) {
-      return new Set(def.options.flatMap((option) => Array.from(option._zod.values)));
+      return new Set(def.options.flatMap((option4) => Array.from(option4._zod.values)));
     }
     return void 0;
   });
@@ -8095,8 +8104,8 @@ var $ZodUnion = /* @__PURE__ */ $constructor("$ZodUnion", (inst, def) => {
   inst._zod.parse = (payload, ctx) => {
     let async = false;
     const results = [];
-    for (const option of def.options) {
-      const result = option._zod.run({
+    for (const option4 of def.options) {
+      const result = option4._zod.run({
         value: payload.value,
         issues: []
       }, ctx);
@@ -8121,10 +8130,10 @@ var $ZodDiscriminatedUnion = /* @__PURE__ */ $constructor("$ZodDiscriminatedUnio
   const _super = inst._zod.parse;
   defineLazy(inst._zod, "propValues", () => {
     const propValues = {};
-    for (const option of def.options) {
-      const pv = option._zod.propValues;
+    for (const option4 of def.options) {
+      const pv = option4._zod.propValues;
       if (!pv || Object.keys(pv).length === 0)
-        throw new Error(`Invalid discriminated union option at index "${def.options.indexOf(option)}"`);
+        throw new Error(`Invalid discriminated union option at index "${def.options.indexOf(option4)}"`);
       for (const [k, v] of Object.entries(pv)) {
         if (!propValues[k])
           propValues[k] = /* @__PURE__ */ new Set();
@@ -15328,8 +15337,8 @@ function isTransforming(_schema, _ctx) {
       return false;
     }
     case "union": {
-      for (const option of def.options) {
-        if (isTransforming(option, ctx))
+      for (const option4 of def.options) {
+        if (isTransforming(option4, ctx))
           return true;
       }
       return false;
@@ -34920,25 +34929,25 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
       }
     }
     const _model = asString2(rawEvaluator.model);
+    const rawRubrics = rawEvaluator.rubrics;
+    const parsedRubrics = Array.isArray(rawRubrics) ? rawRubrics.filter((r) => isJsonObject2(r)).map((rubric, index) => ({
+      id: asString2(rubric.id) ?? `rubric-${index + 1}`,
+      description: asString2(rubric.description) ?? "",
+      weight: typeof rubric.weight === "number" ? rubric.weight : 1,
+      required: typeof rubric.required === "boolean" ? rubric.required : true
+    })).filter((r) => r.description.length > 0) : void 0;
     if (typeValue === "rubric") {
-      const rubrics = rawEvaluator.rubrics;
-      if (!Array.isArray(rubrics)) {
+      if (!parsedRubrics) {
         logWarning2(`Skipping rubric evaluator '${name16}' in '${evalId}': missing rubrics array`);
         continue;
       }
-      const parsedRubrics = rubrics.filter((r) => isJsonObject2(r)).map((rubric, index) => ({
-        id: asString2(rubric.id) ?? `rubric-${index + 1}`,
-        description: asString2(rubric.description) ?? "",
-        weight: typeof rubric.weight === "number" ? rubric.weight : 1,
-        required: typeof rubric.required === "boolean" ? rubric.required : true
-      })).filter((r) => r.description.length > 0);
       if (parsedRubrics.length === 0) {
         logWarning2(`Skipping rubric evaluator '${name16}' in '${evalId}': no valid rubrics found`);
         continue;
       }
       evaluators.push({
         name: name16,
-        type: "rubric",
+        type: "llm_judge",
         rubrics: parsedRubrics
       });
       continue;
@@ -34947,7 +34956,8 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
       name: name16,
       type: "llm_judge",
       prompt,
-      promptPath
+      promptPath,
+      ...parsedRubrics && parsedRubrics.length > 0 ? { rubrics: parsedRubrics } : {}
     });
   }
   return evaluators.length > 0 ? evaluators : void 0;
@@ -35488,7 +35498,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
       if (rubricItems.length > 0) {
         const rubricEvaluator = {
           name: "rubric",
-          type: "rubric",
+          type: "llm_judge",
           rubrics: rubricItems
         };
         evaluators = evaluators ? [rubricEvaluator, ...evaluators] : [rubricEvaluator];
@@ -35887,7 +35897,7 @@ async function withRetry(fn, retryConfig, signal) {
 }
 var execAsync2 = promisify2(execWithCallback);
 var DEFAULT_MAX_BUFFER = 10 * 1024 * 1024;
-async function defaultCommandRunner(command, options) {
+async function defaultCommandRunner(command5, options) {
   const execOptions = {
     cwd: options.cwd,
     env: options.env,
@@ -35897,7 +35907,7 @@ async function defaultCommandRunner(command, options) {
     shell: process.platform === "win32" ? "powershell.exe" : void 0
   };
   try {
-    const { stdout, stderr } = await execAsync2(command, execOptions);
+    const { stdout, stderr } = await execAsync2(command5, execOptions);
     return {
       stdout,
       stderr,
@@ -37321,144 +37331,6 @@ function createProvider(target) {
     }
   }
 }
-var rubricCheckResultSchema = external_exports.object({
-  id: external_exports.string().describe("The ID of the rubric item being checked"),
-  satisfied: external_exports.boolean().describe("Whether this rubric requirement is met"),
-  reasoning: external_exports.string().describe("Brief explanation (1-2 sentences) for this check")
-});
-var rubricEvaluationSchema = external_exports.object({
-  checks: external_exports.array(rubricCheckResultSchema).describe("Results for each rubric item"),
-  overall_reasoning: external_exports.string().describe("Overall assessment summary (1-2 sentences)")
-});
-var RubricEvaluator = class {
-  kind = "rubric";
-  config;
-  resolveJudgeProvider;
-  constructor(options) {
-    this.config = options.config;
-    this.resolveJudgeProvider = options.resolveJudgeProvider;
-  }
-  async evaluate(context) {
-    const judgeProvider = await this.resolveJudgeProvider(context);
-    if (!judgeProvider) {
-      throw new Error("No judge provider available for rubric evaluation");
-    }
-    if (!this.config.rubrics || this.config.rubrics.length === 0) {
-      throw new Error(
-        `No rubrics found for evaluator "${this.config.name}". Run "agentv generate rubrics" first.`
-      );
-    }
-    const prompt = this.buildPrompt(context, this.config.rubrics);
-    const model = judgeProvider.asLanguageModel?.();
-    if (!model) {
-      throw new Error("Judge provider does not support language model interface");
-    }
-    const system = `You are an expert evaluator. Evaluate the candidate answer against each rubric item.
-You must return a valid JSON object matching this schema:
-{
-  "checks": [
-    {
-      "id": "string (rubric id)",
-      "satisfied": boolean,
-      "reasoning": "string (brief explanation)"
-    }
-  ],
-  "overall_reasoning": "string (summary)"
-}`;
-    let result;
-    let lastError;
-    for (let attempt = 1; attempt <= 3; attempt++) {
-      try {
-        const { text: text2 } = await generateText({
-          model,
-          system,
-          prompt
-        });
-        const cleaned = text2.replace(/```json\n?|```/g, "").trim();
-        result = rubricEvaluationSchema.parse(JSON.parse(cleaned));
-        break;
-      } catch (e) {
-        lastError = e instanceof Error ? e : new Error(String(e));
-      }
-    }
-    if (!result) {
-      throw new Error(
-        `Failed to parse rubric evaluation result after 3 attempts: ${lastError?.message}`
-      );
-    }
-    const { score, verdict, hits, misses } = this.calculateScore(result, this.config.rubrics);
-    return {
-      score,
-      verdict,
-      hits,
-      misses,
-      expectedAspectCount: this.config.rubrics.length,
-      reasoning: result.overall_reasoning,
-      evaluatorRawRequest: {
-        prompt
-      }
-    };
-  }
-  buildPrompt(context, rubrics) {
-    const parts = [
-      "You are an expert evaluator. Evaluate the candidate answer against each rubric item below.",
-      "",
-      "[[ ## question ## ]]",
-      context.evalCase.question,
-      "",
-      "[[ ## expected_outcome ## ]]",
-      context.evalCase.expected_outcome,
-      ""
-    ];
-    if (context.evalCase.reference_answer && context.evalCase.reference_answer.trim().length > 0) {
-      parts.push("[[ ## reference_answer ## ]]", context.evalCase.reference_answer, "");
-    }
-    parts.push("[[ ## candidate_answer ## ]]", context.candidate, "", "[[ ## rubrics ## ]]");
-    for (const rubric of rubrics) {
-      const requiredLabel = rubric.required ? " (REQUIRED)" : "";
-      const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
-      parts.push(`- [${rubric.id}]${requiredLabel}${weightLabel}: ${rubric.description}`);
-    }
-    parts.push("", "For each rubric, determine if it is satisfied and provide brief reasoning.");
-    return parts.join("\n");
-  }
-  calculateScore(result, rubrics) {
-    const rubricMap = new Map(rubrics.map((r) => [r.id, r]));
-    const hits = [];
-    const misses = [];
-    let totalWeight = 0;
-    let earnedWeight = 0;
-    let failedRequired = false;
-    for (const check2 of result.checks) {
-      const rubric = rubricMap.get(check2.id);
-      if (!rubric) {
-        continue;
-      }
-      totalWeight += rubric.weight;
-      if (check2.satisfied) {
-        earnedWeight += rubric.weight;
-        hits.push(`[${rubric.id}] ${rubric.description}: ${check2.reasoning}`);
-      } else {
-        misses.push(`[${rubric.id}] ${rubric.description}: ${check2.reasoning}`);
-        if (rubric.required) {
-          failedRequired = true;
-        }
-      }
-    }
-    const score = totalWeight > 0 ? Math.min(1, Math.max(0, earnedWeight / totalWeight)) : 0;
-    let verdict;
-    if (failedRequired) {
-      verdict = "fail";
-    } else if (score >= 0.8) {
-      verdict = "pass";
-    } else if (score >= 0.6) {
-      verdict = "borderline";
-    } else {
-      verdict = "fail";
-    }
-    return { score, verdict, hits, misses };
-  }
-};
 var DEFAULT_EVALUATOR_TEMPLATE = `You are an expert evaluator. Your goal is to grade the candidate_answer based on how well it achieves the expected_outcome for the original task.
 Use the reference_answer as a gold standard for a high-quality response (if provided). The candidate_answer does not need to match it verbatim, but should capture the key points and follow the same spirit.
@@ -37476,6 +37348,21 @@ Be concise and focused in your evaluation. Provide succinct, specific feedback r
 [[ ## candidate_answer ## ]]
 {{${TEMPLATE_VARIABLES.CANDIDATE_ANSWER}}}`;
+var freeformEvaluationSchema = external_exports.object({
+  score: external_exports.number().min(0).max(1).describe("Score between 0.0 and 1.0"),
+  hits: external_exports.array(external_exports.string()).describe("Brief specific achievements").optional(),
+  misses: external_exports.array(external_exports.string()).describe("Brief failures or omissions").optional(),
+  reasoning: external_exports.string().describe("Concise explanation (1-2 sentences)").optional()
+});
+var rubricCheckResultSchema = external_exports.object({
+  id: external_exports.string().describe("The ID of the rubric item being checked"),
+  satisfied: external_exports.boolean().describe("Whether this rubric requirement is met"),
+  reasoning: external_exports.string().describe("Brief explanation (1-2 sentences) for this check")
+});
+var rubricEvaluationSchema = external_exports.object({
+  checks: external_exports.array(rubricCheckResultSchema).describe("Results for each rubric item"),
+  overall_reasoning: external_exports.string().describe("Overall assessment summary (1-2 sentences)")
+});
 var LlmJudgeEvaluator = class {
   kind = "llm_judge";
   resolveJudgeProvider;
@@ -37493,9 +37380,13 @@ var LlmJudgeEvaluator = class {
     if (!judgeProvider) {
       throw new Error("No judge provider available for LLM grading");
     }
-    return this.evaluateWithPrompt(context, judgeProvider);
+    const config2 = context.evaluator;
+    if (config2?.type === "llm_judge" && config2.rubrics && config2.rubrics.length > 0) {
+      return this.evaluateWithRubrics(context, judgeProvider, config2.rubrics);
+    }
+    return this.evaluateFreeform(context, judgeProvider);
   }
-  async evaluateWithPrompt(context, judgeProvider) {
+  async evaluateFreeform(context, judgeProvider) {
     const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
     const variables = {
       [TEMPLATE_VARIABLES.INPUT_MESSAGES]: JSON.stringify(context.evalCase.input_segments, null, 2),
@@ -37512,34 +37403,132 @@ var LlmJudgeEvaluator = class {
     const systemPrompt = buildOutputSchema();
     const evaluatorTemplate = context.evaluatorTemplateOverride ?? this.evaluatorTemplate ?? DEFAULT_EVALUATOR_TEMPLATE;
     const userPrompt = substituteVariables(evaluatorTemplate, variables);
-    const response = await judgeProvider.invoke({
-      question: userPrompt,
-      systemPrompt,
-      evalCaseId: context.evalCase.id,
-      attempt: context.attempt,
-      maxOutputTokens: this.maxOutputTokens,
-      temperature: this.temperature
-    });
-    const parsed = parseQualityResponse(response);
-    const score = clampScore(parsed.score ?? 0);
-    const hits = Array.isArray(parsed.hits) ? parsed.hits.filter(isNonEmptyString).slice(0, 4) : [];
-    const misses = Array.isArray(parsed.misses) ? parsed.misses.filter(isNonEmptyString).slice(0, 4) : [];
-    const reasoning = parsed.reasoning ?? response.reasoning;
-    const expectedAspectCount = Math.max(hits.length + misses.length, 1);
     const evaluatorRawRequest = {
       userPrompt,
       systemPrompt,
       target: judgeProvider.targetName
     };
+    try {
+      const { data, providerResponse } = await this.runWithRetry({
+        context,
+        judgeProvider,
+        systemPrompt,
+        userPrompt,
+        schema: freeformEvaluationSchema
+      });
+      const score = clampScore(data.score);
+      const hits = Array.isArray(data.hits) ? data.hits.filter(isNonEmptyString).slice(0, 4) : [];
+      const misses = Array.isArray(data.misses) ? data.misses.filter(isNonEmptyString).slice(0, 4) : [];
+      const reasoning = data.reasoning ?? providerResponse?.reasoning;
+      const expectedAspectCount = Math.max(hits.length + misses.length, 1);
+      return {
+        score,
+        verdict: scoreToVerdict(score),
+        hits,
+        misses,
+        expectedAspectCount,
+        reasoning,
+        evaluatorRawRequest
+      };
+    } catch {
+      return {
+        score: 0,
+        verdict: "fail",
+        hits: [],
+        misses: [],
+        expectedAspectCount: 1,
+        evaluatorRawRequest
+      };
+    }
+  }
+  async evaluateWithRubrics(context, judgeProvider, rubrics) {
+    if (!rubrics || rubrics.length === 0) {
+      throw new Error(
+        `No rubrics found for evaluator "${context.evaluator?.name ?? "llm_judge"}". Run "agentv generate rubrics" first.`
+      );
+    }
+    const prompt = this.buildRubricPrompt(context, rubrics);
+    const systemPrompt = buildRubricOutputSchema();
+    const evaluatorRawRequest = {
+      userPrompt: prompt,
+      systemPrompt,
+      target: judgeProvider.targetName
+    };
+    const { data } = await this.runWithRetry({
+      context,
+      judgeProvider,
+      systemPrompt,
+      userPrompt: prompt,
+      schema: rubricEvaluationSchema
+    });
+    const { score, verdict, hits, misses } = calculateRubricScore(data, rubrics);
     return {
       score,
+      verdict,
       hits,
       misses,
-      expectedAspectCount,
-      reasoning,
+      expectedAspectCount: rubrics.length,
+      reasoning: data.overall_reasoning,
       evaluatorRawRequest
     };
   }
+  buildRubricPrompt(context, rubrics) {
+    const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
+    const parts = [
+      "You are an expert evaluator. Evaluate the candidate answer against each rubric item below.",
+      "",
+      "[[ ## question ## ]]",
+      formattedQuestion,
+      "",
+      "[[ ## expected_outcome ## ]]",
+      context.evalCase.expected_outcome,
+      ""
+    ];
+    if (context.evalCase.reference_answer && context.evalCase.reference_answer.trim().length > 0) {
+      parts.push("[[ ## reference_answer ## ]]", context.evalCase.reference_answer, "");
+    }
+    parts.push("[[ ## candidate_answer ## ]]", context.candidate, "", "[[ ## rubrics ## ]]");
+    for (const rubric of rubrics) {
+      const requiredLabel = rubric.required ? " (REQUIRED)" : "";
+      const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
+      parts.push(`- [${rubric.id}]${requiredLabel}${weightLabel}: ${rubric.description}`);
+    }
+    parts.push("", "For each rubric, determine if it is satisfied and provide brief reasoning.");
+    return parts.join("\n");
+  }
+  async runWithRetry(options) {
+    const { context, judgeProvider, systemPrompt, userPrompt, schema } = options;
+    let lastError;
+    for (let attempt = 1; attempt <= 3; attempt++) {
+      try {
+        const model = judgeProvider.asLanguageModel?.();
+        if (model) {
+          const { text: text2 } = await generateText({
+            model,
+            system: systemPrompt,
+            prompt: userPrompt,
+            ...this.maxOutputTokens ? { maxTokens: this.maxOutputTokens } : {},
+            ...typeof this.temperature === "number" ? { temperature: this.temperature } : {}
+          });
+          const data2 = schema.parse(parseJsonFromText(text2));
+          return { data: data2 };
+        }
+        const response = await judgeProvider.invoke({
+          question: userPrompt,
+          systemPrompt,
+          evalCaseId: context.evalCase.id,
+          attempt: context.attempt,
+          maxOutputTokens: this.maxOutputTokens,
+          temperature: this.temperature
+        });
+        const data = schema.parse(parseJsonFromText(response.text ?? ""));
+        return { data, providerResponse: response };
+      } catch (e) {
+        lastError = e instanceof Error ? e : new Error(String(e));
+      }
+    }
+    throw new Error(`Failed to parse evaluator response after 3 attempts: ${lastError?.message}`);
+  }
 };
 function buildOutputSchema() {
   return [
@@ -37553,6 +37542,29 @@ function buildOutputSchema() {
     "}"
   ].join("\n");
 }
+function buildRubricOutputSchema() {
+  return `You are an expert evaluator. Evaluate the candidate answer against each rubric item.
+You must return a valid JSON object matching this schema:
+{
+  "checks": [
+    {
+      "id": "string (rubric id)",
+      "satisfied": boolean,
+      "reasoning": "string (brief explanation)"
+    }
+  ],
+  "overall_reasoning": "string (summary)"
+}`;
+}
+function scoreToVerdict(score) {
+  if (score >= 0.8) {
+    return "pass";
+  }
+  if (score >= 0.6) {
+    return "borderline";
+  }
+  return "fail";
+}
 function clampScore(value) {
   if (Number.isNaN(value) || !Number.isFinite(value)) {
     return 0;
@@ -37565,71 +37577,15 @@ function clampScore(value) {
   }
   return value;
 }
-function parseQualityResponse(response) {
-  const text2 = typeof response.text === "string" ? response.text.trim() : "";
-  if (text2.length === 0) {
-    return {};
-  }
-  const direct = attemptParseJson(text2);
-  if (direct && validateQualityJson(direct)) {
-    return direct;
-  }
-  const extracted = extractJsonBlob(text2);
-  if (extracted) {
-    const parsed = attemptParseJson(extracted);
-    if (parsed && validateQualityJson(parsed)) {
-      return parsed;
-    }
-  }
-  return {};
-}
-function attemptParseJson(text2) {
-  try {
-    const parsed = JSON.parse(text2);
-    const score = typeof parsed.score === "number" ? parsed.score : void 0;
-    const hits = parsed.hits;
-    const misses = parsed.misses;
-    const reasoning = typeof parsed.reasoning === "string" ? parsed.reasoning : void 0;
-    return { score, hits, misses, reasoning };
-  } catch {
-    return void 0;
-  }
-}
-function validateQualityJson(parsed) {
-  if (typeof parsed.score !== "number") {
-    return false;
-  }
-  if (Number.isNaN(parsed.score) || !Number.isFinite(parsed.score)) {
-    return false;
-  }
-  if (parsed.score < 0 || parsed.score > 1) {
-    return false;
-  }
-  if (parsed.hits !== void 0) {
-    if (!Array.isArray(parsed.hits)) {
-      return false;
-    }
-    if (!parsed.hits.every((item) => typeof item === "string")) {
-      return false;
-    }
-  }
-  if (parsed.misses !== void 0) {
-    if (!Array.isArray(parsed.misses)) {
-      return false;
-    }
-    if (!parsed.misses.every((item) => typeof item === "string")) {
-      return false;
-    }
-  }
-  if (parsed.reasoning !== void 0 && typeof parsed.reasoning !== "string") {
-    return false;
-  }
-  return true;
-}
 function extractJsonBlob(text2) {
   const match = text2.match(/\{[\s\S]*\}/);
   return match?.[0];
 }
+function parseJsonFromText(text2) {
+  const cleaned = typeof text2 === "string" ? text2.replace(/```json\n?|```/g, "").trim() : "";
+  const blob = extractJsonBlob(cleaned) ?? cleaned;
+  return JSON.parse(blob);
+}
 function isNonEmptyString(value) {
   return typeof value === "string" && value.trim().length > 0;
 }
@@ -37666,6 +37622,7 @@ var CodeEvaluator = class {
       const reasoning = typeof parsed?.reasoning === "string" ? parsed.reasoning : void 0;
       return {
         score,
+        verdict: scoreToVerdict(score),
         hits,
         misses,
         expectedAspectCount: hits.length + misses.length || 1,
@@ -37679,6 +37636,7 @@ var CodeEvaluator = class {
       const message = error40 instanceof Error ? error40.message : String(error40);
       return {
         score: 0,
+        verdict: "fail",
         hits: [],
         misses: [`Code evaluator failed: ${message}`],
         expectedAspectCount: 1,
@@ -37692,6 +37650,33 @@ var CodeEvaluator = class {
     }
   }
 };
+function calculateRubricScore(result, rubrics) {
+  const rubricMap = new Map(rubrics.map((rubric) => [rubric.id, rubric]));
+  const hits = [];
+  const misses = [];
+  let totalWeight = 0;
+  let earnedWeight = 0;
+  let failedRequired = false;
+  for (const check2 of result.checks) {
+    const rubric = rubricMap.get(check2.id);
+    if (!rubric) {
+      continue;
+    }
+    totalWeight += rubric.weight;
+    if (check2.satisfied) {
+      earnedWeight += rubric.weight;
+      hits.push(`[${rubric.id}] ${rubric.description}: ${check2.reasoning}`);
+    } else {
+      misses.push(`[${rubric.id}] ${rubric.description}: ${check2.reasoning}`);
+      if (rubric.required) {
+        failedRequired = true;
+      }
+    }
+  }
+  const score = totalWeight > 0 ? Math.min(1, Math.max(0, earnedWeight / totalWeight)) : 0;
+  const verdict = failedRequired ? "fail" : scoreToVerdict(score);
+  return { score, verdict, hits, misses };
+}
 async function executeScript(scriptPath, input, agentTimeoutMs, cwd) {
   const { spawn: spawn22 } = await import("node:child_process");
   return await new Promise((resolve2, reject) => {
@@ -37821,7 +37806,7 @@ function pLimit(concurrency) {
     activeCount--;
     resumeNext();
   };
-  const run = async (function_, resolve2, arguments_) => {
+  const run2 = async (function_, resolve2, arguments_) => {
     const result = (async () => function_(...arguments_))();
     resolve2(result);
     try {
@@ -37834,7 +37819,7 @@ function pLimit(concurrency) {
     new Promise((internalResolve) => {
       queue.enqueue(internalResolve);
     }).then(
-      run.bind(void 0, function_, resolve2, arguments_)
+      run2.bind(void 0, function_, resolve2, arguments_)
     );
     (async () => {
       await Promise.resolve();
@@ -38417,7 +38402,6 @@ async function runEvaluatorList(options) {
           reasoning: score2.reasoning,
           evaluator_provider_request: score2.evaluatorRawRequest
         });
-        continue;
       }
       if (evaluator.type === "code") {
         const codeEvaluator = new CodeEvaluator({
@@ -38445,44 +38429,12 @@ async function runEvaluatorList(options) {
           reasoning: score2.reasoning,
           evaluator_provider_request: score2.evaluatorRawRequest
         });
-        continue;
-      }
-      if (evaluator.type === "rubric") {
-        const rubricEvaluator = new RubricEvaluator({
-          config: evaluator,
-          resolveJudgeProvider: async (context) => {
-            if (context.judgeProvider) {
-              return context.judgeProvider;
-            }
-            return judgeProvider;
-          }
-        });
-        const score2 = await rubricEvaluator.evaluate({
-          evalCase,
-          candidate,
-          target,
-          provider,
-          attempt,
-          promptInputs,
-          now,
-          judgeProvider
-        });
-        scored.push({ score: score2, name: evaluator.name, type: evaluator.type });
-        evaluatorResults.push({
-          name: evaluator.name,
-          type: evaluator.type,
-          score: score2.score,
-          verdict: score2.verdict,
-          hits: score2.hits,
-          misses: score2.misses,
-          reasoning: score2.reasoning,
-          evaluator_provider_request: score2.evaluatorRawRequest
-        });
       }
     } catch (error40) {
       const message = error40 instanceof Error ? error40.message : String(error40);
       const fallbackScore = {
         score: 0,
+        verdict: "fail",
         hits: [],
         misses: [`Evaluator '${evaluator.name}' failed: ${message}`],
         expectedAspectCount: 1,
@@ -38497,6 +38449,7 @@ async function runEvaluatorList(options) {
         name: evaluator.name ?? "unknown",
         type: evaluator.type ?? "unknown",
         score: 0,
+        verdict: "fail",
         hits: [],
         misses: [`Evaluator '${evaluator.name ?? "unknown"}' failed: ${message}`],
         reasoning: message
@@ -38515,6 +38468,7 @@ async function runEvaluatorList(options) {
   const reasoning = reasoningParts.length > 0 ? reasoningParts.join(" | ") : void 0;
   const score = {
     score: aggregateScore,
+    verdict: scoreToVerdict2(aggregateScore),
     hits,
     misses,
     expectedAspectCount,
@@ -38565,6 +38519,15 @@ async function resolveCustomPrompt(config2) {
 function isNonEmptyString2(value) {
   return typeof value === "string" && value.trim().length > 0;
 }
+function scoreToVerdict2(score) {
+  if (score >= 0.8) {
+    return "pass";
+  }
+  if (score >= 0.6) {
+    return "borderline";
+  }
+  return "fail";
+}
 function filterEvalCases(evalCases, evalId) {
   if (!evalId) {
     return evalCases;
@@ -38778,9 +38741,6 @@ function buildPrompt(expectedOutcome, question, referenceAnswer) {
   }
   return parts.join("\n");
 }
-function createAgentKernel() {
-  return { status: "stub" };
-}
 // src/commands/eval/env.ts
 import { constants as constants4 } from "node:fs";
@@ -39513,17 +39473,18 @@ function formatEvaluationSummary(summary) {
 // ../../packages/core/dist/evaluation/validation/index.js
 import { readFile as readFile7 } from "node:fs/promises";
+import path16 from "node:path";
 import { parse as parse6 } from "yaml";
 import { readFile as readFile23 } from "node:fs/promises";
-import path16 from "node:path";
+import path23 from "node:path";
 import { parse as parse23 } from "yaml";
 import { readFile as readFile33 } from "node:fs/promises";
-import path23 from "node:path";
+import path33 from "node:path";
 import { parse as parse33 } from "yaml";
 import { readFile as readFile43 } from "node:fs/promises";
 import { parse as parse42 } from "yaml";
 import { readFile as readFile52 } from "node:fs/promises";
-import path33 from "node:path";
+import path43 from "node:path";
 import { parse as parse52 } from "yaml";
 var SCHEMA_EVAL_V2 = "agentv-eval-v2";
 var SCHEMA_TARGETS_V2 = "agentv-targets-v2.2";
@@ -39533,12 +39494,12 @@ async function detectFileType(filePath) {
     const content = await readFile7(filePath, "utf8");
     const parsed = parse6(content);
     if (typeof parsed !== "object" || parsed === null) {
-      return "unknown";
+      return inferFileTypeFromPath(filePath);
     }
     const record2 = parsed;
     const schema = record2.$schema;
     if (typeof schema !== "string") {
-      return "unknown";
+      return inferFileTypeFromPath(filePath);
     }
     switch (schema) {
       case SCHEMA_EVAL_V2:
@@ -39548,18 +39509,31 @@ async function detectFileType(filePath) {
       case SCHEMA_CONFIG_V22:
         return "config";
       default:
-        return "unknown";
+        return inferFileTypeFromPath(filePath);
     }
   } catch {
-    return "unknown";
+    return inferFileTypeFromPath(filePath);
   }
 }
+function inferFileTypeFromPath(filePath) {
+  const normalized = path16.normalize(filePath).replace(/\\/g, "/");
+  const basename = path16.basename(filePath);
+  if (normalized.includes("/.agentv/")) {
+    if (basename === "config.yaml" || basename === "config.yml") {
+      return "config";
+    }
+    if (basename === "targets.yaml" || basename === "targets.yml") {
+      return "targets";
+    }
+  }
+  return "eval";
+}
 function isObject2(value) {
   return typeof value === "object" && value !== null && !Array.isArray(value);
 }
 async function validateEvalFile(filePath) {
   const errors = [];
-  const absolutePath = path16.resolve(filePath);
+  const absolutePath = path23.resolve(filePath);
   let parsed;
   try {
     const content = await readFile23(absolutePath, "utf8");
@@ -39908,7 +39882,7 @@ function validateUnknownSettings(target, provider, absolutePath, location, error
 }
 async function validateTargetsFile(filePath) {
   const errors = [];
-  const absolutePath = path23.resolve(filePath);
+  const absolutePath = path33.resolve(filePath);
   let parsed;
   try {
     const content = await readFile33(absolutePath, "utf8");
@@ -40187,8 +40161,8 @@ async function validateConfigFile(filePath) {
     }
     const config2 = parsed;
     const schema = config2.$schema;
-    if (schema !== SCHEMA_CONFIG_V222) {
-      const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${SCHEMA_CONFIG_V222}'` : `Missing required field '$schema'. Please add '$schema: ${SCHEMA_CONFIG_V222}' at the top of the file.`;
+    if (schema !== void 0 && schema !== SCHEMA_CONFIG_V222) {
+      const message = `Invalid $schema value '${schema}'. Expected '${SCHEMA_CONFIG_V222}' or omit the field.`;
       errors.push({
         severity: "error",
         filePath,
@@ -40250,7 +40224,7 @@ function isObject3(value) {
 }
 async function validateFileReferences(evalFilePath) {
   const errors = [];
-  const absolutePath = path33.resolve(evalFilePath);
+  const absolutePath = path43.resolve(evalFilePath);
   const gitRoot = await findGitRoot(absolutePath);
   if (!gitRoot) {
     errors.push({
@@ -40607,12 +40581,12 @@ function buildDefaultOutputPath(cwd, format) {
   const extension = getDefaultExtension(format);
   return path18.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
 }
-function resolvePromptDirectory(option, cwd) {
-  if (option === void 0) {
+function resolvePromptDirectory(option4, cwd) {
+  if (option4 === void 0) {
     return void 0;
   }
-  if (typeof option === "string" && option.trim().length > 0) {
-    return path18.resolve(cwd, option);
+  if (typeof option4 === "string" && option4.trim().length > 0) {
+    return path18.resolve(cwd, option4);
   }
   return path18.join(cwd, ".agentv", "prompts");
 }
@@ -40916,56 +40890,119 @@ async function resolveEvaluationRunner() {
 }
 // src/commands/eval/index.ts
-function parseInteger(value, fallback) {
-  const parsed = Number.parseInt(value, 10);
-  if (Number.isNaN(parsed)) {
-    return fallback;
-  }
-  return parsed;
-}
-function registerEvalCommand(program) {
-  program.command("eval").description("Run eval suites and report results").argument("<eval-paths...>", "Path(s) or glob(s) to evaluation .yaml file(s)").option("--target <name>", "Override target name from targets.yaml", "default").option("--targets <path>", "Path to targets.yaml (overrides discovery)").option("--eval-id <id>", "Run only the eval case with this identifier").option(
-    "--workers <count>",
-    "Number of parallel workers (default: 1, max: 50). Can also be set per-target in targets.yaml",
-    (value) => parseInteger(value, 1)
-  ).option("--out <path>", "Write results to the specified path").option(
-    "--output-format <format>",
-    "Output format: 'jsonl' or 'yaml' (default: jsonl)",
-    "jsonl"
-  ).option("--dry-run", "Use mock provider responses instead of real LLM calls", false).option(
-    "--dry-run-delay <ms>",
-    "Fixed delay in milliseconds for dry-run mode (overridden by delay range if specified)",
-    (value) => parseInteger(value, 0),
-    0
-  ).option(
-    "--dry-run-delay-min <ms>",
-    "Minimum delay in milliseconds for dry-run mode (requires --dry-run-delay-max)",
-    (value) => parseInteger(value, 0),
-    0
-  ).option(
-    "--dry-run-delay-max <ms>",
-    "Maximum delay in milliseconds for dry-run mode (requires --dry-run-delay-min)",
-    (value) => parseInteger(value, 0),
-    0
-  ).option(
-    "--agent-timeout <seconds>",
-    "Timeout in seconds for provider responses (default: 120)",
-    (value) => parseInteger(value, 120),
-    120
-  ).option(
-    "--max-retries <count>",
-    "Retry count for timeout recoveries (default: 2)",
-    (value) => parseInteger(value, 2),
-    2
-  ).option("--cache", "Enable in-memory provider response cache", false).option("--verbose", "Enable verbose logging", false).option(
-    "--dump-prompts [dir]",
-    "Persist prompt payloads for debugging (optional custom directory)"
-  ).action(async (evalPaths, rawOptions) => {
-    const resolvedPaths = await resolveEvalPaths(evalPaths, process.cwd());
+var evalCommand = command({
+  name: "eval",
+  description: "Run eval suites and report results",
+  args: {
+    evalPaths: restPositionals({
+      type: string4,
+      displayName: "eval-paths",
+      description: "Path(s) or glob(s) to evaluation .yaml file(s)"
+    }),
+    target: option({
+      type: string4,
+      long: "target",
+      description: "Override target name from targets.yaml",
+      defaultValue: () => "default"
+    }),
+    targets: option({
+      type: optional2(string4),
+      long: "targets",
+      description: "Path to targets.yaml (overrides discovery)"
+    }),
+    evalId: option({
+      type: optional2(string4),
+      long: "eval-id",
+      description: "Run only the eval case with this identifier"
+    }),
+    workers: option({
+      type: number4,
+      long: "workers",
+      description: "Number of parallel workers (default: 1, max: 50). Can also be set per-target in targets.yaml",
+      defaultValue: () => 1
+    }),
+    out: option({
+      type: optional2(string4),
+      long: "out",
+      description: "Write results to the specified path"
+    }),
+    outputFormat: option({
+      type: string4,
+      long: "output-format",
+      description: "Output format: 'jsonl' or 'yaml' (default: jsonl)",
+      defaultValue: () => "jsonl"
+    }),
+    dryRun: flag({
+      long: "dry-run",
+      description: "Use mock provider responses instead of real LLM calls"
+    }),
+    dryRunDelay: option({
+      type: number4,
+      long: "dry-run-delay",
+      description: "Fixed delay in milliseconds for dry-run mode (overridden by delay range if specified)",
+      defaultValue: () => 0
+    }),
+    dryRunDelayMin: option({
+      type: number4,
+      long: "dry-run-delay-min",
+      description: "Minimum delay in milliseconds for dry-run mode (requires --dry-run-delay-max)",
+      defaultValue: () => 0
+    }),
+    dryRunDelayMax: option({
+      type: number4,
+      long: "dry-run-delay-max",
+      description: "Maximum delay in milliseconds for dry-run mode (requires --dry-run-delay-min)",
+      defaultValue: () => 0
+    }),
+    agentTimeout: option({
+      type: number4,
+      long: "agent-timeout",
+      description: "Timeout in seconds for provider responses (default: 120)",
+      defaultValue: () => 120
+    }),
+    maxRetries: option({
+      type: number4,
+      long: "max-retries",
+      description: "Retry count for timeout recoveries (default: 2)",
+      defaultValue: () => 2
+    }),
+    cache: flag({
+      long: "cache",
+      description: "Enable in-memory provider response cache"
+    }),
+    verbose: flag({
+      long: "verbose",
+      description: "Enable verbose logging"
+    }),
+    dumpPrompts: option({
+      type: optional2(string4),
+      long: "dump-prompts",
+      description: "Directory path for persisting prompt payloads for debugging"
+    })
+  },
+  handler: async (args) => {
+    const resolvedPaths = await resolveEvalPaths(args.evalPaths, process.cwd());
+    const dumpPrompts = args.dumpPrompts !== void 0 ? args.dumpPrompts === "." ? true : args.dumpPrompts : void 0;
+    const rawOptions = {
+      target: args.target,
+      targets: args.targets,
+      evalId: args.evalId,
+      workers: args.workers,
+      out: args.out,
+      outputFormat: args.outputFormat,
+      dryRun: args.dryRun,
+      dryRunDelay: args.dryRunDelay,
+      dryRunDelayMin: args.dryRunDelayMin,
+      dryRunDelayMax: args.dryRunDelayMax,
+      agentTimeout: args.agentTimeout,
+      maxRetries: args.maxRetries,
+      cache: args.cache,
+      verbose: args.verbose,
+      dumpPrompts
+    };
     await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
-  });
-  return program;
-}
+  }
+});
 async function resolveEvalPaths(evalPaths, cwd) {
   const normalizedInputs = evalPaths.map((value) => value?.trim()).filter((value) => value);
   if (normalizedInputs.length === 0) {
@@ -41013,6 +41050,9 @@ async function resolveEvalPaths(evalPaths, cwd) {
   return sorted;
 }
+// src/commands/generate/index.ts
+import { command as command2, flag as flag2, option as option2, optional as optional3, positional as positional2, string as string5, subcommands } from "cmd-ts";
 // src/commands/generate/rubrics.ts
 import { readFile as readFile8, writeFile as writeFile6 } from "node:fs/promises";
 import path20 from "node:path";
@@ -41154,29 +41194,53 @@ function extractQuestion(evalCase) {
 }
 // src/commands/generate/index.ts
-function registerGenerateCommand(program) {
-  const generate = program.command("generate").description("Generate evaluation artifacts");
-  generate.command("rubrics <file>").description("Generate rubrics from expected_outcome in YAML eval file").option(
-    "-t, --target <target>",
-    "Override target for rubric generation (default: file target or openai:gpt-4o)"
-  ).option("-v, --verbose", "Show detailed progress").action(async (file2, options) => {
+var rubricsCommand = command2({
+  name: "rubrics",
+  description: "Generate rubrics from expected_outcome in YAML eval file",
+  args: {
+    file: positional2({
+      type: string5,
+      displayName: "file",
+      description: "Path to YAML eval file"
+    }),
+    target: option2({
+      type: optional3(string5),
+      long: "target",
+      short: "t",
+      description: "Override target for rubric generation (default: file target or openai:gpt-4o)"
+    }),
+    verbose: flag2({
+      long: "verbose",
+      short: "v",
+      description: "Show detailed progress"
+    })
+  },
+  handler: async ({ file: file2, target, verbose }) => {
     try {
       await generateRubricsCommand({
         file: file2,
-        target: options.target,
-        verbose: options.verbose
+        target,
+        verbose
       });
     } catch (error40) {
       console.error(`Error: ${error40.message}`);
       process.exit(1);
     }
-  });
-}
+  }
+});
+var generateCommand = subcommands({
+  name: "generate",
+  description: "Generate evaluation artifacts",
+  cmds: {
+    rubrics: rubricsCommand
+  }
+});
 // src/commands/init/index.ts
 import { existsSync, mkdirSync, writeFileSync } from "node:fs";
 import path24 from "node:path";
 import * as readline from "node:readline/promises";
+import { command as command3, option as option3, optional as optional4, string as string6 } from "cmd-ts";
 // src/templates/index.ts
 import { readFileSync, readdirSync, statSync } from "node:fs";
@@ -41355,15 +41419,28 @@ Files installed to ${path24.relative(targetPath, claudeDir)}:`);
   console.log("  2. Configure targets in .agentv/targets.yaml");
   console.log("  3. Create eval files using the schema and prompt templates");
 }
+var initCmdTsCommand = command3({
+  name: "init",
+  description: "Initialize AgentV in your project (installs prompt templates and schema to .github)",
+  args: {
+    path: option3({
+      type: optional4(string6),
+      long: "path",
+      description: "Target directory for initialization (default: current directory)"
+    })
+  },
+  handler: async ({ path: targetPath }) => {
+    try {
+      await initCommand({ targetPath });
+    } catch (error40) {
+      console.error(`Error: ${error40.message}`);
+      process.exit(1);
+    }
+  }
+});
-// src/commands/status.ts
-function registerStatusCommand(program) {
-  program.command("status").description("Show the latest AgentV kernel status").action(() => {
-    const kernel = createAgentKernel();
-    console.log(`Kernel status: ${kernel.status}`);
-  });
-  return program;
-}
+// src/commands/validate/index.ts
+import { command as command4, restPositionals as restPositionals2, string as string7 } from "cmd-ts";
 // src/commands/validate/format-output.ts
 var ANSI_RED3 = "\x1B[31m";
@@ -41468,20 +41545,6 @@ async function validateFiles(paths) {
 async function validateSingleFile(filePath) {
   const absolutePath = path25.resolve(filePath);
   const fileType = await detectFileType(absolutePath);
-  if (fileType === "unknown") {
-    return {
-      valid: false,
-      filePath: absolutePath,
-      fileType: "unknown",
-      errors: [
-        {
-          severity: "error",
-          filePath: absolutePath,
-          message: "Missing or invalid $schema field. File must declare schema: 'agentv-eval-v2', 'agentv-targets-v2', or 'agentv-config-v2'"
-        }
-      ]
-    };
-  }
   let result;
   if (fileType === "eval") {
     result = await validateEvalFile(absolutePath);
@@ -41551,7 +41614,7 @@ function isYamlFile(filePath) {
 }
 // src/commands/validate/index.ts
-async function runValidateCommand(paths, _options) {
+async function runValidateCommand(paths) {
   if (paths.length === 0) {
     console.error("Error: No paths specified. Usage: agentv validate <paths...>");
     process.exit(1);
@@ -41563,47 +41626,45 @@ async function runValidateCommand(paths, _options) {
     process.exit(1);
   }
 }
-function registerValidateCommand(program) {
-  program.command("validate").description("Validate AgentV eval and targets YAML files").argument("<paths...>", "Files or directories to validate").action(async (paths, _options) => {
+var validateCommand = command4({
+  name: "validate",
+  description: "Validate AgentV eval and targets YAML files",
+  args: {
+    paths: restPositionals2({
+      type: string7,
+      displayName: "paths",
+      description: "Files or directories to validate"
+    })
+  },
+  handler: async ({ paths }) => {
     try {
-      await runValidateCommand(paths, _options);
+      await runValidateCommand(paths);
     } catch (error40) {
       console.error(`Error: ${error40.message}`);
       process.exit(1);
     }
-  });
-  return program;
-}
+  }
+});
 // src/index.ts
 var packageJson = JSON.parse(readFileSync2(new URL("../package.json", import.meta.url), "utf8"));
-function createProgram() {
-  const program = new Command();
-  program.name("agentv").description("AgentV CLI scaffolding").version(packageJson.version);
-  registerStatusCommand(program);
-  registerEvalCommand(program);
-  registerValidateCommand(program);
-  registerGenerateCommand(program);
-  program.command("init [path]").description(
-    "Initialize AgentV in your project (installs prompt templates and schema to .github)"
-  ).action(async (targetPath) => {
-    try {
-      await initCommand({ targetPath });
-    } catch (error40) {
-      console.error(`Error: ${error40.message}`);
-      process.exit(1);
-    }
-  });
-  return program;
-}
+var app = subcommands2({
+  name: "agentv",
+  description: "AgentV CLI",
+  version: packageJson.version,
+  cmds: {
+    eval: evalCommand,
+    validate: validateCommand,
+    generate: generateCommand,
+    init: initCmdTsCommand
+  }
+});
 async function runCli(argv = process.argv) {
-  const program = createProgram();
-  await program.parseAsync(argv);
-  return program;
+  await run(binary(app), argv);
 }
 export {
-  createProgram,
+  app,
   runCli
 };
-//# sourceMappingURL=chunk-MA3MJNJH.js.map
+//# sourceMappingURL=chunk-A5T7W63L.js.map