npm - agentv - Versions diffs - 0.20.1 → 0.21.0 - Mend

agentv 0.20.1 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md +53 -0
package/dist/{chunk-GDGNKNKP.js → chunk-MA3MJNJH.js} +556 -87
package/dist/chunk-MA3MJNJH.js.map +1 -0
package/dist/cli.js +1 -1
package/dist/index.js +1 -1
package/package.json +2 -5
package/dist/chunk-GDGNKNKP.js.map +0 -1

package/dist/{chunk-GDGNKNKP.js → chunk-MA3MJNJH.js} RENAMED Viewed

@@ -155,7 +155,7 @@ import { access as access6, mkdir as mkdir7 } from "node:fs/promises";
 import path18 from "node:path";
 import { pathToFileURL } from "node:url";
-// ../../packages/core/dist/chunk-SVY324GN.js
+// ../../packages/core/dist/chunk-BO7KG7JX.js
 import { constants } from "node:fs";
 import { access, readFile } from "node:fs/promises";
 import path from "node:path";
@@ -638,8 +638,8 @@ function getErrorMap() {
 // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/helpers/parseUtil.js
 var makeIssue = (params) => {
-  const { data, path: path25, errorMaps, issueData } = params;
-  const fullPath = [...path25, ...issueData.path || []];
+  const { data, path: path26, errorMaps, issueData } = params;
+  const fullPath = [...path26, ...issueData.path || []];
   const fullIssue = {
     ...issueData,
     path: fullPath
@@ -755,11 +755,11 @@ var errorUtil;
 // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/types.js
 var ParseInputLazyPath = class {
-  constructor(parent, value, path25, key2) {
+  constructor(parent, value, path26, key2) {
     this._cachedPath = [];
     this.parent = parent;
     this.data = value;
-    this._path = path25;
+    this._path = path26;
     this._key = key2;
   }
   get path() {
@@ -4201,7 +4201,7 @@ var coerce = {
 };
 var NEVER = INVALID;
-// ../../packages/core/dist/chunk-SVY324GN.js
+// ../../packages/core/dist/chunk-BO7KG7JX.js
 async function fileExists(filePath) {
   try {
     await access(filePath, constants.F_OK);
@@ -5976,10 +5976,10 @@ function assignProp(target, prop, value) {
     configurable: true
   });
 }
-function getElementAtPath(obj, path25) {
-  if (!path25)
+function getElementAtPath(obj, path26) {
+  if (!path26)
     return obj;
-  return path25.reduce((acc, key2) => acc?.[key2], obj);
+  return path26.reduce((acc, key2) => acc?.[key2], obj);
 }
 function promiseAllObject(promisesObj) {
   const keys = Object.keys(promisesObj);
@@ -6299,11 +6299,11 @@ function aborted(x, startIndex = 0) {
   }
   return false;
 }
-function prefixIssues(path25, issues) {
+function prefixIssues(path26, issues) {
   return issues.map((iss) => {
     var _a17;
     (_a17 = iss).path ?? (_a17.path = []);
-    iss.path.unshift(path25);
+    iss.path.unshift(path26);
     return iss;
   });
 }
@@ -6440,7 +6440,7 @@ function treeifyError(error40, _mapper) {
     return issue2.message;
   };
   const result = { errors: [] };
-  const processError = (error41, path25 = []) => {
+  const processError = (error41, path26 = []) => {
     var _a17, _b8;
     for (const issue2 of error41.issues) {
       if (issue2.code === "invalid_union" && issue2.errors.length) {
@@ -6450,7 +6450,7 @@ function treeifyError(error40, _mapper) {
       } else if (issue2.code === "invalid_element") {
         processError({ issues: issue2.issues }, issue2.path);
       } else {
-        const fullpath = [...path25, ...issue2.path];
+        const fullpath = [...path26, ...issue2.path];
         if (fullpath.length === 0) {
           result.errors.push(mapper(issue2));
           continue;
@@ -6480,9 +6480,9 @@ function treeifyError(error40, _mapper) {
   processError(error40);
   return result;
 }
-function toDotPath(path25) {
+function toDotPath(path26) {
   const segs = [];
-  for (const seg of path25) {
+  for (const seg of path26) {
     if (typeof seg === "number")
       segs.push(`[${seg}]`);
     else if (typeof seg === "symbol")
@@ -26035,14 +26035,14 @@ function createAzure(options = {}) {
     description: "Azure OpenAI resource name"
   });
   const apiVersion = (_a17 = options.apiVersion) != null ? _a17 : "v1";
-  const url2 = ({ path: path25, modelId }) => {
+  const url2 = ({ path: path26, modelId }) => {
     var _a24;
     const baseUrlPrefix = (_a24 = options.baseURL) != null ? _a24 : `https://${getResourceName()}.openai.azure.com/openai`;
     let fullUrl;
     if (options.useDeploymentBasedUrls) {
-      fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path25}`);
+      fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path26}`);
     } else {
-      fullUrl = new URL(`${baseUrlPrefix}/v1${path25}`);
+      fullUrl = new URL(`${baseUrlPrefix}/v1${path26}`);
     }
     fullUrl.searchParams.set("api-version", apiVersion);
     return fullUrl.toString();
@@ -34553,7 +34553,7 @@ function isTestMessage(value) {
   }
   return candidate.content.every(isJsonObject);
 }
-var EVALUATOR_KIND_VALUES = ["code", "llm_judge"];
+var EVALUATOR_KIND_VALUES = ["code", "llm_judge", "rubric"];
 var EVALUATOR_KIND_SET = new Set(EVALUATOR_KIND_VALUES);
 function isEvaluatorKind(value) {
   return typeof value === "string" && EVALUATOR_KIND_SET.has(value);
@@ -34920,6 +34920,29 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
       }
     }
     const _model = asString2(rawEvaluator.model);
+    if (typeValue === "rubric") {
+      const rubrics = rawEvaluator.rubrics;
+      if (!Array.isArray(rubrics)) {
+        logWarning2(`Skipping rubric evaluator '${name16}' in '${evalId}': missing rubrics array`);
+        continue;
+      }
+      const parsedRubrics = rubrics.filter((r) => isJsonObject2(r)).map((rubric, index) => ({
+        id: asString2(rubric.id) ?? `rubric-${index + 1}`,
+        description: asString2(rubric.description) ?? "",
+        weight: typeof rubric.weight === "number" ? rubric.weight : 1,
+        required: typeof rubric.required === "boolean" ? rubric.required : true
+      })).filter((r) => r.description.length > 0);
+      if (parsedRubrics.length === 0) {
+        logWarning2(`Skipping rubric evaluator '${name16}' in '${evalId}': no valid rubrics found`);
+        continue;
+      }
+      evaluators.push({
+        name: name16,
+        type: "rubric",
+        rubrics: parsedRubrics
+      });
+      continue;
+    }
     evaluators.push({
       name: name16,
       type: "llm_judge",
@@ -35390,7 +35413,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
       continue;
     }
     const conversationId = asString5(evalcase.conversation_id);
-    const outcome = asString5(evalcase.outcome);
+    const outcome = asString5(evalcase.expected_outcome) ?? asString5(evalcase.outcome);
     const inputMessagesValue = evalcase.input_messages;
     const expectedMessagesValue = evalcase.expected_messages;
     if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
@@ -35444,6 +35467,33 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
       logError(`Skipping eval case '${id}': ${message}`);
       continue;
     }
+    const inlineRubrics = evalcase.rubrics;
+    if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
+      const rubricItems = inlineRubrics.filter((r) => isJsonObject(r) || typeof r === "string").map((rubric, index) => {
+        if (typeof rubric === "string") {
+          return {
+            id: `rubric-${index + 1}`,
+            description: rubric,
+            weight: 1,
+            required: true
+          };
+        }
+        return {
+          id: asString5(rubric.id) ?? `rubric-${index + 1}`,
+          description: asString5(rubric.description) ?? "",
+          weight: typeof rubric.weight === "number" ? rubric.weight : 1,
+          required: typeof rubric.required === "boolean" ? rubric.required : true
+        };
+      }).filter((r) => r.description.length > 0);
+      if (rubricItems.length > 0) {
+        const rubricEvaluator = {
+          name: "rubric",
+          type: "rubric",
+          rubrics: rubricItems
+        };
+        evaluators = evaluators ? [rubricEvaluator, ...evaluators] : [rubricEvaluator];
+      }
+    }
     const userFilePaths = [];
     for (const segment of inputSegments) {
       if (segment.type === "file" && typeof segment.resolvedPath === "string") {
@@ -35536,6 +35586,9 @@ var AzureProvider = class {
       retryConfig: this.retryConfig
     });
   }
+  asLanguageModel() {
+    return this.model;
+  }
 };
 var AnthropicProvider = class {
   constructor(targetName, config2) {
@@ -35569,6 +35622,9 @@ var AnthropicProvider = class {
       providerOptions
     });
   }
+  asLanguageModel() {
+    return this.model;
+  }
 };
 var GeminiProvider = class {
   constructor(targetName, config2) {
@@ -35599,6 +35655,9 @@ var GeminiProvider = class {
       retryConfig: this.retryConfig
     });
   }
+  asLanguageModel() {
+    return this.model;
+  }
 };
 function buildAzureOptions(config2) {
   const options = {
@@ -37262,6 +37321,144 @@ function createProvider(target) {
     }
   }
 }
+var rubricCheckResultSchema = external_exports.object({
+  id: external_exports.string().describe("The ID of the rubric item being checked"),
+  satisfied: external_exports.boolean().describe("Whether this rubric requirement is met"),
+  reasoning: external_exports.string().describe("Brief explanation (1-2 sentences) for this check")
+});
+var rubricEvaluationSchema = external_exports.object({
+  checks: external_exports.array(rubricCheckResultSchema).describe("Results for each rubric item"),
+  overall_reasoning: external_exports.string().describe("Overall assessment summary (1-2 sentences)")
+});
+var RubricEvaluator = class {
+  kind = "rubric";
+  config;
+  resolveJudgeProvider;
+  constructor(options) {
+    this.config = options.config;
+    this.resolveJudgeProvider = options.resolveJudgeProvider;
+  }
+  async evaluate(context) {
+    const judgeProvider = await this.resolveJudgeProvider(context);
+    if (!judgeProvider) {
+      throw new Error("No judge provider available for rubric evaluation");
+    }
+    if (!this.config.rubrics || this.config.rubrics.length === 0) {
+      throw new Error(
+        `No rubrics found for evaluator "${this.config.name}". Run "agentv generate rubrics" first.`
+      );
+    }
+    const prompt = this.buildPrompt(context, this.config.rubrics);
+    const model = judgeProvider.asLanguageModel?.();
+    if (!model) {
+      throw new Error("Judge provider does not support language model interface");
+    }
+    const system = `You are an expert evaluator. Evaluate the candidate answer against each rubric item.
+You must return a valid JSON object matching this schema:
+{
+  "checks": [
+    {
+      "id": "string (rubric id)",
+      "satisfied": boolean,
+      "reasoning": "string (brief explanation)"
+    }
+  ],
+  "overall_reasoning": "string (summary)"
+}`;
+    let result;
+    let lastError;
+    for (let attempt = 1; attempt <= 3; attempt++) {
+      try {
+        const { text: text2 } = await generateText({
+          model,
+          system,
+          prompt
+        });
+        const cleaned = text2.replace(/```json\n?|```/g, "").trim();
+        result = rubricEvaluationSchema.parse(JSON.parse(cleaned));
+        break;
+      } catch (e) {
+        lastError = e instanceof Error ? e : new Error(String(e));
+      }
+    }
+    if (!result) {
+      throw new Error(
+        `Failed to parse rubric evaluation result after 3 attempts: ${lastError?.message}`
+      );
+    }
+    const { score, verdict, hits, misses } = this.calculateScore(result, this.config.rubrics);
+    return {
+      score,
+      verdict,
+      hits,
+      misses,
+      expectedAspectCount: this.config.rubrics.length,
+      reasoning: result.overall_reasoning,
+      evaluatorRawRequest: {
+        prompt
+      }
+    };
+  }
+  buildPrompt(context, rubrics) {
+    const parts = [
+      "You are an expert evaluator. Evaluate the candidate answer against each rubric item below.",
+      "",
+      "[[ ## question ## ]]",
+      context.evalCase.question,
+      "",
+      "[[ ## expected_outcome ## ]]",
+      context.evalCase.expected_outcome,
+      ""
+    ];
+    if (context.evalCase.reference_answer && context.evalCase.reference_answer.trim().length > 0) {
+      parts.push("[[ ## reference_answer ## ]]", context.evalCase.reference_answer, "");
+    }
+    parts.push("[[ ## candidate_answer ## ]]", context.candidate, "", "[[ ## rubrics ## ]]");
+    for (const rubric of rubrics) {
+      const requiredLabel = rubric.required ? " (REQUIRED)" : "";
+      const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
+      parts.push(`- [${rubric.id}]${requiredLabel}${weightLabel}: ${rubric.description}`);
+    }
+    parts.push("", "For each rubric, determine if it is satisfied and provide brief reasoning.");
+    return parts.join("\n");
+  }
+  calculateScore(result, rubrics) {
+    const rubricMap = new Map(rubrics.map((r) => [r.id, r]));
+    const hits = [];
+    const misses = [];
+    let totalWeight = 0;
+    let earnedWeight = 0;
+    let failedRequired = false;
+    for (const check2 of result.checks) {
+      const rubric = rubricMap.get(check2.id);
+      if (!rubric) {
+        continue;
+      }
+      totalWeight += rubric.weight;
+      if (check2.satisfied) {
+        earnedWeight += rubric.weight;
+        hits.push(`[${rubric.id}] ${rubric.description}: ${check2.reasoning}`);
+      } else {
+        misses.push(`[${rubric.id}] ${rubric.description}: ${check2.reasoning}`);
+        if (rubric.required) {
+          failedRequired = true;
+        }
+      }
+    }
+    const score = totalWeight > 0 ? Math.min(1, Math.max(0, earnedWeight / totalWeight)) : 0;
+    let verdict;
+    if (failedRequired) {
+      verdict = "fail";
+    } else if (score >= 0.8) {
+      verdict = "pass";
+    } else if (score >= 0.6) {
+      verdict = "borderline";
+    } else {
+      verdict = "fail";
+    }
+    return { score, verdict, hits, misses };
+  }
+};
 var DEFAULT_EVALUATOR_TEMPLATE = `You are an expert evaluator. Your goal is to grade the candidate_answer based on how well it achieves the expected_outcome for the original task.
 Use the reference_answer as a gold standard for a high-quality response (if provided). The candidate_answer does not need to match it verbatim, but should capture the key points and follow the same spirit.
@@ -38214,6 +38411,7 @@ async function runEvaluatorList(options) {
           name: evaluator.name,
           type: evaluator.type,
           score: score2.score,
+          verdict: score2.verdict,
           hits: score2.hits,
           misses: score2.misses,
           reasoning: score2.reasoning,
@@ -38241,6 +38439,40 @@ async function runEvaluatorList(options) {
           name: evaluator.name,
           type: evaluator.type,
           score: score2.score,
+          verdict: score2.verdict,
+          hits: score2.hits,
+          misses: score2.misses,
+          reasoning: score2.reasoning,
+          evaluator_provider_request: score2.evaluatorRawRequest
+        });
+        continue;
+      }
+      if (evaluator.type === "rubric") {
+        const rubricEvaluator = new RubricEvaluator({
+          config: evaluator,
+          resolveJudgeProvider: async (context) => {
+            if (context.judgeProvider) {
+              return context.judgeProvider;
+            }
+            return judgeProvider;
+          }
+        });
+        const score2 = await rubricEvaluator.evaluate({
+          evalCase,
+          candidate,
+          target,
+          provider,
+          attempt,
+          promptInputs,
+          now,
+          judgeProvider
+        });
+        scored.push({ score: score2, name: evaluator.name, type: evaluator.type });
+        evaluatorResults.push({
+          name: evaluator.name,
+          type: evaluator.type,
+          score: score2.score,
+          verdict: score2.verdict,
           hits: score2.hits,
           misses: score2.misses,
           reasoning: score2.reasoning,
@@ -38470,6 +38702,82 @@ function isTimeoutLike(error40) {
   const value = String(error40).toLowerCase();
   return value.includes("timeout");
 }
+var rubricItemSchema = external_exports.object({
+  id: external_exports.string().describe("Short identifier for this rubric (e.g., clarity, completeness)"),
+  description: external_exports.string().describe("What this rubric checks for"),
+  weight: external_exports.number().default(1).describe("Relative importance (default 1.0)"),
+  required: external_exports.boolean().default(true).describe("Whether this is a mandatory requirement")
+});
+var rubricGenerationSchema = external_exports.object({
+  rubrics: external_exports.array(rubricItemSchema).describe("List of evaluation rubrics")
+});
+async function generateRubrics(options) {
+  const { expectedOutcome, question, referenceAnswer, provider } = options;
+  const prompt = buildPrompt(expectedOutcome, question, referenceAnswer);
+  const model = provider.asLanguageModel?.();
+  if (!model) {
+    throw new Error("Provider does not support language model interface");
+  }
+  const system = `You are an expert at creating evaluation rubrics.
+You must return a valid JSON object matching this schema:
+{
+  "rubrics": [
+    {
+      "id": "string (short identifier)",
+      "description": "string (what to check)",
+      "weight": number (default 1.0),
+      "required": boolean (default true)
+    }
+  ]
+}`;
+  let result;
+  let lastError;
+  for (let attempt = 1; attempt <= 3; attempt++) {
+    try {
+      const { text: text2 } = await generateText({
+        model,
+        system,
+        prompt
+      });
+      const cleaned = text2.replace(/```json\n?|```/g, "").trim();
+      result = rubricGenerationSchema.parse(JSON.parse(cleaned));
+      break;
+    } catch (e) {
+      lastError = e instanceof Error ? e : new Error(String(e));
+    }
+  }
+  if (!result) {
+    throw new Error(`Failed to parse generated rubrics after 3 attempts: ${lastError?.message}`);
+  }
+  return result.rubrics;
+}
+function buildPrompt(expectedOutcome, question, referenceAnswer) {
+  const parts = [
+    "You are an expert at creating evaluation rubrics.",
+    "Given the expected outcome (and optionally the question and reference answer),",
+    "generate a list of specific, measurable rubric items to evaluate whether an answer meets the expected outcome.",
+    "",
+    "Each rubric should:",
+    "- Be specific and testable",
+    "- Have a short, descriptive ID",
+    "- Include a clear description of what to check",
+    "- Indicate if it is required (mandatory) or optional",
+    "- Have an appropriate weight (default 1.0, use higher values for more important aspects)",
+    "",
+    "Generate 3-7 rubric items that comprehensively cover the expected outcome.",
+    "",
+    "[[ ## expected_outcome ## ]]",
+    expectedOutcome,
+    ""
+  ];
+  if (question && question.trim().length > 0) {
+    parts.push("[[ ## question ## ]]", question, "");
+  }
+  if (referenceAnswer && referenceAnswer.trim().length > 0) {
+    parts.push("[[ ## reference_answer ## ]]", referenceAnswer, "");
+  }
+  return parts.join("\n");
+}
 function createAgentKernel() {
   return { status: "stub" };
 }
@@ -38927,12 +39235,12 @@ var ProgressDisplay = class {
   }
   addLogPaths(paths) {
     const newPaths = [];
-    for (const path25 of paths) {
-      if (this.logPathSet.has(path25)) {
+    for (const path26 of paths) {
+      if (this.logPathSet.has(path26)) {
         continue;
       }
-      this.logPathSet.add(path25);
-      newPaths.push(path25);
+      this.logPathSet.add(path26);
+      newPaths.push(path26);
     }
     if (newPaths.length === 0) {
       return;
@@ -38948,8 +39256,8 @@ var ProgressDisplay = class {
       this.hasPrintedLogHeader = true;
     }
     const startIndex = this.logPaths.length - newPaths.length;
-    newPaths.forEach((path25, offset) => {
-      console.log(`${startIndex + offset + 1}. ${path25}`);
+    newPaths.forEach((path26, offset) => {
+      console.log(`${startIndex + offset + 1}. ${path26}`);
     });
   }
   scheduleRender() {
@@ -38997,8 +39305,8 @@ var ProgressDisplay = class {
     if (this.logPaths.length > 0) {
       lines.push("");
       lines.push("Codex CLI logs:");
-      this.logPaths.forEach((path25, index) => {
-        lines.push(`${index + 1}. ${path25}`);
+      this.logPaths.forEach((path26, index) => {
+        lines.push(`${index + 1}. ${path26}`);
       });
     }
     const rowCount = this.getRenderedRowCount(lines);
@@ -39203,11 +39511,6 @@ function formatEvaluationSummary(summary) {
   return lines.join("\n");
 }
-// src/commands/eval/targets.ts
-import { constants as constants5 } from "node:fs";
-import { access as access5 } from "node:fs/promises";
-import path17 from "node:path";
 // ../../packages/core/dist/evaluation/validation/index.js
 import { readFile as readFile7 } from "node:fs/promises";
 import { parse as parse6 } from "yaml";
@@ -39323,13 +39626,13 @@ async function validateEvalFile(filePath) {
         message: "Missing or invalid 'id' field (must be a non-empty string)"
       });
     }
-    const outcome = evalCase.outcome;
-    if (typeof outcome !== "string" || outcome.trim().length === 0) {
+    const expectedOutcome = evalCase.expected_outcome ?? evalCase.outcome;
+    if (expectedOutcome !== void 0 && (typeof expectedOutcome !== "string" || expectedOutcome.trim().length === 0)) {
       errors.push({
         severity: "error",
         filePath: absolutePath,
-        location: `${location}.outcome`,
-        message: "Missing or invalid 'outcome' field (must be a non-empty string)"
+        location: `${location}.expected_outcome`,
+        message: "Invalid 'expected_outcome' or 'outcome' field (must be a non-empty string if provided)"
       });
     }
     const inputMessages = evalCase.input_messages;
@@ -40064,19 +40367,16 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
   }
 }
-// src/commands/eval/targets.ts
+// src/utils/targets.ts
+import { constants as constants5 } from "node:fs";
+import { access as access5 } from "node:fs/promises";
+import path17 from "node:path";
 var TARGET_FILE_CANDIDATES = [
   "targets.yaml",
   "targets.yml",
   path17.join(".agentv", "targets.yaml"),
   path17.join(".agentv", "targets.yml")
 ];
-var ANSI_YELLOW7 = "\x1B[33m";
-var ANSI_RED2 = "\x1B[31m";
-var ANSI_RESET7 = "\x1B[0m";
-function isTTY() {
-  return process.stdout.isTTY ?? false;
-}
 async function fileExists5(filePath) {
   try {
     await access5(filePath, constants5.F_OK);
@@ -40085,10 +40385,6 @@ async function fileExists5(filePath) {
     return false;
   }
 }
-async function readTestSuiteTarget(testFilePath) {
-  const metadata = await readTestSuiteMetadata(testFilePath);
-  return metadata.target;
-}
 async function discoverTargetsFile(options) {
   const { explicitPath, testFilePath, repoRoot, cwd } = options;
   if (explicitPath) {
@@ -40119,6 +40415,18 @@ async function discoverTargetsFile(options) {
   }
   throw new Error("Unable to locate targets.yaml. Use --targets to specify the file explicitly.");
 }
+// src/commands/eval/targets.ts
+var ANSI_YELLOW7 = "\x1B[33m";
+var ANSI_RED2 = "\x1B[31m";
+var ANSI_RESET7 = "\x1B[0m";
+function isTTY() {
+  return process.stdout.isTTY ?? false;
+}
+async function readTestSuiteTarget(testFilePath) {
+  const metadata = await readTestSuiteMetadata(testFilePath);
+  return metadata.target;
+}
 function pickTargetName(options) {
   const cliName = options.cliTargetName?.trim();
   if (cliName && cliName !== "default") {
@@ -40705,14 +41013,174 @@ async function resolveEvalPaths(evalPaths, cwd) {
   return sorted;
 }
+// src/commands/generate/rubrics.ts
+import { readFile as readFile8, writeFile as writeFile6 } from "node:fs/promises";
+import path20 from "node:path";
+import { pathToFileURL as pathToFileURL2 } from "node:url";
+import { isMap, isSeq, parseDocument } from "yaml";
+function isJsonObject3(value) {
+  return typeof value === "object" && value !== null && !Array.isArray(value);
+}
+function asString6(value) {
+  return typeof value === "string" ? value : void 0;
+}
+async function loadRubricGenerator() {
+  const customGenerator = process.env.AGENTEVO_CLI_RUBRIC_GENERATOR;
+  if (customGenerator) {
+    const generatorPath = path20.resolve(customGenerator);
+    const generatorUrl = pathToFileURL2(generatorPath).href;
+    const module = await import(generatorUrl);
+    return module.generateRubrics;
+  }
+  return generateRubrics;
+}
+async function generateRubricsCommand(options) {
+  const { file: file2, target: targetOverride, verbose } = options;
+  console.log(`Generating rubrics for: ${file2}`);
+  const absolutePath = path20.resolve(file2);
+  const content = await readFile8(absolutePath, "utf8");
+  const doc = parseDocument(content);
+  const parsed = doc.toJSON();
+  if (!isJsonObject3(parsed)) {
+    throw new Error(`Invalid YAML file format: ${file2}`);
+  }
+  const suite = parsed;
+  const evalcases = suite.evalcases;
+  if (!Array.isArray(evalcases)) {
+    throw new Error(`No evalcases found in ${file2}`);
+  }
+  const targetSelection = await selectTarget({
+    testFilePath: absolutePath,
+    repoRoot: process.cwd(),
+    cwd: process.cwd(),
+    cliTargetName: targetOverride,
+    dryRun: false,
+    dryRunDelay: 0,
+    dryRunDelayMin: 0,
+    dryRunDelayMax: 0,
+    env: process.env
+  });
+  if (verbose) {
+    console.log(`Using target: ${targetSelection.targetName}`);
+  }
+  const provider = createProvider(targetSelection.resolvedTarget);
+  const generateRubricsFunc = await loadRubricGenerator();
+  let updatedCount = 0;
+  let skippedCount = 0;
+  const evalcasesNode = doc.getIn(["evalcases"]);
+  if (!evalcasesNode || !isSeq(evalcasesNode)) {
+    throw new Error("evalcases must be a sequence");
+  }
+  for (let i = 0; i < evalcases.length; i++) {
+    const rawCase = evalcases[i];
+    if (!isJsonObject3(rawCase)) {
+      continue;
+    }
+    const evalCase = rawCase;
+    const id = asString6(evalCase.id) ?? "unknown";
+    const expectedOutcome = asString6(evalCase.expected_outcome) ?? asString6(evalCase.outcome);
+    if (!expectedOutcome) {
+      if (verbose) {
+        console.log(`  Skipping ${id}: no expected_outcome`);
+      }
+      skippedCount++;
+      continue;
+    }
+    if (evalCase.rubrics !== void 0) {
+      if (verbose) {
+        console.log(`  Skipping ${id}: rubrics already defined`);
+      }
+      skippedCount++;
+      continue;
+    }
+    console.log(`  Generating rubrics for: ${id}`);
+    const question = extractQuestion(evalCase);
+    const referenceAnswer = asString6(evalCase.reference_answer);
+    const rubrics = await generateRubricsFunc({
+      expectedOutcome,
+      question,
+      referenceAnswer,
+      provider
+    });
+    const caseNode = evalcasesNode.items[i];
+    if (caseNode && isMap(caseNode)) {
+      caseNode.set(
+        "rubrics",
+        rubrics.map(
+          (r) => ({
+            id: r.id,
+            description: r.description,
+            weight: r.weight,
+            required: r.required
+          })
+        )
+      );
+    }
+    updatedCount++;
+    if (verbose) {
+      console.log(`    Generated ${rubrics.length} rubric(s)`);
+    }
+  }
+  if (updatedCount > 0) {
+    const output = doc.toString();
+    await writeFile6(absolutePath, output, "utf8");
+    console.log(`
+Updated ${updatedCount} eval case(s) with generated rubrics`);
+    if (skippedCount > 0) {
+      console.log(`Skipped ${skippedCount} eval case(s)`);
+    }
+  } else {
+    console.log("\nNo eval cases updated (all already have rubrics or missing expected_outcome)");
+  }
+}
+function extractQuestion(evalCase) {
+  const explicitQuestion = asString6(evalCase.question);
+  if (explicitQuestion) {
+    return explicitQuestion;
+  }
+  const inputMessages = evalCase.input_messages;
+  if (!Array.isArray(inputMessages)) {
+    return void 0;
+  }
+  for (const msg of inputMessages) {
+    if (!isJsonObject3(msg)) {
+      continue;
+    }
+    if (msg.role === "user" && typeof msg.content === "string") {
+      return msg.content;
+    }
+  }
+  return void 0;
+}
+// src/commands/generate/index.ts
+function registerGenerateCommand(program) {
+  const generate = program.command("generate").description("Generate evaluation artifacts");
+  generate.command("rubrics <file>").description("Generate rubrics from expected_outcome in YAML eval file").option(
+    "-t, --target <target>",
+    "Override target for rubric generation (default: file target or openai:gpt-4o)"
+  ).option("-v, --verbose", "Show detailed progress").action(async (file2, options) => {
+    try {
+      await generateRubricsCommand({
+        file: file2,
+        target: options.target,
+        verbose: options.verbose
+      });
+    } catch (error40) {
+      console.error(`Error: ${error40.message}`);
+      process.exit(1);
+    }
+  });
+}
 // src/commands/init/index.ts
 import { existsSync, mkdirSync, writeFileSync } from "node:fs";
-import path21 from "node:path";
+import path24 from "node:path";
 import * as readline from "node:readline/promises";
 // src/templates/index.ts
 import { readFileSync, readdirSync, statSync } from "node:fs";
-import path20 from "node:path";
+import path21 from "node:path";
 import { fileURLToPath } from "node:url";
 function getGithubTemplates() {
   return getTemplatesFromDir(".github");
@@ -40724,12 +41192,12 @@ function getClaudeTemplates() {
   return getTemplatesFromDir(".claude");
 }
 function getTemplatesFromDir(subdir) {
-  const currentDir = path20.dirname(fileURLToPath(import.meta.url));
+  const currentDir = path21.dirname(fileURLToPath(import.meta.url));
   let templatesDir;
-  if (currentDir.includes(`${path20.sep}dist`)) {
-    templatesDir = path20.join(currentDir, "templates", subdir);
+  if (currentDir.includes(`${path21.sep}dist`)) {
+    templatesDir = path21.join(currentDir, "templates", subdir);
   } else {
-    templatesDir = path20.join(currentDir, subdir);
+    templatesDir = path21.join(currentDir, subdir);
   }
   return readTemplatesRecursively(templatesDir, "");
 }
@@ -40737,15 +41205,15 @@ function readTemplatesRecursively(dir, relativePath) {
   const templates = [];
   const entries = readdirSync(dir);
   for (const entry of entries) {
-    const fullPath = path20.join(dir, entry);
+    const fullPath = path21.join(dir, entry);
     const stat6 = statSync(fullPath);
-    const entryRelativePath = relativePath ? path20.join(relativePath, entry) : entry;
+    const entryRelativePath = relativePath ? path21.join(relativePath, entry) : entry;
     if (stat6.isDirectory()) {
       templates.push(...readTemplatesRecursively(fullPath, entryRelativePath));
     } else {
       const content = readFileSync(fullPath, "utf-8");
       templates.push({
-        path: entryRelativePath.split(path20.sep).join("/"),
+        path: entryRelativePath.split(path21.sep).join("/"),
         // Normalize to forward slashes
         content
       });
@@ -40768,10 +41236,10 @@ async function promptYesNo(message) {
   }
 }
 async function initCommand(options = {}) {
-  const targetPath = path21.resolve(options.targetPath ?? ".");
-  const githubDir = path21.join(targetPath, ".github");
-  const agentvDir = path21.join(targetPath, ".agentv");
-  const claudeDir = path21.join(targetPath, ".claude");
+  const targetPath = path24.resolve(options.targetPath ?? ".");
+  const githubDir = path24.join(targetPath, ".github");
+  const agentvDir = path24.join(targetPath, ".agentv");
+  const claudeDir = path24.join(targetPath, ".claude");
   const githubTemplates = getGithubTemplates();
   const agentvTemplates = getAgentvTemplates();
   const claudeTemplates = getClaudeTemplates();
@@ -40779,32 +41247,32 @@ async function initCommand(options = {}) {
   const otherAgentvTemplates = agentvTemplates.filter((t) => t.path !== ".env.template");
   const existingFiles = [];
   if (envTemplate) {
-    const envFilePath = path21.join(targetPath, ".env.template");
+    const envFilePath = path24.join(targetPath, ".env.template");
     if (existsSync(envFilePath)) {
       existingFiles.push(".env.template");
     }
   }
   if (existsSync(githubDir)) {
     for (const template of githubTemplates) {
-      const targetFilePath = path21.join(githubDir, template.path);
+      const targetFilePath = path24.join(githubDir, template.path);
       if (existsSync(targetFilePath)) {
-        existingFiles.push(path21.relative(targetPath, targetFilePath));
+        existingFiles.push(path24.relative(targetPath, targetFilePath));
       }
     }
   }
   if (existsSync(agentvDir)) {
     for (const template of otherAgentvTemplates) {
-      const targetFilePath = path21.join(agentvDir, template.path);
+      const targetFilePath = path24.join(agentvDir, template.path);
       if (existsSync(targetFilePath)) {
-        existingFiles.push(path21.relative(targetPath, targetFilePath));
+        existingFiles.push(path24.relative(targetPath, targetFilePath));
       }
     }
   }
   if (existsSync(claudeDir)) {
     for (const template of claudeTemplates) {
-      const targetFilePath = path21.join(claudeDir, template.path);
+      const targetFilePath = path24.join(claudeDir, template.path);
       if (existsSync(targetFilePath)) {
-        existingFiles.push(path21.relative(targetPath, targetFilePath));
+        existingFiles.push(path24.relative(targetPath, targetFilePath));
       }
     }
   }
@@ -40831,36 +41299,36 @@ async function initCommand(options = {}) {
     mkdirSync(claudeDir, { recursive: true });
   }
   if (envTemplate) {
-    const envFilePath = path21.join(targetPath, ".env.template");
+    const envFilePath = path24.join(targetPath, ".env.template");
     writeFileSync(envFilePath, envTemplate.content, "utf-8");
     console.log("Created .env.template");
   }
   for (const template of githubTemplates) {
-    const targetFilePath = path21.join(githubDir, template.path);
-    const targetDirPath = path21.dirname(targetFilePath);
+    const targetFilePath = path24.join(githubDir, template.path);
+    const targetDirPath = path24.dirname(targetFilePath);
     if (!existsSync(targetDirPath)) {
       mkdirSync(targetDirPath, { recursive: true });
     }
     writeFileSync(targetFilePath, template.content, "utf-8");
-    console.log(`Created ${path21.relative(targetPath, targetFilePath)}`);
+    console.log(`Created ${path24.relative(targetPath, targetFilePath)}`);
   }
   for (const template of otherAgentvTemplates) {
-    const targetFilePath = path21.join(agentvDir, template.path);
-    const targetDirPath = path21.dirname(targetFilePath);
+    const targetFilePath = path24.join(agentvDir, template.path);
+    const targetDirPath = path24.dirname(targetFilePath);
     if (!existsSync(targetDirPath)) {
       mkdirSync(targetDirPath, { recursive: true });
     }
     writeFileSync(targetFilePath, template.content, "utf-8");
-    console.log(`Created ${path21.relative(targetPath, targetFilePath)}`);
+    console.log(`Created ${path24.relative(targetPath, targetFilePath)}`);
   }
   for (const template of claudeTemplates) {
-    const targetFilePath = path21.join(claudeDir, template.path);
-    const targetDirPath = path21.dirname(targetFilePath);
+    const targetFilePath = path24.join(claudeDir, template.path);
+    const targetDirPath = path24.dirname(targetFilePath);
     if (!existsSync(targetDirPath)) {
       mkdirSync(targetDirPath, { recursive: true });
     }
     writeFileSync(targetFilePath, template.content, "utf-8");
-    console.log(`Created ${path21.relative(targetPath, targetFilePath)}`);
+    console.log(`Created ${path24.relative(targetPath, targetFilePath)}`);
   }
   console.log("\nAgentV initialized successfully!");
   console.log("\nFiles installed to root:");
@@ -40868,17 +41336,17 @@ async function initCommand(options = {}) {
     console.log("  - .env.template");
   }
   console.log(`
-Files installed to ${path21.relative(targetPath, githubDir)}:`);
+Files installed to ${path24.relative(targetPath, githubDir)}:`);
   for (const t of githubTemplates) {
     console.log(`  - ${t.path}`);
   }
   console.log(`
-Files installed to ${path21.relative(targetPath, agentvDir)}:`);
+Files installed to ${path24.relative(targetPath, agentvDir)}:`);
   for (const t of otherAgentvTemplates) {
     console.log(`  - ${t.path}`);
   }
   console.log(`
-Files installed to ${path21.relative(targetPath, claudeDir)}:`);
+Files installed to ${path24.relative(targetPath, claudeDir)}:`);
   for (const t of claudeTemplates) {
     console.log(`  - ${t.path}`);
   }
@@ -40980,7 +41448,7 @@ function isTTY2() {
 // src/commands/validate/validate-files.ts
 import { constants as constants7 } from "node:fs";
 import { access as access7, readdir as readdir3, stat as stat5 } from "node:fs/promises";
-import path24 from "node:path";
+import path25 from "node:path";
 async function validateFiles(paths) {
   const filePaths = await expandPaths(paths);
   const results = [];
@@ -40998,7 +41466,7 @@ async function validateFiles(paths) {
   };
 }
 async function validateSingleFile(filePath) {
-  const absolutePath = path24.resolve(filePath);
+  const absolutePath = path25.resolve(filePath);
   const fileType = await detectFileType(absolutePath);
   if (fileType === "unknown") {
     return {
@@ -41037,7 +41505,7 @@ async function validateSingleFile(filePath) {
 async function expandPaths(paths) {
   const expanded = [];
   for (const inputPath of paths) {
-    const absolutePath = path24.resolve(inputPath);
+    const absolutePath = path25.resolve(inputPath);
     try {
       await access7(absolutePath, constants7.F_OK);
     } catch {
@@ -41061,7 +41529,7 @@ async function findYamlFiles(dirPath) {
   try {
     const entries = await readdir3(dirPath, { withFileTypes: true });
     for (const entry of entries) {
-      const fullPath = path24.join(dirPath, entry.name);
+      const fullPath = path25.join(dirPath, entry.name);
       if (entry.isDirectory()) {
         if (entry.name === "node_modules" || entry.name.startsWith(".")) {
           continue;
@@ -41078,7 +41546,7 @@ async function findYamlFiles(dirPath) {
   return results;
 }
 function isYamlFile(filePath) {
-  const ext = path24.extname(filePath).toLowerCase();
+  const ext = path25.extname(filePath).toLowerCase();
   return ext === ".yaml" || ext === ".yml";
 }
@@ -41115,6 +41583,7 @@ function createProgram() {
   registerStatusCommand(program);
   registerEvalCommand(program);
   registerValidateCommand(program);
+  registerGenerateCommand(program);
   program.command("init [path]").description(
     "Initialize AgentV in your project (installs prompt templates and schema to .github)"
   ).action(async (targetPath) => {
@@ -41137,4 +41606,4 @@ export {
   createProgram,
   runCli
 };
-//# sourceMappingURL=chunk-GDGNKNKP.js.map
+//# sourceMappingURL=chunk-MA3MJNJH.js.map