npm - agentv - Versions diffs - 0.15.0 → 0.16.0 - Mend

agentv 0.15.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/{chunk-LVLBPRCV.js → chunk-HWGALLUR.js} +170 -79
package/dist/chunk-HWGALLUR.js.map +1 -0
package/dist/cli.js +1 -1
package/dist/index.js +1 -1
package/package.json +2 -2
package/dist/chunk-LVLBPRCV.js.map +0 -1

package/dist/{chunk-LVLBPRCV.js → chunk-HWGALLUR.js} RENAMED Viewed

@@ -4847,7 +4847,7 @@ function isAgentProvider(provider) {
 }
 // ../../packages/core/dist/index.js
-import { readFile as readFile4 } from "node:fs/promises";
+import { readFile as readFile5 } from "node:fs/promises";
 import path62 from "node:path";
 import { parse as parse22 } from "yaml";
 import micromatch from "micromatch";
@@ -4859,8 +4859,9 @@ import { access as access3 } from "node:fs/promises";
 import path8 from "node:path";
 import path32 from "node:path";
 import { readFile as readFile22 } from "node:fs/promises";
-import path42 from "node:path";
 import { readFile as readFile32 } from "node:fs/promises";
+import path42 from "node:path";
+import { readFile as readFile4 } from "node:fs/promises";
 import path52 from "node:path";
 // ../../node_modules/.pnpm/@ai-sdk+provider@2.0.0/node_modules/@ai-sdk/provider/dist/index.mjs
@@ -34384,7 +34385,7 @@ async function provisionSubagents(options) {
 // ../../packages/core/dist/index.js
 import { constants as constants32 } from "node:fs";
-import { access as access32, readFile as readFile5 } from "node:fs/promises";
+import { access as access32, readFile as readFile6 } from "node:fs/promises";
 import path11 from "node:path";
 import { parse as parse32 } from "yaml";
 import { createHash, randomUUID as randomUUID2 } from "node:crypto";
@@ -34467,7 +34468,7 @@ ${part.content}
   }
   return parts.map((p) => p.content).join(" ");
 }
-function formatSegment(segment) {
+function formatSegment(segment, mode = "lm") {
   const type = asString(segment.type);
   if (type === "text") {
     return asString(segment.value);
@@ -34477,8 +34478,14 @@ function formatSegment(segment) {
     return refPath ? `<Attached: ${refPath}>` : void 0;
   }
   if (type === "file") {
-    const text2 = asString(segment.text);
     const filePath = asString(segment.path);
+    if (!filePath) {
+      return void 0;
+    }
+    if (mode === "agent") {
+      return `<file: path="${filePath}">`;
+    }
+    const text2 = asString(segment.text);
     if (text2 && filePath) {
       return formatFileContents([{ content: text2.trim(), isFile: true, displayPath: filePath }]);
     }
@@ -34666,8 +34673,58 @@ function extractTargetFromSuite(suite) {
 function logWarning(message) {
   console.warn(`${ANSI_YELLOW}Warning: ${message}${ANSI_RESET}`);
 }
+var TEMPLATE_VARIABLES = {
+  CANDIDATE_ANSWER: "candidate_answer",
+  EXPECTED_MESSAGES: "expected_messages",
+  QUESTION: "question",
+  EXPECTED_OUTCOME: "expected_outcome",
+  REFERENCE_ANSWER: "reference_answer",
+  INPUT_MESSAGES: "input_messages"
+};
+var VALID_TEMPLATE_VARIABLES = new Set(
+  Object.values(TEMPLATE_VARIABLES)
+);
+var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
+  TEMPLATE_VARIABLES.CANDIDATE_ANSWER,
+  TEMPLATE_VARIABLES.EXPECTED_MESSAGES
+]);
 var ANSI_YELLOW2 = "\x1B[33m";
 var ANSI_RESET2 = "\x1B[0m";
+async function validateCustomPromptContent(promptPath) {
+  const content = await readFile22(promptPath, "utf8");
+  validateTemplateVariables(content, promptPath);
+}
+function validateTemplateVariables(content, source2) {
+  const variablePattern = /\{\{\s*([a-zA-Z0-9_]+)\s*\}\}/g;
+  const foundVariables = /* @__PURE__ */ new Set();
+  const invalidVariables = [];
+  let match;
+  while ((match = variablePattern.exec(content)) !== null) {
+    const varName = match[1];
+    foundVariables.add(varName);
+    if (!VALID_TEMPLATE_VARIABLES.has(varName)) {
+      invalidVariables.push(varName);
+    }
+  }
+  const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.CANDIDATE_ANSWER);
+  const hasExpectedMessages = foundVariables.has(TEMPLATE_VARIABLES.EXPECTED_MESSAGES);
+  const hasRequiredFields = hasCandidateAnswer || hasExpectedMessages;
+  if (!hasRequiredFields) {
+    throw new Error(
+      `Missing required fields. Must include at least one of:
+  - {{ ${TEMPLATE_VARIABLES.CANDIDATE_ANSWER} }}
+  - {{ ${TEMPLATE_VARIABLES.EXPECTED_MESSAGES} }}`
+    );
+  }
+  if (invalidVariables.length > 0) {
+    const warningMessage = `${ANSI_YELLOW2}Warning: Custom evaluator template at ${source2}
+  Contains invalid variables: ${invalidVariables.map((v) => `{{ ${v} }}`).join(", ")}
+  Valid variables: ${Array.from(VALID_TEMPLATE_VARIABLES).map((v) => `{{ ${v} }}`).join(", ")}${ANSI_RESET2}`;
+    console.warn(warningMessage);
+  }
+}
+var ANSI_YELLOW3 = "\x1B[33m";
+var ANSI_RESET3 = "\x1B[0m";
 async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
   const execution = rawEvalCase.execution;
   const candidateEvaluators = isJsonObject2(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
@@ -34726,6 +34783,12 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
       const resolved = await resolveFileReference2(prompt, searchRoots);
       if (resolved.resolvedPath) {
         promptPath = path32.resolve(resolved.resolvedPath);
+        try {
+          await validateCustomPromptContent(promptPath);
+        } catch (error40) {
+          const message = error40 instanceof Error ? error40.message : String(error40);
+          throw new Error(`Evaluator '${name16}' template (${promptPath}): ${message}`);
+        }
       } else {
         logWarning2(
           `Inline prompt used for evaluator '${name16}' in '${evalId}' (file not found: ${resolved.displayPath})`,
@@ -34762,14 +34825,14 @@ function isJsonObject2(value) {
 function logWarning2(message, details) {
   if (details && details.length > 0) {
     const detailBlock = details.join("\n");
-    console.warn(`${ANSI_YELLOW2}Warning: ${message}
-${detailBlock}${ANSI_RESET2}`);
+    console.warn(`${ANSI_YELLOW3}Warning: ${message}
+${detailBlock}${ANSI_RESET3}`);
   } else {
-    console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET2}`);
+    console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET3}`);
   }
 }
-var ANSI_YELLOW3 = "\x1B[33m";
-var ANSI_RESET3 = "\x1B[0m";
+var ANSI_YELLOW4 = "\x1B[33m";
+var ANSI_RESET4 = "\x1B[0m";
 async function processMessages(options) {
   const {
     messages,
@@ -34812,7 +34875,7 @@ async function processMessages(options) {
           continue;
         }
         try {
-          const fileContent = (await readFile22(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
+          const fileContent = (await readFile32(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
           if (messageType === "input" && guidelinePatterns && guidelinePaths) {
             const relativeToRepo = path42.relative(repoRootPath, resolvedPath);
             if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
@@ -34883,7 +34946,7 @@ async function resolveAssistantContent(content, searchRoots, verbose) {
         continue;
       }
       try {
-        const fileContent = (await readFile22(resolvedPath, "utf8")).replace(/\r\n/g, "\n").trim();
+        const fileContent = (await readFile32(resolvedPath, "utf8")).replace(/\r\n/g, "\n").trim();
         parts.push({ content: fileContent, isFile: true, displayPath });
         if (verbose) {
           console.log(`  [Expected Assistant File] Found: ${displayPath}`);
@@ -34933,15 +34996,15 @@ function cloneJsonValue(value) {
 function logWarning3(message, details) {
   if (details && details.length > 0) {
     const detailBlock = details.join("\n");
-    console.warn(`${ANSI_YELLOW3}Warning: ${message}
-${detailBlock}${ANSI_RESET3}`);
+    console.warn(`${ANSI_YELLOW4}Warning: ${message}
+${detailBlock}${ANSI_RESET4}`);
   } else {
-    console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET3}`);
+    console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET4}`);
   }
 }
-var ANSI_YELLOW4 = "\x1B[33m";
-var ANSI_RESET4 = "\x1B[0m";
-async function buildPromptInputs(testCase) {
+var ANSI_YELLOW5 = "\x1B[33m";
+var ANSI_RESET5 = "\x1B[0m";
+async function buildPromptInputs(testCase, mode = "lm") {
   const guidelineParts = [];
   for (const rawPath of testCase.guideline_paths) {
     const absolutePath = path52.resolve(rawPath);
@@ -34950,7 +35013,7 @@ async function buildPromptInputs(testCase) {
       continue;
     }
     try {
-      const content = (await readFile32(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
+      const content = (await readFile4(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
       guidelineParts.push({
         content,
         isFile: true,
@@ -35017,7 +35080,7 @@ async function buildPromptInputs(testCase) {
       const roleLabel = message.role.charAt(0).toUpperCase() + message.role.slice(1);
       const contentParts = [];
       for (const segment of segments) {
-        const formattedContent = formatSegment(segment);
+        const formattedContent = formatSegment(segment, mode);
         if (formattedContent) {
           contentParts.push(formattedContent);
         }
@@ -35032,7 +35095,11 @@ ${messageContent}`);
   } else {
     const questionParts = [];
     for (const segment of testCase.input_segments) {
-      const formattedContent = formatSegment(segment);
+      if (segment.type === "file" && typeof segment.path === "string" && testCase.guideline_patterns && isGuidelineFile(segment.path, testCase.guideline_patterns)) {
+        questionParts.push(`<Attached: ${segment.path}>`);
+        continue;
+      }
+      const formattedContent = formatSegment(segment, mode);
       if (formattedContent) {
         questionParts.push(formattedContent);
       }
@@ -35046,7 +35113,8 @@ ${messageContent}`);
     messages: testCase.input_messages,
     segmentsByMessage,
     guidelinePatterns: testCase.guideline_patterns,
-    guidelineContent: guidelines
+    guidelineContent: guidelines,
+    mode
   }) : void 0;
   return { question, guidelines, chatPrompt };
 }
@@ -35063,7 +35131,7 @@ function needsRoleMarkers(messages, processedSegmentsByMessage) {
   return messagesWithContent > 1;
 }
 function buildChatPromptFromSegments(options) {
-  const { messages, segmentsByMessage, guidelinePatterns, guidelineContent, systemPrompt } = options;
+  const { messages, segmentsByMessage, guidelinePatterns, guidelineContent, systemPrompt, mode = "lm" } = options;
   if (messages.length === 0) {
     return void 0;
   }
@@ -35081,7 +35149,7 @@ ${guidelineContent.trim()}`);
     const segments = segmentsByMessage[startIndex];
     const contentParts = [];
     for (const segment of segments) {
-      const formatted = formatSegment(segment);
+      const formatted = formatSegment(segment, mode);
       if (formatted) {
         contentParts.push(formatted);
       }
@@ -35114,7 +35182,7 @@ ${guidelineContent.trim()}`);
       if (segment.type === "guideline_ref") {
         continue;
       }
-      const formatted = formatSegment(segment);
+      const formatted = formatSegment(segment, mode);
       if (formatted) {
         const isGuidelineRef = segment.type === "file" && typeof segment.path === "string" && guidelinePatterns && isGuidelineFile(segment.path, guidelinePatterns);
         if (isGuidelineRef) {
@@ -35138,15 +35206,16 @@ function asString4(value) {
   return typeof value === "string" ? value : void 0;
 }
 function logWarning4(message) {
-  console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET4}`);
+  console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
 }
-var ANSI_YELLOW5 = "\x1B[33m";
-var ANSI_RESET5 = "\x1B[0m";
+var ANSI_YELLOW6 = "\x1B[33m";
+var ANSI_RED = "\x1B[31m";
+var ANSI_RESET6 = "\x1B[0m";
 var SCHEMA_EVAL_V2 = "agentv-eval-v2";
 async function readTestSuiteMetadata(testFilePath) {
   try {
     const absolutePath = path62.resolve(testFilePath);
-    const content = await readFile4(absolutePath, "utf8");
+    const content = await readFile5(absolutePath, "utf8");
     const parsed = parse22(content);
     if (!isJsonObject(parsed)) {
       return {};
@@ -35164,7 +35233,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
   const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
   const config2 = await loadConfig(absoluteTestPath, repoRootPath);
   const guidelinePatterns = config2?.guideline_patterns;
-  const rawFile = await readFile4(absoluteTestPath, "utf8");
+  const rawFile = await readFile5(absoluteTestPath, "utf8");
   const parsed = parse22(rawFile);
   if (!isJsonObject(parsed)) {
     throw new Error(`Invalid test file format: ${evalFilePath}`);
@@ -35202,14 +35271,14 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
     const inputMessagesValue = evalcase.input_messages;
     const expectedMessagesValue = evalcase.expected_messages;
     if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
-      logWarning5(`Skipping incomplete eval case: ${id ?? "unknown"}`);
+      logError(`Skipping incomplete eval case: ${id ?? "unknown"}. Missing required fields: id, outcome, and/or input_messages`);
       continue;
     }
     const hasExpectedMessages = Array.isArray(expectedMessagesValue) && expectedMessagesValue.length > 0;
     const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
     const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
     if (hasExpectedMessages && expectedMessages.length === 0) {
-      logWarning5(`No valid expected message found for eval case: ${id}`);
+      logError(`No valid expected message found for eval case: ${id}`);
       continue;
     }
     if (expectedMessages.length > 1) {
@@ -35240,7 +35309,14 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
     const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
     const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
     const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
-    const evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
+    let evaluators;
+    try {
+      evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
+    } catch (error40) {
+      const message = error40 instanceof Error ? error40.message : String(error40);
+      logError(`Skipping eval case '${id}': ${message}`);
+      continue;
+    }
     const userFilePaths = [];
     for (const segment of inputSegments) {
       if (segment.type === "file" && typeof segment.resolvedPath === "string") {
@@ -35258,7 +35334,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
       question,
       input_messages: inputMessages,
       input_segments: inputSegments,
-      output_segments: outputSegments,
+      expected_segments: outputSegments,
       reference_answer: referenceAnswer,
       guideline_paths: guidelinePaths.map((guidelinePath) => path62.resolve(guidelinePath)),
       guideline_patterns: guidelinePatterns,
@@ -35290,10 +35366,19 @@ function asString5(value) {
 function logWarning5(message, details) {
   if (details && details.length > 0) {
     const detailBlock = details.join("\n");
-    console.warn(`${ANSI_YELLOW5}Warning: ${message}
-${detailBlock}${ANSI_RESET5}`);
+    console.warn(`${ANSI_YELLOW6}Warning: ${message}
+${detailBlock}${ANSI_RESET6}`);
   } else {
-    console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
+    console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET6}`);
+  }
+}
+function logError(message, details) {
+  if (details && details.length > 0) {
+    const detailBlock = details.join("\n");
+    console.error(`${ANSI_RED}Error: ${message}
+${detailBlock}${ANSI_RESET6}`);
+  } else {
+    console.error(`${ANSI_RED}Error: ${message}${ANSI_RESET6}`);
   }
 }
 var DEFAULT_SYSTEM_PROMPT = "You are a careful assistant. Follow all provided instructions and do not fabricate results.";
@@ -36991,7 +37076,7 @@ async function readTargetDefinitions(filePath) {
   if (!await fileExists3(absolutePath)) {
     throw new Error(`targets.yaml not found at ${absolutePath}`);
   }
-  const raw = await readFile5(absolutePath, "utf8");
+  const raw = await readFile6(absolutePath, "utf8");
   const parsed = parse32(raw);
   if (!isRecord(parsed)) {
     throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
@@ -37034,16 +37119,16 @@ Use the reference_answer as a gold standard for a high-quality response (if prov
 Be concise and focused in your evaluation. Provide succinct, specific feedback rather than verbose explanations.
 [[ ## expected_outcome ## ]]
-{{expected_outcome}}
+{{${TEMPLATE_VARIABLES.EXPECTED_OUTCOME}}}
 [[ ## question ## ]]
-{{question}}
+{{${TEMPLATE_VARIABLES.QUESTION}}}
 [[ ## reference_answer ## ]]
-{{reference_answer}}
+{{${TEMPLATE_VARIABLES.REFERENCE_ANSWER}}}
 [[ ## candidate_answer ## ]]
-{{candidate_answer}}`;
+{{${TEMPLATE_VARIABLES.CANDIDATE_ANSWER}}}`;
 var LlmJudgeEvaluator = class {
   kind = "llm_judge";
   resolveJudgeProvider;
@@ -37066,12 +37151,12 @@ var LlmJudgeEvaluator = class {
   async evaluateWithPrompt(context, judgeProvider) {
     const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
     const variables = {
-      input_messages: JSON.stringify(context.evalCase.input_segments, null, 2),
-      output_messages: JSON.stringify(context.evalCase.output_segments, null, 2),
-      candidate_answer: context.candidate.trim(),
-      reference_answer: (context.evalCase.reference_answer ?? "").trim(),
-      expected_outcome: context.evalCase.expected_outcome.trim(),
-      question: formattedQuestion.trim()
+      [TEMPLATE_VARIABLES.INPUT_MESSAGES]: JSON.stringify(context.evalCase.input_segments, null, 2),
+      [TEMPLATE_VARIABLES.EXPECTED_MESSAGES]: JSON.stringify(context.evalCase.expected_segments, null, 2),
+      [TEMPLATE_VARIABLES.CANDIDATE_ANSWER]: context.candidate.trim(),
+      [TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context.evalCase.reference_answer ?? "").trim(),
+      [TEMPLATE_VARIABLES.EXPECTED_OUTCOME]: context.evalCase.expected_outcome.trim(),
+      [TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim()
     };
     const systemPrompt = buildOutputSchema();
     const evaluatorTemplate = context.evaluatorTemplateOverride ?? this.evaluatorTemplate ?? DEFAULT_EVALUATOR_TEMPLATE;
@@ -37303,7 +37388,7 @@ function parseJsonSafe(payload) {
   }
 }
 function substituteVariables(template, variables) {
-  return template.replace(/\{\{([a-zA-Z0-9_]+)\}\}/g, (match, varName) => {
+  return template.replace(/\{\{\s*([a-zA-Z0-9_]+)\s*\}\}/g, (match, varName) => {
     return variables[varName] ?? match;
   });
 }
@@ -37456,11 +37541,11 @@ async function runEvaluation(options) {
     now,
     evalId,
     verbose,
+    evalCases: preloadedEvalCases,
     onResult,
     onProgress
   } = options;
-  const load = loadEvalCases;
-  const evalCases = await load(evalFilePath, repoRoot, { verbose, evalId });
+  const evalCases = preloadedEvalCases ?? await loadEvalCases(evalFilePath, repoRoot, { verbose, evalId });
   const filteredEvalCases = filterEvalCases(evalCases, evalId);
   if (filteredEvalCases.length === 0) {
     if (evalId) {
@@ -37644,8 +37729,9 @@ async function runBatchEvaluation(options) {
     agentTimeoutMs
   } = options;
   const promptInputsList = [];
+  const formattingMode = isAgentProvider(provider) ? "agent" : "lm";
   for (const evalCase of evalCases) {
-    const promptInputs = await buildPromptInputs(evalCase);
+    const promptInputs = await buildPromptInputs(evalCase, formattingMode);
     if (promptDumpDir) {
       await dumpPrompt(promptDumpDir, evalCase, promptInputs);
     }
@@ -37751,7 +37837,8 @@ async function runEvalCase(options) {
     signal,
     judgeProvider
   } = options;
-  const promptInputs = await buildPromptInputs(evalCase);
+  const formattingMode = isAgentProvider(provider) ? "agent" : "lm";
+  const promptInputs = await buildPromptInputs(evalCase, formattingMode);
   if (promptDumpDir) {
     await dumpPrompt(promptDumpDir, evalCase, promptInputs);
   }
@@ -38040,7 +38127,8 @@ async function runLlmJudgeEvaluator(options) {
 async function resolveCustomPrompt(config2) {
   if (config2.promptPath) {
     try {
-      return await readTextFile(config2.promptPath);
+      const content = await readTextFile(config2.promptPath);
+      return content;
     } catch (error40) {
       const message = error40 instanceof Error ? error40.message : String(error40);
       console.warn(`Could not read custom prompt at ${config2.promptPath}: ${message}`);
@@ -38927,7 +39015,7 @@ function formatEvaluationSummary(summary) {
 }
 // ../../packages/core/dist/evaluation/validation/index.js
-import { readFile as readFile6 } from "node:fs/promises";
+import { readFile as readFile7 } from "node:fs/promises";
 import { parse as parse6 } from "yaml";
 import { readFile as readFile23 } from "node:fs/promises";
 import path16 from "node:path";
@@ -38945,7 +39033,7 @@ var SCHEMA_TARGETS_V2 = "agentv-targets-v2.2";
 var SCHEMA_CONFIG_V22 = "agentv-config-v2";
 async function detectFileType(filePath) {
   try {
-    const content = await readFile6(filePath, "utf8");
+    const content = await readFile7(filePath, "utf8");
     const parsed = parse6(content);
     if (typeof parsed !== "object" || parsed === null) {
       return "unknown";
@@ -39794,9 +39882,9 @@ var TARGET_FILE_CANDIDATES = [
   path17.join(".agentv", "targets.yaml"),
   path17.join(".agentv", "targets.yml")
 ];
-var ANSI_YELLOW6 = "\x1B[33m";
-var ANSI_RED = "\x1B[31m";
-var ANSI_RESET6 = "\x1B[0m";
+var ANSI_YELLOW7 = "\x1B[33m";
+var ANSI_RED2 = "\x1B[31m";
+var ANSI_RESET7 = "\x1B[0m";
 function isTTY() {
   return process.stdout.isTTY ?? false;
 }
@@ -39869,8 +39957,8 @@ async function selectTarget(options) {
 Warnings in ${targetsFilePath}:`);
     for (const warning of warnings) {
       const location = warning.location ? ` [${warning.location}]` : "";
-      const prefix = useColors ? `${ANSI_YELLOW6}  \u26A0${ANSI_RESET6}` : "  \u26A0";
-      const message = useColors ? `${ANSI_YELLOW6}${warning.message}${ANSI_RESET6}` : warning.message;
+      const prefix = useColors ? `${ANSI_YELLOW7}  \u26A0${ANSI_RESET7}` : "  \u26A0";
+      const message = useColors ? `${ANSI_YELLOW7}${warning.message}${ANSI_RESET7}` : warning.message;
       console.warn(`${prefix}${location} ${message}`);
     }
     console.warn("");
@@ -39881,8 +39969,8 @@ Warnings in ${targetsFilePath}:`);
 Errors in ${targetsFilePath}:`);
     for (const error40 of errors) {
       const location = error40.location ? ` [${error40.location}]` : "";
-      const prefix = useColors ? `${ANSI_RED}  \u2717${ANSI_RESET6}` : "  \u2717";
-      const message = useColors ? `${ANSI_RED}${error40.message}${ANSI_RESET6}` : error40.message;
+      const prefix = useColors ? `${ANSI_RED2}  \u2717${ANSI_RESET7}` : "  \u2717";
+      const message = useColors ? `${ANSI_RED2}${error40.message}${ANSI_RESET7}` : error40.message;
       console.error(`${prefix}${location} ${message}`);
     }
     throw new Error(`Targets file validation failed with ${errors.length} error(s)`);
@@ -40082,7 +40170,7 @@ async function prepareFileMetadata(params) {
   const inlineTargetLabel = `${selection.targetName} [provider=${providerLabel}]`;
   const evalCases = await loadEvalCases(testFilePath, repoRoot, { verbose: options.verbose, evalId: options.evalId });
   const filteredIds = options.evalId ? evalCases.filter((value) => value.id === options.evalId).map((value) => value.id) : evalCases.map((value) => value.id);
-  return { evalIds: filteredIds, selection, inlineTargetLabel };
+  return { evalIds: filteredIds, evalCases, selection, inlineTargetLabel };
 }
 async function runWithLimit(items, limit, task) {
   const safeLimit = Math.max(1, limit);
@@ -40110,7 +40198,8 @@ async function runSingleEvalFile(params) {
     seenEvalCases,
     displayIdTracker,
     selection,
-    inlineTargetLabel
+    inlineTargetLabel,
+    evalCases
   } = params;
   await ensureFileExists(testFilePath, "Test file");
   const resolvedTargetSelection = selection;
@@ -40162,6 +40251,7 @@ async function runSingleEvalFile(params) {
     cache,
     useCache: options.cache,
     evalId: options.evalId,
+    evalCases,
     verbose: options.verbose,
     maxConcurrency: resolvedWorkers,
     onResult: async (result) => {
@@ -40267,7 +40357,8 @@ async function runEvalCommand(input) {
         seenEvalCases,
         displayIdTracker,
         selection: targetPrep.selection,
-        inlineTargetLabel: targetPrep.inlineTargetLabel
+        inlineTargetLabel: targetPrep.inlineTargetLabel,
+        evalCases: targetPrep.evalCases
       });
       allResults.push(...result.results);
       if (result.promptDumpDir) {
@@ -40590,12 +40681,12 @@ function registerStatusCommand(program) {
 }
 // src/commands/validate/format-output.ts
-var ANSI_RED2 = "\x1B[31m";
-var ANSI_YELLOW7 = "\x1B[33m";
+var ANSI_RED3 = "\x1B[31m";
+var ANSI_YELLOW8 = "\x1B[33m";
 var ANSI_GREEN = "\x1B[32m";
 var ANSI_CYAN = "\x1B[36m";
 var ANSI_BOLD = "\x1B[1m";
-var ANSI_RESET7 = "\x1B[0m";
+var ANSI_RESET8 = "\x1B[0m";
 function formatSummary(summary, useColors) {
   const lines = [];
   lines.push("");
@@ -40611,15 +40702,15 @@ function formatSummary(summary, useColors) {
 }
 function formatHeader(text2, useColors) {
   if (useColors) {
-    return `${ANSI_BOLD}${ANSI_CYAN}${text2}${ANSI_RESET7}`;
+    return `${ANSI_BOLD}${ANSI_CYAN}${text2}${ANSI_RESET8}`;
   }
   return text2;
 }
 function formatFileResult(result, useColors) {
   const lines = [];
   const status = result.valid ? "\u2713" : "\u2717";
-  const statusColor = result.valid ? ANSI_GREEN : ANSI_RED2;
-  const statusText = useColors ? `${statusColor}${status}${ANSI_RESET7}` : status;
+  const statusColor = result.valid ? ANSI_GREEN : ANSI_RED3;
+  const statusText = useColors ? `${statusColor}${status}${ANSI_RESET8}` : status;
   const fileName = result.filePath;
   lines.push(`${statusText} ${fileName}`);
   if (result.errors.length > 0) {
@@ -40631,8 +40722,8 @@ function formatFileResult(result, useColors) {
 }
 function formatError2(error40, useColors) {
   const prefix = error40.severity === "error" ? "  \u2717" : "  \u26A0";
-  const color = error40.severity === "error" ? ANSI_RED2 : ANSI_YELLOW7;
-  const coloredPrefix = useColors ? `${color}${prefix}${ANSI_RESET7}` : prefix;
+  const color = error40.severity === "error" ? ANSI_RED3 : ANSI_YELLOW8;
+  const coloredPrefix = useColors ? `${color}${prefix}${ANSI_RESET8}` : prefix;
   const location = error40.location ? ` [${error40.location}]` : "";
   return `${coloredPrefix}${location} ${error40.message}`;
 }
@@ -40645,15 +40736,15 @@ function formatStats(summary, useColors) {
     (r) => r.errors.some((e) => e.severity === "warning")
   ).length;
   if (useColors) {
-    lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET7}`);
-    lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET7}`);
+    lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET8}`);
+    lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET8}`);
     if (summary.invalidFiles > 0) {
-      lines.push(`${ANSI_RED2}${invalidText}${ANSI_RESET7}`);
+      lines.push(`${ANSI_RED3}${invalidText}${ANSI_RESET8}`);
     } else {
       lines.push(invalidText);
     }
     if (filesWithWarnings > 0) {
-      lines.push(`${ANSI_YELLOW7}Files with warnings: ${filesWithWarnings}${ANSI_RESET7}`);
+      lines.push(`${ANSI_YELLOW8}Files with warnings: ${filesWithWarnings}${ANSI_RESET8}`);
     }
   } else {
     lines.push(totalText);
@@ -40827,4 +40918,4 @@ export {
   createProgram,
   runCli
 };
-//# sourceMappingURL=chunk-LVLBPRCV.js.map
+//# sourceMappingURL=chunk-HWGALLUR.js.map