npm - @agentv/core - Versions diffs - 0.15.0 → 0.17.0 - Mend

@agentv/core 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/{chunk-IOCVST3R.js → chunk-YCIZ33BO.js} +28 -11
package/dist/chunk-YCIZ33BO.js.map +1 -0
package/dist/evaluation/validation/index.cjs +68 -64
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +64 -67
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +297 -149
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +18 -5
package/dist/index.d.ts +18 -5
package/dist/index.js +251 -115
package/dist/index.js.map +1 -1
package/package.json +15 -16
package/LICENSE +0 -21
package/dist/chunk-IOCVST3R.js.map +0 -1

package/dist/index.cjs CHANGED Viewed

@@ -116,7 +116,7 @@ function getHitCount(result) {
 }
 // src/evaluation/yaml-parser.ts
-var import_promises5 = require("fs/promises");
+var import_promises6 = require("fs/promises");
 var import_node_path6 = __toESM(require("path"), 1);
 var import_yaml2 = require("yaml");
@@ -125,11 +125,11 @@ function extractCodeBlocks(segments) {
   const CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
   const codeBlocks = [];
   for (const segment of segments) {
-    const typeValue = segment["type"];
+    const typeValue = segment.type;
     if (typeof typeValue !== "string" || typeValue !== "text") {
       continue;
     }
-    const textValue = segment["value"];
+    const textValue = segment.value;
     if (typeof textValue !== "string") {
       continue;
     }
@@ -154,7 +154,7 @@ ${part.content}
   }
   return parts.map((p) => p.content).join(" ");
 }
-function formatSegment(segment) {
+function formatSegment(segment, mode = "lm") {
   const type = asString(segment.type);
   if (type === "text") {
     return asString(segment.value);
@@ -164,8 +164,14 @@ function formatSegment(segment) {
     return refPath ? `<Attached: ${refPath}>` : void 0;
   }
   if (type === "file") {
-    const text = asString(segment.text);
     const filePath = asString(segment.path);
+    if (!filePath) {
+      return void 0;
+    }
+    if (mode === "agent") {
+      return `<file: path="${filePath}">`;
+    }
+    const text = asString(segment.text);
     if (text && filePath) {
       return formatFileContents([{ content: text.trim(), isFile: true, displayPath: filePath }]);
     }
@@ -194,9 +200,9 @@ function asString(value) {
 }
 // src/evaluation/loaders/config-loader.ts
-var import_micromatch = __toESM(require("micromatch"), 1);
 var import_promises2 = require("fs/promises");
 var import_node_path2 = __toESM(require("path"), 1);
+var import_micromatch = __toESM(require("micromatch"), 1);
 var import_yaml = require("yaml");
 // src/evaluation/loaders/file-resolver.ts
@@ -338,8 +344,9 @@ Please add '$schema: ${SCHEMA_CONFIG_V2}' at the top of the file.`;
         guideline_patterns: guidelinePatterns
       };
     } catch (error) {
-      logWarning(`Could not read .agentv/config.yaml at ${configPath}: ${error.message}`);
-      continue;
+      logWarning(
+        `Could not read .agentv/config.yaml at ${configPath}: ${error.message}`
+      );
     }
   }
   return null;
@@ -369,8 +376,66 @@ function logWarning(message) {
 // src/evaluation/loaders/evaluator-parser.ts
 var import_node_path3 = __toESM(require("path"), 1);
+// src/evaluation/validation/prompt-validator.ts
+var import_promises3 = require("fs/promises");
+// src/evaluation/template-variables.ts
+var TEMPLATE_VARIABLES = {
+  CANDIDATE_ANSWER: "candidate_answer",
+  EXPECTED_MESSAGES: "expected_messages",
+  QUESTION: "question",
+  EXPECTED_OUTCOME: "expected_outcome",
+  REFERENCE_ANSWER: "reference_answer",
+  INPUT_MESSAGES: "input_messages"
+};
+var VALID_TEMPLATE_VARIABLES = new Set(Object.values(TEMPLATE_VARIABLES));
+var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
+  TEMPLATE_VARIABLES.CANDIDATE_ANSWER,
+  TEMPLATE_VARIABLES.EXPECTED_MESSAGES
+]);
+// src/evaluation/validation/prompt-validator.ts
 var ANSI_YELLOW2 = "\x1B[33m";
 var ANSI_RESET2 = "\x1B[0m";
+async function validateCustomPromptContent(promptPath) {
+  const content = await (0, import_promises3.readFile)(promptPath, "utf8");
+  validateTemplateVariables(content, promptPath);
+}
+function validateTemplateVariables(content, source) {
+  const variablePattern = /\{\{\s*([a-zA-Z0-9_]+)\s*\}\}/g;
+  const foundVariables = /* @__PURE__ */ new Set();
+  const invalidVariables = [];
+  let match = variablePattern.exec(content);
+  while (match !== null) {
+    const varName = match[1];
+    foundVariables.add(varName);
+    if (!VALID_TEMPLATE_VARIABLES.has(varName)) {
+      invalidVariables.push(varName);
+    }
+    match = variablePattern.exec(content);
+  }
+  const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.CANDIDATE_ANSWER);
+  const hasExpectedMessages = foundVariables.has(TEMPLATE_VARIABLES.EXPECTED_MESSAGES);
+  const hasRequiredFields = hasCandidateAnswer || hasExpectedMessages;
+  if (!hasRequiredFields) {
+    throw new Error(
+      `Missing required fields. Must include at least one of:
+  - {{ ${TEMPLATE_VARIABLES.CANDIDATE_ANSWER} }}
+  - {{ ${TEMPLATE_VARIABLES.EXPECTED_MESSAGES} }}`
+    );
+  }
+  if (invalidVariables.length > 0) {
+    const warningMessage = `${ANSI_YELLOW2}Warning: Custom evaluator template at ${source}
+  Contains invalid variables: ${invalidVariables.map((v) => `{{ ${v} }}`).join(", ")}
+  Valid variables: ${Array.from(VALID_TEMPLATE_VARIABLES).map((v) => `{{ ${v} }}`).join(", ")}${ANSI_RESET2}`;
+    console.warn(warningMessage);
+  }
+}
+// src/evaluation/loaders/evaluator-parser.ts
+var ANSI_YELLOW3 = "\x1B[33m";
+var ANSI_RESET3 = "\x1B[0m";
 async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
   const execution = rawEvalCase.execution;
   const candidateEvaluators = isJsonObject2(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
@@ -429,6 +494,12 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
       const resolved = await resolveFileReference(prompt, searchRoots);
       if (resolved.resolvedPath) {
         promptPath = import_node_path3.default.resolve(resolved.resolvedPath);
+        try {
+          await validateCustomPromptContent(promptPath);
+        } catch (error) {
+          const message = error instanceof Error ? error.message : String(error);
+          throw new Error(`Evaluator '${name}' template (${promptPath}): ${message}`);
+        }
       } else {
         logWarning2(
           `Inline prompt used for evaluator '${name}' in '${evalId}' (file not found: ${resolved.displayPath})`,
@@ -465,18 +536,18 @@ function isJsonObject2(value) {
 function logWarning2(message, details) {
   if (details && details.length > 0) {
     const detailBlock = details.join("\n");
-    console.warn(`${ANSI_YELLOW2}Warning: ${message}
-${detailBlock}${ANSI_RESET2}`);
+    console.warn(`${ANSI_YELLOW3}Warning: ${message}
+${detailBlock}${ANSI_RESET3}`);
   } else {
-    console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET2}`);
+    console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET3}`);
   }
 }
 // src/evaluation/loaders/message-processor.ts
-var import_promises3 = require("fs/promises");
+var import_promises4 = require("fs/promises");
 var import_node_path4 = __toESM(require("path"), 1);
-var ANSI_YELLOW3 = "\x1B[33m";
-var ANSI_RESET3 = "\x1B[0m";
+var ANSI_YELLOW4 = "\x1B[33m";
+var ANSI_RESET4 = "\x1B[0m";
 async function processMessages(options) {
   const {
     messages,
@@ -519,7 +590,7 @@ async function processMessages(options) {
           continue;
         }
         try {
-          const fileContent = (await (0, import_promises3.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
+          const fileContent = (await (0, import_promises4.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
           if (messageType === "input" && guidelinePatterns && guidelinePaths) {
             const relativeToRepo = import_node_path4.default.relative(repoRootPath, resolvedPath);
             if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
@@ -590,7 +661,7 @@ async function resolveAssistantContent(content, searchRoots, verbose) {
         continue;
       }
       try {
-        const fileContent = (await (0, import_promises3.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n").trim();
+        const fileContent = (await (0, import_promises4.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n").trim();
         parts.push({ content: fileContent, isFile: true, displayPath });
         if (verbose) {
           console.log(`  [Expected Assistant File] Found: ${displayPath}`);
@@ -640,19 +711,19 @@ function cloneJsonValue(value) {
 function logWarning3(message, details) {
   if (details && details.length > 0) {
     const detailBlock = details.join("\n");
-    console.warn(`${ANSI_YELLOW3}Warning: ${message}
-${detailBlock}${ANSI_RESET3}`);
+    console.warn(`${ANSI_YELLOW4}Warning: ${message}
+${detailBlock}${ANSI_RESET4}`);
   } else {
-    console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET3}`);
+    console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET4}`);
   }
 }
 // src/evaluation/formatting/prompt-builder.ts
-var import_promises4 = require("fs/promises");
+var import_promises5 = require("fs/promises");
 var import_node_path5 = __toESM(require("path"), 1);
-var ANSI_YELLOW4 = "\x1B[33m";
-var ANSI_RESET4 = "\x1B[0m";
-async function buildPromptInputs(testCase) {
+var ANSI_YELLOW5 = "\x1B[33m";
+var ANSI_RESET5 = "\x1B[0m";
+async function buildPromptInputs(testCase, mode = "lm") {
   const guidelineParts = [];
   for (const rawPath of testCase.guideline_paths) {
     const absolutePath = import_node_path5.default.resolve(rawPath);
@@ -661,7 +732,7 @@ async function buildPromptInputs(testCase) {
       continue;
     }
     try {
-      const content = (await (0, import_promises4.readFile)(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
+      const content = (await (0, import_promises5.readFile)(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
       guidelineParts.push({
         content,
         isFile: true,
@@ -728,7 +799,7 @@ async function buildPromptInputs(testCase) {
       const roleLabel = message.role.charAt(0).toUpperCase() + message.role.slice(1);
       const contentParts = [];
       for (const segment of segments) {
-        const formattedContent = formatSegment(segment);
+        const formattedContent = formatSegment(segment, mode);
         if (formattedContent) {
           contentParts.push(formattedContent);
         }
@@ -743,7 +814,11 @@ ${messageContent}`);
   } else {
     const questionParts = [];
     for (const segment of testCase.input_segments) {
-      const formattedContent = formatSegment(segment);
+      if (segment.type === "file" && typeof segment.path === "string" && testCase.guideline_patterns && isGuidelineFile(segment.path, testCase.guideline_patterns)) {
+        questionParts.push(`<Attached: ${segment.path}>`);
+        continue;
+      }
+      const formattedContent = formatSegment(segment, mode);
       if (formattedContent) {
         questionParts.push(formattedContent);
       }
@@ -757,7 +832,8 @@ ${messageContent}`);
     messages: testCase.input_messages,
     segmentsByMessage,
     guidelinePatterns: testCase.guideline_patterns,
-    guidelineContent: guidelines
+    guidelineContent: guidelines,
+    mode
   }) : void 0;
   return { question, guidelines, chatPrompt };
 }
@@ -774,7 +850,14 @@ function needsRoleMarkers(messages, processedSegmentsByMessage) {
   return messagesWithContent > 1;
 }
 function buildChatPromptFromSegments(options) {
-  const { messages, segmentsByMessage, guidelinePatterns, guidelineContent, systemPrompt } = options;
+  const {
+    messages,
+    segmentsByMessage,
+    guidelinePatterns,
+    guidelineContent,
+    systemPrompt,
+    mode = "lm"
+  } = options;
   if (messages.length === 0) {
     return void 0;
   }
@@ -792,7 +875,7 @@ ${guidelineContent.trim()}`);
     const segments = segmentsByMessage[startIndex];
     const contentParts = [];
     for (const segment of segments) {
-      const formatted = formatSegment(segment);
+      const formatted = formatSegment(segment, mode);
       if (formatted) {
         contentParts.push(formatted);
       }
@@ -825,7 +908,7 @@ ${guidelineContent.trim()}`);
       if (segment.type === "guideline_ref") {
         continue;
       }
-      const formatted = formatSegment(segment);
+      const formatted = formatSegment(segment, mode);
       if (formatted) {
         const isGuidelineRef = segment.type === "file" && typeof segment.path === "string" && guidelinePatterns && isGuidelineFile(segment.path, guidelinePatterns);
         if (isGuidelineRef) {
@@ -849,17 +932,17 @@ function asString4(value) {
   return typeof value === "string" ? value : void 0;
 }
 function logWarning4(message) {
-  console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET4}`);
+  console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
 }
 // src/evaluation/yaml-parser.ts
-var ANSI_YELLOW5 = "\x1B[33m";
-var ANSI_RESET5 = "\x1B[0m";
-var SCHEMA_EVAL_V2 = "agentv-eval-v2";
+var ANSI_YELLOW6 = "\x1B[33m";
+var ANSI_RED = "\x1B[31m";
+var ANSI_RESET6 = "\x1B[0m";
 async function readTestSuiteMetadata(testFilePath) {
   try {
     const absolutePath = import_node_path6.default.resolve(testFilePath);
-    const content = await (0, import_promises5.readFile)(absolutePath, "utf8");
+    const content = await (0, import_promises6.readFile)(absolutePath, "utf8");
     const parsed = (0, import_yaml2.parse)(content);
     if (!isJsonObject(parsed)) {
       return {};
@@ -877,7 +960,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
   const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
   const config = await loadConfig(absoluteTestPath, repoRootPath);
   const guidelinePatterns = config?.guideline_patterns;
-  const rawFile = await (0, import_promises5.readFile)(absoluteTestPath, "utf8");
+  const rawFile = await (0, import_promises6.readFile)(absoluteTestPath, "utf8");
   const parsed = (0, import_yaml2.parse)(rawFile);
   if (!isJsonObject(parsed)) {
     throw new Error(`Invalid test file format: ${evalFilePath}`);
@@ -886,12 +969,6 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
   const datasetNameFromSuite = asString5(suite.dataset)?.trim();
   const fallbackDataset = import_node_path6.default.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
   const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
-  const schema = suite.$schema;
-  if (schema !== SCHEMA_EVAL_V2) {
-    const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${evalFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${evalFilePath}.
-Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
-    throw new Error(message);
-  }
   const rawTestcases = suite.evalcases;
   if (!Array.isArray(rawTestcases)) {
     throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
@@ -915,14 +992,18 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
     const inputMessagesValue = evalcase.input_messages;
     const expectedMessagesValue = evalcase.expected_messages;
     if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
-      logWarning5(`Skipping incomplete eval case: ${id ?? "unknown"}`);
+      logError(
+        `Skipping incomplete eval case: ${id ?? "unknown"}. Missing required fields: id, outcome, and/or input_messages`
+      );
       continue;
     }
     const hasExpectedMessages = Array.isArray(expectedMessagesValue) && expectedMessagesValue.length > 0;
-    const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
+    const inputMessages = inputMessagesValue.filter(
+      (msg) => isTestMessage(msg)
+    );
     const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
     if (hasExpectedMessages && expectedMessages.length === 0) {
-      logWarning5(`No valid expected message found for eval case: ${id}`);
+      logError(`No valid expected message found for eval case: ${id}`);
       continue;
     }
     if (expectedMessages.length > 1) {
@@ -953,7 +1034,14 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
     const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
     const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
     const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
-    const evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
+    let evaluators;
+    try {
+      evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      logError(`Skipping eval case '${id}': ${message}`);
+      continue;
+    }
     const userFilePaths = [];
     for (const segment of inputSegments) {
       if (segment.type === "file" && typeof segment.resolvedPath === "string") {
@@ -971,7 +1059,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
       question,
       input_messages: inputMessages,
       input_segments: inputSegments,
-      output_segments: outputSegments,
+      expected_segments: outputSegments,
       reference_answer: referenceAnswer,
       guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path6.default.resolve(guidelinePath)),
       guideline_patterns: guidelinePatterns,
@@ -1003,20 +1091,29 @@ function asString5(value) {
 function logWarning5(message, details) {
   if (details && details.length > 0) {
     const detailBlock = details.join("\n");
-    console.warn(`${ANSI_YELLOW5}Warning: ${message}
-${detailBlock}${ANSI_RESET5}`);
+    console.warn(`${ANSI_YELLOW6}Warning: ${message}
+${detailBlock}${ANSI_RESET6}`);
+  } else {
+    console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET6}`);
+  }
+}
+function logError(message, details) {
+  if (details && details.length > 0) {
+    const detailBlock = details.join("\n");
+    console.error(`${ANSI_RED}Error: ${message}
+${detailBlock}${ANSI_RESET6}`);
   } else {
-    console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
+    console.error(`${ANSI_RED}Error: ${message}${ANSI_RESET6}`);
   }
 }
 // src/evaluation/file-utils.ts
 var import_node_fs2 = require("fs");
-var import_promises6 = require("fs/promises");
+var import_promises7 = require("fs/promises");
 var import_node_path7 = __toESM(require("path"), 1);
 async function fileExists2(filePath) {
   try {
-    await (0, import_promises6.access)(filePath, import_node_fs2.constants.F_OK);
+    await (0, import_promises7.access)(filePath, import_node_fs2.constants.F_OK);
     return true;
   } catch {
     return false;
@@ -1026,7 +1123,7 @@ function normalizeLineEndings(content) {
   return content.replace(/\r\n/g, "\n");
 }
 async function readTextFile(filePath) {
-  const content = await (0, import_promises6.readFile)(filePath, "utf8");
+  const content = await (0, import_promises7.readFile)(filePath, "utf8");
   return normalizeLineEndings(content);
 }
 async function findGitRoot(startPath) {
@@ -1447,7 +1544,7 @@ async function withRetry(fn, retryConfig, signal) {
 // src/evaluation/providers/cli.ts
 var import_node_child_process = require("child_process");
-var import_promises7 = __toESM(require("fs/promises"), 1);
+var import_promises8 = __toESM(require("fs/promises"), 1);
 var import_node_os = __toESM(require("os"), 1);
 var import_node_path8 = __toESM(require("path"), 1);
 var import_node_util = require("util");
@@ -1548,7 +1645,7 @@ var CliProvider = class {
       const errorMsg = error instanceof Error ? error.message : String(error);
       throw new Error(`Failed to read output file '${filePath}': ${errorMsg}`);
     } finally {
-      await import_promises7.default.unlink(filePath).catch(() => {
+      await import_promises8.default.unlink(filePath).catch(() => {
       });
     }
   }
@@ -1687,7 +1784,7 @@ function formatTimeoutSuffix(timeoutMs) {
 var import_node_child_process2 = require("child_process");
 var import_node_crypto = require("crypto");
 var import_node_fs3 = require("fs");
-var import_promises8 = require("fs/promises");
+var import_promises9 = require("fs/promises");
 var import_node_os2 = require("os");
 var import_node_path10 = __toESM(require("path"), 1);
 var import_node_util2 = require("util");
@@ -1755,9 +1852,7 @@ function buildPromptDocument(request, inputFiles, options) {
     options?.guidelineOverrides
   );
   const inputFilesList = collectInputFiles(inputFiles);
-  const nonGuidelineInputFiles = inputFilesList.filter(
-    (file) => !guidelineFiles.includes(file)
-  );
+  const nonGuidelineInputFiles = inputFilesList.filter((file) => !guidelineFiles.includes(file));
   const prereadBlock = buildMandatoryPrereadBlock(guidelineFiles, nonGuidelineInputFiles);
   if (prereadBlock.length > 0) {
     parts.push("\n", prereadBlock);
@@ -1877,7 +1972,7 @@ var CodexProvider = class {
     try {
       const promptContent = buildPromptDocument(request, inputFiles);
       const promptFile = import_node_path10.default.join(workspaceRoot, PROMPT_FILENAME);
-      await (0, import_promises8.writeFile)(promptFile, promptContent, "utf8");
+      await (0, import_promises9.writeFile)(promptFile, promptContent, "utf8");
       const args = this.buildCodexArgs();
       const cwd = this.resolveCwd(workspaceRoot);
       const result = await this.executeCodex(args, cwd, promptContent, request.signal, logger);
@@ -1929,7 +2024,15 @@ var CodexProvider = class {
     return import_node_path10.default.resolve(this.config.cwd);
   }
   buildCodexArgs() {
-    const args = ["--ask-for-approval", "never", "exec", "--json", "--color", "never", "--skip-git-repo-check"];
+    const args = [
+      "--ask-for-approval",
+      "never",
+      "exec",
+      "--json",
+      "--color",
+      "never",
+      "--skip-git-repo-check"
+    ];
     if (this.config.args && this.config.args.length > 0) {
       args.push(...this.config.args);
     }
@@ -1960,11 +2063,11 @@ var CodexProvider = class {
     }
   }
   async createWorkspace() {
-    return await (0, import_promises8.mkdtemp)(import_node_path10.default.join((0, import_node_os2.tmpdir)(), WORKSPACE_PREFIX));
+    return await (0, import_promises9.mkdtemp)(import_node_path10.default.join((0, import_node_os2.tmpdir)(), WORKSPACE_PREFIX));
   }
   async cleanupWorkspace(workspaceRoot) {
     try {
-      await (0, import_promises8.rm)(workspaceRoot, { recursive: true, force: true });
+      await (0, import_promises9.rm)(workspaceRoot, { recursive: true, force: true });
     } catch {
     }
   }
@@ -1984,7 +2087,7 @@ var CodexProvider = class {
       return void 0;
     }
     try {
-      await (0, import_promises8.mkdir)(logDir, { recursive: true });
+      await (0, import_promises9.mkdir)(logDir, { recursive: true });
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
       console.warn(`Skipping Codex stream logging (could not create ${logDir}): ${message}`);
@@ -2207,7 +2310,7 @@ async function locateExecutable(candidate) {
   if (includesPathSeparator) {
     const resolved = import_node_path10.default.isAbsolute(candidate) ? candidate : import_node_path10.default.resolve(candidate);
     const executablePath = await ensureWindowsExecutableVariant(resolved);
-    await (0, import_promises8.access)(executablePath, import_node_fs3.constants.F_OK);
+    await (0, import_promises9.access)(executablePath, import_node_fs3.constants.F_OK);
     return executablePath;
   }
   const locator = process.platform === "win32" ? "where" : "which";
@@ -2217,7 +2320,7 @@ async function locateExecutable(candidate) {
     const preferred = selectExecutableCandidate(lines);
     if (preferred) {
       const executablePath = await ensureWindowsExecutableVariant(preferred);
-      await (0, import_promises8.access)(executablePath, import_node_fs3.constants.F_OK);
+      await (0, import_promises9.access)(executablePath, import_node_fs3.constants.F_OK);
       return executablePath;
     }
   } catch {
@@ -2251,7 +2354,7 @@ async function ensureWindowsExecutableVariant(candidate) {
   for (const ext of extensions) {
     const withExtension = `${candidate}${ext}`;
     try {
-      await (0, import_promises8.access)(withExtension, import_node_fs3.constants.F_OK);
+      await (0, import_promises9.access)(withExtension, import_node_fs3.constants.F_OK);
       return withExtension;
     } catch {
     }
@@ -2553,7 +2656,14 @@ var MockProvider = class {
 // src/evaluation/providers/targets.ts
 var import_zod = require("zod");
-var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set(["PROMPT", "GUIDELINES", "EVAL_ID", "ATTEMPT", "FILES", "OUTPUT_FILE"]);
+var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
+  "PROMPT",
+  "GUIDELINES",
+  "EVAL_ID",
+  "ATTEMPT",
+  "FILES",
+  "OUTPUT_FILE"
+]);
 var BASE_TARGET_SCHEMA = import_zod.z.object({
   name: import_zod.z.string().min(1, "target name is required"),
   provider: import_zod.z.string().min(1, "provider is required"),
@@ -2798,11 +2908,18 @@ function resolveMockConfig(target) {
   return { response };
 }
 function resolveVSCodeConfig(target, env, insiders) {
-  const workspaceTemplateEnvVar = resolveOptionalLiteralString(target.workspace_template ?? target.workspaceTemplate);
-  const workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(workspaceTemplateEnvVar, env, `${target.name} workspace template path`, {
-    allowLiteral: false,
-    optionalEnv: true
-  }) : void 0;
+  const workspaceTemplateEnvVar = resolveOptionalLiteralString(
+    target.workspace_template ?? target.workspaceTemplate
+  );
+  const workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(
+    workspaceTemplateEnvVar,
+    env,
+    `${target.name} workspace template path`,
+    {
+      allowLiteral: false,
+      optionalEnv: true
+    }
+  ) : void 0;
   const commandSource = target.vscode_cmd ?? target.command;
   const waitSource = target.wait;
   const dryRunSource = target.dry_run ?? target.dryRun;
@@ -2829,7 +2946,10 @@ function resolveCliConfig(target, env) {
     allowLiteral: true,
     optionalEnv: true
   });
-  const timeoutMs = resolveTimeoutMs(target.timeout_seconds ?? target.timeoutSeconds, `${target.name} timeout`);
+  const timeoutMs = resolveTimeoutMs(
+    target.timeout_seconds ?? target.timeoutSeconds,
+    `${target.name} timeout`
+  );
   const healthcheck = resolveCliHealthcheck(target.healthcheck, env, target.name);
   const commandTemplate = resolveString(
     commandTemplateSource,
@@ -2957,7 +3077,9 @@ function resolveOptionalString(source, env, description, options) {
   }
   const allowLiteral = options?.allowLiteral ?? false;
   if (!allowLiteral) {
-    throw new Error(`${description} must use \${{ VARIABLE_NAME }} syntax for environment variables or be marked as allowing literals`);
+    throw new Error(
+      `${description} must use \${{ VARIABLE_NAME }} syntax for environment variables or be marked as allowing literals`
+    );
   }
   return trimmed;
 }
@@ -3181,9 +3303,7 @@ function buildPromptDocument2(request, attachments, guidelinePatterns) {
   }
   const guidelineFiles = collectGuidelineFiles2(attachments, guidelinePatterns);
   const attachmentFiles = collectAttachmentFiles(attachments);
-  const nonGuidelineAttachments = attachmentFiles.filter(
-    (file) => !guidelineFiles.includes(file)
-  );
+  const nonGuidelineAttachments = attachmentFiles.filter((file) => !guidelineFiles.includes(file));
   const prereadBlock = buildMandatoryPrereadBlock2(guidelineFiles, nonGuidelineAttachments);
   if (prereadBlock.length > 0) {
     parts.push("\n", prereadBlock);
@@ -3292,8 +3412,10 @@ async function ensureVSCodeSubagents(options) {
       if (result.skippedExisting.length > 0) {
         console.log(`Reusing ${result.skippedExisting.length} existing unlocked subagent(s)`);
       }
-      console.log(`
-total unlocked subagents available: ${result.created.length + result.skippedExisting.length}`);
+      console.log(
+        `
+total unlocked subagents available: ${result.created.length + result.skippedExisting.length}`
+      );
     }
     return {
       provisioned: true,
@@ -3313,46 +3435,12 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
 // src/evaluation/providers/targets-file.ts
 var import_node_fs4 = require("fs");
-var import_promises9 = require("fs/promises");
+var import_promises10 = require("fs/promises");
 var import_node_path12 = __toESM(require("path"), 1);
 var import_yaml3 = require("yaml");
-// src/evaluation/providers/types.ts
-var AGENT_PROVIDER_KINDS = [
-  "codex",
-  "vscode",
-  "vscode-insiders"
-];
-var TARGETS_SCHEMA_V2 = "agentv-targets-v2.2";
-function isAgentProvider(provider) {
-  return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
-}
-// src/evaluation/providers/targets-file.ts
 function isRecord(value) {
   return typeof value === "object" && value !== null && !Array.isArray(value);
 }
-function checkSchema(parsed, absolutePath) {
-  const schema = parsed.$schema;
-  if (schema === void 0) {
-    throw new Error(
-      `Missing $schema field in targets.yaml at ${absolutePath}.
-Please add '$schema: ${TARGETS_SCHEMA_V2}' at the top of the file.`
-    );
-  }
-  if (typeof schema !== "string") {
-    throw new Error(
-      `Invalid $schema field in targets.yaml at ${absolutePath}.
-Expected a string value '${TARGETS_SCHEMA_V2}'.`
-    );
-  }
-  if (schema !== TARGETS_SCHEMA_V2) {
-    throw new Error(
-      `Invalid $schema '${schema}' in targets.yaml at ${absolutePath}.
-Expected '${TARGETS_SCHEMA_V2}'.`
-    );
-  }
-}
 function extractTargetsArray(parsed, absolutePath) {
   const targets = parsed.targets;
   if (!Array.isArray(targets)) {
@@ -3367,7 +3455,9 @@ function assertTargetDefinition(value, index, filePath) {
   const name = value.name;
   const provider = value.provider;
   if (typeof name !== "string" || name.trim().length === 0) {
-    throw new Error(`targets.yaml entry at index ${index} in ${filePath} is missing a valid 'name'`);
+    throw new Error(
+      `targets.yaml entry at index ${index} in ${filePath} is missing a valid 'name'`
+    );
   }
   if (typeof provider !== "string" || provider.trim().length === 0) {
     throw new Error(`targets.yaml entry '${name}' in ${filePath} is missing a valid 'provider'`);
@@ -3376,7 +3466,7 @@ function assertTargetDefinition(value, index, filePath) {
 }
 async function fileExists3(filePath) {
   try {
-    await (0, import_promises9.access)(filePath, import_node_fs4.constants.F_OK);
+    await (0, import_promises10.access)(filePath, import_node_fs4.constants.F_OK);
     return true;
   } catch {
     return false;
@@ -3387,14 +3477,15 @@ async function readTargetDefinitions(filePath) {
   if (!await fileExists3(absolutePath)) {
     throw new Error(`targets.yaml not found at ${absolutePath}`);
   }
-  const raw = await (0, import_promises9.readFile)(absolutePath, "utf8");
+  const raw = await (0, import_promises10.readFile)(absolutePath, "utf8");
   const parsed = (0, import_yaml3.parse)(raw);
   if (!isRecord(parsed)) {
-    throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
+    throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
   }
-  checkSchema(parsed, absolutePath);
   const targets = extractTargetsArray(parsed, absolutePath);
-  const definitions = targets.map((entry, index) => assertTargetDefinition(entry, index, absolutePath));
+  const definitions = targets.map(
+    (entry, index) => assertTargetDefinition(entry, index, absolutePath)
+  );
   return definitions;
 }
 function listTargetNames(definitions) {
@@ -3438,16 +3529,16 @@ Use the reference_answer as a gold standard for a high-quality response (if prov
 Be concise and focused in your evaluation. Provide succinct, specific feedback rather than verbose explanations.
 [[ ## expected_outcome ## ]]
-{{expected_outcome}}
+{{${TEMPLATE_VARIABLES.EXPECTED_OUTCOME}}}
 [[ ## question ## ]]
-{{question}}
+{{${TEMPLATE_VARIABLES.QUESTION}}}
 [[ ## reference_answer ## ]]
-{{reference_answer}}
+{{${TEMPLATE_VARIABLES.REFERENCE_ANSWER}}}
 [[ ## candidate_answer ## ]]
-{{candidate_answer}}`;
+{{${TEMPLATE_VARIABLES.CANDIDATE_ANSWER}}}`;
 var LlmJudgeEvaluator = class {
   kind = "llm_judge";
   resolveJudgeProvider;
@@ -3470,12 +3561,16 @@ var LlmJudgeEvaluator = class {
   async evaluateWithPrompt(context, judgeProvider) {
     const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
     const variables = {
-      input_messages: JSON.stringify(context.evalCase.input_segments, null, 2),
-      output_messages: JSON.stringify(context.evalCase.output_segments, null, 2),
-      candidate_answer: context.candidate.trim(),
-      reference_answer: (context.evalCase.reference_answer ?? "").trim(),
-      expected_outcome: context.evalCase.expected_outcome.trim(),
-      question: formattedQuestion.trim()
+      [TEMPLATE_VARIABLES.INPUT_MESSAGES]: JSON.stringify(context.evalCase.input_segments, null, 2),
+      [TEMPLATE_VARIABLES.EXPECTED_MESSAGES]: JSON.stringify(
+        context.evalCase.expected_segments,
+        null,
+        2
+      ),
+      [TEMPLATE_VARIABLES.CANDIDATE_ANSWER]: context.candidate.trim(),
+      [TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context.evalCase.reference_answer ?? "").trim(),
+      [TEMPLATE_VARIABLES.EXPECTED_OUTCOME]: context.evalCase.expected_outcome.trim(),
+      [TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim()
     };
     const systemPrompt = buildOutputSchema();
     const evaluatorTemplate = context.evaluatorTemplateOverride ?? this.evaluatorTemplate ?? DEFAULT_EVALUATOR_TEMPLATE;
@@ -3707,17 +3802,17 @@ function parseJsonSafe(payload) {
   }
 }
 function substituteVariables(template, variables) {
-  return template.replace(/\{\{([a-zA-Z0-9_]+)\}\}/g, (match, varName) => {
+  return template.replace(/\{\{\s*([a-zA-Z0-9_]+)\s*\}\}/g, (match, varName) => {
     return variables[varName] ?? match;
   });
 }
 // src/evaluation/orchestrator.ts
 var import_node_crypto2 = require("crypto");
-var import_promises10 = require("fs/promises");
+var import_promises11 = require("fs/promises");
 var import_node_path13 = __toESM(require("path"), 1);
-// ../../node_modules/.pnpm/yocto-queue@1.2.1/node_modules/yocto-queue/index.js
+// ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
 var Node = class {
   value;
   next;
@@ -3750,6 +3845,9 @@ var Queue = class {
     }
     this.#head = this.#head.next;
     this.#size--;
+    if (!this.#head) {
+      this.#tail = void 0;
+    }
     return current.value;
   }
   peek() {
@@ -3780,7 +3878,7 @@ var Queue = class {
   }
 };
-// ../../node_modules/.pnpm/p-limit@6.2.0/node_modules/p-limit/index.js
+// ../../node_modules/.bun/p-limit@6.2.0/node_modules/p-limit/index.js
 function pLimit(concurrency) {
   validateConcurrency(concurrency);
   const queue = new Queue();
@@ -3853,6 +3951,16 @@ function validateConcurrency(concurrency) {
   }
 }
+// src/evaluation/providers/types.ts
+var AGENT_PROVIDER_KINDS = [
+  "codex",
+  "vscode",
+  "vscode-insiders"
+];
+function isAgentProvider(provider) {
+  return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
+}
 // src/evaluation/orchestrator.ts
 async function runEvaluation(options) {
   const {
@@ -3871,11 +3979,11 @@ async function runEvaluation(options) {
     now,
     evalId,
     verbose,
+    evalCases: preloadedEvalCases,
     onResult,
     onProgress
   } = options;
-  const load = loadEvalCases;
-  const evalCases = await load(evalFilePath, repoRoot, { verbose, evalId });
+  const evalCases = preloadedEvalCases ?? await loadEvalCases(evalFilePath, repoRoot, { verbose, evalId });
   const filteredEvalCases = filterEvalCases(evalCases, evalId);
   if (filteredEvalCases.length === 0) {
     if (evalId) {
@@ -3956,7 +4064,9 @@ async function runEvaluation(options) {
     } catch (error) {
       if (verbose) {
         const message = error instanceof Error ? error.message : String(error);
-        console.warn(`Provider batch execution failed, falling back to per-case dispatch: ${message}`);
+        console.warn(
+          `Provider batch execution failed, falling back to per-case dispatch: ${message}`
+        );
       }
     }
   }
@@ -4059,8 +4169,9 @@ async function runBatchEvaluation(options) {
     agentTimeoutMs
   } = options;
   const promptInputsList = [];
+  const formattingMode = isAgentProvider(provider) ? "agent" : "lm";
   for (const evalCase of evalCases) {
-    const promptInputs = await buildPromptInputs(evalCase);
+    const promptInputs = await buildPromptInputs(evalCase, formattingMode);
     if (promptDumpDir) {
       await dumpPrompt(promptDumpDir, evalCase, promptInputs);
     }
@@ -4119,7 +4230,14 @@ async function runBatchEvaluation(options) {
         agentTimeoutMs
       });
     } catch (error) {
-      const errorResult = buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
+      const errorResult = buildErrorResult(
+        evalCase,
+        target.name,
+        nowFn(),
+        error,
+        promptInputs,
+        provider
+      );
       results.push(errorResult);
       if (onResult) {
         await onResult(errorResult);
@@ -4166,7 +4284,8 @@ async function runEvalCase(options) {
     signal,
     judgeProvider
   } = options;
-  const promptInputs = await buildPromptInputs(evalCase);
+  const formattingMode = isAgentProvider(provider) ? "agent" : "lm";
+  const promptInputs = await buildPromptInputs(evalCase, formattingMode);
   if (promptDumpDir) {
     await dumpPrompt(promptDumpDir, evalCase, promptInputs);
   }
@@ -4296,7 +4415,18 @@ async function evaluateCandidate(options) {
   };
 }
 async function runEvaluatorsForCase(options) {
-  const { evalCase, candidate, target, provider, evaluators, attempt, promptInputs, now, judgeProvider, agentTimeoutMs } = options;
+  const {
+    evalCase,
+    candidate,
+    target,
+    provider,
+    evaluators,
+    attempt,
+    promptInputs,
+    now,
+    judgeProvider,
+    agentTimeoutMs
+  } = options;
   if (evalCase.evaluators && evalCase.evaluators.length > 0) {
     return runEvaluatorList({
       evalCase,
@@ -4397,7 +4527,6 @@ async function runEvaluatorList(options) {
           reasoning: score2.reasoning,
           evaluator_provider_request: score2.evaluatorRawRequest
         });
-        continue;
       }
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
@@ -4408,7 +4537,11 @@ async function runEvaluatorList(options) {
         expectedAspectCount: 1,
         reasoning: message
       };
-      scored.push({ score: fallbackScore, name: evaluator.name ?? "unknown", type: evaluator.type ?? "unknown" });
+      scored.push({
+        score: fallbackScore,
+        name: evaluator.name ?? "unknown",
+        type: evaluator.type ?? "unknown"
+      });
       evaluatorResults.push({
         name: evaluator.name ?? "unknown",
         type: evaluator.type ?? "unknown",
@@ -4422,7 +4555,10 @@ async function runEvaluatorList(options) {
   const aggregateScore = scored.length > 0 ? scored.reduce((total, entry) => total + entry.score.score, 0) / scored.length : 0;
   const hits = scored.flatMap((entry) => entry.score.hits);
   const misses = scored.flatMap((entry) => entry.score.misses);
-  const expectedAspectCount = scored.reduce((total, entry) => total + (entry.score.expectedAspectCount ?? 0), 0);
+  const expectedAspectCount = scored.reduce(
+    (total, entry) => total + (entry.score.expectedAspectCount ?? 0),
+    0
+  );
   const rawAspects = scored.flatMap((entry) => entry.score.rawAspects ?? []);
   const reasoningParts = scored.map((entry) => entry.score.reasoning ? `${entry.name}: ${entry.score.reasoning}` : void 0).filter(isNonEmptyString2);
   const reasoning = reasoningParts.length > 0 ? reasoningParts.join(" | ") : void 0;
@@ -4437,7 +4573,18 @@ async function runEvaluatorList(options) {
   return { score, evaluatorResults };
 }
 async function runLlmJudgeEvaluator(options) {
-  const { config, evalCase, candidate, target, provider, evaluatorRegistry, attempt, promptInputs, now, judgeProvider } = options;
+  const {
+    config,
+    evalCase,
+    candidate,
+    target,
+    provider,
+    evaluatorRegistry,
+    attempt,
+    promptInputs,
+    now,
+    judgeProvider
+  } = options;
   const customPrompt = await resolveCustomPrompt(config);
   return evaluatorRegistry.llm_judge.evaluate({
     evalCase,
@@ -4455,7 +4602,8 @@ async function runLlmJudgeEvaluator(options) {
 async function resolveCustomPrompt(config) {
   if (config.promptPath) {
     try {
-      return await readTextFile(config.promptPath);
+      const content = await readTextFile(config.promptPath);
+      return content;
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
       console.warn(`Could not read custom prompt at ${config.promptPath}: ${message}`);
@@ -4490,14 +4638,14 @@ async function dumpPrompt(directory, evalCase, promptInputs) {
   const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
   const filename = `${timestamp}_${sanitizeFilename(evalCase.id)}.json`;
   const filePath = import_node_path13.default.resolve(directory, filename);
-  await (0, import_promises10.mkdir)(import_node_path13.default.dirname(filePath), { recursive: true });
+  await (0, import_promises11.mkdir)(import_node_path13.default.dirname(filePath), { recursive: true });
   const payload = {
     eval_id: evalCase.id,
     question: promptInputs.question,
     guidelines: promptInputs.guidelines,
     guideline_paths: evalCase.guideline_paths
   };
-  await (0, import_promises10.writeFile)(filePath, JSON.stringify(payload, null, 2), "utf8");
+  await (0, import_promises11.writeFile)(filePath, JSON.stringify(payload, null, 2), "utf8");
 }
 function sanitizeFilename(value) {
   if (!value) {