npm - @agentv/core - Versions diffs - 0.15.0 → 0.17.0 - Mend

@agentv/core 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/{chunk-IOCVST3R.js → chunk-YCIZ33BO.js} +28 -11
package/dist/chunk-YCIZ33BO.js.map +1 -0
package/dist/evaluation/validation/index.cjs +68 -64
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +64 -67
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +297 -149
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +18 -5
package/dist/index.d.ts +18 -5
package/dist/index.js +251 -115
package/dist/index.js.map +1 -1
package/package.json +15 -16
package/LICENSE +0 -21
package/dist/chunk-IOCVST3R.js.map +0 -1

package/dist/index.js CHANGED Viewed

@@ -1,5 +1,4 @@
 import {
-  TARGETS_SCHEMA_V2,
   buildDirectoryChain,
   buildSearchRoots,
   fileExists,
@@ -9,7 +8,7 @@ import {
   readTextFile,
   resolveFileReference,
   resolveTargetDefinition
-} from "./chunk-IOCVST3R.js";
+} from "./chunk-YCIZ33BO.js";
 // src/evaluation/types.ts
 var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
@@ -62,7 +61,7 @@ function getHitCount(result) {
 }
 // src/evaluation/yaml-parser.ts
-import { readFile as readFile4 } from "node:fs/promises";
+import { readFile as readFile5 } from "node:fs/promises";
 import path6 from "node:path";
 import { parse as parse2 } from "yaml";
@@ -71,11 +70,11 @@ function extractCodeBlocks(segments) {
   const CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
   const codeBlocks = [];
   for (const segment of segments) {
-    const typeValue = segment["type"];
+    const typeValue = segment.type;
     if (typeof typeValue !== "string" || typeValue !== "text") {
       continue;
     }
-    const textValue = segment["value"];
+    const textValue = segment.value;
     if (typeof textValue !== "string") {
       continue;
     }
@@ -100,7 +99,7 @@ ${part.content}
   }
   return parts.map((p) => p.content).join(" ");
 }
-function formatSegment(segment) {
+function formatSegment(segment, mode = "lm") {
   const type = asString(segment.type);
   if (type === "text") {
     return asString(segment.value);
@@ -110,8 +109,14 @@ function formatSegment(segment) {
     return refPath ? `<Attached: ${refPath}>` : void 0;
   }
   if (type === "file") {
-    const text = asString(segment.text);
     const filePath = asString(segment.path);
+    if (!filePath) {
+      return void 0;
+    }
+    if (mode === "agent") {
+      return `<file: path="${filePath}">`;
+    }
+    const text = asString(segment.text);
     if (text && filePath) {
       return formatFileContents([{ content: text.trim(), isFile: true, displayPath: filePath }]);
     }
@@ -140,9 +145,9 @@ function asString(value) {
 }
 // src/evaluation/loaders/config-loader.ts
-import micromatch from "micromatch";
 import { readFile } from "node:fs/promises";
 import path2 from "node:path";
+import micromatch from "micromatch";
 import { parse } from "yaml";
 // src/evaluation/loaders/file-resolver.ts
@@ -284,8 +289,9 @@ Please add '$schema: ${SCHEMA_CONFIG_V2}' at the top of the file.`;
         guideline_patterns: guidelinePatterns
       };
     } catch (error) {
-      logWarning(`Could not read .agentv/config.yaml at ${configPath}: ${error.message}`);
-      continue;
+      logWarning(
+        `Could not read .agentv/config.yaml at ${configPath}: ${error.message}`
+      );
     }
   }
   return null;
@@ -315,8 +321,66 @@ function logWarning(message) {
 // src/evaluation/loaders/evaluator-parser.ts
 import path3 from "node:path";
+// src/evaluation/validation/prompt-validator.ts
+import { readFile as readFile2 } from "node:fs/promises";
+// src/evaluation/template-variables.ts
+var TEMPLATE_VARIABLES = {
+  CANDIDATE_ANSWER: "candidate_answer",
+  EXPECTED_MESSAGES: "expected_messages",
+  QUESTION: "question",
+  EXPECTED_OUTCOME: "expected_outcome",
+  REFERENCE_ANSWER: "reference_answer",
+  INPUT_MESSAGES: "input_messages"
+};
+var VALID_TEMPLATE_VARIABLES = new Set(Object.values(TEMPLATE_VARIABLES));
+var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
+  TEMPLATE_VARIABLES.CANDIDATE_ANSWER,
+  TEMPLATE_VARIABLES.EXPECTED_MESSAGES
+]);
+// src/evaluation/validation/prompt-validator.ts
 var ANSI_YELLOW2 = "\x1B[33m";
 var ANSI_RESET2 = "\x1B[0m";
+async function validateCustomPromptContent(promptPath) {
+  const content = await readFile2(promptPath, "utf8");
+  validateTemplateVariables(content, promptPath);
+}
+function validateTemplateVariables(content, source) {
+  const variablePattern = /\{\{\s*([a-zA-Z0-9_]+)\s*\}\}/g;
+  const foundVariables = /* @__PURE__ */ new Set();
+  const invalidVariables = [];
+  let match = variablePattern.exec(content);
+  while (match !== null) {
+    const varName = match[1];
+    foundVariables.add(varName);
+    if (!VALID_TEMPLATE_VARIABLES.has(varName)) {
+      invalidVariables.push(varName);
+    }
+    match = variablePattern.exec(content);
+  }
+  const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.CANDIDATE_ANSWER);
+  const hasExpectedMessages = foundVariables.has(TEMPLATE_VARIABLES.EXPECTED_MESSAGES);
+  const hasRequiredFields = hasCandidateAnswer || hasExpectedMessages;
+  if (!hasRequiredFields) {
+    throw new Error(
+      `Missing required fields. Must include at least one of:
+  - {{ ${TEMPLATE_VARIABLES.CANDIDATE_ANSWER} }}
+  - {{ ${TEMPLATE_VARIABLES.EXPECTED_MESSAGES} }}`
+    );
+  }
+  if (invalidVariables.length > 0) {
+    const warningMessage = `${ANSI_YELLOW2}Warning: Custom evaluator template at ${source}
+  Contains invalid variables: ${invalidVariables.map((v) => `{{ ${v} }}`).join(", ")}
+  Valid variables: ${Array.from(VALID_TEMPLATE_VARIABLES).map((v) => `{{ ${v} }}`).join(", ")}${ANSI_RESET2}`;
+    console.warn(warningMessage);
+  }
+}
+// src/evaluation/loaders/evaluator-parser.ts
+var ANSI_YELLOW3 = "\x1B[33m";
+var ANSI_RESET3 = "\x1B[0m";
 async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
   const execution = rawEvalCase.execution;
   const candidateEvaluators = isJsonObject2(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
@@ -375,6 +439,12 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
       const resolved = await resolveFileReference2(prompt, searchRoots);
       if (resolved.resolvedPath) {
         promptPath = path3.resolve(resolved.resolvedPath);
+        try {
+          await validateCustomPromptContent(promptPath);
+        } catch (error) {
+          const message = error instanceof Error ? error.message : String(error);
+          throw new Error(`Evaluator '${name}' template (${promptPath}): ${message}`);
+        }
       } else {
         logWarning2(
           `Inline prompt used for evaluator '${name}' in '${evalId}' (file not found: ${resolved.displayPath})`,
@@ -411,18 +481,18 @@ function isJsonObject2(value) {
 function logWarning2(message, details) {
   if (details && details.length > 0) {
     const detailBlock = details.join("\n");
-    console.warn(`${ANSI_YELLOW2}Warning: ${message}
-${detailBlock}${ANSI_RESET2}`);
+    console.warn(`${ANSI_YELLOW3}Warning: ${message}
+${detailBlock}${ANSI_RESET3}`);
   } else {
-    console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET2}`);
+    console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET3}`);
   }
 }
 // src/evaluation/loaders/message-processor.ts
-import { readFile as readFile2 } from "node:fs/promises";
+import { readFile as readFile3 } from "node:fs/promises";
 import path4 from "node:path";
-var ANSI_YELLOW3 = "\x1B[33m";
-var ANSI_RESET3 = "\x1B[0m";
+var ANSI_YELLOW4 = "\x1B[33m";
+var ANSI_RESET4 = "\x1B[0m";
 async function processMessages(options) {
   const {
     messages,
@@ -465,7 +535,7 @@ async function processMessages(options) {
           continue;
         }
         try {
-          const fileContent = (await readFile2(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
+          const fileContent = (await readFile3(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
           if (messageType === "input" && guidelinePatterns && guidelinePaths) {
             const relativeToRepo = path4.relative(repoRootPath, resolvedPath);
             if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
@@ -536,7 +606,7 @@ async function resolveAssistantContent(content, searchRoots, verbose) {
         continue;
       }
       try {
-        const fileContent = (await readFile2(resolvedPath, "utf8")).replace(/\r\n/g, "\n").trim();
+        const fileContent = (await readFile3(resolvedPath, "utf8")).replace(/\r\n/g, "\n").trim();
         parts.push({ content: fileContent, isFile: true, displayPath });
         if (verbose) {
           console.log(`  [Expected Assistant File] Found: ${displayPath}`);
@@ -586,19 +656,19 @@ function cloneJsonValue(value) {
 function logWarning3(message, details) {
   if (details && details.length > 0) {
     const detailBlock = details.join("\n");
-    console.warn(`${ANSI_YELLOW3}Warning: ${message}
-${detailBlock}${ANSI_RESET3}`);
+    console.warn(`${ANSI_YELLOW4}Warning: ${message}
+${detailBlock}${ANSI_RESET4}`);
   } else {
-    console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET3}`);
+    console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET4}`);
   }
 }
 // src/evaluation/formatting/prompt-builder.ts
-import { readFile as readFile3 } from "node:fs/promises";
+import { readFile as readFile4 } from "node:fs/promises";
 import path5 from "node:path";
-var ANSI_YELLOW4 = "\x1B[33m";
-var ANSI_RESET4 = "\x1B[0m";
-async function buildPromptInputs(testCase) {
+var ANSI_YELLOW5 = "\x1B[33m";
+var ANSI_RESET5 = "\x1B[0m";
+async function buildPromptInputs(testCase, mode = "lm") {
   const guidelineParts = [];
   for (const rawPath of testCase.guideline_paths) {
     const absolutePath = path5.resolve(rawPath);
@@ -607,7 +677,7 @@ async function buildPromptInputs(testCase) {
       continue;
     }
     try {
-      const content = (await readFile3(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
+      const content = (await readFile4(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
       guidelineParts.push({
         content,
         isFile: true,
@@ -674,7 +744,7 @@ async function buildPromptInputs(testCase) {
       const roleLabel = message.role.charAt(0).toUpperCase() + message.role.slice(1);
       const contentParts = [];
       for (const segment of segments) {
-        const formattedContent = formatSegment(segment);
+        const formattedContent = formatSegment(segment, mode);
         if (formattedContent) {
           contentParts.push(formattedContent);
         }
@@ -689,7 +759,11 @@ ${messageContent}`);
   } else {
     const questionParts = [];
     for (const segment of testCase.input_segments) {
-      const formattedContent = formatSegment(segment);
+      if (segment.type === "file" && typeof segment.path === "string" && testCase.guideline_patterns && isGuidelineFile(segment.path, testCase.guideline_patterns)) {
+        questionParts.push(`<Attached: ${segment.path}>`);
+        continue;
+      }
+      const formattedContent = formatSegment(segment, mode);
       if (formattedContent) {
         questionParts.push(formattedContent);
       }
@@ -703,7 +777,8 @@ ${messageContent}`);
     messages: testCase.input_messages,
     segmentsByMessage,
     guidelinePatterns: testCase.guideline_patterns,
-    guidelineContent: guidelines
+    guidelineContent: guidelines,
+    mode
   }) : void 0;
   return { question, guidelines, chatPrompt };
 }
@@ -720,7 +795,14 @@ function needsRoleMarkers(messages, processedSegmentsByMessage) {
   return messagesWithContent > 1;
 }
 function buildChatPromptFromSegments(options) {
-  const { messages, segmentsByMessage, guidelinePatterns, guidelineContent, systemPrompt } = options;
+  const {
+    messages,
+    segmentsByMessage,
+    guidelinePatterns,
+    guidelineContent,
+    systemPrompt,
+    mode = "lm"
+  } = options;
   if (messages.length === 0) {
     return void 0;
   }
@@ -738,7 +820,7 @@ ${guidelineContent.trim()}`);
     const segments = segmentsByMessage[startIndex];
     const contentParts = [];
     for (const segment of segments) {
-      const formatted = formatSegment(segment);
+      const formatted = formatSegment(segment, mode);
       if (formatted) {
         contentParts.push(formatted);
       }
@@ -771,7 +853,7 @@ ${guidelineContent.trim()}`);
       if (segment.type === "guideline_ref") {
         continue;
       }
-      const formatted = formatSegment(segment);
+      const formatted = formatSegment(segment, mode);
       if (formatted) {
         const isGuidelineRef = segment.type === "file" && typeof segment.path === "string" && guidelinePatterns && isGuidelineFile(segment.path, guidelinePatterns);
         if (isGuidelineRef) {
@@ -795,17 +877,17 @@ function asString4(value) {
   return typeof value === "string" ? value : void 0;
 }
 function logWarning4(message) {
-  console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET4}`);
+  console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
 }
 // src/evaluation/yaml-parser.ts
-var ANSI_YELLOW5 = "\x1B[33m";
-var ANSI_RESET5 = "\x1B[0m";
-var SCHEMA_EVAL_V2 = "agentv-eval-v2";
+var ANSI_YELLOW6 = "\x1B[33m";
+var ANSI_RED = "\x1B[31m";
+var ANSI_RESET6 = "\x1B[0m";
 async function readTestSuiteMetadata(testFilePath) {
   try {
     const absolutePath = path6.resolve(testFilePath);
-    const content = await readFile4(absolutePath, "utf8");
+    const content = await readFile5(absolutePath, "utf8");
     const parsed = parse2(content);
     if (!isJsonObject(parsed)) {
       return {};
@@ -823,7 +905,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
   const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
   const config = await loadConfig(absoluteTestPath, repoRootPath);
   const guidelinePatterns = config?.guideline_patterns;
-  const rawFile = await readFile4(absoluteTestPath, "utf8");
+  const rawFile = await readFile5(absoluteTestPath, "utf8");
   const parsed = parse2(rawFile);
   if (!isJsonObject(parsed)) {
     throw new Error(`Invalid test file format: ${evalFilePath}`);
@@ -832,12 +914,6 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
   const datasetNameFromSuite = asString5(suite.dataset)?.trim();
   const fallbackDataset = path6.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
   const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
-  const schema = suite.$schema;
-  if (schema !== SCHEMA_EVAL_V2) {
-    const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${evalFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${evalFilePath}.
-Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
-    throw new Error(message);
-  }
   const rawTestcases = suite.evalcases;
   if (!Array.isArray(rawTestcases)) {
     throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
@@ -861,14 +937,18 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
     const inputMessagesValue = evalcase.input_messages;
     const expectedMessagesValue = evalcase.expected_messages;
     if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
-      logWarning5(`Skipping incomplete eval case: ${id ?? "unknown"}`);
+      logError(
+        `Skipping incomplete eval case: ${id ?? "unknown"}. Missing required fields: id, outcome, and/or input_messages`
+      );
       continue;
     }
     const hasExpectedMessages = Array.isArray(expectedMessagesValue) && expectedMessagesValue.length > 0;
-    const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
+    const inputMessages = inputMessagesValue.filter(
+      (msg) => isTestMessage(msg)
+    );
     const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
     if (hasExpectedMessages && expectedMessages.length === 0) {
-      logWarning5(`No valid expected message found for eval case: ${id}`);
+      logError(`No valid expected message found for eval case: ${id}`);
       continue;
     }
     if (expectedMessages.length > 1) {
@@ -899,7 +979,14 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
     const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
     const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
     const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
-    const evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
+    let evaluators;
+    try {
+      evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      logError(`Skipping eval case '${id}': ${message}`);
+      continue;
+    }
     const userFilePaths = [];
     for (const segment of inputSegments) {
       if (segment.type === "file" && typeof segment.resolvedPath === "string") {
@@ -917,7 +1004,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
       question,
       input_messages: inputMessages,
       input_segments: inputSegments,
-      output_segments: outputSegments,
+      expected_segments: outputSegments,
       reference_answer: referenceAnswer,
       guideline_paths: guidelinePaths.map((guidelinePath) => path6.resolve(guidelinePath)),
       guideline_patterns: guidelinePatterns,
@@ -949,10 +1036,19 @@ function asString5(value) {
 function logWarning5(message, details) {
   if (details && details.length > 0) {
     const detailBlock = details.join("\n");
-    console.warn(`${ANSI_YELLOW5}Warning: ${message}
-${detailBlock}${ANSI_RESET5}`);
+    console.warn(`${ANSI_YELLOW6}Warning: ${message}
+${detailBlock}${ANSI_RESET6}`);
   } else {
-    console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
+    console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET6}`);
+  }
+}
+function logError(message, details) {
+  if (details && details.length > 0) {
+    const detailBlock = details.join("\n");
+    console.error(`${ANSI_RED}Error: ${message}
+${detailBlock}${ANSI_RESET6}`);
+  } else {
+    console.error(`${ANSI_RED}Error: ${message}${ANSI_RESET6}`);
   }
 }
@@ -1522,7 +1618,7 @@ function formatTimeoutSuffix(timeoutMs) {
 import { exec as execCallback, spawn } from "node:child_process";
 import { randomUUID } from "node:crypto";
 import { constants as constants2, createWriteStream } from "node:fs";
-import { access as access2, mkdtemp, mkdir, rm, writeFile } from "node:fs/promises";
+import { access as access2, mkdir, mkdtemp, rm, writeFile } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import path9 from "node:path";
 import { promisify as promisify2 } from "node:util";
@@ -1590,9 +1686,7 @@ function buildPromptDocument(request, inputFiles, options) {
     options?.guidelineOverrides
   );
   const inputFilesList = collectInputFiles(inputFiles);
-  const nonGuidelineInputFiles = inputFilesList.filter(
-    (file) => !guidelineFiles.includes(file)
-  );
+  const nonGuidelineInputFiles = inputFilesList.filter((file) => !guidelineFiles.includes(file));
   const prereadBlock = buildMandatoryPrereadBlock(guidelineFiles, nonGuidelineInputFiles);
   if (prereadBlock.length > 0) {
     parts.push("\n", prereadBlock);
@@ -1764,7 +1858,15 @@ var CodexProvider = class {
     return path9.resolve(this.config.cwd);
   }
   buildCodexArgs() {
-    const args = ["--ask-for-approval", "never", "exec", "--json", "--color", "never", "--skip-git-repo-check"];
+    const args = [
+      "--ask-for-approval",
+      "never",
+      "exec",
+      "--json",
+      "--color",
+      "never",
+      "--skip-git-repo-check"
+    ];
     if (this.config.args && this.config.args.length > 0) {
       args.push(...this.config.args);
     }
@@ -2388,7 +2490,12 @@ var MockProvider = class {
 // src/evaluation/providers/vscode.ts
 import path10 from "node:path";
-import { dispatchAgentSession, dispatchBatchAgent, getSubagentRoot, provisionSubagents } from "subagent";
+import {
+  dispatchAgentSession,
+  dispatchBatchAgent,
+  getSubagentRoot,
+  provisionSubagents
+} from "subagent";
 var VSCodeProvider = class {
   id;
   kind;
@@ -2505,9 +2612,7 @@ function buildPromptDocument2(request, attachments, guidelinePatterns) {
   }
   const guidelineFiles = collectGuidelineFiles2(attachments, guidelinePatterns);
   const attachmentFiles = collectAttachmentFiles(attachments);
-  const nonGuidelineAttachments = attachmentFiles.filter(
-    (file) => !guidelineFiles.includes(file)
-  );
+  const nonGuidelineAttachments = attachmentFiles.filter((file) => !guidelineFiles.includes(file));
   const prereadBlock = buildMandatoryPrereadBlock2(guidelineFiles, nonGuidelineAttachments);
   if (prereadBlock.length > 0) {
     parts.push("\n", prereadBlock);
@@ -2616,8 +2721,10 @@ async function ensureVSCodeSubagents(options) {
       if (result.skippedExisting.length > 0) {
         console.log(`Reusing ${result.skippedExisting.length} existing unlocked subagent(s)`);
       }
-      console.log(`
-total unlocked subagents available: ${result.created.length + result.skippedExisting.length}`);
+      console.log(
+        `
+total unlocked subagents available: ${result.created.length + result.skippedExisting.length}`
+      );
     }
     return {
       provisioned: true,
@@ -2637,33 +2744,12 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
 // src/evaluation/providers/targets-file.ts
 import { constants as constants3 } from "node:fs";
-import { access as access3, readFile as readFile5 } from "node:fs/promises";
+import { access as access3, readFile as readFile6 } from "node:fs/promises";
 import path11 from "node:path";
 import { parse as parse3 } from "yaml";
 function isRecord(value) {
   return typeof value === "object" && value !== null && !Array.isArray(value);
 }
-function checkSchema(parsed, absolutePath) {
-  const schema = parsed.$schema;
-  if (schema === void 0) {
-    throw new Error(
-      `Missing $schema field in targets.yaml at ${absolutePath}.
-Please add '$schema: ${TARGETS_SCHEMA_V2}' at the top of the file.`
-    );
-  }
-  if (typeof schema !== "string") {
-    throw new Error(
-      `Invalid $schema field in targets.yaml at ${absolutePath}.
-Expected a string value '${TARGETS_SCHEMA_V2}'.`
-    );
-  }
-  if (schema !== TARGETS_SCHEMA_V2) {
-    throw new Error(
-      `Invalid $schema '${schema}' in targets.yaml at ${absolutePath}.
-Expected '${TARGETS_SCHEMA_V2}'.`
-    );
-  }
-}
 function extractTargetsArray(parsed, absolutePath) {
   const targets = parsed.targets;
   if (!Array.isArray(targets)) {
@@ -2678,7 +2764,9 @@ function assertTargetDefinition(value, index, filePath) {
   const name = value.name;
   const provider = value.provider;
   if (typeof name !== "string" || name.trim().length === 0) {
-    throw new Error(`targets.yaml entry at index ${index} in ${filePath} is missing a valid 'name'`);
+    throw new Error(
+      `targets.yaml entry at index ${index} in ${filePath} is missing a valid 'name'`
+    );
   }
   if (typeof provider !== "string" || provider.trim().length === 0) {
     throw new Error(`targets.yaml entry '${name}' in ${filePath} is missing a valid 'provider'`);
@@ -2698,14 +2786,15 @@ async function readTargetDefinitions(filePath) {
   if (!await fileExists3(absolutePath)) {
     throw new Error(`targets.yaml not found at ${absolutePath}`);
   }
-  const raw = await readFile5(absolutePath, "utf8");
+  const raw = await readFile6(absolutePath, "utf8");
   const parsed = parse3(raw);
   if (!isRecord(parsed)) {
-    throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
+    throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
   }
-  checkSchema(parsed, absolutePath);
   const targets = extractTargetsArray(parsed, absolutePath);
-  const definitions = targets.map((entry, index) => assertTargetDefinition(entry, index, absolutePath));
+  const definitions = targets.map(
+    (entry, index) => assertTargetDefinition(entry, index, absolutePath)
+  );
   return definitions;
 }
 function listTargetNames(definitions) {
@@ -2749,16 +2838,16 @@ Use the reference_answer as a gold standard for a high-quality response (if prov
 Be concise and focused in your evaluation. Provide succinct, specific feedback rather than verbose explanations.
 [[ ## expected_outcome ## ]]
-{{expected_outcome}}
+{{${TEMPLATE_VARIABLES.EXPECTED_OUTCOME}}}
 [[ ## question ## ]]
-{{question}}
+{{${TEMPLATE_VARIABLES.QUESTION}}}
 [[ ## reference_answer ## ]]
-{{reference_answer}}
+{{${TEMPLATE_VARIABLES.REFERENCE_ANSWER}}}
 [[ ## candidate_answer ## ]]
-{{candidate_answer}}`;
+{{${TEMPLATE_VARIABLES.CANDIDATE_ANSWER}}}`;
 var LlmJudgeEvaluator = class {
   kind = "llm_judge";
   resolveJudgeProvider;
@@ -2781,12 +2870,16 @@ var LlmJudgeEvaluator = class {
   async evaluateWithPrompt(context, judgeProvider) {
     const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
     const variables = {
-      input_messages: JSON.stringify(context.evalCase.input_segments, null, 2),
-      output_messages: JSON.stringify(context.evalCase.output_segments, null, 2),
-      candidate_answer: context.candidate.trim(),
-      reference_answer: (context.evalCase.reference_answer ?? "").trim(),
-      expected_outcome: context.evalCase.expected_outcome.trim(),
-      question: formattedQuestion.trim()
+      [TEMPLATE_VARIABLES.INPUT_MESSAGES]: JSON.stringify(context.evalCase.input_segments, null, 2),
+      [TEMPLATE_VARIABLES.EXPECTED_MESSAGES]: JSON.stringify(
+        context.evalCase.expected_segments,
+        null,
+        2
+      ),
+      [TEMPLATE_VARIABLES.CANDIDATE_ANSWER]: context.candidate.trim(),
+      [TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context.evalCase.reference_answer ?? "").trim(),
+      [TEMPLATE_VARIABLES.EXPECTED_OUTCOME]: context.evalCase.expected_outcome.trim(),
+      [TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim()
     };
     const systemPrompt = buildOutputSchema();
     const evaluatorTemplate = context.evaluatorTemplateOverride ?? this.evaluatorTemplate ?? DEFAULT_EVALUATOR_TEMPLATE;
@@ -3018,7 +3111,7 @@ function parseJsonSafe(payload) {
   }
 }
 function substituteVariables(template, variables) {
-  return template.replace(/\{\{([a-zA-Z0-9_]+)\}\}/g, (match, varName) => {
+  return template.replace(/\{\{\s*([a-zA-Z0-9_]+)\s*\}\}/g, (match, varName) => {
     return variables[varName] ?? match;
   });
 }
@@ -3028,7 +3121,7 @@ import { createHash, randomUUID as randomUUID2 } from "node:crypto";
 import { mkdir as mkdir2, writeFile as writeFile2 } from "node:fs/promises";
 import path12 from "node:path";
-// ../../node_modules/.pnpm/yocto-queue@1.2.1/node_modules/yocto-queue/index.js
+// ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
 var Node = class {
   value;
   next;
@@ -3061,6 +3154,9 @@ var Queue = class {
     }
     this.#head = this.#head.next;
     this.#size--;
+    if (!this.#head) {
+      this.#tail = void 0;
+    }
     return current.value;
   }
   peek() {
@@ -3091,7 +3187,7 @@ var Queue = class {
   }
 };
-// ../../node_modules/.pnpm/p-limit@6.2.0/node_modules/p-limit/index.js
+// ../../node_modules/.bun/p-limit@6.2.0/node_modules/p-limit/index.js
 function pLimit(concurrency) {
   validateConcurrency(concurrency);
   const queue = new Queue();
@@ -3182,11 +3278,11 @@ async function runEvaluation(options) {
     now,
     evalId,
     verbose,
+    evalCases: preloadedEvalCases,
     onResult,
     onProgress
   } = options;
-  const load = loadEvalCases;
-  const evalCases = await load(evalFilePath, repoRoot, { verbose, evalId });
+  const evalCases = preloadedEvalCases ?? await loadEvalCases(evalFilePath, repoRoot, { verbose, evalId });
   const filteredEvalCases = filterEvalCases(evalCases, evalId);
   if (filteredEvalCases.length === 0) {
     if (evalId) {
@@ -3267,7 +3363,9 @@ async function runEvaluation(options) {
     } catch (error) {
       if (verbose) {
         const message = error instanceof Error ? error.message : String(error);
-        console.warn(`Provider batch execution failed, falling back to per-case dispatch: ${message}`);
+        console.warn(
+          `Provider batch execution failed, falling back to per-case dispatch: ${message}`
+        );
       }
     }
   }
@@ -3370,8 +3468,9 @@ async function runBatchEvaluation(options) {
     agentTimeoutMs
   } = options;
   const promptInputsList = [];
+  const formattingMode = isAgentProvider(provider) ? "agent" : "lm";
   for (const evalCase of evalCases) {
-    const promptInputs = await buildPromptInputs(evalCase);
+    const promptInputs = await buildPromptInputs(evalCase, formattingMode);
     if (promptDumpDir) {
       await dumpPrompt(promptDumpDir, evalCase, promptInputs);
     }
@@ -3430,7 +3529,14 @@ async function runBatchEvaluation(options) {
         agentTimeoutMs
       });
     } catch (error) {
-      const errorResult = buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
+      const errorResult = buildErrorResult(
+        evalCase,
+        target.name,
+        nowFn(),
+        error,
+        promptInputs,
+        provider
+      );
       results.push(errorResult);
       if (onResult) {
         await onResult(errorResult);
@@ -3477,7 +3583,8 @@ async function runEvalCase(options) {
     signal,
     judgeProvider
   } = options;
-  const promptInputs = await buildPromptInputs(evalCase);
+  const formattingMode = isAgentProvider(provider) ? "agent" : "lm";
+  const promptInputs = await buildPromptInputs(evalCase, formattingMode);
   if (promptDumpDir) {
     await dumpPrompt(promptDumpDir, evalCase, promptInputs);
   }
@@ -3607,7 +3714,18 @@ async function evaluateCandidate(options) {
   };
 }
 async function runEvaluatorsForCase(options) {
-  const { evalCase, candidate, target, provider, evaluators, attempt, promptInputs, now, judgeProvider, agentTimeoutMs } = options;
+  const {
+    evalCase,
+    candidate,
+    target,
+    provider,
+    evaluators,
+    attempt,
+    promptInputs,
+    now,
+    judgeProvider,
+    agentTimeoutMs
+  } = options;
   if (evalCase.evaluators && evalCase.evaluators.length > 0) {
     return runEvaluatorList({
       evalCase,
@@ -3708,7 +3826,6 @@ async function runEvaluatorList(options) {
           reasoning: score2.reasoning,
           evaluator_provider_request: score2.evaluatorRawRequest
         });
-        continue;
       }
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
@@ -3719,7 +3836,11 @@ async function runEvaluatorList(options) {
         expectedAspectCount: 1,
         reasoning: message
       };
-      scored.push({ score: fallbackScore, name: evaluator.name ?? "unknown", type: evaluator.type ?? "unknown" });
+      scored.push({
+        score: fallbackScore,
+        name: evaluator.name ?? "unknown",
+        type: evaluator.type ?? "unknown"
+      });
       evaluatorResults.push({
         name: evaluator.name ?? "unknown",
         type: evaluator.type ?? "unknown",
@@ -3733,7 +3854,10 @@ async function runEvaluatorList(options) {
   const aggregateScore = scored.length > 0 ? scored.reduce((total, entry) => total + entry.score.score, 0) / scored.length : 0;
   const hits = scored.flatMap((entry) => entry.score.hits);
   const misses = scored.flatMap((entry) => entry.score.misses);
-  const expectedAspectCount = scored.reduce((total, entry) => total + (entry.score.expectedAspectCount ?? 0), 0);
+  const expectedAspectCount = scored.reduce(
+    (total, entry) => total + (entry.score.expectedAspectCount ?? 0),
+    0
+  );
   const rawAspects = scored.flatMap((entry) => entry.score.rawAspects ?? []);
   const reasoningParts = scored.map((entry) => entry.score.reasoning ? `${entry.name}: ${entry.score.reasoning}` : void 0).filter(isNonEmptyString2);
   const reasoning = reasoningParts.length > 0 ? reasoningParts.join(" | ") : void 0;
@@ -3748,7 +3872,18 @@ async function runEvaluatorList(options) {
   return { score, evaluatorResults };
 }
 async function runLlmJudgeEvaluator(options) {
-  const { config, evalCase, candidate, target, provider, evaluatorRegistry, attempt, promptInputs, now, judgeProvider } = options;
+  const {
+    config,
+    evalCase,
+    candidate,
+    target,
+    provider,
+    evaluatorRegistry,
+    attempt,
+    promptInputs,
+    now,
+    judgeProvider
+  } = options;
   const customPrompt = await resolveCustomPrompt(config);
   return evaluatorRegistry.llm_judge.evaluate({
     evalCase,
@@ -3766,7 +3901,8 @@ async function runLlmJudgeEvaluator(options) {
 async function resolveCustomPrompt(config) {
   if (config.promptPath) {
     try {
-      return await readTextFile(config.promptPath);
+      const content = await readTextFile(config.promptPath);
+      return content;
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
       console.warn(`Could not read custom prompt at ${config.promptPath}: ${message}`);