npm - @agentv/core - Versions diffs - 0.10.1 → 0.13.0 - Mend

@agentv/core 0.10.1 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/{chunk-YQBJAT5I.js → chunk-U3GEJ3K7.js} +1 -1
package/dist/{chunk-YQBJAT5I.js.map → chunk-U3GEJ3K7.js.map} +1 -1
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +1 -1
package/dist/index.cjs +691 -562
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +29 -26
package/dist/index.d.ts +29 -26
package/dist/index.js +638 -507
package/dist/index.js.map +1 -1
package/package.json +1 -1

package/dist/index.cjs CHANGED Viewed

@@ -33,15 +33,15 @@ __export(index_exports, {
   CodeEvaluator: () => CodeEvaluator,
   LlmJudgeEvaluator: () => LlmJudgeEvaluator,
   TEST_MESSAGE_ROLES: () => TEST_MESSAGE_ROLES,
-  buildDirectoryChain: () => buildDirectoryChain,
+  buildDirectoryChain: () => buildDirectoryChain2,
   buildPromptInputs: () => buildPromptInputs,
-  buildSearchRoots: () => buildSearchRoots,
+  buildSearchRoots: () => buildSearchRoots2,
   consumeCodexLogEntries: () => consumeCodexLogEntries,
   createAgentKernel: () => createAgentKernel,
   createProvider: () => createProvider,
   ensureVSCodeSubagents: () => ensureVSCodeSubagents,
   extractCodeBlocks: () => extractCodeBlocks,
-  fileExists: () => fileExists,
+  fileExists: () => fileExists2,
   findGitRoot: () => findGitRoot,
   getHitCount: () => getHitCount,
   isEvaluatorKind: () => isEvaluatorKind,
@@ -57,7 +57,7 @@ __export(index_exports, {
   readTestSuiteMetadata: () => readTestSuiteMetadata,
   readTextFile: () => readTextFile,
   resolveAndCreateProvider: () => resolveAndCreateProvider,
-  resolveFileReference: () => resolveFileReference,
+  resolveFileReference: () => resolveFileReference2,
   resolveTargetDefinition: () => resolveTargetDefinition,
   runEvalCase: () => runEvalCase,
   runEvaluation: () => runEvaluation,
@@ -116,47 +116,112 @@ function getHitCount(result) {
 }
 // src/evaluation/yaml-parser.ts
+var import_promises5 = require("fs/promises");
+var import_node_path6 = __toESM(require("path"), 1);
+var import_yaml2 = require("yaml");
+// src/evaluation/formatting/segment-formatter.ts
+function extractCodeBlocks(segments) {
+  const CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
+  const codeBlocks = [];
+  for (const segment of segments) {
+    const typeValue = segment["type"];
+    if (typeof typeValue !== "string" || typeValue !== "text") {
+      continue;
+    }
+    const textValue = segment["value"];
+    if (typeof textValue !== "string") {
+      continue;
+    }
+    const matches = textValue.match(CODE_BLOCK_PATTERN);
+    if (matches) {
+      codeBlocks.push(...matches);
+    }
+  }
+  return codeBlocks;
+}
+function formatFileContents(parts) {
+  const fileCount = parts.filter((p) => p.isFile).length;
+  if (fileCount > 0) {
+    return parts.map((part) => {
+      if (part.isFile && part.displayPath) {
+        return `<file path="${part.displayPath}">
+${part.content}
+</file>`;
+      }
+      return part.content;
+    }).join("\n\n");
+  }
+  return parts.map((p) => p.content).join(" ");
+}
+function formatSegment(segment) {
+  const type = asString(segment.type);
+  if (type === "text") {
+    return asString(segment.value);
+  }
+  if (type === "guideline_ref") {
+    const refPath = asString(segment.path);
+    return refPath ? `<Attached: ${refPath}>` : void 0;
+  }
+  if (type === "file") {
+    const text = asString(segment.text);
+    const filePath = asString(segment.path);
+    if (text && filePath) {
+      return formatFileContents([{ content: text.trim(), isFile: true, displayPath: filePath }]);
+    }
+  }
+  return void 0;
+}
+function hasVisibleContent(segments) {
+  return segments.some((segment) => {
+    const type = asString(segment.type);
+    if (type === "text") {
+      const value = asString(segment.value);
+      return value !== void 0 && value.trim().length > 0;
+    }
+    if (type === "guideline_ref") {
+      return false;
+    }
+    if (type === "file") {
+      const text = asString(segment.text);
+      return text !== void 0 && text.trim().length > 0;
+    }
+    return false;
+  });
+}
+function asString(value) {
+  return typeof value === "string" ? value : void 0;
+}
+// src/evaluation/loaders/config-loader.ts
 var import_micromatch = __toESM(require("micromatch"), 1);
-var import_node_fs2 = require("fs");
 var import_promises2 = require("fs/promises");
 var import_node_path2 = __toESM(require("path"), 1);
-var import_node_url = require("url");
 var import_yaml = require("yaml");
-// src/evaluation/file-utils.ts
+// src/evaluation/loaders/file-resolver.ts
 var import_node_fs = require("fs");
 var import_promises = require("fs/promises");
 var import_node_path = __toESM(require("path"), 1);
-async function fileExists(filePath) {
+async function fileExists(absolutePath) {
   try {
-    await (0, import_promises.access)(filePath, import_node_fs.constants.F_OK);
+    await (0, import_promises.access)(absolutePath, import_node_fs.constants.F_OK);
     return true;
   } catch {
     return false;
   }
 }
-function normalizeLineEndings(content) {
-  return content.replace(/\r\n/g, "\n");
-}
-async function readTextFile(filePath) {
-  const content = await (0, import_promises.readFile)(filePath, "utf8");
-  return normalizeLineEndings(content);
-}
-async function findGitRoot(startPath) {
-  let currentDir = import_node_path.default.dirname(import_node_path.default.resolve(startPath));
-  const root = import_node_path.default.parse(currentDir).root;
-  while (currentDir !== root) {
-    const gitPath = import_node_path.default.join(currentDir, ".git");
-    if (await fileExists(gitPath)) {
-      return currentDir;
-    }
-    const parentDir = import_node_path.default.dirname(currentDir);
-    if (parentDir === currentDir) {
-      break;
+function resolveToAbsolutePath(candidate) {
+  if (candidate instanceof URL) {
+    return new URL(candidate).pathname;
+  }
+  if (typeof candidate === "string") {
+    if (candidate.startsWith("file://")) {
+      return new URL(candidate).pathname;
     }
-    currentDir = parentDir;
+    return import_node_path.default.resolve(candidate);
   }
-  return null;
+  throw new TypeError("Unsupported repoRoot value. Expected string or URL.");
 }
 function buildDirectoryChain(filePath, repoRoot) {
   const directories = [];
@@ -234,44 +299,15 @@ async function resolveFileReference(rawValue, searchRoots) {
   return { displayPath, attempted };
 }
-// src/evaluation/yaml-parser.ts
-var CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
+// src/evaluation/loaders/config-loader.ts
+var SCHEMA_CONFIG_V2 = "agentv-config-v2";
 var ANSI_YELLOW = "\x1B[33m";
 var ANSI_RESET = "\x1B[0m";
-var SCHEMA_EVAL_V2 = "agentv-eval-v2";
-var SCHEMA_CONFIG_V2 = "agentv-config-v2";
-async function readTestSuiteMetadata(testFilePath) {
-  try {
-    const absolutePath = import_node_path2.default.resolve(testFilePath);
-    const content = await (0, import_promises2.readFile)(absolutePath, "utf8");
-    const parsed = (0, import_yaml.parse)(content);
-    if (!isJsonObject(parsed)) {
-      return {};
-    }
-    return { target: extractTargetFromSuite(parsed) };
-  } catch {
-    return {};
-  }
-}
-function extractTargetFromSuite(suite) {
-  const execution = suite.execution;
-  if (execution && typeof execution === "object" && !Array.isArray(execution)) {
-    const executionTarget = execution.target;
-    if (typeof executionTarget === "string" && executionTarget.trim().length > 0) {
-      return executionTarget.trim();
-    }
-  }
-  const targetValue = suite.target;
-  if (typeof targetValue === "string" && targetValue.trim().length > 0) {
-    return targetValue.trim();
-  }
-  return void 0;
-}
 async function loadConfig(evalFilePath, repoRoot) {
   const directories = buildDirectoryChain(evalFilePath, repoRoot);
   for (const directory of directories) {
     const configPath = import_node_path2.default.join(directory, ".agentv", "config.yaml");
-    if (!await fileExists2(configPath)) {
+    if (!await fileExists(configPath)) {
       continue;
     }
     try {
@@ -313,24 +349,134 @@ function isGuidelineFile(filePath, patterns) {
   const patternsToUse = patterns ?? [];
   return import_micromatch.default.isMatch(normalized, patternsToUse);
 }
-function extractCodeBlocks(segments) {
-  const codeBlocks = [];
-  for (const segment of segments) {
-    const typeValue = segment["type"];
-    if (typeof typeValue !== "string" || typeValue !== "text") {
+function extractTargetFromSuite(suite) {
+  const execution = suite.execution;
+  if (execution && typeof execution === "object" && !Array.isArray(execution)) {
+    const executionTarget = execution.target;
+    if (typeof executionTarget === "string" && executionTarget.trim().length > 0) {
+      return executionTarget.trim();
+    }
+  }
+  const targetValue = suite.target;
+  if (typeof targetValue === "string" && targetValue.trim().length > 0) {
+    return targetValue.trim();
+  }
+  return void 0;
+}
+function logWarning(message) {
+  console.warn(`${ANSI_YELLOW}Warning: ${message}${ANSI_RESET}`);
+}
+// src/evaluation/loaders/evaluator-parser.ts
+var import_node_path3 = __toESM(require("path"), 1);
+var ANSI_YELLOW2 = "\x1B[33m";
+var ANSI_RESET2 = "\x1B[0m";
+async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
+  const execution = rawEvalCase.execution;
+  const candidateEvaluators = isJsonObject2(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
+  if (candidateEvaluators === void 0) {
+    return void 0;
+  }
+  if (!Array.isArray(candidateEvaluators)) {
+    logWarning2(`Skipping evaluators for '${evalId}': expected array`);
+    return void 0;
+  }
+  const evaluators = [];
+  for (const rawEvaluator of candidateEvaluators) {
+    if (!isJsonObject2(rawEvaluator)) {
+      logWarning2(`Skipping invalid evaluator entry for '${evalId}' (expected object)`);
       continue;
     }
-    const textValue = segment["value"];
-    if (typeof textValue !== "string") {
+    const name = asString2(rawEvaluator.name);
+    const typeValue = rawEvaluator.type;
+    if (!name || !isEvaluatorKind(typeValue)) {
+      logWarning2(`Skipping evaluator with invalid name/type in '${evalId}'`);
       continue;
     }
-    const matches = textValue.match(CODE_BLOCK_PATTERN);
-    if (matches) {
-      codeBlocks.push(...matches);
+    if (typeValue === "code") {
+      const script = asString2(rawEvaluator.script);
+      if (!script) {
+        logWarning2(`Skipping code evaluator '${name}' in '${evalId}': missing script`);
+        continue;
+      }
+      const cwd = asString2(rawEvaluator.cwd);
+      let resolvedCwd;
+      if (cwd) {
+        const resolved = await resolveFileReference(cwd, searchRoots);
+        if (resolved.resolvedPath) {
+          resolvedCwd = import_node_path3.default.resolve(resolved.resolvedPath);
+        } else {
+          logWarning2(
+            `Code evaluator '${name}' in '${evalId}': cwd not found (${resolved.displayPath})`,
+            resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => `  Tried: ${attempt}`) : void 0
+          );
+        }
+      } else {
+        resolvedCwd = searchRoots[0];
+      }
+      evaluators.push({
+        name,
+        type: "code",
+        script,
+        cwd,
+        resolvedCwd
+      });
+      continue;
+    }
+    const prompt = asString2(rawEvaluator.prompt);
+    let promptPath;
+    if (prompt) {
+      const resolved = await resolveFileReference(prompt, searchRoots);
+      if (resolved.resolvedPath) {
+        promptPath = import_node_path3.default.resolve(resolved.resolvedPath);
+      } else {
+        logWarning2(
+          `Inline prompt used for evaluator '${name}' in '${evalId}' (file not found: ${resolved.displayPath})`,
+          resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => `  Tried: ${attempt}`) : void 0
+        );
+      }
     }
+    const _model = asString2(rawEvaluator.model);
+    evaluators.push({
+      name,
+      type: "llm_judge",
+      prompt,
+      promptPath
+    });
   }
-  return codeBlocks;
+  return evaluators.length > 0 ? evaluators : void 0;
+}
+function coerceEvaluator(candidate, contextId) {
+  if (typeof candidate !== "string") {
+    return void 0;
+  }
+  if (isEvaluatorKind(candidate)) {
+    return candidate;
+  }
+  logWarning2(`Unknown evaluator '${candidate}' in ${contextId}, falling back to default`);
+  return void 0;
+}
+function asString2(value) {
+  return typeof value === "string" ? value : void 0;
+}
+function isJsonObject2(value) {
+  return typeof value === "object" && value !== null && !Array.isArray(value);
 }
+function logWarning2(message, details) {
+  if (details && details.length > 0) {
+    const detailBlock = details.join("\n");
+    console.warn(`${ANSI_YELLOW2}Warning: ${message}
+${detailBlock}${ANSI_RESET2}`);
+  } else {
+    console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET2}`);
+  }
+}
+// src/evaluation/loaders/message-processor.ts
+var import_promises3 = require("fs/promises");
+var import_node_path4 = __toESM(require("path"), 1);
+var ANSI_YELLOW3 = "\x1B[33m";
+var ANSI_RESET3 = "\x1B[0m";
 async function processMessages(options) {
   const {
     messages,
@@ -356,9 +502,9 @@ async function processMessages(options) {
       if (!isJsonObject(rawSegment)) {
         continue;
       }
-      const segmentType = asString(rawSegment.type);
+      const segmentType = asString3(rawSegment.type);
       if (segmentType === "file") {
-        const rawValue = asString(rawSegment.value);
+        const rawValue = asString3(rawSegment.value);
         if (!rawValue) {
           continue;
         }
@@ -369,15 +515,15 @@ async function processMessages(options) {
         if (!resolvedPath) {
           const attempts = attempted.length ? ["  Tried:", ...attempted.map((candidate) => `    ${candidate}`)] : void 0;
           const context = messageType === "input" ? "" : " in expected_messages";
-          logWarning(`File not found${context}: ${displayPath}`, attempts);
+          logWarning3(`File not found${context}: ${displayPath}`, attempts);
           continue;
         }
         try {
-          const fileContent = (await (0, import_promises2.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
+          const fileContent = (await (0, import_promises3.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
           if (messageType === "input" && guidelinePatterns && guidelinePaths) {
-            const relativeToRepo = import_node_path2.default.relative(repoRootPath, resolvedPath);
+            const relativeToRepo = import_node_path4.default.relative(repoRootPath, resolvedPath);
             if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
-              guidelinePaths.push(import_node_path2.default.resolve(resolvedPath));
+              guidelinePaths.push(import_node_path4.default.resolve(resolvedPath));
               if (verbose) {
                 console.log(`  [Guideline] Found: ${displayPath}`);
                 console.log(`    Resolved to: ${resolvedPath}`);
@@ -389,7 +535,7 @@ async function processMessages(options) {
             type: "file",
             path: displayPath,
             text: fileContent,
-            resolvedPath: import_node_path2.default.resolve(resolvedPath)
+            resolvedPath: import_node_path4.default.resolve(resolvedPath)
           });
           if (verbose) {
             const label = messageType === "input" ? "[File]" : "[Expected Output File]";
@@ -398,7 +544,7 @@ async function processMessages(options) {
           }
         } catch (error) {
           const context = messageType === "input" ? "" : " expected output";
-          logWarning(`Could not read${context} file ${resolvedPath}: ${error.message}`);
+          logWarning3(`Could not read${context} file ${resolvedPath}: ${error.message}`);
         }
         continue;
       }
@@ -412,202 +558,120 @@ async function processMessages(options) {
   }
   return segments;
 }
-async function loadEvalCases(evalFilePath, repoRoot, options) {
-  const verbose = options?.verbose ?? false;
-  const evalIdFilter = options?.evalId;
-  const absoluteTestPath = import_node_path2.default.resolve(evalFilePath);
-  if (!await fileExists2(absoluteTestPath)) {
-    throw new Error(`Test file not found: ${evalFilePath}`);
-  }
-  const repoRootPath = resolveToAbsolutePath(repoRoot);
-  const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
-  const config = await loadConfig(absoluteTestPath, repoRootPath);
-  const guidelinePatterns = config?.guideline_patterns;
-  const rawFile = await (0, import_promises2.readFile)(absoluteTestPath, "utf8");
-  const parsed = (0, import_yaml.parse)(rawFile);
-  if (!isJsonObject(parsed)) {
-    throw new Error(`Invalid test file format: ${evalFilePath}`);
-  }
-  const suite = parsed;
-  const datasetNameFromSuite = asString(suite.dataset)?.trim();
-  const fallbackDataset = import_node_path2.default.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
-  const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
-  const schema = suite.$schema;
-  if (schema !== SCHEMA_EVAL_V2) {
-    const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${evalFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${evalFilePath}.
-Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
-    throw new Error(message);
+async function resolveAssistantContent(content, searchRoots, verbose) {
+  if (typeof content === "string") {
+    return content;
   }
-  const rawTestcases = suite.evalcases;
-  if (!Array.isArray(rawTestcases)) {
-    throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
+  if (!content) {
+    return "";
   }
-  const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
-  const globalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
-  const globalTarget = asString(globalExecution?.target) ?? asString(suite.target);
-  const results = [];
-  for (const rawEvalcase of rawTestcases) {
-    if (!isJsonObject(rawEvalcase)) {
-      logWarning("Skipping invalid eval case entry (expected object)");
+  const parts = [];
+  for (const entry of content) {
+    if (typeof entry === "string") {
+      parts.push({ content: entry, isFile: false });
       continue;
     }
-    const evalcase = rawEvalcase;
-    const id = asString(evalcase.id);
-    if (evalIdFilter && id !== evalIdFilter) {
+    if (!isJsonObject(entry)) {
       continue;
     }
-    const conversationId = asString(evalcase.conversation_id);
-    const outcome = asString(evalcase.outcome);
-    const inputMessagesValue = evalcase.input_messages;
-    const expectedMessagesValue = evalcase.expected_messages;
-    if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
-      logWarning(`Skipping incomplete eval case: ${id ?? "unknown"}`);
-      continue;
-    }
-    const hasExpectedMessages = Array.isArray(expectedMessagesValue) && expectedMessagesValue.length > 0;
-    const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
-    const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
-    if (hasExpectedMessages && expectedMessages.length === 0) {
-      logWarning(`No valid expected message found for eval case: ${id}`);
-      continue;
-    }
-    if (expectedMessages.length > 1) {
-      logWarning(`Multiple expected messages found for eval case: ${id}, using first`);
-    }
-    const guidelinePaths = [];
-    const inputTextParts = [];
-    const inputSegments = await processMessages({
-      messages: inputMessages,
-      searchRoots,
-      repoRootPath,
-      guidelinePatterns,
-      guidelinePaths,
-      textParts: inputTextParts,
-      messageType: "input",
-      verbose
-    });
-    const outputSegments = hasExpectedMessages ? await processMessages({
-      messages: expectedMessages,
-      searchRoots,
-      repoRootPath,
-      guidelinePatterns,
-      messageType: "output",
-      verbose
-    }) : [];
-    const codeSnippets = extractCodeBlocks(inputSegments);
-    const expectedContent = expectedMessages[0]?.content;
-    const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
-    const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
-    const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
-    const evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
-    const userFilePaths = [];
-    for (const segment of inputSegments) {
-      if (segment.type === "file" && typeof segment.resolvedPath === "string") {
-        userFilePaths.push(segment.resolvedPath);
+    const segmentType = asString3(entry.type);
+    if (segmentType === "file") {
+      const rawValue = asString3(entry.value);
+      if (!rawValue) {
+        continue;
       }
-    }
-    const allFilePaths = [
-      ...guidelinePaths.map((guidelinePath) => import_node_path2.default.resolve(guidelinePath)),
-      ...userFilePaths
-    ];
-    const testCase = {
-      id,
-      dataset: datasetName,
-      conversation_id: conversationId,
-      question,
-      input_messages: inputMessages,
-      input_segments: inputSegments,
-      output_segments: outputSegments,
-      reference_answer: referenceAnswer,
-      guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path2.default.resolve(guidelinePath)),
-      guideline_patterns: guidelinePatterns,
-      file_paths: allFilePaths,
-      code_snippets: codeSnippets,
-      expected_outcome: outcome,
-      evaluator: evalCaseEvaluatorKind,
-      evaluators
-    };
-    if (verbose) {
-      console.log(`
-[Eval Case: ${id}]`);
-      if (testCase.guideline_paths.length > 0) {
-        console.log(`  Guidelines used: ${testCase.guideline_paths.length}`);
-        for (const guidelinePath of testCase.guideline_paths) {
-          console.log(`    - ${guidelinePath}`);
+      const { displayPath, resolvedPath, attempted } = await resolveFileReference(
+        rawValue,
+        searchRoots
+      );
+      if (!resolvedPath) {
+        const attempts = attempted.length ? ["  Tried:", ...attempted.map((candidate) => `    ${candidate}`)] : void 0;
+        logWarning3(`File not found in expected_messages: ${displayPath}`, attempts);
+        continue;
+      }
+      try {
+        const fileContent = (await (0, import_promises3.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n").trim();
+        parts.push({ content: fileContent, isFile: true, displayPath });
+        if (verbose) {
+          console.log(`  [Expected Assistant File] Found: ${displayPath}`);
+          console.log(`    Resolved to: ${resolvedPath}`);
         }
-      } else {
-        console.log("  No guidelines found");
+      } catch (error) {
+        logWarning3(`Could not read file ${resolvedPath}: ${error.message}`);
       }
+      continue;
     }
-    results.push(testCase);
-  }
-  return results;
-}
-function needsRoleMarkers(messages, processedSegmentsByMessage) {
-  if (messages.some((msg) => msg.role === "assistant" || msg.role === "tool")) {
-    return true;
-  }
-  let messagesWithContent = 0;
-  for (const segments of processedSegmentsByMessage) {
-    if (hasVisibleContent(segments)) {
-      messagesWithContent++;
+    const textValue = asString3(entry.text);
+    if (typeof textValue === "string") {
+      parts.push({ content: textValue, isFile: false });
+      continue;
+    }
+    const valueValue = asString3(entry.value);
+    if (typeof valueValue === "string") {
+      parts.push({ content: valueValue, isFile: false });
+      continue;
     }
+    parts.push({ content: JSON.stringify(entry), isFile: false });
   }
-  return messagesWithContent > 1;
+  return formatFileContents(parts);
 }
-function hasVisibleContent(segments) {
-  return segments.some((segment) => {
-    const type = asString(segment.type);
-    if (type === "text") {
-      const value = asString(segment.value);
-      return value !== void 0 && value.trim().length > 0;
-    }
-    if (type === "guideline_ref") {
-      return false;
-    }
-    if (type === "file") {
-      const text = asString(segment.text);
-      return text !== void 0 && text.trim().length > 0;
-    }
-    return false;
-  });
+function asString3(value) {
+  return typeof value === "string" ? value : void 0;
 }
-function formatSegment(segment) {
-  const type = asString(segment.type);
-  if (type === "text") {
-    return asString(segment.value);
+function cloneJsonObject(source) {
+  const entries = Object.entries(source).map(([key, value]) => [key, cloneJsonValue(value)]);
+  return Object.fromEntries(entries);
+}
+function cloneJsonValue(value) {
+  if (value === null) {
+    return null;
   }
-  if (type === "guideline_ref") {
-    const refPath = asString(segment.path);
-    return refPath ? `<Attached: ${refPath}>` : void 0;
+  if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
+    return value;
   }
-  if (type === "file") {
-    const text = asString(segment.text);
-    const filePath = asString(segment.path);
-    if (text && filePath) {
-      return `=== ${filePath} ===
-${text}`;
-    }
+  if (Array.isArray(value)) {
+    return value.map((item) => cloneJsonValue(item));
+  }
+  if (typeof value === "object") {
+    return cloneJsonObject(value);
+  }
+  return value;
+}
+function logWarning3(message, details) {
+  if (details && details.length > 0) {
+    const detailBlock = details.join("\n");
+    console.warn(`${ANSI_YELLOW3}Warning: ${message}
+${detailBlock}${ANSI_RESET3}`);
+  } else {
+    console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET3}`);
   }
-  return void 0;
 }
+// src/evaluation/formatting/prompt-builder.ts
+var import_promises4 = require("fs/promises");
+var import_node_path5 = __toESM(require("path"), 1);
+var ANSI_YELLOW4 = "\x1B[33m";
+var ANSI_RESET4 = "\x1B[0m";
 async function buildPromptInputs(testCase) {
-  const guidelineContents = [];
+  const guidelineParts = [];
   for (const rawPath of testCase.guideline_paths) {
-    const absolutePath = import_node_path2.default.resolve(rawPath);
-    if (!await fileExists2(absolutePath)) {
-      logWarning(`Could not read guideline file ${absolutePath}: file does not exist`);
+    const absolutePath = import_node_path5.default.resolve(rawPath);
+    if (!await fileExists(absolutePath)) {
+      logWarning4(`Could not read guideline file ${absolutePath}: file does not exist`);
       continue;
     }
     try {
-      const content = (await (0, import_promises2.readFile)(absolutePath, "utf8")).replace(/\r\n/g, "\n");
-      guidelineContents.push(`=== ${import_node_path2.default.basename(absolutePath)} ===
-${content}`);
+      const content = (await (0, import_promises4.readFile)(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
+      guidelineParts.push({
+        content,
+        isFile: true,
+        displayPath: import_node_path5.default.basename(absolutePath)
+      });
     } catch (error) {
-      logWarning(`Could not read guideline file ${absolutePath}: ${error.message}`);
+      logWarning4(`Could not read guideline file ${absolutePath}: ${error.message}`);
     }
   }
-  const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
+  const guidelines = formatFileContents(guidelineParts);
   const segmentsByMessage = [];
   const fileContentsByPath = /* @__PURE__ */ new Map();
   for (const segment of testCase.input_segments) {
@@ -628,9 +692,9 @@ ${content}`);
             messageSegments.push({ type: "text", value: segment });
           }
         } else if (isJsonObject(segment)) {
-          const type = asString(segment.type);
+          const type = asString4(segment.type);
           if (type === "file") {
-            const value = asString(segment.value);
+            const value = asString4(segment.value);
             if (!value) continue;
             if (testCase.guideline_patterns && isGuidelineFile(value, testCase.guideline_patterns)) {
               messageSegments.push({ type: "guideline_ref", path: value });
@@ -641,7 +705,7 @@ ${content}`);
               messageSegments.push({ type: "file", text: fileText, path: value });
             }
           } else if (type === "text") {
-            const textValue = asString(segment.value);
+            const textValue = asString4(segment.value);
             if (textValue && textValue.trim().length > 0) {
               messageSegments.push({ type: "text", value: textValue });
             }
@@ -697,6 +761,18 @@ ${messageContent}`);
   }) : void 0;
   return { question, guidelines, chatPrompt };
 }
+function needsRoleMarkers(messages, processedSegmentsByMessage) {
+  if (messages.some((msg) => msg.role === "assistant" || msg.role === "tool")) {
+    return true;
+  }
+  let messagesWithContent = 0;
+  for (const segments of processedSegmentsByMessage) {
+    if (hasVisibleContent(segments)) {
+      messagesWithContent++;
+    }
+  }
+  return messagesWithContent > 1;
+}
 function buildChatPromptFromSegments(options) {
   const { messages, segmentsByMessage, guidelinePatterns, guidelineContent, systemPrompt } = options;
   if (messages.length === 0) {
@@ -756,209 +832,294 @@ ${guidelineContent.trim()}`);
         if (isGuidelineRef) {
           continue;
         }
-        contentParts.push(formatted);
+        contentParts.push(formatted);
+      }
+    }
+    if (contentParts.length === 0) {
+      continue;
+    }
+    chatPrompt.push({
+      role,
+      content: contentParts.join("\n"),
+      ...name ? { name } : {}
+    });
+  }
+  return chatPrompt.length > 0 ? chatPrompt : void 0;
+}
+function asString4(value) {
+  return typeof value === "string" ? value : void 0;
+}
+function logWarning4(message) {
+  console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET4}`);
+}
+// src/evaluation/yaml-parser.ts
+var ANSI_YELLOW5 = "\x1B[33m";
+var ANSI_RESET5 = "\x1B[0m";
+var SCHEMA_EVAL_V2 = "agentv-eval-v2";
+async function readTestSuiteMetadata(testFilePath) {
+  try {
+    const absolutePath = import_node_path6.default.resolve(testFilePath);
+    const content = await (0, import_promises5.readFile)(absolutePath, "utf8");
+    const parsed = (0, import_yaml2.parse)(content);
+    if (!isJsonObject(parsed)) {
+      return {};
+    }
+    return { target: extractTargetFromSuite(parsed) };
+  } catch {
+    return {};
+  }
+}
+async function loadEvalCases(evalFilePath, repoRoot, options) {
+  const verbose = options?.verbose ?? false;
+  const evalIdFilter = options?.evalId;
+  const absoluteTestPath = import_node_path6.default.resolve(evalFilePath);
+  const repoRootPath = resolveToAbsolutePath(repoRoot);
+  const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
+  const config = await loadConfig(absoluteTestPath, repoRootPath);
+  const guidelinePatterns = config?.guideline_patterns;
+  const rawFile = await (0, import_promises5.readFile)(absoluteTestPath, "utf8");
+  const parsed = (0, import_yaml2.parse)(rawFile);
+  if (!isJsonObject(parsed)) {
+    throw new Error(`Invalid test file format: ${evalFilePath}`);
+  }
+  const suite = parsed;
+  const datasetNameFromSuite = asString5(suite.dataset)?.trim();
+  const fallbackDataset = import_node_path6.default.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
+  const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
+  const schema = suite.$schema;
+  if (schema !== SCHEMA_EVAL_V2) {
+    const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${evalFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${evalFilePath}.
+Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
+    throw new Error(message);
+  }
+  const rawTestcases = suite.evalcases;
+  if (!Array.isArray(rawTestcases)) {
+    throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
+  }
+  const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
+  const globalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
+  const _globalTarget = asString5(globalExecution?.target) ?? asString5(suite.target);
+  const results = [];
+  for (const rawEvalcase of rawTestcases) {
+    if (!isJsonObject(rawEvalcase)) {
+      logWarning5("Skipping invalid eval case entry (expected object)");
+      continue;
+    }
+    const evalcase = rawEvalcase;
+    const id = asString5(evalcase.id);
+    if (evalIdFilter && id !== evalIdFilter) {
+      continue;
+    }
+    const conversationId = asString5(evalcase.conversation_id);
+    const outcome = asString5(evalcase.outcome);
+    const inputMessagesValue = evalcase.input_messages;
+    const expectedMessagesValue = evalcase.expected_messages;
+    if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
+      logWarning5(`Skipping incomplete eval case: ${id ?? "unknown"}`);
+      continue;
+    }
+    const hasExpectedMessages = Array.isArray(expectedMessagesValue) && expectedMessagesValue.length > 0;
+    const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
+    const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
+    if (hasExpectedMessages && expectedMessages.length === 0) {
+      logWarning5(`No valid expected message found for eval case: ${id}`);
+      continue;
+    }
+    if (expectedMessages.length > 1) {
+      logWarning5(`Multiple expected messages found for eval case: ${id}, using first`);
+    }
+    const guidelinePaths = [];
+    const inputTextParts = [];
+    const inputSegments = await processMessages({
+      messages: inputMessages,
+      searchRoots,
+      repoRootPath,
+      guidelinePatterns,
+      guidelinePaths,
+      textParts: inputTextParts,
+      messageType: "input",
+      verbose
+    });
+    const outputSegments = hasExpectedMessages ? await processMessages({
+      messages: expectedMessages,
+      searchRoots,
+      repoRootPath,
+      guidelinePatterns,
+      messageType: "output",
+      verbose
+    }) : [];
+    const codeSnippets = extractCodeBlocks(inputSegments);
+    const expectedContent = expectedMessages[0]?.content;
+    const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
+    const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
+    const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
+    const evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
+    const userFilePaths = [];
+    for (const segment of inputSegments) {
+      if (segment.type === "file" && typeof segment.resolvedPath === "string") {
+        userFilePaths.push(segment.resolvedPath);
+      }
+    }
+    const allFilePaths = [
+      ...guidelinePaths.map((guidelinePath) => import_node_path6.default.resolve(guidelinePath)),
+      ...userFilePaths
+    ];
+    const testCase = {
+      id,
+      dataset: datasetName,
+      conversation_id: conversationId,
+      question,
+      input_messages: inputMessages,
+      input_segments: inputSegments,
+      output_segments: outputSegments,
+      reference_answer: referenceAnswer,
+      guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path6.default.resolve(guidelinePath)),
+      guideline_patterns: guidelinePatterns,
+      file_paths: allFilePaths,
+      code_snippets: codeSnippets,
+      expected_outcome: outcome,
+      evaluator: evalCaseEvaluatorKind,
+      evaluators
+    };
+    if (verbose) {
+      console.log(`
+[Eval Case: ${id}]`);
+      if (testCase.guideline_paths.length > 0) {
+        console.log(`  Guidelines used: ${testCase.guideline_paths.length}`);
+        for (const guidelinePath of testCase.guideline_paths) {
+          console.log(`    - ${guidelinePath}`);
+        }
+      } else {
+        console.log("  No guidelines found");
       }
     }
-    if (contentParts.length === 0) {
-      continue;
-    }
-    chatPrompt.push({
-      role,
-      content: contentParts.join("\n"),
-      ...name ? { name } : {}
-    });
+    results.push(testCase);
   }
-  return chatPrompt.length > 0 ? chatPrompt : void 0;
+  return results;
+}
+function asString5(value) {
+  return typeof value === "string" ? value : void 0;
 }
-async function fileExists2(absolutePath) {
+function logWarning5(message, details) {
+  if (details && details.length > 0) {
+    const detailBlock = details.join("\n");
+    console.warn(`${ANSI_YELLOW5}Warning: ${message}
+${detailBlock}${ANSI_RESET5}`);
+  } else {
+    console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
+  }
+}
+// src/evaluation/file-utils.ts
+var import_node_fs2 = require("fs");
+var import_promises6 = require("fs/promises");
+var import_node_path7 = __toESM(require("path"), 1);
+async function fileExists2(filePath) {
   try {
-    await (0, import_promises2.access)(absolutePath, import_node_fs2.constants.F_OK);
+    await (0, import_promises6.access)(filePath, import_node_fs2.constants.F_OK);
     return true;
   } catch {
     return false;
   }
 }
-function resolveToAbsolutePath(candidate) {
-  if (candidate instanceof URL) {
-    return (0, import_node_url.fileURLToPath)(candidate);
-  }
-  if (typeof candidate === "string") {
-    if (candidate.startsWith("file://")) {
-      return (0, import_node_url.fileURLToPath)(new URL(candidate));
-    }
-    return import_node_path2.default.resolve(candidate);
-  }
-  throw new TypeError("Unsupported repoRoot value. Expected string or URL.");
-}
-function asString(value) {
-  return typeof value === "string" ? value : void 0;
+function normalizeLineEndings(content) {
+  return content.replace(/\r\n/g, "\n");
 }
-function cloneJsonObject(source) {
-  const entries = Object.entries(source).map(([key, value]) => [key, cloneJsonValue(value)]);
-  return Object.fromEntries(entries);
+async function readTextFile(filePath) {
+  const content = await (0, import_promises6.readFile)(filePath, "utf8");
+  return normalizeLineEndings(content);
 }
-function cloneJsonValue(value) {
-  if (value === null) {
-    return null;
-  }
-  if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
-    return value;
-  }
-  if (Array.isArray(value)) {
-    return value.map((item) => cloneJsonValue(item));
+async function findGitRoot(startPath) {
+  let currentDir = import_node_path7.default.dirname(import_node_path7.default.resolve(startPath));
+  const root = import_node_path7.default.parse(currentDir).root;
+  while (currentDir !== root) {
+    const gitPath = import_node_path7.default.join(currentDir, ".git");
+    if (await fileExists2(gitPath)) {
+      return currentDir;
+    }
+    const parentDir = import_node_path7.default.dirname(currentDir);
+    if (parentDir === currentDir) {
+      break;
+    }
+    currentDir = parentDir;
   }
-  return cloneJsonObject(value);
+  return null;
 }
-async function resolveAssistantContent(content, searchRoots, verbose) {
-  if (typeof content === "string") {
-    return content;
-  }
-  if (!content) {
-    return "";
-  }
-  const parts = [];
-  for (const entry of content) {
-    if (typeof entry === "string") {
-      parts.push(entry);
-      continue;
+function buildDirectoryChain2(filePath, repoRoot) {
+  const directories = [];
+  const seen = /* @__PURE__ */ new Set();
+  const boundary = import_node_path7.default.resolve(repoRoot);
+  let current = import_node_path7.default.resolve(import_node_path7.default.dirname(filePath));
+  while (current !== void 0) {
+    if (!seen.has(current)) {
+      directories.push(current);
+      seen.add(current);
     }
-    if (!isJsonObject(entry)) {
-      continue;
+    if (current === boundary) {
+      break;
     }
-    const segmentType = asString(entry.type);
-    if (segmentType === "file") {
-      const rawValue = asString(entry.value);
-      if (!rawValue) {
-        continue;
-      }
-      const { displayPath, resolvedPath, attempted } = await resolveFileReference(
-        rawValue,
-        searchRoots
-      );
-      if (!resolvedPath) {
-        const attempts = attempted.length ? ["  Tried:", ...attempted.map((candidate) => `    ${candidate}`)] : void 0;
-        logWarning(`File not found in expected_messages: ${displayPath}`, attempts);
-        continue;
-      }
-      try {
-        const fileContent = (await (0, import_promises2.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
-        parts.push(fileContent);
-        if (verbose) {
-          console.log(`  [Expected Assistant File] Found: ${displayPath}`);
-          console.log(`    Resolved to: ${resolvedPath}`);
-        }
-      } catch (error) {
-        logWarning(`Could not read file ${resolvedPath}: ${error.message}`);
-      }
-      continue;
+    const parent = import_node_path7.default.dirname(current);
+    if (parent === current) {
+      break;
     }
-    const textValue = asString(entry.text);
-    if (typeof textValue === "string") {
-      parts.push(textValue);
-      continue;
+    current = parent;
+  }
+  if (!seen.has(boundary)) {
+    directories.push(boundary);
+  }
+  return directories;
+}
+function buildSearchRoots2(evalPath, repoRoot) {
+  const uniqueRoots = [];
+  const addRoot = (root) => {
+    const normalized = import_node_path7.default.resolve(root);
+    if (!uniqueRoots.includes(normalized)) {
+      uniqueRoots.push(normalized);
     }
-    const valueValue = asString(entry.value);
-    if (typeof valueValue === "string") {
-      parts.push(valueValue);
-      continue;
+  };
+  let currentDir = import_node_path7.default.dirname(evalPath);
+  let reachedBoundary = false;
+  while (!reachedBoundary) {
+    addRoot(currentDir);
+    const parentDir = import_node_path7.default.dirname(currentDir);
+    if (currentDir === repoRoot || parentDir === currentDir) {
+      reachedBoundary = true;
+    } else {
+      currentDir = parentDir;
     }
-    parts.push(JSON.stringify(entry));
   }
-  return parts.join(" ");
+  addRoot(repoRoot);
+  addRoot(process.cwd());
+  return uniqueRoots;
 }
-async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
-  const execution = rawEvalCase.execution;
-  const candidateEvaluators = isJsonObject(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
-  if (candidateEvaluators === void 0) {
-    return void 0;
+function trimLeadingSeparators2(value) {
+  const trimmed = value.replace(/^[/\\]+/, "");
+  return trimmed.length > 0 ? trimmed : value;
+}
+async function resolveFileReference2(rawValue, searchRoots) {
+  const displayPath = trimLeadingSeparators2(rawValue);
+  const potentialPaths = [];
+  if (import_node_path7.default.isAbsolute(rawValue)) {
+    potentialPaths.push(import_node_path7.default.normalize(rawValue));
   }
-  if (!Array.isArray(candidateEvaluators)) {
-    logWarning(`Skipping evaluators for '${evalId}': expected array`);
-    return void 0;
+  for (const base of searchRoots) {
+    potentialPaths.push(import_node_path7.default.resolve(base, displayPath));
   }
-  const evaluators = [];
-  for (const rawEvaluator of candidateEvaluators) {
-    if (!isJsonObject(rawEvaluator)) {
-      logWarning(`Skipping invalid evaluator entry for '${evalId}' (expected object)`);
-      continue;
-    }
-    const name = asString(rawEvaluator.name);
-    const typeValue = rawEvaluator.type;
-    if (!name || !isEvaluatorKind(typeValue)) {
-      logWarning(`Skipping evaluator with invalid name/type in '${evalId}'`);
-      continue;
-    }
-    if (typeValue === "code") {
-      const script = asString(rawEvaluator.script);
-      if (!script) {
-        logWarning(`Skipping code evaluator '${name}' in '${evalId}': missing script`);
-        continue;
-      }
-      const cwd = asString(rawEvaluator.cwd);
-      let resolvedCwd;
-      if (cwd) {
-        const resolved = await resolveFileReference(cwd, searchRoots);
-        if (resolved.resolvedPath) {
-          resolvedCwd = import_node_path2.default.resolve(resolved.resolvedPath);
-        } else {
-          logWarning(
-            `Code evaluator '${name}' in '${evalId}': cwd not found (${resolved.displayPath})`,
-            resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => `  Tried: ${attempt}`) : void 0
-          );
-        }
-      } else {
-        resolvedCwd = searchRoots[0];
-      }
-      evaluators.push({
-        name,
-        type: "code",
-        script,
-        cwd,
-        resolvedCwd
-      });
+  const attempted = [];
+  const seen = /* @__PURE__ */ new Set();
+  for (const candidate of potentialPaths) {
+    const absoluteCandidate = import_node_path7.default.resolve(candidate);
+    if (seen.has(absoluteCandidate)) {
       continue;
     }
-    const prompt = asString(rawEvaluator.prompt);
-    let promptPath;
-    if (prompt) {
-      const resolved = await resolveFileReference(prompt, searchRoots);
-      if (resolved.resolvedPath) {
-        promptPath = import_node_path2.default.resolve(resolved.resolvedPath);
-      } else {
-        logWarning(
-          `Inline prompt used for evaluator '${name}' in '${evalId}' (file not found: ${resolved.displayPath})`,
-          resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => `  Tried: ${attempt}`) : void 0
-        );
-      }
+    seen.add(absoluteCandidate);
+    attempted.push(absoluteCandidate);
+    if (await fileExists2(absoluteCandidate)) {
+      return { displayPath, resolvedPath: absoluteCandidate, attempted };
     }
-    const model = asString(rawEvaluator.model);
-    evaluators.push({
-      name,
-      type: "llm_judge",
-      prompt,
-      promptPath
-    });
-  }
-  return evaluators.length > 0 ? evaluators : void 0;
-}
-function coerceEvaluator(candidate, contextId) {
-  if (typeof candidate !== "string") {
-    return void 0;
-  }
-  if (isEvaluatorKind(candidate)) {
-    return candidate;
-  }
-  logWarning(`Unknown evaluator '${candidate}' in ${contextId}, falling back to default`);
-  return void 0;
-}
-function logWarning(message, details) {
-  if (details && details.length > 0) {
-    const detailBlock = details.join("\n");
-    console.warn(`${ANSI_YELLOW}Warning: ${message}
-${detailBlock}${ANSI_RESET}`);
-  } else {
-    console.warn(`${ANSI_YELLOW}Warning: ${message}${ANSI_RESET}`);
   }
+  return { displayPath, attempted };
 }
 // src/evaluation/providers/ax.ts
@@ -989,9 +1150,8 @@ function buildChatPrompt(request) {
 }
 function resolveSystemContent(request) {
   const systemSegments = [];
-  const metadataSystemPrompt = typeof request.metadata?.systemPrompt === "string" ? request.metadata.systemPrompt : void 0;
-  if (metadataSystemPrompt && metadataSystemPrompt.trim().length > 0) {
-    systemSegments.push(metadataSystemPrompt.trim());
+  if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
+    systemSegments.push(request.systemPrompt.trim());
   } else {
     systemSegments.push(DEFAULT_SYSTEM_PROMPT);
   }
@@ -1242,9 +1402,9 @@ var GeminiProvider = class {
 // src/evaluation/providers/cli.ts
 var import_node_child_process = require("child_process");
-var import_promises3 = __toESM(require("fs/promises"), 1);
+var import_promises7 = __toESM(require("fs/promises"), 1);
 var import_node_os = __toESM(require("os"), 1);
-var import_node_path3 = __toESM(require("path"), 1);
+var import_node_path8 = __toESM(require("path"), 1);
 var import_node_util = require("util");
 var execAsync = (0, import_node_util.promisify)(import_node_child_process.exec);
 var DEFAULT_MAX_BUFFER = 10 * 1024 * 1024;
@@ -1341,7 +1501,7 @@ var CliProvider = class {
       const errorMsg = error instanceof Error ? error.message : String(error);
       throw new Error(`Failed to read output file '${filePath}': ${errorMsg}`);
     } finally {
-      await import_promises3.default.unlink(filePath).catch(() => {
+      await import_promises7.default.unlink(filePath).catch(() => {
       });
     }
   }
@@ -1423,7 +1583,7 @@ function normalizeInputFiles(inputFiles) {
   }
   const unique = /* @__PURE__ */ new Map();
   for (const inputFile of inputFiles) {
-    const absolutePath = import_node_path3.default.resolve(inputFile);
+    const absolutePath = import_node_path8.default.resolve(inputFile);
     if (!unique.has(absolutePath)) {
       unique.set(absolutePath, absolutePath);
     }
@@ -1437,7 +1597,7 @@ function formatFileList(files, template) {
   const formatter = template ?? "{path}";
   return files.map((filePath) => {
     const escapedPath = shellEscape(filePath);
-    const escapedName = shellEscape(import_node_path3.default.basename(filePath));
+    const escapedName = shellEscape(import_node_path8.default.basename(filePath));
     return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
   }).join(" ");
 }
@@ -1461,7 +1621,7 @@ function generateOutputFilePath(evalCaseId) {
   const safeEvalId = evalCaseId || "unknown";
   const timestamp = Date.now();
   const random = Math.random().toString(36).substring(2, 9);
-  return import_node_path3.default.join(import_node_os.default.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}.json`);
+  return import_node_path8.default.join(import_node_os.default.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}.json`);
 }
 function formatTimeoutSuffix(timeoutMs) {
   if (!timeoutMs || timeoutMs <= 0) {
@@ -1475,9 +1635,9 @@ function formatTimeoutSuffix(timeoutMs) {
 var import_node_child_process2 = require("child_process");
 var import_node_crypto = require("crypto");
 var import_node_fs3 = require("fs");
-var import_promises4 = require("fs/promises");
+var import_promises8 = require("fs/promises");
 var import_node_os2 = require("os");
-var import_node_path5 = __toESM(require("path"), 1);
+var import_node_path10 = __toESM(require("path"), 1);
 var import_node_util2 = require("util");
 // src/evaluation/providers/codex-log-tracker.ts
@@ -1534,7 +1694,7 @@ function subscribeToCodexLogEntries(listener) {
 }
 // src/evaluation/providers/preread.ts
-var import_node_path4 = __toESM(require("path"), 1);
+var import_node_path9 = __toESM(require("path"), 1);
 function buildPromptDocument(request, inputFiles, options) {
   const parts = [];
   const guidelineFiles = collectGuidelineFiles(
@@ -1559,7 +1719,7 @@ function normalizeInputFiles2(inputFiles) {
   }
   const deduped = /* @__PURE__ */ new Map();
   for (const inputFile of inputFiles) {
-    const absolutePath = import_node_path4.default.resolve(inputFile);
+    const absolutePath = import_node_path9.default.resolve(inputFile);
     if (!deduped.has(absolutePath)) {
       deduped.set(absolutePath, absolutePath);
     }
@@ -1572,14 +1732,14 @@ function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
   }
   const unique = /* @__PURE__ */ new Map();
   for (const inputFile of inputFiles) {
-    const absolutePath = import_node_path4.default.resolve(inputFile);
+    const absolutePath = import_node_path9.default.resolve(inputFile);
     if (overrides?.has(absolutePath)) {
       if (!unique.has(absolutePath)) {
         unique.set(absolutePath, absolutePath);
       }
       continue;
     }
-    const normalized = absolutePath.split(import_node_path4.default.sep).join("/");
+    const normalized = absolutePath.split(import_node_path9.default.sep).join("/");
     if (isGuidelineFile(normalized, guidelinePatterns)) {
       if (!unique.has(absolutePath)) {
         unique.set(absolutePath, absolutePath);
@@ -1594,7 +1754,7 @@ function collectInputFiles(inputFiles) {
   }
   const unique = /* @__PURE__ */ new Map();
   for (const inputFile of inputFiles) {
-    const absolutePath = import_node_path4.default.resolve(inputFile);
+    const absolutePath = import_node_path9.default.resolve(inputFile);
     if (!unique.has(absolutePath)) {
       unique.set(absolutePath, absolutePath);
     }
@@ -1606,7 +1766,7 @@ function buildMandatoryPrereadBlock(guidelineFiles, inputFiles) {
     return "";
   }
   const buildList = (files) => files.map((absolutePath) => {
-    const fileName = import_node_path4.default.basename(absolutePath);
+    const fileName = import_node_path9.default.basename(absolutePath);
     const fileUri = pathToFileUri(absolutePath);
     return `* [${fileName}](${fileUri})`;
   });
@@ -1626,7 +1786,7 @@ ${buildList(inputFiles).join("\n")}.`);
   return sections.join("\n");
 }
 function pathToFileUri(filePath) {
-  const absolutePath = import_node_path4.default.isAbsolute(filePath) ? filePath : import_node_path4.default.resolve(filePath);
+  const absolutePath = import_node_path9.default.isAbsolute(filePath) ? filePath : import_node_path9.default.resolve(filePath);
   const normalizedPath = absolutePath.replace(/\\/g, "/");
   if (/^[a-zA-Z]:\//.test(normalizedPath)) {
     return `file:///${normalizedPath}`;
@@ -1664,8 +1824,8 @@ var CodexProvider = class {
     const logger = await this.createStreamLogger(request).catch(() => void 0);
     try {
       const promptContent = buildPromptDocument(request, inputFiles);
-      const promptFile = import_node_path5.default.join(workspaceRoot, PROMPT_FILENAME);
-      await (0, import_promises4.writeFile)(promptFile, promptContent, "utf8");
+      const promptFile = import_node_path10.default.join(workspaceRoot, PROMPT_FILENAME);
+      await (0, import_promises8.writeFile)(promptFile, promptContent, "utf8");
       const args = this.buildCodexArgs();
       const cwd = this.resolveCwd(workspaceRoot);
       const result = await this.executeCodex(args, cwd, promptContent, request.signal, logger);
@@ -1714,7 +1874,7 @@ var CodexProvider = class {
     if (!this.config.cwd) {
       return workspaceRoot;
     }
-    return import_node_path5.default.resolve(this.config.cwd);
+    return import_node_path10.default.resolve(this.config.cwd);
   }
   buildCodexArgs() {
     const args = ["--ask-for-approval", "never", "exec", "--json", "--color", "never", "--skip-git-repo-check"];
@@ -1748,11 +1908,11 @@ var CodexProvider = class {
     }
   }
   async createWorkspace() {
-    return await (0, import_promises4.mkdtemp)(import_node_path5.default.join((0, import_node_os2.tmpdir)(), WORKSPACE_PREFIX));
+    return await (0, import_promises8.mkdtemp)(import_node_path10.default.join((0, import_node_os2.tmpdir)(), WORKSPACE_PREFIX));
   }
   async cleanupWorkspace(workspaceRoot) {
     try {
-      await (0, import_promises4.rm)(workspaceRoot, { recursive: true, force: true });
+      await (0, import_promises8.rm)(workspaceRoot, { recursive: true, force: true });
     } catch {
     }
   }
@@ -1762,9 +1922,9 @@ var CodexProvider = class {
       return void 0;
     }
     if (this.config.logDir) {
-      return import_node_path5.default.resolve(this.config.logDir);
+      return import_node_path10.default.resolve(this.config.logDir);
     }
-    return import_node_path5.default.join(process.cwd(), ".agentv", "logs", "codex");
+    return import_node_path10.default.join(process.cwd(), ".agentv", "logs", "codex");
   }
   async createStreamLogger(request) {
     const logDir = this.resolveLogDirectory();
@@ -1772,13 +1932,13 @@ var CodexProvider = class {
       return void 0;
     }
     try {
-      await (0, import_promises4.mkdir)(logDir, { recursive: true });
+      await (0, import_promises8.mkdir)(logDir, { recursive: true });
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
       console.warn(`Skipping Codex stream logging (could not create ${logDir}): ${message}`);
       return void 0;
     }
-    const filePath = import_node_path5.default.join(logDir, buildLogFilename(request, this.targetName));
+    const filePath = import_node_path10.default.join(logDir, buildLogFilename(request, this.targetName));
     try {
       const logger = await CodexStreamLogger.create({
         filePath,
@@ -1993,9 +2153,9 @@ function tryParseJsonValue(rawLine) {
 async function locateExecutable(candidate) {
   const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
   if (includesPathSeparator) {
-    const resolved = import_node_path5.default.isAbsolute(candidate) ? candidate : import_node_path5.default.resolve(candidate);
+    const resolved = import_node_path10.default.isAbsolute(candidate) ? candidate : import_node_path10.default.resolve(candidate);
     const executablePath = await ensureWindowsExecutableVariant(resolved);
-    await (0, import_promises4.access)(executablePath, import_node_fs3.constants.F_OK);
+    await (0, import_promises8.access)(executablePath, import_node_fs3.constants.F_OK);
     return executablePath;
   }
   const locator = process.platform === "win32" ? "where" : "which";
@@ -2005,7 +2165,7 @@ async function locateExecutable(candidate) {
     const preferred = selectExecutableCandidate(lines);
     if (preferred) {
       const executablePath = await ensureWindowsExecutableVariant(preferred);
-      await (0, import_promises4.access)(executablePath, import_node_fs3.constants.F_OK);
+      await (0, import_promises8.access)(executablePath, import_node_fs3.constants.F_OK);
       return executablePath;
     }
   } catch {
@@ -2039,7 +2199,7 @@ async function ensureWindowsExecutableVariant(candidate) {
   for (const ext of extensions) {
     const withExtension = `${candidate}${ext}`;
     try {
-      await (0, import_promises4.access)(withExtension, import_node_fs3.constants.F_OK);
+      await (0, import_promises8.access)(withExtension, import_node_fs3.constants.F_OK);
       return withExtension;
     } catch {
     }
@@ -2851,7 +3011,7 @@ function resolveOptionalNumberArray(source, description) {
 }
 // src/evaluation/providers/vscode.ts
-var import_node_path6 = __toESM(require("path"), 1);
+var import_node_path11 = __toESM(require("path"), 1);
 var import_subagent = require("subagent");
 var VSCodeProvider = class {
   id;
@@ -2964,6 +3124,9 @@ var VSCodeProvider = class {
 };
 function buildPromptDocument2(request, attachments, guidelinePatterns) {
   const parts = [];
+  if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
+    parts.push(request.systemPrompt.trim());
+  }
   const guidelineFiles = collectGuidelineFiles2(attachments, guidelinePatterns);
   const attachmentFiles = collectAttachmentFiles(attachments);
   const nonGuidelineAttachments = attachmentFiles.filter(
@@ -2981,7 +3144,7 @@ function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
     return "";
   }
   const buildList = (files) => files.map((absolutePath) => {
-    const fileName = import_node_path6.default.basename(absolutePath);
+    const fileName = import_node_path11.default.basename(absolutePath);
     const fileUri = pathToFileUri2(absolutePath);
     return `* [${fileName}](${fileUri})`;
   });
@@ -3006,8 +3169,8 @@ function collectGuidelineFiles2(attachments, guidelinePatterns) {
   }
   const unique = /* @__PURE__ */ new Map();
   for (const attachment of attachments) {
-    const absolutePath = import_node_path6.default.resolve(attachment);
-    const normalized = absolutePath.split(import_node_path6.default.sep).join("/");
+    const absolutePath = import_node_path11.default.resolve(attachment);
+    const normalized = absolutePath.split(import_node_path11.default.sep).join("/");
     if (isGuidelineFile(normalized, guidelinePatterns)) {
       if (!unique.has(absolutePath)) {
         unique.set(absolutePath, absolutePath);
@@ -3022,7 +3185,7 @@ function collectAttachmentFiles(attachments) {
   }
   const unique = /* @__PURE__ */ new Map();
   for (const attachment of attachments) {
-    const absolutePath = import_node_path6.default.resolve(attachment);
+    const absolutePath = import_node_path11.default.resolve(attachment);
     if (!unique.has(absolutePath)) {
       unique.set(absolutePath, absolutePath);
     }
@@ -3030,7 +3193,7 @@ function collectAttachmentFiles(attachments) {
   return Array.from(unique.values());
 }
 function pathToFileUri2(filePath) {
-  const absolutePath = import_node_path6.default.isAbsolute(filePath) ? filePath : import_node_path6.default.resolve(filePath);
+  const absolutePath = import_node_path11.default.isAbsolute(filePath) ? filePath : import_node_path11.default.resolve(filePath);
   const normalizedPath = absolutePath.replace(/\\/g, "/");
   if (/^[a-zA-Z]:\//.test(normalizedPath)) {
     return `file:///${normalizedPath}`;
@@ -3043,7 +3206,7 @@ function normalizeAttachments(attachments) {
   }
   const deduped = /* @__PURE__ */ new Set();
   for (const attachment of attachments) {
-    deduped.add(import_node_path6.default.resolve(attachment));
+    deduped.add(import_node_path11.default.resolve(attachment));
   }
   return Array.from(deduped);
 }
@@ -3052,7 +3215,7 @@ function mergeAttachments(all) {
   for (const list of all) {
     if (!list) continue;
     for (const inputFile of list) {
-      deduped.add(import_node_path6.default.resolve(inputFile));
+      deduped.add(import_node_path11.default.resolve(inputFile));
     }
   }
   return deduped.size > 0 ? Array.from(deduped) : void 0;
@@ -3098,9 +3261,9 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
 // src/evaluation/providers/targets-file.ts
 var import_node_fs4 = require("fs");
-var import_promises5 = require("fs/promises");
-var import_node_path7 = __toESM(require("path"), 1);
-var import_yaml2 = require("yaml");
+var import_promises9 = require("fs/promises");
+var import_node_path12 = __toESM(require("path"), 1);
+var import_yaml3 = require("yaml");
 // src/evaluation/providers/types.ts
 var AGENT_PROVIDER_KINDS = [
@@ -3161,19 +3324,19 @@ function assertTargetDefinition(value, index, filePath) {
 }
 async function fileExists3(filePath) {
   try {
-    await (0, import_promises5.access)(filePath, import_node_fs4.constants.F_OK);
+    await (0, import_promises9.access)(filePath, import_node_fs4.constants.F_OK);
     return true;
   } catch {
     return false;
   }
 }
 async function readTargetDefinitions(filePath) {
-  const absolutePath = import_node_path7.default.resolve(filePath);
+  const absolutePath = import_node_path12.default.resolve(filePath);
   if (!await fileExists3(absolutePath)) {
     throw new Error(`targets.yaml not found at ${absolutePath}`);
   }
-  const raw = await (0, import_promises5.readFile)(absolutePath, "utf8");
-  const parsed = (0, import_yaml2.parse)(raw);
+  const raw = await (0, import_promises9.readFile)(absolutePath, "utf8");
+  const parsed = (0, import_yaml3.parse)(raw);
   if (!isRecord(parsed)) {
     throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
   }
@@ -3216,18 +3379,34 @@ function resolveAndCreateProvider(definition, env = process.env) {
 }
 // src/evaluation/evaluators.ts
-var import_node_crypto2 = require("crypto");
+var DEFAULT_EVALUATOR_TEMPLATE = `You are an expert evaluator. Your goal is to grade the candidate_answer based on how well it achieves the expected_outcome for the original task.
+Use the reference_answer as a gold standard for a high-quality response (if provided). The candidate_answer does not need to match it verbatim, but should capture the key points and follow the same spirit.
+Be concise and focused in your evaluation. Provide succinct, specific feedback rather than verbose explanations.
+[[ ## expected_outcome ## ]]
+{{expected_outcome}}
+[[ ## question ## ]]
+{{question}}
+[[ ## reference_answer ## ]]
+{{reference_answer}}
+[[ ## candidate_answer ## ]]
+{{candidate_answer}}`;
 var LlmJudgeEvaluator = class {
   kind = "llm_judge";
   resolveJudgeProvider;
   maxOutputTokens;
   temperature;
-  customPrompt;
+  evaluatorTemplate;
   constructor(options) {
     this.resolveJudgeProvider = options.resolveJudgeProvider;
     this.maxOutputTokens = options.maxOutputTokens;
     this.temperature = options.temperature;
-    this.customPrompt = options.customPrompt;
+    this.evaluatorTemplate = options.evaluatorTemplate;
   }
   async evaluate(context) {
     const judgeProvider = await this.resolveJudgeProvider(context);
@@ -3237,26 +3416,21 @@ var LlmJudgeEvaluator = class {
     return this.evaluateWithPrompt(context, judgeProvider);
   }
   async evaluateWithPrompt(context, judgeProvider) {
-    const hasReferenceAnswer = hasNonEmptyReferenceAnswer(context.evalCase);
     const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
-    let prompt = buildQualityPrompt(context.evalCase, context.candidate, formattedQuestion);
-    let systemPrompt = context.systemPrompt ?? this.customPrompt ?? buildSystemPrompt(hasReferenceAnswer);
-    if (systemPrompt && hasTemplateVariables(systemPrompt)) {
-      const variables = {
-        input_messages: JSON.stringify(context.evalCase.input_segments, null, 2),
-        output_messages: JSON.stringify(context.evalCase.output_segments, null, 2),
-        candidate_answer: context.candidate,
-        reference_answer: context.evalCase.reference_answer ?? "",
-        expected_outcome: context.evalCase.expected_outcome,
-        question: formattedQuestion
-      };
-      prompt = substituteVariables(systemPrompt, variables);
-      systemPrompt = buildSystemPrompt(hasReferenceAnswer);
-    }
-    const metadata = systemPrompt !== void 0 ? { systemPrompt } : {};
+    const variables = {
+      input_messages: JSON.stringify(context.evalCase.input_segments, null, 2),
+      output_messages: JSON.stringify(context.evalCase.output_segments, null, 2),
+      candidate_answer: context.candidate.trim(),
+      reference_answer: (context.evalCase.reference_answer ?? "").trim(),
+      expected_outcome: context.evalCase.expected_outcome.trim(),
+      question: formattedQuestion.trim()
+    };
+    const systemPrompt = buildOutputSchema();
+    const evaluatorTemplate = context.evaluatorTemplateOverride ?? this.evaluatorTemplate ?? DEFAULT_EVALUATOR_TEMPLATE;
+    const userPrompt = substituteVariables(evaluatorTemplate, variables);
     const response = await judgeProvider.invoke({
-      question: prompt,
-      metadata,
+      question: userPrompt,
+      systemPrompt,
       evalCaseId: context.evalCase.id,
       attempt: context.attempt,
       maxOutputTokens: this.maxOutputTokens,
@@ -3269,11 +3443,9 @@ var LlmJudgeEvaluator = class {
     const reasoning = parsed.reasoning ?? response.reasoning;
     const expectedAspectCount = Math.max(hits.length + misses.length, 1);
     const evaluatorRawRequest = {
-      id: (0, import_node_crypto2.randomUUID)(),
-      provider: judgeProvider.id,
-      prompt,
-      target: context.target.name,
-      ...systemPrompt !== void 0 && { systemPrompt }
+      userPrompt,
+      systemPrompt,
+      target: judgeProvider.targetName
     };
     return {
       score,
@@ -3285,20 +3457,8 @@ var LlmJudgeEvaluator = class {
     };
   }
 };
-function buildSystemPrompt(hasReferenceAnswer) {
-  const basePrompt = [
-    "You are an expert evaluator. Your goal is to grade the candidate_answer based on how well it achieves the expected_outcome for the original task.",
-    ""
-  ];
-  if (hasReferenceAnswer) {
-    basePrompt.push(
-      "Use the reference_answer as a gold standard for a high-quality response. The candidate_answer does not need to match it verbatim, but should capture the key points and follow the same spirit.",
-      ""
-    );
-  }
-  basePrompt.push(
-    "Be concise and focused in your evaluation. Provide succinct, specific feedback rather than verbose explanations.",
-    "",
+function buildOutputSchema() {
+  return [
     "You must respond with a single JSON object matching this schema:",
     "",
     "{",
@@ -3307,30 +3467,7 @@ function buildSystemPrompt(hasReferenceAnswer) {
     '  "misses": [<array of strings, max 4 items, brief specific failures or omissions, empty if none>],',
     '  "reasoning": "<string, concise explanation for the score, 1-2 sentences max>"',
     "}"
-  );
-  return basePrompt.join("\n");
-}
-function buildQualityPrompt(evalCase, candidate, question) {
-  const parts = [
-    "[[ ## expected_outcome ## ]]",
-    evalCase.expected_outcome.trim(),
-    "",
-    "[[ ## question ## ]]",
-    question.trim(),
-    ""
-  ];
-  if (hasNonEmptyReferenceAnswer(evalCase)) {
-    parts.push(
-      "[[ ## reference_answer ## ]]",
-      evalCase.reference_answer.trim(),
-      ""
-    );
-  }
-  parts.push(
-    "[[ ## candidate_answer ## ]]",
-    candidate.trim()
-  );
-  return parts.join("\n");
+  ].join("\n");
 }
 function clampScore(value) {
   if (Number.isNaN(value) || !Number.isFinite(value)) {
@@ -3412,9 +3549,6 @@ function extractJsonBlob(text) {
 function isNonEmptyString(value) {
   return typeof value === "string" && value.trim().length > 0;
 }
-function hasNonEmptyReferenceAnswer(evalCase) {
-  return evalCase.reference_answer !== void 0 && evalCase.reference_answer.trim().length > 0;
-}
 var CodeEvaluator = class {
   kind = "code";
   script;
@@ -3520,19 +3654,16 @@ function parseJsonSafe(payload) {
     return void 0;
   }
 }
-function hasTemplateVariables(text) {
-  return /\$\{[a-zA-Z0-9_]+\}/.test(text);
-}
 function substituteVariables(template, variables) {
-  return template.replace(/\$\{([a-zA-Z0-9_]+)\}/g, (match, varName) => {
+  return template.replace(/\{\{([a-zA-Z0-9_]+)\}\}/g, (match, varName) => {
     return variables[varName] ?? match;
   });
 }
 // src/evaluation/orchestrator.ts
-var import_node_crypto3 = require("crypto");
-var import_promises6 = require("fs/promises");
-var import_node_path8 = __toESM(require("path"), 1);
+var import_node_crypto2 = require("crypto");
+var import_promises10 = require("fs/promises");
+var import_node_path13 = __toESM(require("path"), 1);
 // ../../node_modules/.pnpm/yocto-queue@1.2.1/node_modules/yocto-queue/index.js
 var Node = class {
@@ -4095,6 +4226,7 @@ async function evaluateCandidate(options) {
     }
   }
   return {
+    timestamp: completedAt.toISOString(),
     eval_id: evalCase.id,
     dataset: evalCase.dataset,
     conversation_id: evalCase.conversation_id,
@@ -4102,14 +4234,12 @@ async function evaluateCandidate(options) {
     hits: score.hits,
     misses: score.misses,
     candidate_answer: candidate,
-    expected_aspect_count: score.expectedAspectCount,
     target: target.name,
-    timestamp: completedAt.toISOString(),
     reasoning: score.reasoning,
     raw_aspects: score.rawAspects,
     agent_provider_request: agentProviderRequest,
     lm_provider_request: lmProviderRequest,
-    evaluator_raw_request: evaluatorResults ? void 0 : score.evaluatorRawRequest,
+    evaluator_provider_request: evaluatorResults ? void 0 : score.evaluatorRawRequest,
     evaluator_results: evaluatorResults
   };
 }
@@ -4186,7 +4316,7 @@ async function runEvaluatorList(options) {
           hits: score2.hits,
           misses: score2.misses,
           reasoning: score2.reasoning,
-          evaluator_raw_request: score2.evaluatorRawRequest
+          evaluator_provider_request: score2.evaluatorRawRequest
         });
         continue;
       }
@@ -4213,7 +4343,7 @@ async function runEvaluatorList(options) {
           hits: score2.hits,
           misses: score2.misses,
           reasoning: score2.reasoning,
-          evaluator_raw_request: score2.evaluatorRawRequest
+          evaluator_provider_request: score2.evaluatorRawRequest
         });
         continue;
       }
@@ -4266,7 +4396,7 @@ async function runLlmJudgeEvaluator(options) {
     promptInputs,
     now,
     judgeProvider,
-    systemPrompt: customPrompt,
+    evaluatorTemplateOverride: customPrompt,
     evaluator: config
   });
 }
@@ -4307,22 +4437,22 @@ function buildEvaluatorRegistry(overrides, resolveJudgeProvider) {
 async function dumpPrompt(directory, evalCase, promptInputs) {
   const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
   const filename = `${timestamp}_${sanitizeFilename(evalCase.id)}.json`;
-  const filePath = import_node_path8.default.resolve(directory, filename);
-  await (0, import_promises6.mkdir)(import_node_path8.default.dirname(filePath), { recursive: true });
+  const filePath = import_node_path13.default.resolve(directory, filename);
+  await (0, import_promises10.mkdir)(import_node_path13.default.dirname(filePath), { recursive: true });
   const payload = {
     eval_id: evalCase.id,
     question: promptInputs.question,
     guidelines: promptInputs.guidelines,
     guideline_paths: evalCase.guideline_paths
   };
-  await (0, import_promises6.writeFile)(filePath, JSON.stringify(payload, null, 2), "utf8");
+  await (0, import_promises10.writeFile)(filePath, JSON.stringify(payload, null, 2), "utf8");
 }
 function sanitizeFilename(value) {
   if (!value) {
     return "prompt";
   }
   const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
-  return sanitized.length > 0 ? sanitized : (0, import_node_crypto3.randomUUID)();
+  return sanitized.length > 0 ? sanitized : (0, import_node_crypto2.randomUUID)();
 }
 async function invokeProvider(provider, options) {
   const { evalCase, promptInputs, attempt, agentTimeoutMs, signal } = options;
@@ -4378,6 +4508,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
     }
   }
   return {
+    timestamp: timestamp.toISOString(),
     eval_id: evalCase.id,
     dataset: evalCase.dataset,
     conversation_id: evalCase.conversation_id,
@@ -4385,9 +4516,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
     hits: [],
     misses: [`Error: ${message}`],
     candidate_answer: `Error occurred: ${message}`,
-    expected_aspect_count: 0,
     target: targetName,
-    timestamp: timestamp.toISOString(),
     raw_aspects: [],
     agent_provider_request: agentProviderRequest,
     lm_provider_request: lmProviderRequest,
@@ -4395,7 +4524,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
   };
 }
 function createCacheKey(provider, target, evalCase, promptInputs) {
-  const hash = (0, import_node_crypto3.createHash)("sha256");
+  const hash = (0, import_node_crypto2.createHash)("sha256");
   hash.update(provider.id);
   hash.update(target.name);
   hash.update(evalCase.id);