npm - @agentv/core - Versions diffs - 2.1.0 → 2.2.0 - Mend

@agentv/core 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/evaluation/validation/index.cjs +0 -11
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +0 -11
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +458 -211
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +8 -2
package/dist/index.d.ts +8 -2
package/dist/index.js +405 -159
package/dist/index.js.map +1 -1
package/package.json +1 -1

package/dist/index.js CHANGED Viewed

@@ -146,8 +146,8 @@ function mergeExecutionMetrics(summary, metrics) {
 }
 // src/evaluation/yaml-parser.ts
-import { readFile as readFile5 } from "node:fs/promises";
-import path6 from "node:path";
+import { readFile as readFile6 } from "node:fs/promises";
+import path7 from "node:path";
 import { parse as parse2 } from "yaml";
 // src/evaluation/loaders/config-loader.ts
@@ -257,7 +257,6 @@ async function resolveFileReference2(rawValue, searchRoots) {
 }
 // src/evaluation/loaders/config-loader.ts
-var SCHEMA_CONFIG_V2 = "agentv-config-v2";
 var ANSI_YELLOW = "\x1B[33m";
 var ANSI_RESET = "\x1B[0m";
 async function loadConfig(evalFilePath, repoRoot) {
@@ -275,13 +274,6 @@ async function loadConfig(evalFilePath, repoRoot) {
         continue;
       }
       const config = parsed;
-      const schema = config.$schema;
-      if (schema !== SCHEMA_CONFIG_V2) {
-        const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${configPath}. Expected '${SCHEMA_CONFIG_V2}'` : `Missing required field '$schema' in ${configPath}.
-Please add '$schema: ${SCHEMA_CONFIG_V2}' at the top of the file.`;
-        logWarning(message);
-        continue;
-      }
       const guidelinePatterns = config.guideline_patterns;
       if (guidelinePatterns !== void 0 && !Array.isArray(guidelinePatterns)) {
         logWarning(`Invalid guideline_patterns in ${configPath}, expected array`);
@@ -390,7 +382,8 @@ var ANSI_YELLOW3 = "\x1B[33m";
 var ANSI_RESET3 = "\x1B[0m";
 async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
   const execution = rawEvalCase.execution;
-  const candidateEvaluators = isJsonObject2(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
+  const executionObject = isJsonObject2(execution) ? execution : void 0;
+  const candidateEvaluators = (executionObject ? executionObject.evaluators : void 0) ?? rawEvalCase.evaluators ?? globalExecution?.evaluators;
   if (candidateEvaluators === void 0) {
     return void 0;
   }
@@ -933,6 +926,11 @@ function isValidFieldAggregationType(value) {
   return typeof value === "string" && VALID_FIELD_AGGREGATION_TYPES.has(value);
 }
+// src/evaluation/loaders/jsonl-parser.ts
+import { readFile as readFile4 } from "node:fs/promises";
+import path5 from "node:path";
+import { parse as parseYaml } from "yaml";
 // src/evaluation/loaders/message-processor.ts
 import { readFile as readFile3 } from "node:fs/promises";
 import path4 from "node:path";
@@ -1193,28 +1191,271 @@ async function processExpectedMessages(options) {
   return segments;
 }
-// src/evaluation/formatting/prompt-builder.ts
-import { readFile as readFile4 } from "node:fs/promises";
-import path5 from "node:path";
+// src/evaluation/loaders/jsonl-parser.ts
 var ANSI_YELLOW5 = "\x1B[33m";
+var ANSI_RED = "\x1B[31m";
 var ANSI_RESET5 = "\x1B[0m";
+function detectFormat(filePath) {
+  const ext = path5.extname(filePath).toLowerCase();
+  if (ext === ".jsonl") return "jsonl";
+  if (ext === ".yaml" || ext === ".yml") return "yaml";
+  throw new Error(`Unsupported file format: '${ext}'. Supported formats: .yaml, .yml, .jsonl`);
+}
+async function loadSidecarMetadata(jsonlPath, verbose) {
+  const dir = path5.dirname(jsonlPath);
+  const base = path5.basename(jsonlPath, ".jsonl");
+  const sidecarPath = path5.join(dir, `${base}.yaml`);
+  if (!await fileExists2(sidecarPath)) {
+    if (verbose) {
+      logWarning4(`Sidecar metadata file not found: ${sidecarPath} (using defaults)`);
+    }
+    return {};
+  }
+  try {
+    const content = await readFile4(sidecarPath, "utf8");
+    const parsed = parseYaml(content);
+    if (!isJsonObject(parsed)) {
+      logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
+      return {};
+    }
+    return {
+      description: asString4(parsed.description),
+      dataset: asString4(parsed.dataset),
+      execution: isJsonObject(parsed.execution) ? parsed.execution : void 0,
+      evaluator: parsed.evaluator
+    };
+  } catch (error) {
+    logWarning4(`Could not read sidecar metadata from ${sidecarPath}: ${error.message}`);
+    return {};
+  }
+}
+function parseJsonlContent(content, filePath) {
+  const lines = content.split("\n");
+  const cases = [];
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i].trim();
+    if (line === "") continue;
+    try {
+      const parsed = JSON.parse(line);
+      if (!isJsonObject(parsed)) {
+        throw new Error("Expected JSON object");
+      }
+      cases.push(parsed);
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      throw new Error(`Line ${i + 1}: Invalid JSON - ${message}
+  File: ${filePath}`);
+    }
+  }
+  return cases;
+}
+async function loadEvalCasesFromJsonl(evalFilePath, repoRoot, options) {
+  const verbose = options?.verbose ?? false;
+  const evalIdFilter = options?.evalId;
+  const absoluteTestPath = path5.resolve(evalFilePath);
+  const repoRootPath = resolveToAbsolutePath(repoRoot);
+  const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
+  const config = await loadConfig(absoluteTestPath, repoRootPath);
+  const guidelinePatterns = config?.guideline_patterns;
+  const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
+  const rawFile = await readFile4(absoluteTestPath, "utf8");
+  const rawCases = parseJsonlContent(rawFile, evalFilePath);
+  const fallbackDataset = path5.basename(absoluteTestPath, ".jsonl") || "eval";
+  const datasetName = sidecar.dataset && sidecar.dataset.trim().length > 0 ? sidecar.dataset : fallbackDataset;
+  const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm_judge";
+  const globalExecution = sidecar.execution;
+  if (verbose) {
+    console.log(`
+[JSONL Dataset: ${evalFilePath}]`);
+    console.log(`  Cases: ${rawCases.length}`);
+    console.log(`  Dataset name: ${datasetName}`);
+    if (sidecar.description) {
+      console.log(`  Description: ${sidecar.description}`);
+    }
+  }
+  const results = [];
+  for (let lineIndex = 0; lineIndex < rawCases.length; lineIndex++) {
+    const evalcase = rawCases[lineIndex];
+    const lineNumber = lineIndex + 1;
+    const id = asString4(evalcase.id);
+    if (evalIdFilter && id !== evalIdFilter) {
+      continue;
+    }
+    const conversationId = asString4(evalcase.conversation_id);
+    const outcome = asString4(evalcase.expected_outcome) ?? asString4(evalcase.outcome);
+    const inputMessagesValue = evalcase.input_messages;
+    const expectedMessagesValue = evalcase.expected_messages;
+    if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
+      logError(
+        `Skipping incomplete eval case at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, expected_outcome, and/or input_messages`
+      );
+      continue;
+    }
+    const hasExpectedMessages = Array.isArray(expectedMessagesValue) && expectedMessagesValue.length > 0;
+    const inputMessages = inputMessagesValue.filter(
+      (msg) => isTestMessage(msg)
+    );
+    const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
+    if (hasExpectedMessages && expectedMessages.length === 0) {
+      logError(`Line ${lineNumber}: No valid expected message found for eval case: ${id}`);
+      continue;
+    }
+    const guidelinePaths = [];
+    const inputTextParts = [];
+    const inputSegments = await processMessages({
+      messages: inputMessages,
+      searchRoots,
+      repoRootPath,
+      guidelinePatterns,
+      guidelinePaths,
+      textParts: inputTextParts,
+      messageType: "input",
+      verbose
+    });
+    const outputSegments = hasExpectedMessages ? await processExpectedMessages({
+      messages: expectedMessages,
+      searchRoots,
+      repoRootPath,
+      verbose
+    }) : [];
+    let referenceAnswer = "";
+    if (outputSegments.length > 0) {
+      const lastMessage = outputSegments[outputSegments.length - 1];
+      const content = lastMessage.content;
+      const toolCalls = lastMessage.tool_calls;
+      if (typeof content === "string") {
+        referenceAnswer = content;
+      } else if (content !== void 0 && content !== null) {
+        referenceAnswer = JSON.stringify(content, null, 2);
+      } else if (toolCalls !== void 0 && toolCalls !== null) {
+        referenceAnswer = JSON.stringify(toolCalls, null, 2);
+      }
+    }
+    const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
+    const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
+    const mergedExecution = caseExecution ?? globalExecution;
+    const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
+    let evaluators;
+    try {
+      evaluators = await parseEvaluators(evalcase, mergedExecution, searchRoots, id ?? "unknown");
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      logError(`Skipping eval case '${id}' at line ${lineNumber}: ${message}`);
+      continue;
+    }
+    const inlineRubrics = evalcase.rubrics;
+    if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
+      const rubricItems = inlineRubrics.filter((r) => isJsonObject(r) || typeof r === "string").map((rubric, index) => {
+        if (typeof rubric === "string") {
+          return {
+            id: `rubric-${index + 1}`,
+            description: rubric,
+            weight: 1,
+            required: true
+          };
+        }
+        return {
+          id: asString4(rubric.id) ?? `rubric-${index + 1}`,
+          description: asString4(rubric.description) ?? "",
+          weight: typeof rubric.weight === "number" ? rubric.weight : 1,
+          required: typeof rubric.required === "boolean" ? rubric.required : true
+        };
+      }).filter((r) => r.description.length > 0);
+      if (rubricItems.length > 0) {
+        const rubricEvaluator = {
+          name: "rubric",
+          type: "llm_judge",
+          rubrics: rubricItems
+        };
+        evaluators = evaluators ? [rubricEvaluator, ...evaluators] : [rubricEvaluator];
+      }
+    }
+    const userFilePaths = [];
+    for (const segment of inputSegments) {
+      if (segment.type === "file" && typeof segment.resolvedPath === "string") {
+        userFilePaths.push(segment.resolvedPath);
+      }
+    }
+    const allFilePaths = [
+      ...guidelinePaths.map((guidelinePath) => path5.resolve(guidelinePath)),
+      ...userFilePaths
+    ];
+    const testCase = {
+      id,
+      dataset: datasetName,
+      conversation_id: conversationId,
+      question,
+      input_messages: inputMessages,
+      input_segments: inputSegments,
+      expected_messages: outputSegments,
+      reference_answer: referenceAnswer,
+      guideline_paths: guidelinePaths.map((guidelinePath) => path5.resolve(guidelinePath)),
+      guideline_patterns: guidelinePatterns,
+      file_paths: allFilePaths,
+      expected_outcome: outcome,
+      evaluator: evalCaseEvaluatorKind,
+      evaluators
+    };
+    if (verbose) {
+      console.log(`
+[Eval Case: ${id}]`);
+      if (testCase.guideline_paths.length > 0) {
+        console.log(`  Guidelines used: ${testCase.guideline_paths.length}`);
+        for (const guidelinePath of testCase.guideline_paths) {
+          console.log(`    - ${guidelinePath}`);
+        }
+      } else {
+        console.log("  No guidelines found");
+      }
+    }
+    results.push(testCase);
+  }
+  return results;
+}
+function asString4(value) {
+  return typeof value === "string" ? value : void 0;
+}
+function logWarning4(message, details) {
+  if (details && details.length > 0) {
+    const detailBlock = details.join("\n");
+    console.warn(`${ANSI_YELLOW5}Warning: ${message}
+${detailBlock}${ANSI_RESET5}`);
+  } else {
+    console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
+  }
+}
+function logError(message, details) {
+  if (details && details.length > 0) {
+    const detailBlock = details.join("\n");
+    console.error(`${ANSI_RED}Error: ${message}
+${detailBlock}${ANSI_RESET5}`);
+  } else {
+    console.error(`${ANSI_RED}Error: ${message}${ANSI_RESET5}`);
+  }
+}
+// src/evaluation/formatting/prompt-builder.ts
+import { readFile as readFile5 } from "node:fs/promises";
+import path6 from "node:path";
+var ANSI_YELLOW6 = "\x1B[33m";
+var ANSI_RESET6 = "\x1B[0m";
 async function buildPromptInputs(testCase, mode = "lm") {
   const guidelineParts = [];
   for (const rawPath of testCase.guideline_paths) {
-    const absolutePath = path5.resolve(rawPath);
+    const absolutePath = path6.resolve(rawPath);
     if (!await fileExists2(absolutePath)) {
-      logWarning4(`Could not read guideline file ${absolutePath}: file does not exist`);
+      logWarning5(`Could not read guideline file ${absolutePath}: file does not exist`);
       continue;
     }
     try {
-      const content = (await readFile4(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
+      const content = (await readFile5(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
       guidelineParts.push({
         content,
         isFile: true,
-        displayPath: path5.basename(absolutePath)
+        displayPath: path6.basename(absolutePath)
       });
     } catch (error) {
-      logWarning4(`Could not read guideline file ${absolutePath}: ${error.message}`);
+      logWarning5(`Could not read guideline file ${absolutePath}: ${error.message}`);
     }
   }
   const guidelines = formatFileContents(guidelineParts);
@@ -1238,9 +1479,9 @@ async function buildPromptInputs(testCase, mode = "lm") {
             messageSegments.push({ type: "text", value: segment });
           }
         } else if (isJsonObject(segment)) {
-          const type = asString4(segment.type);
+          const type = asString5(segment.type);
           if (type === "file") {
-            const value = asString4(segment.value);
+            const value = asString5(segment.value);
             if (!value) continue;
             if (testCase.guideline_patterns && isGuidelineFile(value, testCase.guideline_patterns)) {
               messageSegments.push({ type: "guideline_ref", path: value });
@@ -1251,7 +1492,7 @@ async function buildPromptInputs(testCase, mode = "lm") {
               messageSegments.push({ type: "file", text: fileText, path: value });
             }
           } else if (type === "text") {
-            const textValue = asString4(segment.value);
+            const textValue = asString5(segment.value);
             if (textValue && textValue.trim().length > 0) {
               messageSegments.push({ type: "text", value: textValue });
             }
@@ -1405,21 +1646,21 @@ ${guidelineContent.trim()}`);
   }
   return chatPrompt.length > 0 ? chatPrompt : void 0;
 }
-function asString4(value) {
+function asString5(value) {
   return typeof value === "string" ? value : void 0;
 }
-function logWarning4(message) {
-  console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
+function logWarning5(message) {
+  console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET6}`);
 }
 // src/evaluation/yaml-parser.ts
-var ANSI_YELLOW6 = "\x1B[33m";
-var ANSI_RED = "\x1B[31m";
-var ANSI_RESET6 = "\x1B[0m";
+var ANSI_YELLOW7 = "\x1B[33m";
+var ANSI_RED2 = "\x1B[31m";
+var ANSI_RESET7 = "\x1B[0m";
 async function readTestSuiteMetadata(testFilePath) {
   try {
-    const absolutePath = path6.resolve(testFilePath);
-    const content = await readFile5(absolutePath, "utf8");
+    const absolutePath = path7.resolve(testFilePath);
+    const content = await readFile6(absolutePath, "utf8");
     const parsed = parse2(content);
     if (!isJsonObject(parsed)) {
       return {};
@@ -1430,21 +1671,25 @@ async function readTestSuiteMetadata(testFilePath) {
   }
 }
 async function loadEvalCases(evalFilePath, repoRoot, options) {
+  const format = detectFormat(evalFilePath);
+  if (format === "jsonl") {
+    return loadEvalCasesFromJsonl(evalFilePath, repoRoot, options);
+  }
   const verbose = options?.verbose ?? false;
   const evalIdFilter = options?.evalId;
-  const absoluteTestPath = path6.resolve(evalFilePath);
+  const absoluteTestPath = path7.resolve(evalFilePath);
   const repoRootPath = resolveToAbsolutePath(repoRoot);
   const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
   const config = await loadConfig(absoluteTestPath, repoRootPath);
   const guidelinePatterns = config?.guideline_patterns;
-  const rawFile = await readFile5(absoluteTestPath, "utf8");
+  const rawFile = await readFile6(absoluteTestPath, "utf8");
   const parsed = parse2(rawFile);
   if (!isJsonObject(parsed)) {
     throw new Error(`Invalid test file format: ${evalFilePath}`);
   }
   const suite = parsed;
-  const datasetNameFromSuite = asString5(suite.dataset)?.trim();
-  const fallbackDataset = path6.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
+  const datasetNameFromSuite = asString6(suite.dataset)?.trim();
+  const fallbackDataset = path7.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
   const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
   const rawTestcases = suite.evalcases;
   if (!Array.isArray(rawTestcases)) {
@@ -1452,24 +1697,24 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
   }
   const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
   const globalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
-  const _globalTarget = asString5(globalExecution?.target) ?? asString5(suite.target);
+  const _globalTarget = asString6(globalExecution?.target) ?? asString6(suite.target);
   const results = [];
   for (const rawEvalcase of rawTestcases) {
     if (!isJsonObject(rawEvalcase)) {
-      logWarning5("Skipping invalid eval case entry (expected object)");
+      logWarning6("Skipping invalid eval case entry (expected object)");
       continue;
     }
     const evalcase = rawEvalcase;
-    const id = asString5(evalcase.id);
+    const id = asString6(evalcase.id);
     if (evalIdFilter && id !== evalIdFilter) {
       continue;
     }
-    const conversationId = asString5(evalcase.conversation_id);
-    const outcome = asString5(evalcase.expected_outcome) ?? asString5(evalcase.outcome);
+    const conversationId = asString6(evalcase.conversation_id);
+    const outcome = asString6(evalcase.expected_outcome) ?? asString6(evalcase.outcome);
     const inputMessagesValue = evalcase.input_messages;
     const expectedMessagesValue = evalcase.expected_messages;
     if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
-      logError(
+      logError2(
         `Skipping incomplete eval case: ${id ?? "unknown"}. Missing required fields: id, outcome, and/or input_messages`
       );
       continue;
@@ -1480,7 +1725,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
     );
     const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
     if (hasExpectedMessages && expectedMessages.length === 0) {
-      logError(`No valid expected message found for eval case: ${id}`);
+      logError2(`No valid expected message found for eval case: ${id}`);
       continue;
     }
     const guidelinePaths = [];
@@ -1521,7 +1766,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
       evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
-      logError(`Skipping eval case '${id}': ${message}`);
+      logError2(`Skipping eval case '${id}': ${message}`);
       continue;
     }
     const inlineRubrics = evalcase.rubrics;
@@ -1536,8 +1781,8 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
           };
         }
         return {
-          id: asString5(rubric.id) ?? `rubric-${index + 1}`,
-          description: asString5(rubric.description) ?? "",
+          id: asString6(rubric.id) ?? `rubric-${index + 1}`,
+          description: asString6(rubric.description) ?? "",
           weight: typeof rubric.weight === "number" ? rubric.weight : 1,
           required: typeof rubric.required === "boolean" ? rubric.required : true
         };
@@ -1558,7 +1803,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
       }
     }
     const allFilePaths = [
-      ...guidelinePaths.map((guidelinePath) => path6.resolve(guidelinePath)),
+      ...guidelinePaths.map((guidelinePath) => path7.resolve(guidelinePath)),
       ...userFilePaths
     ];
     const testCase = {
@@ -1570,7 +1815,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
       input_segments: inputSegments,
       expected_messages: outputSegments,
       reference_answer: referenceAnswer,
-      guideline_paths: guidelinePaths.map((guidelinePath) => path6.resolve(guidelinePath)),
+      guideline_paths: guidelinePaths.map((guidelinePath) => path7.resolve(guidelinePath)),
       guideline_patterns: guidelinePatterns,
       file_paths: allFilePaths,
       expected_outcome: outcome,
@@ -1593,25 +1838,25 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
   }
   return results;
 }
-function asString5(value) {
+function asString6(value) {
   return typeof value === "string" ? value : void 0;
 }
-function logWarning5(message, details) {
+function logWarning6(message, details) {
   if (details && details.length > 0) {
     const detailBlock = details.join("\n");
-    console.warn(`${ANSI_YELLOW6}Warning: ${message}
-${detailBlock}${ANSI_RESET6}`);
+    console.warn(`${ANSI_YELLOW7}Warning: ${message}
+${detailBlock}${ANSI_RESET7}`);
   } else {
-    console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET6}`);
+    console.warn(`${ANSI_YELLOW7}Warning: ${message}${ANSI_RESET7}`);
   }
 }
-function logError(message, details) {
+function logError2(message, details) {
   if (details && details.length > 0) {
     const detailBlock = details.join("\n");
-    console.error(`${ANSI_RED}Error: ${message}
-${detailBlock}${ANSI_RESET6}`);
+    console.error(`${ANSI_RED2}Error: ${message}
+${detailBlock}${ANSI_RESET7}`);
   } else {
-    console.error(`${ANSI_RED}Error: ${message}${ANSI_RESET6}`);
+    console.error(`${ANSI_RED2}Error: ${message}${ANSI_RESET7}`);
   }
 }
@@ -1954,7 +2199,7 @@ import { randomUUID } from "node:crypto";
 import { createWriteStream } from "node:fs";
 import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises";
 import { tmpdir } from "node:os";
-import path8 from "node:path";
+import path9 from "node:path";
 // src/evaluation/providers/claude-code-log-tracker.ts
 var GLOBAL_LOGS_KEY = Symbol.for("agentv.claudeCodeLogs");
@@ -2010,7 +2255,7 @@ function subscribeToClaudeCodeLogEntries(listener) {
 }
 // src/evaluation/providers/preread.ts
-import path7 from "node:path";
+import path8 from "node:path";
 function buildPromptDocument(request, inputFiles, options) {
   const parts = [];
   const guidelineFiles = collectGuidelineFiles(
@@ -2033,7 +2278,7 @@ function normalizeInputFiles(inputFiles) {
   }
   const deduped = /* @__PURE__ */ new Map();
   for (const inputFile of inputFiles) {
-    const absolutePath = path7.resolve(inputFile);
+    const absolutePath = path8.resolve(inputFile);
     if (!deduped.has(absolutePath)) {
       deduped.set(absolutePath, absolutePath);
     }
@@ -2046,14 +2291,14 @@ function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
   }
   const unique = /* @__PURE__ */ new Map();
   for (const inputFile of inputFiles) {
-    const absolutePath = path7.resolve(inputFile);
+    const absolutePath = path8.resolve(inputFile);
     if (overrides?.has(absolutePath)) {
       if (!unique.has(absolutePath)) {
         unique.set(absolutePath, absolutePath);
       }
       continue;
     }
-    const normalized = absolutePath.split(path7.sep).join("/");
+    const normalized = absolutePath.split(path8.sep).join("/");
     if (isGuidelineFile(normalized, guidelinePatterns)) {
       if (!unique.has(absolutePath)) {
         unique.set(absolutePath, absolutePath);
@@ -2068,7 +2313,7 @@ function collectInputFiles(inputFiles) {
   }
   const unique = /* @__PURE__ */ new Map();
   for (const inputFile of inputFiles) {
-    const absolutePath = path7.resolve(inputFile);
+    const absolutePath = path8.resolve(inputFile);
     if (!unique.has(absolutePath)) {
       unique.set(absolutePath, absolutePath);
     }
@@ -2080,7 +2325,7 @@ function buildMandatoryPrereadBlock(guidelineFiles, inputFiles) {
     return "";
   }
   const buildList = (files) => files.map((absolutePath) => {
-    const fileName = path7.basename(absolutePath);
+    const fileName = path8.basename(absolutePath);
     const fileUri = pathToFileUri(absolutePath);
     return `* [${fileName}](${fileUri})`;
   });
@@ -2100,7 +2345,7 @@ ${buildList(inputFiles).join("\n")}.`);
   return sections.join("\n");
 }
 function pathToFileUri(filePath) {
-  const absolutePath = path7.isAbsolute(filePath) ? filePath : path7.resolve(filePath);
+  const absolutePath = path8.isAbsolute(filePath) ? filePath : path8.resolve(filePath);
   const normalizedPath = absolutePath.replace(/\\/g, "/");
   if (/^[a-zA-Z]:\//.test(normalizedPath)) {
     return `file:///${normalizedPath}`;
@@ -2137,7 +2382,7 @@ var ClaudeCodeProvider = class {
     const workspaceRoot = await this.createWorkspace();
     const logger = await this.createStreamLogger(request).catch(() => void 0);
     try {
-      const promptFile = path8.join(workspaceRoot, PROMPT_FILENAME);
+      const promptFile = path9.join(workspaceRoot, PROMPT_FILENAME);
       await writeFile(promptFile, request.question, "utf8");
       const args = this.buildClaudeCodeArgs(request.question, inputFiles);
       const cwd = this.resolveCwd();
@@ -2185,7 +2430,7 @@ var ClaudeCodeProvider = class {
     if (!this.config.cwd) {
       return process.cwd();
     }
-    return path8.resolve(this.config.cwd);
+    return path9.resolve(this.config.cwd);
   }
   buildClaudeCodeArgs(prompt, inputFiles) {
     const args = [];
@@ -2242,7 +2487,7 @@ ${filesContext}`;
     }
   }
   async createWorkspace() {
-    return await mkdtemp(path8.join(tmpdir(), WORKSPACE_PREFIX));
+    return await mkdtemp(path9.join(tmpdir(), WORKSPACE_PREFIX));
   }
   async cleanupWorkspace(workspaceRoot) {
     try {
@@ -2256,9 +2501,9 @@ ${filesContext}`;
       return void 0;
     }
     if (this.config.logDir) {
-      return path8.resolve(this.config.logDir);
+      return path9.resolve(this.config.logDir);
     }
-    return path8.join(process.cwd(), ".agentv", "logs", "claude-code");
+    return path9.join(process.cwd(), ".agentv", "logs", "claude-code");
   }
   async createStreamLogger(request) {
     const logDir = this.resolveLogDirectory();
@@ -2272,7 +2517,7 @@ ${filesContext}`;
       console.warn(`Skipping Claude Code stream logging (could not create ${logDir}): ${message}`);
       return void 0;
     }
-    const filePath = path8.join(logDir, buildLogFilename(request, this.targetName));
+    const filePath = path9.join(logDir, buildLogFilename(request, this.targetName));
     try {
       const logger = await ClaudeCodeStreamLogger.create({
         filePath,
@@ -2677,10 +2922,10 @@ function escapeShellArg(arg) {
 }
 async function defaultClaudeCodeRunner(options) {
   const tempId = randomUUID();
-  const stdoutFile = path8.join(tmpdir(), `agentv-cc-${tempId}-stdout`);
-  const stderrFile = path8.join(tmpdir(), `agentv-cc-${tempId}-stderr`);
-  const exitFile = path8.join(tmpdir(), `agentv-cc-${tempId}-exit`);
-  const pidFile = path8.join(tmpdir(), `agentv-cc-${tempId}-pid`);
+  const stdoutFile = path9.join(tmpdir(), `agentv-cc-${tempId}-stdout`);
+  const stderrFile = path9.join(tmpdir(), `agentv-cc-${tempId}-stderr`);
+  const exitFile = path9.join(tmpdir(), `agentv-cc-${tempId}-exit`);
+  const pidFile = path9.join(tmpdir(), `agentv-cc-${tempId}-pid`);
   try {
     return await runClaudeCodeWithTempFiles(options, stdoutFile, stderrFile, exitFile, pidFile);
   } finally {
@@ -2720,8 +2965,8 @@ async function runClaudeCodeWithTempFiles(options, stdoutFile, stderrFile, exitF
   let lastStdoutSize = 0;
   const readFileIfExists = async (filePath) => {
     try {
-      const { readFile: readFile7 } = await import("node:fs/promises");
-      return await readFile7(filePath, "utf8");
+      const { readFile: readFile8 } = await import("node:fs/promises");
+      return await readFile8(filePath, "utf8");
     } catch {
       return "";
     }
@@ -2796,7 +3041,7 @@ async function runClaudeCodeWithTempFiles(options, stdoutFile, stderrFile, exitF
 import { exec as execWithCallback } from "node:child_process";
 import fs from "node:fs/promises";
 import os from "node:os";
-import path9 from "node:path";
+import path10 from "node:path";
 import { promisify } from "node:util";
 import { z } from "zod";
 var ToolCallSchema = z.object({
@@ -3253,7 +3498,7 @@ function normalizeInputFiles2(inputFiles) {
   }
   const unique = /* @__PURE__ */ new Map();
   for (const inputFile of inputFiles) {
-    const absolutePath = path9.resolve(inputFile);
+    const absolutePath = path10.resolve(inputFile);
     if (!unique.has(absolutePath)) {
       unique.set(absolutePath, absolutePath);
     }
@@ -3267,7 +3512,7 @@ function formatFileList(files, template) {
   const formatter = template ?? "{path}";
   return files.map((filePath) => {
     const escapedPath = shellEscape(filePath);
-    const escapedName = shellEscape(path9.basename(filePath));
+    const escapedName = shellEscape(path10.basename(filePath));
     return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
   }).join(" ");
 }
@@ -3291,7 +3536,7 @@ function generateOutputFilePath(evalCaseId, extension = ".json") {
   const safeEvalId = evalCaseId || "unknown";
   const timestamp = Date.now();
   const random = Math.random().toString(36).substring(2, 9);
-  return path9.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
+  return path10.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
 }
 function formatTimeoutSuffix2(timeoutMs) {
   if (!timeoutMs || timeoutMs <= 0) {
@@ -3307,7 +3552,7 @@ import { randomUUID as randomUUID2 } from "node:crypto";
 import { constants as constants2, createWriteStream as createWriteStream2 } from "node:fs";
 import { access as access2, mkdir as mkdir2, mkdtemp as mkdtemp2, rm as rm2, writeFile as writeFile2 } from "node:fs/promises";
 import { tmpdir as tmpdir2 } from "node:os";
-import path10 from "node:path";
+import path11 from "node:path";
 import { promisify as promisify2 } from "node:util";
 // src/evaluation/providers/codex-log-tracker.ts
@@ -3402,7 +3647,7 @@ var CodexProvider = class {
       const promptContent = `${systemPrompt}
 ${basePrompt}`;
-      const promptFile = path10.join(workspaceRoot, PROMPT_FILENAME2);
+      const promptFile = path11.join(workspaceRoot, PROMPT_FILENAME2);
       await writeFile2(promptFile, promptContent, "utf8");
       const args = this.buildCodexArgs();
       const cwd = this.resolveCwd(workspaceRoot);
@@ -3452,7 +3697,7 @@ ${basePrompt}`;
     if (!this.config.cwd) {
       return workspaceRoot;
     }
-    return path10.resolve(this.config.cwd);
+    return path11.resolve(this.config.cwd);
   }
   buildCodexArgs() {
     const args = [
@@ -3494,7 +3739,7 @@ ${basePrompt}`;
     }
   }
   async createWorkspace() {
-    return await mkdtemp2(path10.join(tmpdir2(), WORKSPACE_PREFIX2));
+    return await mkdtemp2(path11.join(tmpdir2(), WORKSPACE_PREFIX2));
   }
   async cleanupWorkspace(workspaceRoot) {
     try {
@@ -3508,9 +3753,9 @@ ${basePrompt}`;
       return void 0;
     }
     if (this.config.logDir) {
-      return path10.resolve(this.config.logDir);
+      return path11.resolve(this.config.logDir);
     }
-    return path10.join(process.cwd(), ".agentv", "logs", "codex");
+    return path11.join(process.cwd(), ".agentv", "logs", "codex");
   }
   async createStreamLogger(request) {
     const logDir = this.resolveLogDirectory();
@@ -3524,7 +3769,7 @@ ${basePrompt}`;
       console.warn(`Skipping Codex stream logging (could not create ${logDir}): ${message}`);
       return void 0;
     }
-    const filePath = path10.join(logDir, buildLogFilename2(request, this.targetName));
+    const filePath = path11.join(logDir, buildLogFilename2(request, this.targetName));
     try {
       const logger = await CodexStreamLogger.create({
         filePath,
@@ -3739,7 +3984,7 @@ function tryParseJsonValue2(rawLine) {
 async function locateExecutable(candidate) {
   const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
   if (includesPathSeparator) {
-    const resolved = path10.isAbsolute(candidate) ? candidate : path10.resolve(candidate);
+    const resolved = path11.isAbsolute(candidate) ? candidate : path11.resolve(candidate);
     const executablePath = await ensureWindowsExecutableVariant(resolved);
     await access2(executablePath, constants2.F_OK);
     return executablePath;
@@ -4252,7 +4497,7 @@ import { randomUUID as randomUUID3 } from "node:crypto";
 import { createWriteStream as createWriteStream3 } from "node:fs";
 import { mkdir as mkdir3, mkdtemp as mkdtemp3, rm as rm3, writeFile as writeFile3 } from "node:fs/promises";
 import { tmpdir as tmpdir3 } from "node:os";
-import path11 from "node:path";
+import path12 from "node:path";
 // src/evaluation/providers/pi-log-tracker.ts
 var GLOBAL_LOGS_KEY3 = Symbol.for("agentv.piLogs");
@@ -4336,7 +4581,7 @@ var PiCodingAgentProvider = class {
     const workspaceRoot = await this.createWorkspace();
     const logger = await this.createStreamLogger(request).catch(() => void 0);
     try {
-      const promptFile = path11.join(workspaceRoot, PROMPT_FILENAME3);
+      const promptFile = path12.join(workspaceRoot, PROMPT_FILENAME3);
       await writeFile3(promptFile, request.question, "utf8");
       const args = this.buildPiArgs(request.question, inputFiles);
       const cwd = this.resolveCwd(workspaceRoot);
@@ -4378,7 +4623,7 @@ var PiCodingAgentProvider = class {
     if (!this.config.cwd) {
       return workspaceRoot;
     }
-    return path11.resolve(this.config.cwd);
+    return path12.resolve(this.config.cwd);
   }
   buildPiArgs(prompt, inputFiles) {
     const args = [];
@@ -4467,7 +4712,7 @@ ${prompt}`;
     return env;
   }
   async createWorkspace() {
-    return await mkdtemp3(path11.join(tmpdir3(), WORKSPACE_PREFIX3));
+    return await mkdtemp3(path12.join(tmpdir3(), WORKSPACE_PREFIX3));
   }
   async cleanupWorkspace(workspaceRoot) {
     try {
@@ -4477,9 +4722,9 @@ ${prompt}`;
   }
   resolveLogDirectory() {
     if (this.config.logDir) {
-      return path11.resolve(this.config.logDir);
+      return path12.resolve(this.config.logDir);
     }
-    return path11.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
+    return path12.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
   }
   async createStreamLogger(request) {
     const logDir = this.resolveLogDirectory();
@@ -4493,7 +4738,7 @@ ${prompt}`;
       console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
       return void 0;
     }
-    const filePath = path11.join(logDir, buildLogFilename3(request, this.targetName));
+    const filePath = path12.join(logDir, buildLogFilename3(request, this.targetName));
     try {
       const logger = await PiStreamLogger.create({
         filePath,
@@ -4926,7 +5171,7 @@ async function defaultPiRunner(options) {
 }
 // src/evaluation/providers/vscode.ts
-import path12 from "node:path";
+import path13 from "node:path";
 import {
   dispatchAgentSession,
   dispatchBatchAgent,
@@ -5101,7 +5346,7 @@ function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
     return "";
   }
   const buildList = (files) => files.map((absolutePath) => {
-    const fileName = path12.basename(absolutePath);
+    const fileName = path13.basename(absolutePath);
     const fileUri = pathToFileUri2(absolutePath);
     return `* [${fileName}](${fileUri})`;
   });
@@ -5126,8 +5371,8 @@ function collectGuidelineFiles2(attachments, guidelinePatterns) {
   }
   const unique = /* @__PURE__ */ new Map();
   for (const attachment of attachments) {
-    const absolutePath = path12.resolve(attachment);
-    const normalized = absolutePath.split(path12.sep).join("/");
+    const absolutePath = path13.resolve(attachment);
+    const normalized = absolutePath.split(path13.sep).join("/");
     if (isGuidelineFile(normalized, guidelinePatterns)) {
       if (!unique.has(absolutePath)) {
         unique.set(absolutePath, absolutePath);
@@ -5142,7 +5387,7 @@ function collectAttachmentFiles(attachments) {
   }
   const unique = /* @__PURE__ */ new Map();
   for (const attachment of attachments) {
-    const absolutePath = path12.resolve(attachment);
+    const absolutePath = path13.resolve(attachment);
     if (!unique.has(absolutePath)) {
       unique.set(absolutePath, absolutePath);
     }
@@ -5150,7 +5395,7 @@ function collectAttachmentFiles(attachments) {
   return Array.from(unique.values());
 }
 function pathToFileUri2(filePath) {
-  const absolutePath = path12.isAbsolute(filePath) ? filePath : path12.resolve(filePath);
+  const absolutePath = path13.isAbsolute(filePath) ? filePath : path13.resolve(filePath);
   const normalizedPath = absolutePath.replace(/\\/g, "/");
   if (/^[a-zA-Z]:\//.test(normalizedPath)) {
     return `file:///${normalizedPath}`;
@@ -5163,7 +5408,7 @@ function normalizeAttachments(attachments) {
   }
   const deduped = /* @__PURE__ */ new Set();
   for (const attachment of attachments) {
-    deduped.add(path12.resolve(attachment));
+    deduped.add(path13.resolve(attachment));
   }
   return Array.from(deduped);
 }
@@ -5172,7 +5417,7 @@ function mergeAttachments(all) {
   for (const list of all) {
     if (!list) continue;
     for (const inputFile of list) {
-      deduped.add(path12.resolve(inputFile));
+      deduped.add(path13.resolve(inputFile));
     }
   }
   return deduped.size > 0 ? Array.from(deduped) : void 0;
@@ -5220,8 +5465,8 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
 // src/evaluation/providers/targets-file.ts
 import { constants as constants3 } from "node:fs";
-import { access as access3, readFile as readFile6 } from "node:fs/promises";
-import path13 from "node:path";
+import { access as access3, readFile as readFile7 } from "node:fs/promises";
+import path14 from "node:path";
 import { parse as parse3 } from "yaml";
 function isRecord(value) {
   return typeof value === "object" && value !== null && !Array.isArray(value);
@@ -5258,11 +5503,11 @@ async function fileExists3(filePath) {
   }
 }
 async function readTargetDefinitions(filePath) {
-  const absolutePath = path13.resolve(filePath);
+  const absolutePath = path14.resolve(filePath);
   if (!await fileExists3(absolutePath)) {
     throw new Error(`targets.yaml not found at ${absolutePath}`);
   }
-  const raw = await readFile6(absolutePath, "utf8");
+  const raw = await readFile7(absolutePath, "utf8");
   const parsed = parse3(raw);
   if (!isRecord(parsed)) {
     throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
@@ -5469,15 +5714,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
   });
 }
 async function execShellWithStdin(command, stdinPayload, options = {}) {
-  const { mkdir: mkdir4, readFile: readFile7, rm: rm4, writeFile: writeFile4 } = await import("node:fs/promises");
+  const { mkdir: mkdir4, readFile: readFile8, rm: rm4, writeFile: writeFile4 } = await import("node:fs/promises");
   const { tmpdir: tmpdir4 } = await import("node:os");
-  const path15 = await import("node:path");
+  const path16 = await import("node:path");
   const { randomUUID: randomUUID4 } = await import("node:crypto");
-  const dir = path15.join(tmpdir4(), `agentv-exec-${randomUUID4()}`);
+  const dir = path16.join(tmpdir4(), `agentv-exec-${randomUUID4()}`);
   await mkdir4(dir, { recursive: true });
-  const stdinPath = path15.join(dir, "stdin.txt");
-  const stdoutPath = path15.join(dir, "stdout.txt");
-  const stderrPath = path15.join(dir, "stderr.txt");
+  const stdinPath = path16.join(dir, "stdin.txt");
+  const stdoutPath = path16.join(dir, "stdout.txt");
+  const stderrPath = path16.join(dir, "stderr.txt");
   await writeFile4(stdinPath, stdinPayload, "utf8");
   const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
   const { spawn: spawn4 } = await import("node:child_process");
@@ -5507,8 +5752,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
         resolve(code ?? 0);
       });
     });
-    const stdout = (await readFile7(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
-    const stderr = (await readFile7(stderrPath, "utf8")).replace(/\r\n/g, "\n");
+    const stdout = (await readFile8(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
+    const stderr = (await readFile8(stderrPath, "utf8")).replace(/\r\n/g, "\n");
     return { stdout, stderr, exitCode };
   } finally {
     await rm4(dir, { recursive: true, force: true });
@@ -5780,7 +6025,7 @@ var CodeEvaluator = class {
       outputMessages: context.outputMessages ?? null,
       guidelineFiles: context.evalCase.guideline_paths,
       inputFiles: context.evalCase.file_paths.filter(
-        (path15) => !context.evalCase.guideline_paths.includes(path15)
+        (path16) => !context.evalCase.guideline_paths.includes(path16)
       ),
       inputMessages: context.evalCase.input_messages,
       traceSummary: context.traceSummary ?? null,
@@ -6539,115 +6784,115 @@ var FieldAccuracyEvaluator = class {
    * Evaluate a single field against the expected value.
    */
   evaluateField(fieldConfig, candidateData, expectedData) {
-    const { path: path15, match, required = true, weight = 1 } = fieldConfig;
-    const candidateValue = resolvePath(candidateData, path15);
-    const expectedValue = resolvePath(expectedData, path15);
+    const { path: path16, match, required = true, weight = 1 } = fieldConfig;
+    const candidateValue = resolvePath(candidateData, path16);
+    const expectedValue = resolvePath(expectedData, path16);
     if (expectedValue === void 0) {
       return {
-        path: path15,
+        path: path16,
         score: 1,
         // No expected value means no comparison needed
         weight,
         hit: true,
-        message: `${path15}: no expected value`
+        message: `${path16}: no expected value`
       };
     }
     if (candidateValue === void 0) {
       if (required) {
         return {
-          path: path15,
+          path: path16,
           score: 0,
           weight,
           hit: false,
-          message: `${path15} (required, missing)`
+          message: `${path16} (required, missing)`
         };
       }
       return {
-        path: path15,
+        path: path16,
         score: 1,
         // Don't penalize missing optional fields
         weight: 0,
         // Zero weight means it won't affect the score
         hit: true,
-        message: `${path15}: optional field missing`
+        message: `${path16}: optional field missing`
       };
     }
     switch (match) {
       case "exact":
-        return this.compareExact(path15, candidateValue, expectedValue, weight);
+        return this.compareExact(path16, candidateValue, expectedValue, weight);
       case "numeric_tolerance":
         return this.compareNumericTolerance(
-          path15,
+          path16,
           candidateValue,
           expectedValue,
           fieldConfig,
           weight
         );
       case "date":
-        return this.compareDate(path15, candidateValue, expectedValue, fieldConfig, weight);
+        return this.compareDate(path16, candidateValue, expectedValue, fieldConfig, weight);
       default:
         return {
-          path: path15,
+          path: path16,
           score: 0,
           weight,
           hit: false,
-          message: `${path15}: unknown match type "${match}"`
+          message: `${path16}: unknown match type "${match}"`
         };
     }
   }
   /**
    * Exact equality comparison.
    */
-  compareExact(path15, candidateValue, expectedValue, weight) {
+  compareExact(path16, candidateValue, expectedValue, weight) {
     if (deepEqual(candidateValue, expectedValue)) {
       return {
-        path: path15,
+        path: path16,
         score: 1,
         weight,
         hit: true,
-        message: path15
+        message: path16
       };
     }
     if (typeof candidateValue !== typeof expectedValue) {
       return {
-        path: path15,
+        path: path16,
         score: 0,
         weight,
         hit: false,
-        message: `${path15} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
+        message: `${path16} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
       };
     }
     return {
-      path: path15,
+      path: path16,
       score: 0,
       weight,
       hit: false,
-      message: `${path15} (value mismatch)`
+      message: `${path16} (value mismatch)`
     };
   }
   /**
    * Numeric comparison with absolute or relative tolerance.
    */
-  compareNumericTolerance(path15, candidateValue, expectedValue, fieldConfig, weight) {
+  compareNumericTolerance(path16, candidateValue, expectedValue, fieldConfig, weight) {
     const { tolerance = 0, relative = false } = fieldConfig;
     const candidateNum = toNumber(candidateValue);
     const expectedNum = toNumber(expectedValue);
     if (candidateNum === null || expectedNum === null) {
       return {
-        path: path15,
+        path: path16,
         score: 0,
         weight,
         hit: false,
-        message: `${path15} (non-numeric value)`
+        message: `${path16} (non-numeric value)`
       };
     }
     if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
       return {
-        path: path15,
+        path: path16,
         score: 0,
         weight,
         hit: false,
-        message: `${path15} (invalid numeric value)`
+        message: `${path16} (invalid numeric value)`
       };
     }
     const diff = Math.abs(candidateNum - expectedNum);
@@ -6660,61 +6905,61 @@ var FieldAccuracyEvaluator = class {
     }
     if (withinTolerance) {
       return {
-        path: path15,
+        path: path16,
         score: 1,
         weight,
         hit: true,
-        message: `${path15} (within tolerance: diff=${diff.toFixed(2)})`
+        message: `${path16} (within tolerance: diff=${diff.toFixed(2)})`
       };
     }
     return {
-      path: path15,
+      path: path16,
       score: 0,
       weight,
       hit: false,
-      message: `${path15} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
+      message: `${path16} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
     };
   }
   /**
    * Date comparison with format normalization.
    */
-  compareDate(path15, candidateValue, expectedValue, fieldConfig, weight) {
+  compareDate(path16, candidateValue, expectedValue, fieldConfig, weight) {
     const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
     const candidateDate = parseDate(String(candidateValue), formats);
     const expectedDate = parseDate(String(expectedValue), formats);
     if (candidateDate === null) {
       return {
-        path: path15,
+        path: path16,
         score: 0,
         weight,
         hit: false,
-        message: `${path15} (unparseable candidate date)`
+        message: `${path16} (unparseable candidate date)`
       };
     }
     if (expectedDate === null) {
       return {
-        path: path15,
+        path: path16,
         score: 0,
         weight,
         hit: false,
-        message: `${path15} (unparseable expected date)`
+        message: `${path16} (unparseable expected date)`
       };
     }
     if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
       return {
-        path: path15,
+        path: path16,
         score: 1,
         weight,
         hit: true,
-        message: path15
+        message: path16
       };
     }
     return {
-      path: path15,
+      path: path16,
       score: 0,
       weight,
       hit: false,
-      message: `${path15} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
+      message: `${path16} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
     };
   }
   /**
@@ -6754,11 +6999,11 @@ var FieldAccuracyEvaluator = class {
     };
   }
 };
-function resolvePath(obj, path15) {
-  if (!path15 || !obj) {
+function resolvePath(obj, path16) {
+  if (!path16 || !obj) {
     return void 0;
   }
-  const parts = path15.split(/\.|\[|\]/).filter((p) => p.length > 0);
+  const parts = path16.split(/\.|\[|\]/).filter((p) => p.length > 0);
   let current = obj;
   for (const part of parts) {
     if (current === null || current === void 0) {
@@ -7194,7 +7439,7 @@ var ToolTrajectoryEvaluator = class {
 // src/evaluation/orchestrator.ts
 import { createHash } from "node:crypto";
-import path14 from "node:path";
+import path15 from "node:path";
 // ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
 var Node = class {
@@ -7993,7 +8238,7 @@ async function runEvaluatorList(options) {
         });
       }
       if (evaluator.type === "composite") {
-        const evalFileDir = evalCase.guideline_paths[0] ? path14.dirname(evalCase.guideline_paths[0]) : process.cwd();
+        const evalFileDir = evalCase.guideline_paths[0] ? path15.dirname(evalCase.guideline_paths[0]) : process.cwd();
         const createEvaluator = (memberConfig) => {
           switch (memberConfig.type) {
             case "llm_judge":
@@ -8567,6 +8812,7 @@ export {
   createAgentKernel,
   createProvider,
   deepEqual,
+  detectFormat,
   ensureVSCodeSubagents,
   executeScript,
   explorationRatio,