npm - @agentv/core - Versions diffs - 2.1.0 → 2.2.0 - Mend

@agentv/core 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/evaluation/validation/index.cjs +0 -11
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +0 -11
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +458 -211
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +8 -2
package/dist/index.d.ts +8 -2
package/dist/index.js +405 -159
package/dist/index.js.map +1 -1
package/package.json +1 -1

package/dist/index.cjs CHANGED Viewed

@@ -53,6 +53,7 @@ __export(index_exports, {
   createAgentKernel: () => createAgentKernel,
   createProvider: () => createProvider,
   deepEqual: () => deepEqual,
+  detectFormat: () => detectFormat,
   ensureVSCodeSubagents: () => ensureVSCodeSubagents,
   executeScript: () => executeScript,
   explorationRatio: () => explorationRatio,
@@ -226,9 +227,9 @@ function mergeExecutionMetrics(summary, metrics) {
 }
 // src/evaluation/yaml-parser.ts
-var import_promises6 = require("fs/promises");
-var import_node_path6 = __toESM(require("path"), 1);
-var import_yaml2 = require("yaml");
+var import_promises7 = require("fs/promises");
+var import_node_path7 = __toESM(require("path"), 1);
+var import_yaml3 = require("yaml");
 // src/evaluation/loaders/config-loader.ts
 var import_promises2 = require("fs/promises");
@@ -337,7 +338,6 @@ async function resolveFileReference(rawValue, searchRoots) {
 }
 // src/evaluation/loaders/config-loader.ts
-var SCHEMA_CONFIG_V2 = "agentv-config-v2";
 var ANSI_YELLOW = "\x1B[33m";
 var ANSI_RESET = "\x1B[0m";
 async function loadConfig(evalFilePath, repoRoot) {
@@ -355,13 +355,6 @@ async function loadConfig(evalFilePath, repoRoot) {
         continue;
       }
       const config = parsed;
-      const schema = config.$schema;
-      if (schema !== SCHEMA_CONFIG_V2) {
-        const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${configPath}. Expected '${SCHEMA_CONFIG_V2}'` : `Missing required field '$schema' in ${configPath}.
-Please add '$schema: ${SCHEMA_CONFIG_V2}' at the top of the file.`;
-        logWarning(message);
-        continue;
-      }
       const guidelinePatterns = config.guideline_patterns;
       if (guidelinePatterns !== void 0 && !Array.isArray(guidelinePatterns)) {
         logWarning(`Invalid guideline_patterns in ${configPath}, expected array`);
@@ -470,7 +463,8 @@ var ANSI_YELLOW3 = "\x1B[33m";
 var ANSI_RESET3 = "\x1B[0m";
 async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
   const execution = rawEvalCase.execution;
-  const candidateEvaluators = isJsonObject2(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
+  const executionObject = isJsonObject2(execution) ? execution : void 0;
+  const candidateEvaluators = (executionObject ? executionObject.evaluators : void 0) ?? rawEvalCase.evaluators ?? globalExecution?.evaluators;
   if (candidateEvaluators === void 0) {
     return void 0;
   }
@@ -1013,6 +1007,11 @@ function isValidFieldAggregationType(value) {
   return typeof value === "string" && VALID_FIELD_AGGREGATION_TYPES.has(value);
 }
+// src/evaluation/loaders/jsonl-parser.ts
+var import_promises5 = require("fs/promises");
+var import_node_path5 = __toESM(require("path"), 1);
+var import_yaml2 = require("yaml");
 // src/evaluation/loaders/message-processor.ts
 var import_promises4 = require("fs/promises");
 var import_node_path4 = __toESM(require("path"), 1);
@@ -1273,28 +1272,271 @@ async function processExpectedMessages(options) {
   return segments;
 }
-// src/evaluation/formatting/prompt-builder.ts
-var import_promises5 = require("fs/promises");
-var import_node_path5 = __toESM(require("path"), 1);
+// src/evaluation/loaders/jsonl-parser.ts
 var ANSI_YELLOW5 = "\x1B[33m";
+var ANSI_RED = "\x1B[31m";
 var ANSI_RESET5 = "\x1B[0m";
+function detectFormat(filePath) {
+  const ext = import_node_path5.default.extname(filePath).toLowerCase();
+  if (ext === ".jsonl") return "jsonl";
+  if (ext === ".yaml" || ext === ".yml") return "yaml";
+  throw new Error(`Unsupported file format: '${ext}'. Supported formats: .yaml, .yml, .jsonl`);
+}
+async function loadSidecarMetadata(jsonlPath, verbose) {
+  const dir = import_node_path5.default.dirname(jsonlPath);
+  const base = import_node_path5.default.basename(jsonlPath, ".jsonl");
+  const sidecarPath = import_node_path5.default.join(dir, `${base}.yaml`);
+  if (!await fileExists(sidecarPath)) {
+    if (verbose) {
+      logWarning4(`Sidecar metadata file not found: ${sidecarPath} (using defaults)`);
+    }
+    return {};
+  }
+  try {
+    const content = await (0, import_promises5.readFile)(sidecarPath, "utf8");
+    const parsed = (0, import_yaml2.parse)(content);
+    if (!isJsonObject(parsed)) {
+      logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
+      return {};
+    }
+    return {
+      description: asString4(parsed.description),
+      dataset: asString4(parsed.dataset),
+      execution: isJsonObject(parsed.execution) ? parsed.execution : void 0,
+      evaluator: parsed.evaluator
+    };
+  } catch (error) {
+    logWarning4(`Could not read sidecar metadata from ${sidecarPath}: ${error.message}`);
+    return {};
+  }
+}
+function parseJsonlContent(content, filePath) {
+  const lines = content.split("\n");
+  const cases = [];
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i].trim();
+    if (line === "") continue;
+    try {
+      const parsed = JSON.parse(line);
+      if (!isJsonObject(parsed)) {
+        throw new Error("Expected JSON object");
+      }
+      cases.push(parsed);
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      throw new Error(`Line ${i + 1}: Invalid JSON - ${message}
+  File: ${filePath}`);
+    }
+  }
+  return cases;
+}
+async function loadEvalCasesFromJsonl(evalFilePath, repoRoot, options) {
+  const verbose = options?.verbose ?? false;
+  const evalIdFilter = options?.evalId;
+  const absoluteTestPath = import_node_path5.default.resolve(evalFilePath);
+  const repoRootPath = resolveToAbsolutePath(repoRoot);
+  const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
+  const config = await loadConfig(absoluteTestPath, repoRootPath);
+  const guidelinePatterns = config?.guideline_patterns;
+  const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
+  const rawFile = await (0, import_promises5.readFile)(absoluteTestPath, "utf8");
+  const rawCases = parseJsonlContent(rawFile, evalFilePath);
+  const fallbackDataset = import_node_path5.default.basename(absoluteTestPath, ".jsonl") || "eval";
+  const datasetName = sidecar.dataset && sidecar.dataset.trim().length > 0 ? sidecar.dataset : fallbackDataset;
+  const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm_judge";
+  const globalExecution = sidecar.execution;
+  if (verbose) {
+    console.log(`
+[JSONL Dataset: ${evalFilePath}]`);
+    console.log(`  Cases: ${rawCases.length}`);
+    console.log(`  Dataset name: ${datasetName}`);
+    if (sidecar.description) {
+      console.log(`  Description: ${sidecar.description}`);
+    }
+  }
+  const results = [];
+  for (let lineIndex = 0; lineIndex < rawCases.length; lineIndex++) {
+    const evalcase = rawCases[lineIndex];
+    const lineNumber = lineIndex + 1;
+    const id = asString4(evalcase.id);
+    if (evalIdFilter && id !== evalIdFilter) {
+      continue;
+    }
+    const conversationId = asString4(evalcase.conversation_id);
+    const outcome = asString4(evalcase.expected_outcome) ?? asString4(evalcase.outcome);
+    const inputMessagesValue = evalcase.input_messages;
+    const expectedMessagesValue = evalcase.expected_messages;
+    if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
+      logError(
+        `Skipping incomplete eval case at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, expected_outcome, and/or input_messages`
+      );
+      continue;
+    }
+    const hasExpectedMessages = Array.isArray(expectedMessagesValue) && expectedMessagesValue.length > 0;
+    const inputMessages = inputMessagesValue.filter(
+      (msg) => isTestMessage(msg)
+    );
+    const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
+    if (hasExpectedMessages && expectedMessages.length === 0) {
+      logError(`Line ${lineNumber}: No valid expected message found for eval case: ${id}`);
+      continue;
+    }
+    const guidelinePaths = [];
+    const inputTextParts = [];
+    const inputSegments = await processMessages({
+      messages: inputMessages,
+      searchRoots,
+      repoRootPath,
+      guidelinePatterns,
+      guidelinePaths,
+      textParts: inputTextParts,
+      messageType: "input",
+      verbose
+    });
+    const outputSegments = hasExpectedMessages ? await processExpectedMessages({
+      messages: expectedMessages,
+      searchRoots,
+      repoRootPath,
+      verbose
+    }) : [];
+    let referenceAnswer = "";
+    if (outputSegments.length > 0) {
+      const lastMessage = outputSegments[outputSegments.length - 1];
+      const content = lastMessage.content;
+      const toolCalls = lastMessage.tool_calls;
+      if (typeof content === "string") {
+        referenceAnswer = content;
+      } else if (content !== void 0 && content !== null) {
+        referenceAnswer = JSON.stringify(content, null, 2);
+      } else if (toolCalls !== void 0 && toolCalls !== null) {
+        referenceAnswer = JSON.stringify(toolCalls, null, 2);
+      }
+    }
+    const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
+    const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
+    const mergedExecution = caseExecution ?? globalExecution;
+    const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
+    let evaluators;
+    try {
+      evaluators = await parseEvaluators(evalcase, mergedExecution, searchRoots, id ?? "unknown");
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      logError(`Skipping eval case '${id}' at line ${lineNumber}: ${message}`);
+      continue;
+    }
+    const inlineRubrics = evalcase.rubrics;
+    if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
+      const rubricItems = inlineRubrics.filter((r) => isJsonObject(r) || typeof r === "string").map((rubric, index) => {
+        if (typeof rubric === "string") {
+          return {
+            id: `rubric-${index + 1}`,
+            description: rubric,
+            weight: 1,
+            required: true
+          };
+        }
+        return {
+          id: asString4(rubric.id) ?? `rubric-${index + 1}`,
+          description: asString4(rubric.description) ?? "",
+          weight: typeof rubric.weight === "number" ? rubric.weight : 1,
+          required: typeof rubric.required === "boolean" ? rubric.required : true
+        };
+      }).filter((r) => r.description.length > 0);
+      if (rubricItems.length > 0) {
+        const rubricEvaluator = {
+          name: "rubric",
+          type: "llm_judge",
+          rubrics: rubricItems
+        };
+        evaluators = evaluators ? [rubricEvaluator, ...evaluators] : [rubricEvaluator];
+      }
+    }
+    const userFilePaths = [];
+    for (const segment of inputSegments) {
+      if (segment.type === "file" && typeof segment.resolvedPath === "string") {
+        userFilePaths.push(segment.resolvedPath);
+      }
+    }
+    const allFilePaths = [
+      ...guidelinePaths.map((guidelinePath) => import_node_path5.default.resolve(guidelinePath)),
+      ...userFilePaths
+    ];
+    const testCase = {
+      id,
+      dataset: datasetName,
+      conversation_id: conversationId,
+      question,
+      input_messages: inputMessages,
+      input_segments: inputSegments,
+      expected_messages: outputSegments,
+      reference_answer: referenceAnswer,
+      guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path5.default.resolve(guidelinePath)),
+      guideline_patterns: guidelinePatterns,
+      file_paths: allFilePaths,
+      expected_outcome: outcome,
+      evaluator: evalCaseEvaluatorKind,
+      evaluators
+    };
+    if (verbose) {
+      console.log(`
+[Eval Case: ${id}]`);
+      if (testCase.guideline_paths.length > 0) {
+        console.log(`  Guidelines used: ${testCase.guideline_paths.length}`);
+        for (const guidelinePath of testCase.guideline_paths) {
+          console.log(`    - ${guidelinePath}`);
+        }
+      } else {
+        console.log("  No guidelines found");
+      }
+    }
+    results.push(testCase);
+  }
+  return results;
+}
+function asString4(value) {
+  return typeof value === "string" ? value : void 0;
+}
+function logWarning4(message, details) {
+  if (details && details.length > 0) {
+    const detailBlock = details.join("\n");
+    console.warn(`${ANSI_YELLOW5}Warning: ${message}
+${detailBlock}${ANSI_RESET5}`);
+  } else {
+    console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
+  }
+}
+function logError(message, details) {
+  if (details && details.length > 0) {
+    const detailBlock = details.join("\n");
+    console.error(`${ANSI_RED}Error: ${message}
+${detailBlock}${ANSI_RESET5}`);
+  } else {
+    console.error(`${ANSI_RED}Error: ${message}${ANSI_RESET5}`);
+  }
+}
+// src/evaluation/formatting/prompt-builder.ts
+var import_promises6 = require("fs/promises");
+var import_node_path6 = __toESM(require("path"), 1);
+var ANSI_YELLOW6 = "\x1B[33m";
+var ANSI_RESET6 = "\x1B[0m";
 async function buildPromptInputs(testCase, mode = "lm") {
   const guidelineParts = [];
   for (const rawPath of testCase.guideline_paths) {
-    const absolutePath = import_node_path5.default.resolve(rawPath);
+    const absolutePath = import_node_path6.default.resolve(rawPath);
     if (!await fileExists(absolutePath)) {
-      logWarning4(`Could not read guideline file ${absolutePath}: file does not exist`);
+      logWarning5(`Could not read guideline file ${absolutePath}: file does not exist`);
       continue;
     }
     try {
-      const content = (await (0, import_promises5.readFile)(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
+      const content = (await (0, import_promises6.readFile)(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
       guidelineParts.push({
         content,
         isFile: true,
-        displayPath: import_node_path5.default.basename(absolutePath)
+        displayPath: import_node_path6.default.basename(absolutePath)
       });
     } catch (error) {
-      logWarning4(`Could not read guideline file ${absolutePath}: ${error.message}`);
+      logWarning5(`Could not read guideline file ${absolutePath}: ${error.message}`);
     }
   }
   const guidelines = formatFileContents(guidelineParts);
@@ -1318,9 +1560,9 @@ async function buildPromptInputs(testCase, mode = "lm") {
             messageSegments.push({ type: "text", value: segment });
           }
         } else if (isJsonObject(segment)) {
-          const type = asString4(segment.type);
+          const type = asString5(segment.type);
           if (type === "file") {
-            const value = asString4(segment.value);
+            const value = asString5(segment.value);
             if (!value) continue;
             if (testCase.guideline_patterns && isGuidelineFile(value, testCase.guideline_patterns)) {
               messageSegments.push({ type: "guideline_ref", path: value });
@@ -1331,7 +1573,7 @@ async function buildPromptInputs(testCase, mode = "lm") {
               messageSegments.push({ type: "file", text: fileText, path: value });
             }
           } else if (type === "text") {
-            const textValue = asString4(segment.value);
+            const textValue = asString5(segment.value);
             if (textValue && textValue.trim().length > 0) {
               messageSegments.push({ type: "text", value: textValue });
             }
@@ -1485,22 +1727,22 @@ ${guidelineContent.trim()}`);
   }
   return chatPrompt.length > 0 ? chatPrompt : void 0;
 }
-function asString4(value) {
+function asString5(value) {
   return typeof value === "string" ? value : void 0;
 }
-function logWarning4(message) {
-  console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
+function logWarning5(message) {
+  console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET6}`);
 }
 // src/evaluation/yaml-parser.ts
-var ANSI_YELLOW6 = "\x1B[33m";
-var ANSI_RED = "\x1B[31m";
-var ANSI_RESET6 = "\x1B[0m";
+var ANSI_YELLOW7 = "\x1B[33m";
+var ANSI_RED2 = "\x1B[31m";
+var ANSI_RESET7 = "\x1B[0m";
 async function readTestSuiteMetadata(testFilePath) {
   try {
-    const absolutePath = import_node_path6.default.resolve(testFilePath);
-    const content = await (0, import_promises6.readFile)(absolutePath, "utf8");
-    const parsed = (0, import_yaml2.parse)(content);
+    const absolutePath = import_node_path7.default.resolve(testFilePath);
+    const content = await (0, import_promises7.readFile)(absolutePath, "utf8");
+    const parsed = (0, import_yaml3.parse)(content);
     if (!isJsonObject(parsed)) {
       return {};
     }
@@ -1510,21 +1752,25 @@ async function readTestSuiteMetadata(testFilePath) {
   }
 }
 async function loadEvalCases(evalFilePath, repoRoot, options) {
+  const format = detectFormat(evalFilePath);
+  if (format === "jsonl") {
+    return loadEvalCasesFromJsonl(evalFilePath, repoRoot, options);
+  }
   const verbose = options?.verbose ?? false;
   const evalIdFilter = options?.evalId;
-  const absoluteTestPath = import_node_path6.default.resolve(evalFilePath);
+  const absoluteTestPath = import_node_path7.default.resolve(evalFilePath);
   const repoRootPath = resolveToAbsolutePath(repoRoot);
   const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
   const config = await loadConfig(absoluteTestPath, repoRootPath);
   const guidelinePatterns = config?.guideline_patterns;
-  const rawFile = await (0, import_promises6.readFile)(absoluteTestPath, "utf8");
-  const parsed = (0, import_yaml2.parse)(rawFile);
+  const rawFile = await (0, import_promises7.readFile)(absoluteTestPath, "utf8");
+  const parsed = (0, import_yaml3.parse)(rawFile);
   if (!isJsonObject(parsed)) {
     throw new Error(`Invalid test file format: ${evalFilePath}`);
   }
   const suite = parsed;
-  const datasetNameFromSuite = asString5(suite.dataset)?.trim();
-  const fallbackDataset = import_node_path6.default.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
+  const datasetNameFromSuite = asString6(suite.dataset)?.trim();
+  const fallbackDataset = import_node_path7.default.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
   const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
   const rawTestcases = suite.evalcases;
   if (!Array.isArray(rawTestcases)) {
@@ -1532,24 +1778,24 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
   }
   const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
   const globalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
-  const _globalTarget = asString5(globalExecution?.target) ?? asString5(suite.target);
+  const _globalTarget = asString6(globalExecution?.target) ?? asString6(suite.target);
   const results = [];
   for (const rawEvalcase of rawTestcases) {
     if (!isJsonObject(rawEvalcase)) {
-      logWarning5("Skipping invalid eval case entry (expected object)");
+      logWarning6("Skipping invalid eval case entry (expected object)");
       continue;
     }
     const evalcase = rawEvalcase;
-    const id = asString5(evalcase.id);
+    const id = asString6(evalcase.id);
     if (evalIdFilter && id !== evalIdFilter) {
       continue;
     }
-    const conversationId = asString5(evalcase.conversation_id);
-    const outcome = asString5(evalcase.expected_outcome) ?? asString5(evalcase.outcome);
+    const conversationId = asString6(evalcase.conversation_id);
+    const outcome = asString6(evalcase.expected_outcome) ?? asString6(evalcase.outcome);
     const inputMessagesValue = evalcase.input_messages;
     const expectedMessagesValue = evalcase.expected_messages;
     if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
-      logError(
+      logError2(
         `Skipping incomplete eval case: ${id ?? "unknown"}. Missing required fields: id, outcome, and/or input_messages`
       );
       continue;
@@ -1560,7 +1806,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
     );
     const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
     if (hasExpectedMessages && expectedMessages.length === 0) {
-      logError(`No valid expected message found for eval case: ${id}`);
+      logError2(`No valid expected message found for eval case: ${id}`);
       continue;
     }
     const guidelinePaths = [];
@@ -1601,7 +1847,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
       evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
-      logError(`Skipping eval case '${id}': ${message}`);
+      logError2(`Skipping eval case '${id}': ${message}`);
       continue;
     }
     const inlineRubrics = evalcase.rubrics;
@@ -1616,8 +1862,8 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
           };
         }
         return {
-          id: asString5(rubric.id) ?? `rubric-${index + 1}`,
-          description: asString5(rubric.description) ?? "",
+          id: asString6(rubric.id) ?? `rubric-${index + 1}`,
+          description: asString6(rubric.description) ?? "",
           weight: typeof rubric.weight === "number" ? rubric.weight : 1,
           required: typeof rubric.required === "boolean" ? rubric.required : true
         };
@@ -1638,7 +1884,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
       }
     }
     const allFilePaths = [
-      ...guidelinePaths.map((guidelinePath) => import_node_path6.default.resolve(guidelinePath)),
+      ...guidelinePaths.map((guidelinePath) => import_node_path7.default.resolve(guidelinePath)),
       ...userFilePaths
     ];
     const testCase = {
@@ -1650,7 +1896,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
       input_segments: inputSegments,
       expected_messages: outputSegments,
       reference_answer: referenceAnswer,
-      guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path6.default.resolve(guidelinePath)),
+      guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path7.default.resolve(guidelinePath)),
       guideline_patterns: guidelinePatterns,
       file_paths: allFilePaths,
       expected_outcome: outcome,
@@ -1673,35 +1919,35 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
   }
   return results;
 }
-function asString5(value) {
+function asString6(value) {
   return typeof value === "string" ? value : void 0;
 }
-function logWarning5(message, details) {
+function logWarning6(message, details) {
   if (details && details.length > 0) {
     const detailBlock = details.join("\n");
-    console.warn(`${ANSI_YELLOW6}Warning: ${message}
-${detailBlock}${ANSI_RESET6}`);
+    console.warn(`${ANSI_YELLOW7}Warning: ${message}
+${detailBlock}${ANSI_RESET7}`);
   } else {
-    console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET6}`);
+    console.warn(`${ANSI_YELLOW7}Warning: ${message}${ANSI_RESET7}`);
   }
 }
-function logError(message, details) {
+function logError2(message, details) {
   if (details && details.length > 0) {
     const detailBlock = details.join("\n");
-    console.error(`${ANSI_RED}Error: ${message}
-${detailBlock}${ANSI_RESET6}`);
+    console.error(`${ANSI_RED2}Error: ${message}
+${detailBlock}${ANSI_RESET7}`);
   } else {
-    console.error(`${ANSI_RED}Error: ${message}${ANSI_RESET6}`);
+    console.error(`${ANSI_RED2}Error: ${message}${ANSI_RESET7}`);
   }
 }
 // src/evaluation/file-utils.ts
 var import_node_fs2 = require("fs");
-var import_promises7 = require("fs/promises");
-var import_node_path7 = __toESM(require("path"), 1);
+var import_promises8 = require("fs/promises");
+var import_node_path8 = __toESM(require("path"), 1);
 async function fileExists2(filePath) {
   try {
-    await (0, import_promises7.access)(filePath, import_node_fs2.constants.F_OK);
+    await (0, import_promises8.access)(filePath, import_node_fs2.constants.F_OK);
     return true;
   } catch {
     return false;
@@ -1711,22 +1957,22 @@ function normalizeLineEndings(content) {
   return content.replace(/\r\n/g, "\n");
 }
 async function readTextFile(filePath) {
-  const content = await (0, import_promises7.readFile)(filePath, "utf8");
+  const content = await (0, import_promises8.readFile)(filePath, "utf8");
   return normalizeLineEndings(content);
 }
 async function readJsonFile(filePath) {
-  const content = await (0, import_promises7.readFile)(filePath, "utf8");
+  const content = await (0, import_promises8.readFile)(filePath, "utf8");
   return JSON.parse(content);
 }
 async function findGitRoot(startPath) {
-  let currentDir = import_node_path7.default.dirname(import_node_path7.default.resolve(startPath));
-  const root = import_node_path7.default.parse(currentDir).root;
+  let currentDir = import_node_path8.default.dirname(import_node_path8.default.resolve(startPath));
+  const root = import_node_path8.default.parse(currentDir).root;
   while (currentDir !== root) {
-    const gitPath = import_node_path7.default.join(currentDir, ".git");
+    const gitPath = import_node_path8.default.join(currentDir, ".git");
     if (await fileExists2(gitPath)) {
       return currentDir;
     }
-    const parentDir = import_node_path7.default.dirname(currentDir);
+    const parentDir = import_node_path8.default.dirname(currentDir);
     if (parentDir === currentDir) {
       break;
     }
@@ -1737,8 +1983,8 @@ async function findGitRoot(startPath) {
 function buildDirectoryChain2(filePath, repoRoot) {
   const directories = [];
   const seen = /* @__PURE__ */ new Set();
-  const boundary = import_node_path7.default.resolve(repoRoot);
-  let current = import_node_path7.default.resolve(import_node_path7.default.dirname(filePath));
+  const boundary = import_node_path8.default.resolve(repoRoot);
+  let current = import_node_path8.default.resolve(import_node_path8.default.dirname(filePath));
   while (current !== void 0) {
     if (!seen.has(current)) {
       directories.push(current);
@@ -1747,7 +1993,7 @@ function buildDirectoryChain2(filePath, repoRoot) {
     if (current === boundary) {
       break;
     }
-    const parent = import_node_path7.default.dirname(current);
+    const parent = import_node_path8.default.dirname(current);
     if (parent === current) {
       break;
     }
@@ -1761,16 +2007,16 @@ function buildDirectoryChain2(filePath, repoRoot) {
 function buildSearchRoots2(evalPath, repoRoot) {
   const uniqueRoots = [];
   const addRoot = (root) => {
-    const normalized = import_node_path7.default.resolve(root);
+    const normalized = import_node_path8.default.resolve(root);
     if (!uniqueRoots.includes(normalized)) {
       uniqueRoots.push(normalized);
     }
   };
-  let currentDir = import_node_path7.default.dirname(evalPath);
+  let currentDir = import_node_path8.default.dirname(evalPath);
   let reachedBoundary = false;
   while (!reachedBoundary) {
     addRoot(currentDir);
-    const parentDir = import_node_path7.default.dirname(currentDir);
+    const parentDir = import_node_path8.default.dirname(currentDir);
     if (currentDir === repoRoot || parentDir === currentDir) {
       reachedBoundary = true;
     } else {
@@ -1788,16 +2034,16 @@ function trimLeadingSeparators2(value) {
 async function resolveFileReference2(rawValue, searchRoots) {
   const displayPath = trimLeadingSeparators2(rawValue);
   const potentialPaths = [];
-  if (import_node_path7.default.isAbsolute(rawValue)) {
-    potentialPaths.push(import_node_path7.default.normalize(rawValue));
+  if (import_node_path8.default.isAbsolute(rawValue)) {
+    potentialPaths.push(import_node_path8.default.normalize(rawValue));
   }
   for (const base of searchRoots) {
-    potentialPaths.push(import_node_path7.default.resolve(base, displayPath));
+    potentialPaths.push(import_node_path8.default.resolve(base, displayPath));
   }
   const attempted = [];
   const seen = /* @__PURE__ */ new Set();
   for (const candidate of potentialPaths) {
-    const absoluteCandidate = import_node_path7.default.resolve(candidate);
+    const absoluteCandidate = import_node_path8.default.resolve(candidate);
     if (seen.has(absoluteCandidate)) {
       continue;
     }
@@ -2147,9 +2393,9 @@ async function withRetry(fn, retryConfig, signal) {
 var import_node_child_process = require("child_process");
 var import_node_crypto = require("crypto");
 var import_node_fs3 = require("fs");
-var import_promises8 = require("fs/promises");
+var import_promises9 = require("fs/promises");
 var import_node_os = require("os");
-var import_node_path9 = __toESM(require("path"), 1);
+var import_node_path10 = __toESM(require("path"), 1);
 // src/evaluation/providers/claude-code-log-tracker.ts
 var GLOBAL_LOGS_KEY = Symbol.for("agentv.claudeCodeLogs");
@@ -2205,7 +2451,7 @@ function subscribeToClaudeCodeLogEntries(listener) {
 }
 // src/evaluation/providers/preread.ts
-var import_node_path8 = __toESM(require("path"), 1);
+var import_node_path9 = __toESM(require("path"), 1);
 function buildPromptDocument(request, inputFiles, options) {
   const parts = [];
   const guidelineFiles = collectGuidelineFiles(
@@ -2228,7 +2474,7 @@ function normalizeInputFiles(inputFiles) {
   }
   const deduped = /* @__PURE__ */ new Map();
   for (const inputFile of inputFiles) {
-    const absolutePath = import_node_path8.default.resolve(inputFile);
+    const absolutePath = import_node_path9.default.resolve(inputFile);
     if (!deduped.has(absolutePath)) {
       deduped.set(absolutePath, absolutePath);
     }
@@ -2241,14 +2487,14 @@ function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
   }
   const unique = /* @__PURE__ */ new Map();
   for (const inputFile of inputFiles) {
-    const absolutePath = import_node_path8.default.resolve(inputFile);
+    const absolutePath = import_node_path9.default.resolve(inputFile);
     if (overrides?.has(absolutePath)) {
       if (!unique.has(absolutePath)) {
         unique.set(absolutePath, absolutePath);
       }
       continue;
     }
-    const normalized = absolutePath.split(import_node_path8.default.sep).join("/");
+    const normalized = absolutePath.split(import_node_path9.default.sep).join("/");
     if (isGuidelineFile(normalized, guidelinePatterns)) {
       if (!unique.has(absolutePath)) {
         unique.set(absolutePath, absolutePath);
@@ -2263,7 +2509,7 @@ function collectInputFiles(inputFiles) {
   }
   const unique = /* @__PURE__ */ new Map();
   for (const inputFile of inputFiles) {
-    const absolutePath = import_node_path8.default.resolve(inputFile);
+    const absolutePath = import_node_path9.default.resolve(inputFile);
     if (!unique.has(absolutePath)) {
       unique.set(absolutePath, absolutePath);
     }
@@ -2275,7 +2521,7 @@ function buildMandatoryPrereadBlock(guidelineFiles, inputFiles) {
     return "";
   }
   const buildList = (files) => files.map((absolutePath) => {
-    const fileName = import_node_path8.default.basename(absolutePath);
+    const fileName = import_node_path9.default.basename(absolutePath);
     const fileUri = pathToFileUri(absolutePath);
     return `* [${fileName}](${fileUri})`;
   });
@@ -2295,7 +2541,7 @@ ${buildList(inputFiles).join("\n")}.`);
   return sections.join("\n");
 }
 function pathToFileUri(filePath) {
-  const absolutePath = import_node_path8.default.isAbsolute(filePath) ? filePath : import_node_path8.default.resolve(filePath);
+  const absolutePath = import_node_path9.default.isAbsolute(filePath) ? filePath : import_node_path9.default.resolve(filePath);
   const normalizedPath = absolutePath.replace(/\\/g, "/");
   if (/^[a-zA-Z]:\//.test(normalizedPath)) {
     return `file:///${normalizedPath}`;
@@ -2332,8 +2578,8 @@ var ClaudeCodeProvider = class {
     const workspaceRoot = await this.createWorkspace();
     const logger = await this.createStreamLogger(request).catch(() => void 0);
     try {
-      const promptFile = import_node_path9.default.join(workspaceRoot, PROMPT_FILENAME);
-      await (0, import_promises8.writeFile)(promptFile, request.question, "utf8");
+      const promptFile = import_node_path10.default.join(workspaceRoot, PROMPT_FILENAME);
+      await (0, import_promises9.writeFile)(promptFile, request.question, "utf8");
       const args = this.buildClaudeCodeArgs(request.question, inputFiles);
       const cwd = this.resolveCwd();
       const result = await this.executeClaudeCode(args, cwd, request.signal, logger);
@@ -2380,7 +2626,7 @@ var ClaudeCodeProvider = class {
     if (!this.config.cwd) {
       return process.cwd();
     }
-    return import_node_path9.default.resolve(this.config.cwd);
+    return import_node_path10.default.resolve(this.config.cwd);
   }
   buildClaudeCodeArgs(prompt, inputFiles) {
     const args = [];
@@ -2437,11 +2683,11 @@ ${filesContext}`;
     }
   }
   async createWorkspace() {
-    return await (0, import_promises8.mkdtemp)(import_node_path9.default.join((0, import_node_os.tmpdir)(), WORKSPACE_PREFIX));
+    return await (0, import_promises9.mkdtemp)(import_node_path10.default.join((0, import_node_os.tmpdir)(), WORKSPACE_PREFIX));
   }
   async cleanupWorkspace(workspaceRoot) {
     try {
-      await (0, import_promises8.rm)(workspaceRoot, { recursive: true, force: true });
+      await (0, import_promises9.rm)(workspaceRoot, { recursive: true, force: true });
     } catch {
     }
   }
@@ -2451,9 +2697,9 @@ ${filesContext}`;
       return void 0;
     }
     if (this.config.logDir) {
-      return import_node_path9.default.resolve(this.config.logDir);
+      return import_node_path10.default.resolve(this.config.logDir);
     }
-    return import_node_path9.default.join(process.cwd(), ".agentv", "logs", "claude-code");
+    return import_node_path10.default.join(process.cwd(), ".agentv", "logs", "claude-code");
   }
   async createStreamLogger(request) {
     const logDir = this.resolveLogDirectory();
@@ -2461,13 +2707,13 @@ ${filesContext}`;
       return void 0;
     }
     try {
-      await (0, import_promises8.mkdir)(logDir, { recursive: true });
+      await (0, import_promises9.mkdir)(logDir, { recursive: true });
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
       console.warn(`Skipping Claude Code stream logging (could not create ${logDir}): ${message}`);
       return void 0;
     }
-    const filePath = import_node_path9.default.join(logDir, buildLogFilename(request, this.targetName));
+    const filePath = import_node_path10.default.join(logDir, buildLogFilename(request, this.targetName));
     try {
       const logger = await ClaudeCodeStreamLogger.create({
         filePath,
@@ -2872,16 +3118,16 @@ function escapeShellArg(arg) {
 }
 async function defaultClaudeCodeRunner(options) {
   const tempId = (0, import_node_crypto.randomUUID)();
-  const stdoutFile = import_node_path9.default.join((0, import_node_os.tmpdir)(), `agentv-cc-${tempId}-stdout`);
-  const stderrFile = import_node_path9.default.join((0, import_node_os.tmpdir)(), `agentv-cc-${tempId}-stderr`);
-  const exitFile = import_node_path9.default.join((0, import_node_os.tmpdir)(), `agentv-cc-${tempId}-exit`);
-  const pidFile = import_node_path9.default.join((0, import_node_os.tmpdir)(), `agentv-cc-${tempId}-pid`);
+  const stdoutFile = import_node_path10.default.join((0, import_node_os.tmpdir)(), `agentv-cc-${tempId}-stdout`);
+  const stderrFile = import_node_path10.default.join((0, import_node_os.tmpdir)(), `agentv-cc-${tempId}-stderr`);
+  const exitFile = import_node_path10.default.join((0, import_node_os.tmpdir)(), `agentv-cc-${tempId}-exit`);
+  const pidFile = import_node_path10.default.join((0, import_node_os.tmpdir)(), `agentv-cc-${tempId}-pid`);
   try {
     return await runClaudeCodeWithTempFiles(options, stdoutFile, stderrFile, exitFile, pidFile);
   } finally {
     for (const file of [stdoutFile, stderrFile, exitFile, pidFile]) {
       try {
-        await (0, import_promises8.rm)(file, { force: true });
+        await (0, import_promises9.rm)(file, { force: true });
       } catch {
       }
     }
@@ -2915,8 +3161,8 @@ async function runClaudeCodeWithTempFiles(options, stdoutFile, stderrFile, exitF
   let lastStdoutSize = 0;
   const readFileIfExists = async (filePath) => {
     try {
-      const { readFile: readFile8 } = await import("fs/promises");
-      return await readFile8(filePath, "utf8");
+      const { readFile: readFile9 } = await import("fs/promises");
+      return await readFile9(filePath, "utf8");
     } catch {
       return "";
     }
@@ -2989,9 +3235,9 @@ async function runClaudeCodeWithTempFiles(options, stdoutFile, stderrFile, exitF
 // src/evaluation/providers/cli.ts
 var import_node_child_process2 = require("child_process");
-var import_promises9 = __toESM(require("fs/promises"), 1);
+var import_promises10 = __toESM(require("fs/promises"), 1);
 var import_node_os2 = __toESM(require("os"), 1);
-var import_node_path10 = __toESM(require("path"), 1);
+var import_node_path11 = __toESM(require("path"), 1);
 var import_node_util = require("util");
 var import_zod = require("zod");
 var ToolCallSchema = import_zod.z.object({
@@ -3360,7 +3606,7 @@ var CliProvider = class {
       throw new Error(`Failed to read output file '${filePath}': ${errorMsg}`);
     } finally {
       if (!this.keepTempFiles) {
-        await import_promises9.default.unlink(filePath).catch(() => {
+        await import_promises10.default.unlink(filePath).catch(() => {
         });
       }
     }
@@ -3448,7 +3694,7 @@ function normalizeInputFiles2(inputFiles) {
   }
   const unique = /* @__PURE__ */ new Map();
   for (const inputFile of inputFiles) {
-    const absolutePath = import_node_path10.default.resolve(inputFile);
+    const absolutePath = import_node_path11.default.resolve(inputFile);
     if (!unique.has(absolutePath)) {
       unique.set(absolutePath, absolutePath);
     }
@@ -3462,7 +3708,7 @@ function formatFileList(files, template) {
   const formatter = template ?? "{path}";
   return files.map((filePath) => {
     const escapedPath = shellEscape(filePath);
-    const escapedName = shellEscape(import_node_path10.default.basename(filePath));
+    const escapedName = shellEscape(import_node_path11.default.basename(filePath));
     return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
   }).join(" ");
 }
@@ -3486,7 +3732,7 @@ function generateOutputFilePath(evalCaseId, extension = ".json") {
   const safeEvalId = evalCaseId || "unknown";
   const timestamp = Date.now();
   const random = Math.random().toString(36).substring(2, 9);
-  return import_node_path10.default.join(import_node_os2.default.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
+  return import_node_path11.default.join(import_node_os2.default.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
 }
 function formatTimeoutSuffix2(timeoutMs) {
   if (!timeoutMs || timeoutMs <= 0) {
@@ -3500,9 +3746,9 @@ function formatTimeoutSuffix2(timeoutMs) {
 var import_node_child_process3 = require("child_process");
 var import_node_crypto2 = require("crypto");
 var import_node_fs4 = require("fs");
-var import_promises10 = require("fs/promises");
+var import_promises11 = require("fs/promises");
 var import_node_os3 = require("os");
-var import_node_path11 = __toESM(require("path"), 1);
+var import_node_path12 = __toESM(require("path"), 1);
 var import_node_util2 = require("util");
 // src/evaluation/providers/codex-log-tracker.ts
@@ -3597,8 +3843,8 @@ var CodexProvider = class {
       const promptContent = `${systemPrompt}
 ${basePrompt}`;
-      const promptFile = import_node_path11.default.join(workspaceRoot, PROMPT_FILENAME2);
-      await (0, import_promises10.writeFile)(promptFile, promptContent, "utf8");
+      const promptFile = import_node_path12.default.join(workspaceRoot, PROMPT_FILENAME2);
+      await (0, import_promises11.writeFile)(promptFile, promptContent, "utf8");
       const args = this.buildCodexArgs();
       const cwd = this.resolveCwd(workspaceRoot);
       const result = await this.executeCodex(args, cwd, promptContent, request.signal, logger);
@@ -3647,7 +3893,7 @@ ${basePrompt}`;
     if (!this.config.cwd) {
       return workspaceRoot;
     }
-    return import_node_path11.default.resolve(this.config.cwd);
+    return import_node_path12.default.resolve(this.config.cwd);
   }
   buildCodexArgs() {
     const args = [
@@ -3689,11 +3935,11 @@ ${basePrompt}`;
     }
   }
   async createWorkspace() {
-    return await (0, import_promises10.mkdtemp)(import_node_path11.default.join((0, import_node_os3.tmpdir)(), WORKSPACE_PREFIX2));
+    return await (0, import_promises11.mkdtemp)(import_node_path12.default.join((0, import_node_os3.tmpdir)(), WORKSPACE_PREFIX2));
   }
   async cleanupWorkspace(workspaceRoot) {
     try {
-      await (0, import_promises10.rm)(workspaceRoot, { recursive: true, force: true });
+      await (0, import_promises11.rm)(workspaceRoot, { recursive: true, force: true });
     } catch {
     }
   }
@@ -3703,9 +3949,9 @@ ${basePrompt}`;
       return void 0;
     }
     if (this.config.logDir) {
-      return import_node_path11.default.resolve(this.config.logDir);
+      return import_node_path12.default.resolve(this.config.logDir);
     }
-    return import_node_path11.default.join(process.cwd(), ".agentv", "logs", "codex");
+    return import_node_path12.default.join(process.cwd(), ".agentv", "logs", "codex");
   }
   async createStreamLogger(request) {
     const logDir = this.resolveLogDirectory();
@@ -3713,13 +3959,13 @@ ${basePrompt}`;
       return void 0;
     }
     try {
-      await (0, import_promises10.mkdir)(logDir, { recursive: true });
+      await (0, import_promises11.mkdir)(logDir, { recursive: true });
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
       console.warn(`Skipping Codex stream logging (could not create ${logDir}): ${message}`);
       return void 0;
     }
-    const filePath = import_node_path11.default.join(logDir, buildLogFilename2(request, this.targetName));
+    const filePath = import_node_path12.default.join(logDir, buildLogFilename2(request, this.targetName));
     try {
       const logger = await CodexStreamLogger.create({
         filePath,
@@ -3934,9 +4180,9 @@ function tryParseJsonValue2(rawLine) {
 async function locateExecutable(candidate) {
   const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
   if (includesPathSeparator) {
-    const resolved = import_node_path11.default.isAbsolute(candidate) ? candidate : import_node_path11.default.resolve(candidate);
+    const resolved = import_node_path12.default.isAbsolute(candidate) ? candidate : import_node_path12.default.resolve(candidate);
     const executablePath = await ensureWindowsExecutableVariant(resolved);
-    await (0, import_promises10.access)(executablePath, import_node_fs4.constants.F_OK);
+    await (0, import_promises11.access)(executablePath, import_node_fs4.constants.F_OK);
     return executablePath;
   }
   const locator = process.platform === "win32" ? "where" : "which";
@@ -3946,7 +4192,7 @@ async function locateExecutable(candidate) {
     const preferred = selectExecutableCandidate(lines);
     if (preferred) {
       const executablePath = await ensureWindowsExecutableVariant(preferred);
-      await (0, import_promises10.access)(executablePath, import_node_fs4.constants.F_OK);
+      await (0, import_promises11.access)(executablePath, import_node_fs4.constants.F_OK);
       return executablePath;
     }
   } catch {
@@ -3980,7 +4226,7 @@ async function ensureWindowsExecutableVariant(candidate) {
   for (const ext of extensions) {
     const withExtension = `${candidate}${ext}`;
     try {
-      await (0, import_promises10.access)(withExtension, import_node_fs4.constants.F_OK);
+      await (0, import_promises11.access)(withExtension, import_node_fs4.constants.F_OK);
       return withExtension;
     } catch {
     }
@@ -4445,9 +4691,9 @@ function extractToolCalls2(content) {
 var import_node_child_process4 = require("child_process");
 var import_node_crypto3 = require("crypto");
 var import_node_fs5 = require("fs");
-var import_promises11 = require("fs/promises");
+var import_promises12 = require("fs/promises");
 var import_node_os4 = require("os");
-var import_node_path12 = __toESM(require("path"), 1);
+var import_node_path13 = __toESM(require("path"), 1);
 // src/evaluation/providers/pi-log-tracker.ts
 var GLOBAL_LOGS_KEY3 = Symbol.for("agentv.piLogs");
@@ -4531,8 +4777,8 @@ var PiCodingAgentProvider = class {
     const workspaceRoot = await this.createWorkspace();
     const logger = await this.createStreamLogger(request).catch(() => void 0);
     try {
-      const promptFile = import_node_path12.default.join(workspaceRoot, PROMPT_FILENAME3);
-      await (0, import_promises11.writeFile)(promptFile, request.question, "utf8");
+      const promptFile = import_node_path13.default.join(workspaceRoot, PROMPT_FILENAME3);
+      await (0, import_promises12.writeFile)(promptFile, request.question, "utf8");
       const args = this.buildPiArgs(request.question, inputFiles);
       const cwd = this.resolveCwd(workspaceRoot);
       const result = await this.executePi(args, cwd, request.signal, logger);
@@ -4573,7 +4819,7 @@ var PiCodingAgentProvider = class {
     if (!this.config.cwd) {
       return workspaceRoot;
     }
-    return import_node_path12.default.resolve(this.config.cwd);
+    return import_node_path13.default.resolve(this.config.cwd);
   }
   buildPiArgs(prompt, inputFiles) {
     const args = [];
@@ -4662,19 +4908,19 @@ ${prompt}`;
     return env;
   }
   async createWorkspace() {
-    return await (0, import_promises11.mkdtemp)(import_node_path12.default.join((0, import_node_os4.tmpdir)(), WORKSPACE_PREFIX3));
+    return await (0, import_promises12.mkdtemp)(import_node_path13.default.join((0, import_node_os4.tmpdir)(), WORKSPACE_PREFIX3));
   }
   async cleanupWorkspace(workspaceRoot) {
     try {
-      await (0, import_promises11.rm)(workspaceRoot, { recursive: true, force: true });
+      await (0, import_promises12.rm)(workspaceRoot, { recursive: true, force: true });
     } catch {
     }
   }
   resolveLogDirectory() {
     if (this.config.logDir) {
-      return import_node_path12.default.resolve(this.config.logDir);
+      return import_node_path13.default.resolve(this.config.logDir);
     }
-    return import_node_path12.default.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
+    return import_node_path13.default.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
   }
   async createStreamLogger(request) {
     const logDir = this.resolveLogDirectory();
@@ -4682,13 +4928,13 @@ ${prompt}`;
       return void 0;
     }
     try {
-      await (0, import_promises11.mkdir)(logDir, { recursive: true });
+      await (0, import_promises12.mkdir)(logDir, { recursive: true });
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
       console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
       return void 0;
     }
-    const filePath = import_node_path12.default.join(logDir, buildLogFilename3(request, this.targetName));
+    const filePath = import_node_path13.default.join(logDir, buildLogFilename3(request, this.targetName));
     try {
       const logger = await PiStreamLogger.create({
         filePath,
@@ -5121,7 +5367,7 @@ async function defaultPiRunner(options) {
 }
 // src/evaluation/providers/targets.ts
-var import_node_path13 = __toESM(require("path"), 1);
+var import_node_path14 = __toESM(require("path"), 1);
 var import_zod2 = require("zod");
 var CliHealthcheckHttpInputSchema = import_zod2.z.object({
   type: import_zod2.z.literal("http"),
@@ -5227,11 +5473,11 @@ function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
     allowLiteral: true,
     optionalEnv: true
   });
-  if (cwd && evalFilePath && !import_node_path13.default.isAbsolute(cwd)) {
-    cwd = import_node_path13.default.resolve(import_node_path13.default.dirname(import_node_path13.default.resolve(evalFilePath)), cwd);
+  if (cwd && evalFilePath && !import_node_path14.default.isAbsolute(cwd)) {
+    cwd = import_node_path14.default.resolve(import_node_path14.default.dirname(import_node_path14.default.resolve(evalFilePath)), cwd);
   }
   if (!cwd && evalFilePath) {
-    cwd = import_node_path13.default.dirname(import_node_path13.default.resolve(evalFilePath));
+    cwd = import_node_path14.default.dirname(import_node_path14.default.resolve(evalFilePath));
   }
   return {
     type: "command",
@@ -5258,11 +5504,11 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
     allowLiteral: true,
     optionalEnv: true
   });
-  if (cwd && evalFilePath && !import_node_path13.default.isAbsolute(cwd)) {
-    cwd = import_node_path13.default.resolve(import_node_path13.default.dirname(import_node_path13.default.resolve(evalFilePath)), cwd);
+  if (cwd && evalFilePath && !import_node_path14.default.isAbsolute(cwd)) {
+    cwd = import_node_path14.default.resolve(import_node_path14.default.dirname(import_node_path14.default.resolve(evalFilePath)), cwd);
   }
   if (!cwd && evalFilePath) {
-    cwd = import_node_path13.default.dirname(import_node_path13.default.resolve(evalFilePath));
+    cwd = import_node_path14.default.dirname(import_node_path14.default.resolve(evalFilePath));
   }
   const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
   const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
@@ -5767,8 +6013,8 @@ function resolveCliConfig(target, env, evalFilePath) {
   const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
   if (!parseResult.success) {
     const firstError = parseResult.error.errors[0];
-    const path17 = firstError?.path.join(".") || "";
-    const prefix = path17 ? `${target.name} ${path17}: ` : `${target.name}: `;
+    const path18 = firstError?.path.join(".") || "";
+    const prefix = path18 ? `${target.name} ${path18}: ` : `${target.name}: `;
     throw new Error(`${prefix}${firstError?.message}`);
   }
   const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
@@ -5956,7 +6202,7 @@ function resolveOptionalNumberArray(source, description) {
 }
 // src/evaluation/providers/vscode.ts
-var import_node_path14 = __toESM(require("path"), 1);
+var import_node_path15 = __toESM(require("path"), 1);
 var import_subagent = require("subagent");
 // src/evaluation/providers/vscode-templates.ts
@@ -6126,7 +6372,7 @@ function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
     return "";
   }
   const buildList = (files) => files.map((absolutePath) => {
-    const fileName = import_node_path14.default.basename(absolutePath);
+    const fileName = import_node_path15.default.basename(absolutePath);
     const fileUri = pathToFileUri2(absolutePath);
     return `* [${fileName}](${fileUri})`;
   });
@@ -6151,8 +6397,8 @@ function collectGuidelineFiles2(attachments, guidelinePatterns) {
   }
   const unique = /* @__PURE__ */ new Map();
   for (const attachment of attachments) {
-    const absolutePath = import_node_path14.default.resolve(attachment);
-    const normalized = absolutePath.split(import_node_path14.default.sep).join("/");
+    const absolutePath = import_node_path15.default.resolve(attachment);
+    const normalized = absolutePath.split(import_node_path15.default.sep).join("/");
     if (isGuidelineFile(normalized, guidelinePatterns)) {
       if (!unique.has(absolutePath)) {
         unique.set(absolutePath, absolutePath);
@@ -6167,7 +6413,7 @@ function collectAttachmentFiles(attachments) {
   }
   const unique = /* @__PURE__ */ new Map();
   for (const attachment of attachments) {
-    const absolutePath = import_node_path14.default.resolve(attachment);
+    const absolutePath = import_node_path15.default.resolve(attachment);
     if (!unique.has(absolutePath)) {
       unique.set(absolutePath, absolutePath);
     }
@@ -6175,7 +6421,7 @@ function collectAttachmentFiles(attachments) {
   return Array.from(unique.values());
 }
 function pathToFileUri2(filePath) {
-  const absolutePath = import_node_path14.default.isAbsolute(filePath) ? filePath : import_node_path14.default.resolve(filePath);
+  const absolutePath = import_node_path15.default.isAbsolute(filePath) ? filePath : import_node_path15.default.resolve(filePath);
   const normalizedPath = absolutePath.replace(/\\/g, "/");
   if (/^[a-zA-Z]:\//.test(normalizedPath)) {
     return `file:///${normalizedPath}`;
@@ -6188,7 +6434,7 @@ function normalizeAttachments(attachments) {
   }
   const deduped = /* @__PURE__ */ new Set();
   for (const attachment of attachments) {
-    deduped.add(import_node_path14.default.resolve(attachment));
+    deduped.add(import_node_path15.default.resolve(attachment));
   }
   return Array.from(deduped);
 }
@@ -6197,7 +6443,7 @@ function mergeAttachments(all) {
   for (const list of all) {
     if (!list) continue;
     for (const inputFile of list) {
-      deduped.add(import_node_path14.default.resolve(inputFile));
+      deduped.add(import_node_path15.default.resolve(inputFile));
     }
   }
   return deduped.size > 0 ? Array.from(deduped) : void 0;
@@ -6245,9 +6491,9 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
 // src/evaluation/providers/targets-file.ts
 var import_node_fs6 = require("fs");
-var import_promises12 = require("fs/promises");
-var import_node_path15 = __toESM(require("path"), 1);
-var import_yaml3 = require("yaml");
+var import_promises13 = require("fs/promises");
+var import_node_path16 = __toESM(require("path"), 1);
+var import_yaml4 = require("yaml");
 function isRecord(value) {
   return typeof value === "object" && value !== null && !Array.isArray(value);
 }
@@ -6276,19 +6522,19 @@ function assertTargetDefinition(value, index, filePath) {
 }
 async function fileExists3(filePath) {
   try {
-    await (0, import_promises12.access)(filePath, import_node_fs6.constants.F_OK);
+    await (0, import_promises13.access)(filePath, import_node_fs6.constants.F_OK);
     return true;
   } catch {
     return false;
   }
 }
 async function readTargetDefinitions(filePath) {
-  const absolutePath = import_node_path15.default.resolve(filePath);
+  const absolutePath = import_node_path16.default.resolve(filePath);
   if (!await fileExists3(absolutePath)) {
     throw new Error(`targets.yaml not found at ${absolutePath}`);
   }
-  const raw = await (0, import_promises12.readFile)(absolutePath, "utf8");
-  const parsed = (0, import_yaml3.parse)(raw);
+  const raw = await (0, import_promises13.readFile)(absolutePath, "utf8");
+  const parsed = (0, import_yaml4.parse)(raw);
   if (!isRecord(parsed)) {
     throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
   }
@@ -6494,15 +6740,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
   });
 }
 async function execShellWithStdin(command, stdinPayload, options = {}) {
-  const { mkdir: mkdir4, readFile: readFile8, rm: rm4, writeFile: writeFile4 } = await import("fs/promises");
+  const { mkdir: mkdir4, readFile: readFile9, rm: rm4, writeFile: writeFile4 } = await import("fs/promises");
   const { tmpdir: tmpdir4 } = await import("os");
-  const path17 = await import("path");
+  const path18 = await import("path");
   const { randomUUID: randomUUID4 } = await import("crypto");
-  const dir = path17.join(tmpdir4(), `agentv-exec-${randomUUID4()}`);
+  const dir = path18.join(tmpdir4(), `agentv-exec-${randomUUID4()}`);
   await mkdir4(dir, { recursive: true });
-  const stdinPath = path17.join(dir, "stdin.txt");
-  const stdoutPath = path17.join(dir, "stdout.txt");
-  const stderrPath = path17.join(dir, "stderr.txt");
+  const stdinPath = path18.join(dir, "stdin.txt");
+  const stdoutPath = path18.join(dir, "stdout.txt");
+  const stderrPath = path18.join(dir, "stderr.txt");
   await writeFile4(stdinPath, stdinPayload, "utf8");
   const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
   const { spawn: spawn4 } = await import("child_process");
@@ -6532,8 +6778,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
         resolve(code ?? 0);
       });
     });
-    const stdout = (await readFile8(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
-    const stderr = (await readFile8(stderrPath, "utf8")).replace(/\r\n/g, "\n");
+    const stdout = (await readFile9(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
+    const stderr = (await readFile9(stderrPath, "utf8")).replace(/\r\n/g, "\n");
     return { stdout, stderr, exitCode };
   } finally {
     await rm4(dir, { recursive: true, force: true });
@@ -6805,7 +7051,7 @@ var CodeEvaluator = class {
       outputMessages: context.outputMessages ?? null,
       guidelineFiles: context.evalCase.guideline_paths,
       inputFiles: context.evalCase.file_paths.filter(
-        (path17) => !context.evalCase.guideline_paths.includes(path17)
+        (path18) => !context.evalCase.guideline_paths.includes(path18)
       ),
       inputMessages: context.evalCase.input_messages,
       traceSummary: context.traceSummary ?? null,
@@ -7591,115 +7837,115 @@ var FieldAccuracyEvaluator = class {
    * Evaluate a single field against the expected value.
    */
   evaluateField(fieldConfig, candidateData, expectedData) {
-    const { path: path17, match, required = true, weight = 1 } = fieldConfig;
-    const candidateValue = resolvePath(candidateData, path17);
-    const expectedValue = resolvePath(expectedData, path17);
+    const { path: path18, match, required = true, weight = 1 } = fieldConfig;
+    const candidateValue = resolvePath(candidateData, path18);
+    const expectedValue = resolvePath(expectedData, path18);
     if (expectedValue === void 0) {
       return {
-        path: path17,
+        path: path18,
         score: 1,
         // No expected value means no comparison needed
         weight,
         hit: true,
-        message: `${path17}: no expected value`
+        message: `${path18}: no expected value`
       };
     }
     if (candidateValue === void 0) {
       if (required) {
         return {
-          path: path17,
+          path: path18,
           score: 0,
           weight,
           hit: false,
-          message: `${path17} (required, missing)`
+          message: `${path18} (required, missing)`
         };
       }
       return {
-        path: path17,
+        path: path18,
         score: 1,
         // Don't penalize missing optional fields
         weight: 0,
         // Zero weight means it won't affect the score
         hit: true,
-        message: `${path17}: optional field missing`
+        message: `${path18}: optional field missing`
       };
     }
     switch (match) {
       case "exact":
-        return this.compareExact(path17, candidateValue, expectedValue, weight);
+        return this.compareExact(path18, candidateValue, expectedValue, weight);
       case "numeric_tolerance":
         return this.compareNumericTolerance(
-          path17,
+          path18,
           candidateValue,
           expectedValue,
           fieldConfig,
           weight
         );
       case "date":
-        return this.compareDate(path17, candidateValue, expectedValue, fieldConfig, weight);
+        return this.compareDate(path18, candidateValue, expectedValue, fieldConfig, weight);
       default:
         return {
-          path: path17,
+          path: path18,
           score: 0,
           weight,
           hit: false,
-          message: `${path17}: unknown match type "${match}"`
+          message: `${path18}: unknown match type "${match}"`
         };
     }
   }
   /**
    * Exact equality comparison.
    */
-  compareExact(path17, candidateValue, expectedValue, weight) {
+  compareExact(path18, candidateValue, expectedValue, weight) {
     if (deepEqual(candidateValue, expectedValue)) {
       return {
-        path: path17,
+        path: path18,
         score: 1,
         weight,
         hit: true,
-        message: path17
+        message: path18
       };
     }
     if (typeof candidateValue !== typeof expectedValue) {
       return {
-        path: path17,
+        path: path18,
         score: 0,
         weight,
         hit: false,
-        message: `${path17} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
+        message: `${path18} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
       };
     }
     return {
-      path: path17,
+      path: path18,
       score: 0,
       weight,
       hit: false,
-      message: `${path17} (value mismatch)`
+      message: `${path18} (value mismatch)`
     };
   }
   /**
    * Numeric comparison with absolute or relative tolerance.
    */
-  compareNumericTolerance(path17, candidateValue, expectedValue, fieldConfig, weight) {
+  compareNumericTolerance(path18, candidateValue, expectedValue, fieldConfig, weight) {
     const { tolerance = 0, relative = false } = fieldConfig;
     const candidateNum = toNumber(candidateValue);
     const expectedNum = toNumber(expectedValue);
     if (candidateNum === null || expectedNum === null) {
       return {
-        path: path17,
+        path: path18,
         score: 0,
         weight,
         hit: false,
-        message: `${path17} (non-numeric value)`
+        message: `${path18} (non-numeric value)`
       };
     }
     if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
       return {
-        path: path17,
+        path: path18,
         score: 0,
         weight,
         hit: false,
-        message: `${path17} (invalid numeric value)`
+        message: `${path18} (invalid numeric value)`
       };
     }
     const diff = Math.abs(candidateNum - expectedNum);
@@ -7712,61 +7958,61 @@ var FieldAccuracyEvaluator = class {
     }
     if (withinTolerance) {
       return {
-        path: path17,
+        path: path18,
         score: 1,
         weight,
         hit: true,
-        message: `${path17} (within tolerance: diff=${diff.toFixed(2)})`
+        message: `${path18} (within tolerance: diff=${diff.toFixed(2)})`
       };
     }
     return {
-      path: path17,
+      path: path18,
       score: 0,
       weight,
       hit: false,
-      message: `${path17} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
+      message: `${path18} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
     };
   }
   /**
    * Date comparison with format normalization.
    */
-  compareDate(path17, candidateValue, expectedValue, fieldConfig, weight) {
+  compareDate(path18, candidateValue, expectedValue, fieldConfig, weight) {
     const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
     const candidateDate = parseDate(String(candidateValue), formats);
     const expectedDate = parseDate(String(expectedValue), formats);
     if (candidateDate === null) {
       return {
-        path: path17,
+        path: path18,
         score: 0,
         weight,
         hit: false,
-        message: `${path17} (unparseable candidate date)`
+        message: `${path18} (unparseable candidate date)`
       };
     }
     if (expectedDate === null) {
       return {
-        path: path17,
+        path: path18,
         score: 0,
         weight,
         hit: false,
-        message: `${path17} (unparseable expected date)`
+        message: `${path18} (unparseable expected date)`
       };
     }
     if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
       return {
-        path: path17,
+        path: path18,
         score: 1,
         weight,
         hit: true,
-        message: path17
+        message: path18
       };
     }
     return {
-      path: path17,
+      path: path18,
       score: 0,
       weight,
       hit: false,
-      message: `${path17} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
+      message: `${path18} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
     };
   }
   /**
@@ -7806,11 +8052,11 @@ var FieldAccuracyEvaluator = class {
     };
   }
 };
-function resolvePath(obj, path17) {
-  if (!path17 || !obj) {
+function resolvePath(obj, path18) {
+  if (!path18 || !obj) {
     return void 0;
   }
-  const parts = path17.split(/\.|\[|\]/).filter((p) => p.length > 0);
+  const parts = path18.split(/\.|\[|\]/).filter((p) => p.length > 0);
   let current = obj;
   for (const part of parts) {
     if (current === null || current === void 0) {
@@ -8246,7 +8492,7 @@ var ToolTrajectoryEvaluator = class {
 // src/evaluation/orchestrator.ts
 var import_node_crypto5 = require("crypto");
-var import_node_path16 = __toESM(require("path"), 1);
+var import_node_path17 = __toESM(require("path"), 1);
 // ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
 var Node = class {
@@ -9045,7 +9291,7 @@ async function runEvaluatorList(options) {
         });
       }
       if (evaluator.type === "composite") {
-        const evalFileDir = evalCase.guideline_paths[0] ? import_node_path16.default.dirname(evalCase.guideline_paths[0]) : process.cwd();
+        const evalFileDir = evalCase.guideline_paths[0] ? import_node_path17.default.dirname(evalCase.guideline_paths[0]) : process.cwd();
         const createEvaluator = (memberConfig) => {
           switch (memberConfig.type) {
             case "llm_judge":
@@ -9620,6 +9866,7 @@ function createAgentKernel() {
   createAgentKernel,
   createProvider,
   deepEqual,
+  detectFormat,
   ensureVSCodeSubagents,
   executeScript,
   explorationRatio,