npm - @agentv/core - Versions diffs - 0.2.6 → 0.2.11 - Mend

@agentv/core 0.2.6 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/dist/{chunk-QVS4OL44.js → chunk-P4GOYWYH.js} +27 -1
package/dist/chunk-P4GOYWYH.js.map +1 -0
package/dist/chunk-XXNQA4EW.js +140 -0
package/dist/chunk-XXNQA4EW.js.map +1 -0
package/dist/evaluation/validation/index.cjs +93 -8
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.d.cts +7 -2
package/dist/evaluation/validation/index.d.ts +7 -2
package/dist/evaluation/validation/index.js +91 -7
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +533 -187
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +53 -10
package/dist/index.d.ts +53 -10
package/dist/index.js +502 -193
package/dist/index.js.map +1 -1
package/package.json +6 -2
package/dist/chunk-QVS4OL44.js.map +0 -1

package/dist/index.js CHANGED Viewed

@@ -1,8 +1,11 @@
 import {
   TARGETS_SCHEMA_V2,
+  buildDirectoryChain,
   buildSearchRoots,
+  fileExists,
+  findGitRoot,
   resolveFileReference
-} from "./chunk-QVS4OL44.js";
+} from "./chunk-P4GOYWYH.js";
 // src/evaluation/types.ts
 var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
@@ -56,6 +59,7 @@ function getHitCount(result) {
 }
 // src/evaluation/yaml-parser.ts
+import micromatch from "micromatch";
 import { constants } from "node:fs";
 import { access, readFile } from "node:fs/promises";
 import path from "node:path";
@@ -65,9 +69,52 @@ var CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
 var ANSI_YELLOW = "\x1B[33m";
 var ANSI_RESET = "\x1B[0m";
 var SCHEMA_EVAL_V2 = "agentv-eval-v2";
-function isGuidelineFile(filePath) {
+var SCHEMA_CONFIG_V2 = "agentv-config-v2";
+async function loadConfig(evalFilePath, repoRoot) {
+  const directories = buildDirectoryChain(evalFilePath, repoRoot);
+  for (const directory of directories) {
+    const configPath = path.join(directory, ".agentv", "config.yaml");
+    if (!await fileExists2(configPath)) {
+      continue;
+    }
+    try {
+      const rawConfig = await readFile(configPath, "utf8");
+      const parsed = parse(rawConfig);
+      if (!isJsonObject(parsed)) {
+        logWarning(`Invalid .agentv/config.yaml format at ${configPath}`);
+        continue;
+      }
+      const config = parsed;
+      const schema = config.$schema;
+      if (schema !== SCHEMA_CONFIG_V2) {
+        const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${configPath}. Expected '${SCHEMA_CONFIG_V2}'` : `Missing required field '$schema' in ${configPath}.
+Please add '$schema: ${SCHEMA_CONFIG_V2}' at the top of the file.`;
+        logWarning(message);
+        continue;
+      }
+      const guidelinePatterns = config.guideline_patterns;
+      if (guidelinePatterns !== void 0 && !Array.isArray(guidelinePatterns)) {
+        logWarning(`Invalid guideline_patterns in ${configPath}, expected array`);
+        continue;
+      }
+      if (Array.isArray(guidelinePatterns) && !guidelinePatterns.every((p) => typeof p === "string")) {
+        logWarning(`Invalid guideline_patterns in ${configPath}, all entries must be strings`);
+        continue;
+      }
+      return {
+        guideline_patterns: guidelinePatterns
+      };
+    } catch (error) {
+      logWarning(`Could not read .agentv/config.yaml at ${configPath}: ${error.message}`);
+      continue;
+    }
+  }
+  return null;
+}
+function isGuidelineFile(filePath, patterns) {
   const normalized = filePath.split("\\").join("/");
-  return normalized.endsWith(".instructions.md") || normalized.includes("/instructions/") || normalized.endsWith(".prompt.md") || normalized.includes("/prompts/");
+  const patternsToUse = patterns ?? [];
+  return micromatch.isMatch(normalized, patternsToUse);
 }
 function extractCodeBlocks(segments) {
   const codeBlocks = [];
@@ -87,43 +134,45 @@ function extractCodeBlocks(segments) {
   }
   return codeBlocks;
 }
-async function loadTestCases(testFilePath, repoRoot, options) {
+async function loadEvalCases(evalFilePath, repoRoot, options) {
   const verbose = options?.verbose ?? false;
-  const absoluteTestPath = path.resolve(testFilePath);
-  if (!await fileExists(absoluteTestPath)) {
-    throw new Error(`Test file not found: ${testFilePath}`);
+  const absoluteTestPath = path.resolve(evalFilePath);
+  if (!await fileExists2(absoluteTestPath)) {
+    throw new Error(`Test file not found: ${evalFilePath}`);
   }
   const repoRootPath = resolveToAbsolutePath(repoRoot);
   const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
+  const config = await loadConfig(absoluteTestPath, repoRootPath);
+  const guidelinePatterns = config?.guideline_patterns;
   const rawFile = await readFile(absoluteTestPath, "utf8");
   const parsed = parse(rawFile);
   if (!isJsonObject(parsed)) {
-    throw new Error(`Invalid test file format: ${testFilePath}`);
+    throw new Error(`Invalid test file format: ${evalFilePath}`);
   }
   const suite = parsed;
   const schema = suite.$schema;
   if (schema !== SCHEMA_EVAL_V2) {
-    const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${testFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${testFilePath}.
+    const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${evalFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${evalFilePath}.
 Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
     throw new Error(message);
   }
   const rawTestcases = suite.evalcases;
   if (!Array.isArray(rawTestcases)) {
-    throw new Error(`Invalid test file format: ${testFilePath} - missing 'evalcases' field`);
+    throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
   }
   const globalGrader = coerceGrader(suite.grader) ?? "llm_judge";
   const results = [];
-  for (const rawTestcase of rawTestcases) {
-    if (!isJsonObject(rawTestcase)) {
+  for (const rawEvalcase of rawTestcases) {
+    if (!isJsonObject(rawEvalcase)) {
       logWarning("Skipping invalid test case entry (expected object)");
       continue;
     }
-    const testcase = rawTestcase;
-    const id = asString(testcase.id);
-    const conversationId = asString(testcase.conversation_id);
-    const outcome = asString(testcase.outcome);
-    const inputMessagesValue = testcase.input_messages;
-    const expectedMessagesValue = testcase.expected_messages;
+    const evalcase = rawEvalcase;
+    const id = asString(evalcase.id);
+    const conversationId = asString(evalcase.conversation_id);
+    const outcome = asString(evalcase.outcome);
+    const inputMessagesValue = evalcase.input_messages;
+    const expectedMessagesValue = evalcase.expected_messages;
     if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
       logWarning(`Skipping incomplete test case: ${id ?? "unknown"}`);
       continue;
@@ -136,6 +185,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
     const expectedMessages = expectedMessagesValue.filter((msg) => isTestMessage(msg));
     const assistantMessages = expectedMessages.filter((message) => message.role === "assistant");
     const userMessages = inputMessages.filter((message) => message.role === "user");
+    const systemMessages = inputMessages.filter((message) => message.role === "system");
     if (assistantMessages.length === 0) {
       logWarning(`No assistant message found for test case: ${id}`);
       continue;
@@ -143,6 +193,29 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
     if (assistantMessages.length > 1) {
       logWarning(`Multiple assistant messages found for test case: ${id}, using first`);
     }
+    if (systemMessages.length > 1) {
+      logWarning(`Multiple system messages found for test case: ${id}, using first`);
+    }
+    let systemMessageContent;
+    if (systemMessages.length > 0) {
+      const content = systemMessages[0]?.content;
+      if (typeof content === "string") {
+        systemMessageContent = content;
+      } else if (Array.isArray(content)) {
+        const textParts = [];
+        for (const segment of content) {
+          if (isJsonObject(segment)) {
+            const value = segment.value;
+            if (typeof value === "string") {
+              textParts.push(value);
+            }
+          }
+        }
+        if (textParts.length > 0) {
+          systemMessageContent = textParts.join("\n\n");
+        }
+      }
+    }
     const userSegments = [];
     const guidelinePaths = [];
     const userTextParts = [];
@@ -174,7 +247,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
           }
           try {
             const fileContent = (await readFile(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
-            if (isGuidelineFile(displayPath)) {
+            const relativeToRepo = path.relative(repoRootPath, resolvedPath);
+            if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
               guidelinePaths.push(path.resolve(resolvedPath));
               if (verbose) {
                 console.log(`  [Guideline] Found: ${displayPath}`);
@@ -184,7 +258,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
               userSegments.push({
                 type: "file",
                 path: displayPath,
-                text: fileContent
+                text: fileContent,
+                resolvedPath: path.resolve(resolvedPath)
               });
               if (verbose) {
                 console.log(`  [File] Found: ${displayPath}`);
@@ -208,14 +283,27 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
     const assistantContent = assistantMessages[0]?.content;
     const expectedAssistantRaw = await resolveAssistantContent(assistantContent, searchRoots, verbose);
     const userTextPrompt = userTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
-    const testCaseGrader = coerceGrader(testcase.grader) ?? globalGrader;
+    const testCaseGrader = coerceGrader(evalcase.grader) ?? globalGrader;
+    const userFilePaths = [];
+    for (const segment of userSegments) {
+      if (segment.type === "file" && typeof segment.resolvedPath === "string") {
+        userFilePaths.push(segment.resolvedPath);
+      }
+    }
+    const allFilePaths = [
+      ...guidelinePaths.map((guidelinePath) => path.resolve(guidelinePath)),
+      ...userFilePaths
+    ];
     const testCase = {
       id,
       conversation_id: conversationId,
       task: userTextPrompt,
       user_segments: userSegments,
+      system_message: systemMessageContent,
       expected_assistant_raw: expectedAssistantRaw,
       guideline_paths: guidelinePaths.map((guidelinePath) => path.resolve(guidelinePath)),
+      guideline_patterns: guidelinePatterns,
+      file_paths: allFilePaths,
       code_snippets: codeSnippets,
       outcome,
       grader: testCaseGrader
@@ -240,7 +328,7 @@ async function buildPromptInputs(testCase) {
   const guidelineContents = [];
   for (const rawPath of testCase.guideline_paths) {
     const absolutePath = path.resolve(rawPath);
-    if (!await fileExists(absolutePath)) {
+    if (!await fileExists2(absolutePath)) {
       logWarning(`Could not read guideline file ${absolutePath}: file does not exist`);
       continue;
     }
@@ -281,9 +369,9 @@ ${body}`);
   }
   const request = requestParts.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
   const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
-  return { request, guidelines };
+  return { request, guidelines, systemMessage: testCase.system_message };
 }
-async function fileExists(absolutePath) {
+async function fileExists2(absolutePath) {
   try {
     await access(absolutePath, constants.F_OK);
     return true;
@@ -407,15 +495,18 @@ function buildChatPrompt(request) {
     return request.chatPrompt;
   }
   const systemSegments = [];
-  if (request.guidelines && request.guidelines.trim().length > 0) {
-    systemSegments.push(`Guidelines:
-${request.guidelines.trim()}`);
-  }
   const metadataSystemPrompt = typeof request.metadata?.systemPrompt === "string" ? request.metadata.systemPrompt : void 0;
   if (metadataSystemPrompt && metadataSystemPrompt.trim().length > 0) {
     systemSegments.push(metadataSystemPrompt.trim());
+  } else {
+    systemSegments.push(DEFAULT_SYSTEM_PROMPT);
   }
-  const systemContent = systemSegments.length > 0 ? systemSegments.join("\n\n") : DEFAULT_SYSTEM_PROMPT;
+  if (request.guidelines && request.guidelines.trim().length > 0) {
+    systemSegments.push(`[[ ## Guidelines ## ]]
+${request.guidelines.trim()}`);
+  }
+  const systemContent = systemSegments.join("\n\n");
   const userContent = request.prompt.trim();
   const prompt = [
     {
@@ -644,6 +735,9 @@ function normalizeAzureApiVersion(value) {
 function resolveTargetDefinition(definition, env = process.env) {
   const parsed = BASE_TARGET_SCHEMA.parse(definition);
   const provider = parsed.provider.toLowerCase();
+  const providerBatching = resolveOptionalBoolean(
+    parsed.settings?.provider_batching ?? parsed.settings?.providerBatching
+  );
   switch (provider) {
     case "azure":
     case "azure-openai":
@@ -652,6 +746,7 @@ function resolveTargetDefinition(definition, env = process.env) {
         name: parsed.name,
         judgeTarget: parsed.judge_target,
         workers: parsed.workers,
+        providerBatching,
         config: resolveAzureConfig(parsed, env)
       };
     case "anthropic":
@@ -660,6 +755,7 @@ function resolveTargetDefinition(definition, env = process.env) {
         name: parsed.name,
         judgeTarget: parsed.judge_target,
         workers: parsed.workers,
+        providerBatching,
         config: resolveAnthropicConfig(parsed, env)
       };
     case "gemini":
@@ -670,6 +766,7 @@ function resolveTargetDefinition(definition, env = process.env) {
         name: parsed.name,
         judgeTarget: parsed.judge_target,
         workers: parsed.workers,
+        providerBatching,
         config: resolveGeminiConfig(parsed, env)
       };
     case "mock":
@@ -678,6 +775,7 @@ function resolveTargetDefinition(definition, env = process.env) {
         name: parsed.name,
         judgeTarget: parsed.judge_target,
         workers: parsed.workers,
+        providerBatching,
         config: resolveMockConfig(parsed)
       };
     case "vscode":
@@ -687,6 +785,7 @@ function resolveTargetDefinition(definition, env = process.env) {
         name: parsed.name,
         judgeTarget: parsed.judge_target,
         workers: parsed.workers,
+        providerBatching,
         config: resolveVSCodeConfig(parsed, env, provider === "vscode-insiders")
       };
     default:
@@ -871,15 +970,19 @@ function isLikelyEnvReference(value) {
 }
 // src/evaluation/providers/vscode.ts
-import { mkdtemp, readFile as readFile2, rm, writeFile } from "node:fs/promises";
-import { tmpdir } from "node:os";
+import { readFile as readFile2 } from "node:fs/promises";
 import path2 from "node:path";
-import { dispatchAgentSession, getSubagentRoot, provisionSubagents } from "subagent";
-var PROMPT_FILE_PREFIX = "agentv-vscode-";
+import {
+  dispatchAgentSession,
+  dispatchBatchAgent,
+  getSubagentRoot,
+  provisionSubagents
+} from "subagent";
 var VSCodeProvider = class {
   id;
   kind;
   targetName;
+  supportsBatch = true;
   config;
   constructor(targetName, config, kind) {
     this.id = `${kind}:${targetName}`;
@@ -892,117 +995,159 @@ var VSCodeProvider = class {
       throw new Error("VS Code provider request was aborted before dispatch");
     }
     const attachments = normalizeAttachments(request.attachments);
-    const promptContent = buildPromptDocument(request, attachments);
-    const directory = await mkdtemp(path2.join(tmpdir(), PROMPT_FILE_PREFIX));
-    const promptPath = path2.join(directory, `${request.testCaseId ?? "request"}.prompt.md`);
-    try {
-      await writeFile(promptPath, promptContent, "utf8");
-      const session = await dispatchAgentSession({
-        userQuery: composeUserQuery(request),
-        promptFile: promptPath,
-        extraAttachments: attachments,
-        wait: this.config.waitForResponse,
-        dryRun: this.config.dryRun,
-        vscodeCmd: this.config.command,
-        subagentRoot: this.config.subagentRoot,
-        workspaceTemplate: this.config.workspaceTemplate,
-        silent: true
-      });
-      if (session.exitCode !== 0 || !session.responseFile) {
-        const failure = session.error ?? "VS Code subagent did not produce a response";
-        throw new Error(failure);
-      }
-      if (this.config.dryRun) {
-        return {
-          text: "",
-          raw: {
-            session,
-            promptFile: promptPath,
-            attachments
-          }
-        };
-      }
-      const responseText = await readFile2(session.responseFile, "utf8");
+    const promptContent = buildPromptDocument(request, attachments, request.guideline_patterns);
+    const session = await dispatchAgentSession({
+      userQuery: promptContent,
+      // Use full prompt content instead of just request.prompt
+      extraAttachments: attachments,
+      wait: this.config.waitForResponse,
+      dryRun: this.config.dryRun,
+      vscodeCmd: this.config.command,
+      subagentRoot: this.config.subagentRoot,
+      workspaceTemplate: this.config.workspaceTemplate,
+      silent: true
+    });
+    if (session.exitCode !== 0 || !session.responseFile) {
+      const failure = session.error ?? "VS Code subagent did not produce a response";
+      throw new Error(failure);
+    }
+    if (this.config.dryRun) {
       return {
-        text: responseText,
+        text: "",
         raw: {
           session,
-          promptFile: promptPath,
           attachments
         }
       };
-    } finally {
-      await rm(directory, { recursive: true, force: true });
     }
+    const responseText = await readFile2(session.responseFile, "utf8");
+    return {
+      text: responseText,
+      raw: {
+        session,
+        attachments
+      }
+    };
+  }
+  async invokeBatch(requests) {
+    if (requests.length === 0) {
+      return [];
+    }
+    const normalizedRequests = requests.map((req) => ({
+      request: req,
+      attachments: normalizeAttachments(req.attachments)
+    }));
+    const combinedAttachments = mergeAttachments(
+      normalizedRequests.map(({ attachments }) => attachments)
+    );
+    const userQueries = normalizedRequests.map(
+      ({ request, attachments }) => buildPromptDocument(request, attachments, request.guideline_patterns)
+    );
+    const session = await dispatchBatchAgent({
+      userQueries,
+      extraAttachments: combinedAttachments,
+      wait: this.config.waitForResponse,
+      dryRun: this.config.dryRun,
+      vscodeCmd: this.config.command,
+      subagentRoot: this.config.subagentRoot,
+      workspaceTemplate: this.config.workspaceTemplate,
+      silent: true
+    });
+    if (session.exitCode !== 0 || !session.responseFiles) {
+      const failure = session.error ?? "VS Code subagent did not produce batch responses";
+      throw new Error(failure);
+    }
+    if (this.config.dryRun) {
+      return normalizedRequests.map(({ attachments }) => ({
+        text: "",
+        raw: {
+          session,
+          attachments,
+          allAttachments: combinedAttachments
+        }
+      }));
+    }
+    if (session.responseFiles.length !== requests.length) {
+      throw new Error(
+        `VS Code batch returned ${session.responseFiles.length} responses for ${requests.length} requests`
+      );
+    }
+    const responses = [];
+    for (const [index, responseFile] of session.responseFiles.entries()) {
+      const responseText = await readFile2(responseFile, "utf8");
+      responses.push({
+        text: responseText,
+        raw: {
+          session,
+          attachments: normalizedRequests[index]?.attachments,
+          allAttachments: combinedAttachments,
+          responseFile
+        }
+      });
+    }
+    return responses;
   }
 };
-function buildPromptDocument(request, attachments) {
+function buildPromptDocument(request, attachments, guidelinePatterns) {
   const parts = [];
-  const instructionFiles = collectInstructionFiles(attachments);
-  if (instructionFiles.length > 0) {
-    parts.push(buildMandatoryPrereadBlock(instructionFiles));
-  }
-  parts.push(`# AgentV Request`);
-  if (request.testCaseId) {
-    parts.push(`- Test Case: ${request.testCaseId}`);
-  }
-  if (request.metadata?.target) {
-    parts.push(`- Target: ${String(request.metadata.target)}`);
-  }
-  parts.push("\n## Task\n", request.prompt.trim());
-  if (request.guidelines && request.guidelines.trim().length > 0) {
-    parts.push("\n## Guidelines\n", request.guidelines.trim());
-  }
-  if (attachments && attachments.length > 0) {
-    const attachmentList = attachments.map((item) => `- ${item}`).join("\n");
-    parts.push("\n## Attachments\n", attachmentList);
+  const guidelineFiles = collectGuidelineFiles(attachments, guidelinePatterns);
+  const attachmentFiles = collectAttachmentFiles(attachments);
+  const nonGuidelineAttachments = attachmentFiles.filter(
+    (file) => !guidelineFiles.includes(file)
+  );
+  const prereadBlock = buildMandatoryPrereadBlock(guidelineFiles, nonGuidelineAttachments);
+  if (prereadBlock.length > 0) {
+    parts.push("\n", prereadBlock);
   }
+  parts.push("\n[[ ## user_query ## ]]\n", request.prompt.trim());
   return parts.join("\n").trim();
 }
-function buildMandatoryPrereadBlock(instructionFiles) {
-  if (instructionFiles.length === 0) {
+function buildMandatoryPrereadBlock(guidelineFiles, attachmentFiles) {
+  if (guidelineFiles.length === 0 && attachmentFiles.length === 0) {
     return "";
   }
-  const fileList = [];
-  const tokenList = [];
-  let counter = 0;
-  for (const absolutePath of instructionFiles) {
-    counter += 1;
+  const buildList = (files) => files.map((absolutePath) => {
     const fileName = path2.basename(absolutePath);
     const fileUri = pathToFileUri(absolutePath);
-    fileList.push(`[${fileName}](${fileUri})`);
-    tokenList.push(`INSTRUCTIONS_READ: \`${fileName}\` i=${counter} SHA256=<hex>`);
-  }
-  const filesText = fileList.join(", ");
-  const tokensText = tokenList.join("\n");
-  const instruction = [
-    `Read all instruction files: ${filesText}.`,
-    `After reading each file, compute its SHA256 hash using this PowerShell command:`,
-    "`Get-FileHash -Algorithm SHA256 -LiteralPath '<file-path>' | Select-Object -ExpandProperty Hash`.",
-    `Then include, at the top of your reply, these exact tokens on separate lines:
-`,
-    tokensText,
-    `
-Replace \`<hex>\` with the actual SHA256 hash value computed from the PowerShell command.`,
-    `If any file is missing, fail with ERROR: missing-file <filename> and stop.
-`,
-    `Then fetch all documentation required by the instructions before proceeding with your task.`
-  ].join(" ");
-  return `[[ ## mandatory_pre_read ## ]]
-${instruction}
-`;
+    return `* [${fileName}](${fileUri})`;
+  });
+  const sections = [];
+  if (guidelineFiles.length > 0) {
+    sections.push(`Read all guideline files:
+${buildList(guidelineFiles).join("\n")}.`);
+  }
+  if (attachmentFiles.length > 0) {
+    sections.push(`Read all attachment files:
+${buildList(attachmentFiles).join("\n")}.`);
+  }
+  sections.push(
+    "If any file is missing, fail with ERROR: missing-file <filename> and stop.",
+    "Then apply system_instructions on the user query below."
+  );
+  return sections.join("\n");
 }
-function collectInstructionFiles(attachments) {
+function collectGuidelineFiles(attachments, guidelinePatterns) {
   if (!attachments || attachments.length === 0) {
     return [];
   }
   const unique = /* @__PURE__ */ new Map();
   for (const attachment of attachments) {
-    if (!isInstructionPath(attachment)) {
-      continue;
+    const absolutePath = path2.resolve(attachment);
+    const normalized = absolutePath.split(path2.sep).join("/");
+    if (isGuidelineFile(normalized, guidelinePatterns)) {
+      if (!unique.has(absolutePath)) {
+        unique.set(absolutePath, absolutePath);
+      }
     }
+  }
+  return Array.from(unique.values());
+}
+function collectAttachmentFiles(attachments) {
+  if (!attachments || attachments.length === 0) {
+    return [];
+  }
+  const unique = /* @__PURE__ */ new Map();
+  for (const attachment of attachments) {
     const absolutePath = path2.resolve(attachment);
     if (!unique.has(absolutePath)) {
       unique.set(absolutePath, absolutePath);
@@ -1010,10 +1155,6 @@ function collectInstructionFiles(attachments) {
   }
   return Array.from(unique.values());
 }
-function isInstructionPath(filePath) {
-  const normalized = filePath.split(path2.sep).join("/");
-  return normalized.endsWith(".instructions.md") || normalized.includes("/instructions/") || normalized.endsWith(".prompt.md") || normalized.includes("/prompts/");
-}
 function pathToFileUri(filePath) {
   const absolutePath = path2.isAbsolute(filePath) ? filePath : path2.resolve(filePath);
   const normalizedPath = absolutePath.replace(/\\/g, "/");
@@ -1022,14 +1163,6 @@ function pathToFileUri(filePath) {
   }
   return `file://${normalizedPath}`;
 }
-function composeUserQuery(request) {
-  const segments = [];
-  segments.push(request.prompt.trim());
-  if (request.guidelines && request.guidelines.trim().length > 0) {
-    segments.push("\nGuidelines:\n", request.guidelines.trim());
-  }
-  return segments.join("\n").trim();
-}
 function normalizeAttachments(attachments) {
   if (!attachments || attachments.length === 0) {
     return void 0;
@@ -1040,6 +1173,16 @@ function normalizeAttachments(attachments) {
   }
   return Array.from(deduped);
 }
+function mergeAttachments(all) {
+  const deduped = /* @__PURE__ */ new Set();
+  for (const list of all) {
+    if (!list) continue;
+    for (const attachment of list) {
+      deduped.add(path2.resolve(attachment));
+    }
+  }
+  return deduped.size > 0 ? Array.from(deduped) : void 0;
+}
 async function ensureVSCodeSubagents(options) {
   const { kind, count, verbose = false } = options;
   const vscodeCmd = kind === "vscode-insiders" ? "code-insiders" : "code";
@@ -1136,7 +1279,7 @@ function assertTargetDefinition(value, index, filePath) {
     judge_target: typeof judgeTarget === "string" ? judgeTarget : void 0
   };
 }
-async function fileExists2(filePath) {
+async function fileExists3(filePath) {
   try {
     await access2(filePath, constants2.F_OK);
     return true;
@@ -1146,7 +1289,7 @@ async function fileExists2(filePath) {
 }
 async function readTargetDefinitions(filePath) {
   const absolutePath = path3.resolve(filePath);
-  if (!await fileExists2(absolutePath)) {
+  if (!await fileExists3(absolutePath)) {
     throw new Error(`targets.yaml not found at ${absolutePath}`);
   }
   const raw = await readFile3(absolutePath, "utf8");
@@ -1376,7 +1519,7 @@ import { randomUUID } from "node:crypto";
 var HeuristicGrader = class {
   kind = "heuristic";
   grade(context) {
-    const expectedAspects = extractAspects(context.testCase.expected_assistant_raw);
+    const expectedAspects = extractAspects(context.evalCase.expected_assistant_raw);
     const result = scoreCandidateResponse(context.candidate, expectedAspects);
     const misses = [...result.misses];
     if (expectedAspects.length === 0 && isErrorLike(context.candidate)) {
@@ -1409,14 +1552,14 @@ var QualityGrader = class {
     if (!judgeProvider) {
       throw new Error("No judge provider available for LLM grading");
     }
-    const prompt = buildQualityPrompt(context.testCase, context.candidate);
+    const prompt = buildQualityPrompt(context.evalCase, context.candidate);
     const metadata = {
       systemPrompt: QUALITY_SYSTEM_PROMPT
     };
     const response = await judgeProvider.invoke({
       prompt,
       metadata,
-      testCaseId: context.testCase.id,
+      evalCaseId: context.evalCase.id,
       attempt: context.attempt,
       maxOutputTokens: this.maxOutputTokens,
       temperature: this.temperature
@@ -1462,16 +1605,16 @@ var QUALITY_SYSTEM_PROMPT = [
 function buildQualityPrompt(testCase, candidate) {
   const parts = [
     "[[ ## expected_outcome ## ]]",
-    testCase.outcome,
+    testCase.outcome.trim(),
     "",
     "[[ ## request ## ]]",
-    testCase.task,
+    testCase.task.trim(),
     "",
     "[[ ## reference_answer ## ]]",
-    testCase.expected_assistant_raw,
+    testCase.expected_assistant_raw.trim(),
     "",
     "[[ ## generated_answer ## ]]",
-    candidate,
+    candidate.trim(),
     "",
     "Respond with a single JSON object matching the schema described in the system prompt."
   ];
@@ -1720,10 +1863,10 @@ async function runEvaluation(options) {
     onResult,
     onProgress
   } = options;
-  const load = loadTestCases;
-  const testCases = await load(testFilePath, repoRoot, { verbose });
-  const filteredTestCases = filterTestCases(testCases, evalId);
-  if (filteredTestCases.length === 0) {
+  const load = loadEvalCases;
+  const evalCases = await load(testFilePath, repoRoot, { verbose });
+  const filteredEvalCases = filterEvalCases(evalCases, evalId);
+  if (filteredEvalCases.length === 0) {
     if (evalId) {
       throw new Error(`Test case with id '${evalId}' not found in ${testFilePath}`);
     }
@@ -1769,35 +1912,62 @@ async function runEvaluation(options) {
   };
   const graderRegistry = buildGraderRegistry(graders, resolveJudgeProvider);
   const primaryProvider = getOrCreateProvider(target);
-  if (onProgress && filteredTestCases.length > 0) {
-    for (let i = 0; i < filteredTestCases.length; i++) {
+  const providerSupportsBatch = target.providerBatching === true && primaryProvider.supportsBatch === true && typeof primaryProvider.invokeBatch === "function";
+  if (target.providerBatching && !providerSupportsBatch && verbose) {
+    console.warn(
+      `Provider batching requested for target '${target.name}', but provider does not advertise batch support. Using per-case dispatch.`
+    );
+  }
+  if (onProgress && filteredEvalCases.length > 0) {
+    for (let i = 0; i < filteredEvalCases.length; i++) {
       await onProgress({
         workerId: i + 1,
-        evalId: filteredTestCases[i].id,
+        evalId: filteredEvalCases[i].id,
         status: "pending"
       });
     }
   }
+  if (providerSupportsBatch) {
+    try {
+      return await runBatchEvaluation({
+        evalCases: filteredEvalCases,
+        provider: primaryProvider,
+        target,
+        graderRegistry,
+        promptDumpDir,
+        nowFn: now ?? (() => /* @__PURE__ */ new Date()),
+        onProgress,
+        onResult,
+        verbose,
+        resolveJudgeProvider
+      });
+    } catch (error) {
+      if (verbose) {
+        const message = error instanceof Error ? error.message : String(error);
+        console.warn(`Provider batch execution failed, falling back to per-case dispatch: ${message}`);
+      }
+    }
+  }
   const workers = options.maxConcurrency ?? target.workers ?? 1;
   const limit = pLimit(workers);
   let nextWorkerId = 1;
   const workerIdByEvalId = /* @__PURE__ */ new Map();
-  const promises = filteredTestCases.map(
-    (testCase) => limit(async () => {
+  const promises = filteredEvalCases.map(
+    (evalCase) => limit(async () => {
       const workerId = nextWorkerId++;
-      workerIdByEvalId.set(testCase.id, workerId);
+      workerIdByEvalId.set(evalCase.id, workerId);
       if (onProgress) {
         await onProgress({
           workerId,
-          evalId: testCase.id,
+          evalId: evalCase.id,
           status: "running",
           startedAt: Date.now()
         });
       }
       try {
         const judgeProvider = await resolveJudgeProvider(target);
-        const result = await runTestCase({
-          testCase,
+        const result = await runEvalCase({
+          evalCase,
           provider: primaryProvider,
           target,
           graders: graderRegistry,
@@ -1812,7 +1982,7 @@ async function runEvaluation(options) {
         if (onProgress) {
           await onProgress({
             workerId,
-            evalId: testCase.id,
+            evalId: evalCase.id,
             status: "completed",
             startedAt: 0,
             // Not used for completed status
@@ -1827,7 +1997,7 @@ async function runEvaluation(options) {
         if (onProgress) {
           await onProgress({
             workerId,
-            evalId: testCase.id,
+            evalId: evalCase.id,
             status: "failed",
             completedAt: Date.now(),
             error: error instanceof Error ? error.message : String(error)
@@ -1844,10 +2014,10 @@ async function runEvaluation(options) {
     if (outcome.status === "fulfilled") {
       results.push(outcome.value);
     } else {
-      const testCase = filteredTestCases[i];
-      const promptInputs = await buildPromptInputs(testCase);
+      const evalCase = filteredEvalCases[i];
+      const promptInputs = await buildPromptInputs(evalCase);
       const errorResult = buildErrorResult(
-        testCase,
+        evalCase,
         target.name,
         (now ?? (() => /* @__PURE__ */ new Date()))(),
         outcome.reason,
@@ -1861,9 +2031,140 @@ async function runEvaluation(options) {
   }
   return results;
 }
-async function runTestCase(options) {
+async function runBatchEvaluation(options) {
+  const {
+    evalCases,
+    provider,
+    target,
+    graderRegistry,
+    promptDumpDir,
+    nowFn,
+    onProgress,
+    onResult,
+    resolveJudgeProvider
+  } = options;
+  const promptInputsList = [];
+  for (const evalCase of evalCases) {
+    const promptInputs = await buildPromptInputs(evalCase);
+    if (promptDumpDir) {
+      await dumpPrompt(promptDumpDir, evalCase, promptInputs);
+    }
+    promptInputsList.push(promptInputs);
+  }
+  const batchRequests = evalCases.map((evalCase, index) => {
+    const promptInputs = promptInputsList[index];
+    return {
+      prompt: promptInputs.request,
+      guidelines: promptInputs.guidelines,
+      guideline_patterns: evalCase.guideline_patterns,
+      attachments: evalCase.file_paths,
+      evalCaseId: evalCase.id,
+      metadata: {
+        systemPrompt: promptInputs.systemMessage ?? ""
+      }
+    };
+  });
+  const batchResponse = await provider.invokeBatch?.(batchRequests);
+  if (!Array.isArray(batchResponse)) {
+    throw new Error("Provider batching failed: invokeBatch did not return an array");
+  }
+  if (batchResponse.length !== evalCases.length) {
+    throw new Error(
+      `Provider batching failed: expected ${evalCases.length} responses, received ${batchResponse.length}`
+    );
+  }
+  if (onProgress) {
+    const startedAt = Date.now();
+    for (let i = 0; i < evalCases.length; i++) {
+      await onProgress({
+        workerId: 1,
+        evalId: evalCases[i].id,
+        status: "running",
+        startedAt
+      });
+    }
+  }
+  const results = [];
+  for (let i = 0; i < evalCases.length; i++) {
+    const evalCase = evalCases[i];
+    const promptInputs = promptInputsList[i];
+    const providerResponse = batchResponse[i];
+    const now = nowFn();
+    const graderKind = evalCase.grader ?? "heuristic";
+    const activeGrader = graderRegistry[graderKind] ?? graderRegistry.heuristic;
+    if (!activeGrader) {
+      throw new Error(`No grader registered for kind '${graderKind}'`);
+    }
+    let grade;
+    try {
+      grade = await activeGrader.grade({
+        evalCase,
+        candidate: providerResponse.text ?? "",
+        target,
+        provider,
+        attempt: 0,
+        promptInputs,
+        now,
+        judgeProvider: await resolveJudgeProvider(target)
+      });
+    } catch (error) {
+      const errorResult = buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
+      results.push(errorResult);
+      if (onResult) {
+        await onResult(errorResult);
+      }
+      if (onProgress) {
+        await onProgress({
+          workerId: 1,
+          evalId: evalCase.id,
+          status: "failed",
+          completedAt: Date.now(),
+          error: error instanceof Error ? error.message : String(error)
+        });
+      }
+      continue;
+    }
+    const completedAt = nowFn();
+    const rawRequest = {
+      request: promptInputs.request,
+      guidelines: promptInputs.guidelines,
+      guideline_paths: evalCase.guideline_paths,
+      system_message: promptInputs.systemMessage ?? ""
+    };
+    const result = {
+      eval_id: evalCase.id,
+      conversation_id: evalCase.conversation_id,
+      score: grade.score,
+      hits: grade.hits,
+      misses: grade.misses,
+      model_answer: providerResponse.text ?? "",
+      expected_aspect_count: grade.expectedAspectCount,
+      target: target.name,
+      timestamp: completedAt.toISOString(),
+      reasoning: grade.reasoning,
+      raw_aspects: grade.rawAspects,
+      raw_request: rawRequest,
+      grader_raw_request: grade.graderRawRequest
+    };
+    results.push(result);
+    if (onResult) {
+      await onResult(result);
+    }
+    if (onProgress) {
+      await onProgress({
+        workerId: 1,
+        evalId: evalCase.id,
+        status: "completed",
+        startedAt: 0,
+        completedAt: Date.now()
+      });
+    }
+  }
+  return results;
+}
+async function runEvalCase(options) {
   const {
-    testCase,
+    evalCase,
     provider,
     target,
     graders,
@@ -1876,11 +2177,11 @@ async function runTestCase(options) {
     signal,
     judgeProvider
   } = options;
-  const promptInputs = await buildPromptInputs(testCase);
+  const promptInputs = await buildPromptInputs(evalCase);
   if (promptDumpDir) {
-    await dumpPrompt(promptDumpDir, testCase, promptInputs);
+    await dumpPrompt(promptDumpDir, evalCase, promptInputs);
   }
-  const cacheKey = useCache ? createCacheKey(provider, target, testCase, promptInputs) : void 0;
+  const cacheKey = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
   let cachedResponse;
   if (cacheKey && cache) {
     cachedResponse = await cache.get(cacheKey);
@@ -1893,7 +2194,7 @@ async function runTestCase(options) {
   while (!providerResponse && attempt < attemptBudget) {
     try {
       providerResponse = await invokeProvider(provider, {
-        testCase,
+        evalCase,
         target,
         promptInputs,
         attempt,
@@ -1906,12 +2207,12 @@ async function runTestCase(options) {
         attempt += 1;
         continue;
       }
-      return buildErrorResult(testCase, target.name, nowFn(), error, promptInputs);
+      return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
     }
   }
   if (!providerResponse) {
     return buildErrorResult(
-      testCase,
+      evalCase,
       target.name,
       nowFn(),
       lastError ?? new Error("Provider did not return a response"),
@@ -1921,7 +2222,7 @@ async function runTestCase(options) {
   if (cacheKey && cache && !cachedResponse) {
     await cache.set(cacheKey, providerResponse);
   }
-  const graderKind = testCase.grader ?? "heuristic";
+  const graderKind = evalCase.grader ?? "heuristic";
   const activeGrader = graders[graderKind] ?? graders.heuristic;
   if (!activeGrader) {
     throw new Error(`No grader registered for kind '${graderKind}'`);
@@ -1930,7 +2231,7 @@ async function runTestCase(options) {
   try {
     const gradeTimestamp = nowFn();
     grade = await activeGrader.grade({
-      testCase,
+      evalCase,
       candidate: providerResponse.text ?? "",
       target,
       provider,
@@ -1940,17 +2241,18 @@ async function runTestCase(options) {
       judgeProvider
     });
   } catch (error) {
-    return buildErrorResult(testCase, target.name, nowFn(), error, promptInputs);
+    return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
   }
   const completedAt = nowFn();
   const rawRequest = {
     request: promptInputs.request,
     guidelines: promptInputs.guidelines,
-    guideline_paths: testCase.guideline_paths
+    guideline_paths: evalCase.guideline_paths,
+    system_message: promptInputs.systemMessage ?? ""
   };
   return {
-    eval_id: testCase.id,
-    conversation_id: testCase.conversation_id,
+    eval_id: evalCase.id,
+    conversation_id: evalCase.conversation_id,
     score: grade.score,
     hits: grade.hits,
     misses: grade.misses,
@@ -1964,11 +2266,11 @@ async function runTestCase(options) {
     grader_raw_request: grade.graderRawRequest
   };
 }
-function filterTestCases(testCases, evalId) {
+function filterEvalCases(evalCases, evalId) {
   if (!evalId) {
-    return testCases;
+    return evalCases;
   }
-  return testCases.filter((testCase) => testCase.id === evalId);
+  return evalCases.filter((evalCase) => evalCase.id === evalId);
 }
 function buildGraderRegistry(overrides, resolveJudgeProvider) {
   const heuristic = overrides?.heuristic ?? new HeuristicGrader();
@@ -1986,16 +2288,16 @@ function buildGraderRegistry(overrides, resolveJudgeProvider) {
     llm_judge: llmJudge
   };
 }
-async function dumpPrompt(directory, testCase, promptInputs) {
+async function dumpPrompt(directory, evalCase, promptInputs) {
   const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
-  const filename = `${timestamp}_${sanitizeFilename(testCase.id)}.json`;
+  const filename = `${timestamp}_${sanitizeFilename(evalCase.id)}.json`;
   const filePath = path4.resolve(directory, filename);
   await mkdir(path4.dirname(filePath), { recursive: true });
   const payload = {
-    eval_id: testCase.id,
+    eval_id: evalCase.id,
     request: promptInputs.request,
     guidelines: promptInputs.guidelines,
-    guideline_paths: testCase.guideline_paths
+    guideline_paths: evalCase.guideline_paths
   };
   await writeFile2(filePath, JSON.stringify(payload, null, 2), "utf8");
 }
@@ -2007,7 +2309,7 @@ function sanitizeFilename(value) {
   return sanitized.length > 0 ? sanitized : randomUUID2();
 }
 async function invokeProvider(provider, options) {
-  const { testCase, target, promptInputs, attempt, agentTimeoutMs, signal } = options;
+  const { evalCase, target, promptInputs, attempt, agentTimeoutMs, signal } = options;
   const controller = new AbortController();
   const timeout = agentTimeoutMs ? setTimeout(() => controller.abort(), agentTimeoutMs) : void 0;
   if (signal) {
@@ -2017,12 +2319,12 @@ async function invokeProvider(provider, options) {
     return await provider.invoke({
       prompt: promptInputs.request,
       guidelines: promptInputs.guidelines,
-      attachments: testCase.guideline_paths,
-      testCaseId: testCase.id,
+      guideline_patterns: evalCase.guideline_patterns,
+      attachments: evalCase.file_paths,
+      evalCaseId: evalCase.id,
       attempt,
       metadata: {
-        target: target.name,
-        grader: testCase.grader
+        systemPrompt: promptInputs.systemMessage ?? ""
       },
       signal: controller.signal
     });
@@ -2032,17 +2334,18 @@ async function invokeProvider(provider, options) {
     }
   }
 }
-function buildErrorResult(testCase, targetName, timestamp, error, promptInputs) {
+function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs) {
   const message = error instanceof Error ? error.message : String(error);
   const rawRequest = {
     request: promptInputs.request,
     guidelines: promptInputs.guidelines,
-    guideline_paths: testCase.guideline_paths,
+    guideline_paths: evalCase.guideline_paths,
+    system_message: promptInputs.systemMessage ?? "",
     error: message
   };
   return {
-    eval_id: testCase.id,
-    conversation_id: testCase.conversation_id,
+    eval_id: evalCase.id,
+    conversation_id: evalCase.conversation_id,
     score: 0,
     hits: [],
     misses: [`Error: ${message}`],
@@ -2054,13 +2357,14 @@ function buildErrorResult(testCase, targetName, timestamp, error, promptInputs)
     raw_request: rawRequest
   };
 }
-function createCacheKey(provider, target, testCase, promptInputs) {
+function createCacheKey(provider, target, evalCase, promptInputs) {
   const hash = createHash("sha256");
   hash.update(provider.id);
   hash.update(target.name);
-  hash.update(testCase.id);
+  hash.update(evalCase.id);
   hash.update(promptInputs.request);
   hash.update(promptInputs.guidelines);
+  hash.update(promptInputs.systemMessage ?? "");
   return hash.digest("hex");
 }
 function isTimeoutLike(error) {
@@ -2088,7 +2392,9 @@ export {
   HeuristicGrader,
   QualityGrader,
   TEST_MESSAGE_ROLES,
+  buildDirectoryChain,
   buildPromptInputs,
+  buildSearchRoots,
   calculateHits,
   calculateMisses,
   createAgentKernel,
@@ -2096,6 +2402,8 @@ export {
   ensureVSCodeSubagents,
   extractAspects,
   extractCodeBlocks,
+  fileExists,
+  findGitRoot,
   getHitCount,
   isErrorLike,
   isGraderKind,
@@ -2105,12 +2413,13 @@ export {
   isTestMessage,
   isTestMessageRole,
   listTargetNames,
-  loadTestCases,
+  loadEvalCases,
   readTargetDefinitions,
   resolveAndCreateProvider,
+  resolveFileReference,
   resolveTargetDefinition,
+  runEvalCase,
   runEvaluation,
-  runTestCase,
   scoreCandidateResponse
 };
 //# sourceMappingURL=index.js.map