npm - @agentv/core - Versions diffs - 0.2.3 → 0.2.8 - Mend

@agentv/core 0.2.3 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/{chunk-5REK5RSI.js → chunk-XXNQA4EW.js} +56 -2
package/dist/chunk-XXNQA4EW.js.map +1 -0
package/dist/evaluation/validation/index.cjs +123 -12
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.d.cts +7 -2
package/dist/evaluation/validation/index.d.ts +7 -2
package/dist/evaluation/validation/index.js +97 -11
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +334 -201
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +48 -19
package/dist/index.d.ts +48 -19
package/dist/index.js +293 -206
package/dist/index.js.map +1 -1
package/package.json +6 -2
package/dist/chunk-5REK5RSI.js.map +0 -1

package/dist/index.js CHANGED Viewed

@@ -1,7 +1,11 @@
 import {
+  TARGETS_SCHEMA_V2,
+  buildDirectoryChain,
   buildSearchRoots,
+  fileExists,
+  findGitRoot,
   resolveFileReference
-} from "./chunk-5REK5RSI.js";
+} from "./chunk-XXNQA4EW.js";
 // src/evaluation/types.ts
 var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
@@ -55,6 +59,7 @@ function getHitCount(result) {
 }
 // src/evaluation/yaml-parser.ts
+import micromatch from "micromatch";
 import { constants } from "node:fs";
 import { access, readFile } from "node:fs/promises";
 import path from "node:path";
@@ -64,9 +69,52 @@ var CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
 var ANSI_YELLOW = "\x1B[33m";
 var ANSI_RESET = "\x1B[0m";
 var SCHEMA_EVAL_V2 = "agentv-eval-v2";
-function isGuidelineFile(filePath) {
+var SCHEMA_CONFIG_V2 = "agentv-config-v2";
+async function loadConfig(evalFilePath, repoRoot) {
+  const directories = buildDirectoryChain(evalFilePath, repoRoot);
+  for (const directory of directories) {
+    const configPath = path.join(directory, ".agentv", "config.yaml");
+    if (!await fileExists2(configPath)) {
+      continue;
+    }
+    try {
+      const rawConfig = await readFile(configPath, "utf8");
+      const parsed = parse(rawConfig);
+      if (!isJsonObject(parsed)) {
+        logWarning(`Invalid .agentv/config.yaml format at ${configPath}`);
+        continue;
+      }
+      const config = parsed;
+      const schema = config.$schema;
+      if (schema !== SCHEMA_CONFIG_V2) {
+        const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${configPath}. Expected '${SCHEMA_CONFIG_V2}'` : `Missing required field '$schema' in ${configPath}.
+Please add '$schema: ${SCHEMA_CONFIG_V2}' at the top of the file.`;
+        logWarning(message);
+        continue;
+      }
+      const guidelinePatterns = config.guideline_patterns;
+      if (guidelinePatterns !== void 0 && !Array.isArray(guidelinePatterns)) {
+        logWarning(`Invalid guideline_patterns in ${configPath}, expected array`);
+        continue;
+      }
+      if (Array.isArray(guidelinePatterns) && !guidelinePatterns.every((p) => typeof p === "string")) {
+        logWarning(`Invalid guideline_patterns in ${configPath}, all entries must be strings`);
+        continue;
+      }
+      return {
+        guideline_patterns: guidelinePatterns
+      };
+    } catch (error) {
+      logWarning(`Could not read .agentv/config.yaml at ${configPath}: ${error.message}`);
+      continue;
+    }
+  }
+  return null;
+}
+function isGuidelineFile(filePath, patterns) {
   const normalized = filePath.split("\\").join("/");
-  return normalized.endsWith(".instructions.md") || normalized.includes("/instructions/") || normalized.endsWith(".prompt.md") || normalized.includes("/prompts/");
+  const patternsToUse = patterns ?? [];
+  return micromatch.isMatch(normalized, patternsToUse);
 }
 function extractCodeBlocks(segments) {
   const codeBlocks = [];
@@ -86,43 +134,45 @@ function extractCodeBlocks(segments) {
   }
   return codeBlocks;
 }
-async function loadTestCases(testFilePath, repoRoot, options) {
+async function loadEvalCases(evalFilePath, repoRoot, options) {
   const verbose = options?.verbose ?? false;
-  const absoluteTestPath = path.resolve(testFilePath);
-  if (!await fileExists(absoluteTestPath)) {
-    throw new Error(`Test file not found: ${testFilePath}`);
+  const absoluteTestPath = path.resolve(evalFilePath);
+  if (!await fileExists2(absoluteTestPath)) {
+    throw new Error(`Test file not found: ${evalFilePath}`);
   }
   const repoRootPath = resolveToAbsolutePath(repoRoot);
   const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
+  const config = await loadConfig(absoluteTestPath, repoRootPath);
+  const guidelinePatterns = config?.guideline_patterns;
   const rawFile = await readFile(absoluteTestPath, "utf8");
   const parsed = parse(rawFile);
   if (!isJsonObject(parsed)) {
-    throw new Error(`Invalid test file format: ${testFilePath}`);
+    throw new Error(`Invalid test file format: ${evalFilePath}`);
   }
   const suite = parsed;
   const schema = suite.$schema;
   if (schema !== SCHEMA_EVAL_V2) {
-    const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${testFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${testFilePath}.
+    const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${evalFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${evalFilePath}.
 Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
     throw new Error(message);
   }
   const rawTestcases = suite.evalcases;
   if (!Array.isArray(rawTestcases)) {
-    throw new Error(`Invalid test file format: ${testFilePath} - missing 'evalcases' field`);
+    throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
   }
   const globalGrader = coerceGrader(suite.grader) ?? "llm_judge";
   const results = [];
-  for (const rawTestcase of rawTestcases) {
-    if (!isJsonObject(rawTestcase)) {
+  for (const rawEvalcase of rawTestcases) {
+    if (!isJsonObject(rawEvalcase)) {
       logWarning("Skipping invalid test case entry (expected object)");
       continue;
     }
-    const testcase = rawTestcase;
-    const id = asString(testcase.id);
-    const conversationId = asString(testcase.conversation_id);
-    const outcome = asString(testcase.outcome);
-    const inputMessagesValue = testcase.input_messages;
-    const expectedMessagesValue = testcase.expected_messages;
+    const evalcase = rawEvalcase;
+    const id = asString(evalcase.id);
+    const conversationId = asString(evalcase.conversation_id);
+    const outcome = asString(evalcase.outcome);
+    const inputMessagesValue = evalcase.input_messages;
+    const expectedMessagesValue = evalcase.expected_messages;
     if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
       logWarning(`Skipping incomplete test case: ${id ?? "unknown"}`);
       continue;
@@ -135,6 +185,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
     const expectedMessages = expectedMessagesValue.filter((msg) => isTestMessage(msg));
     const assistantMessages = expectedMessages.filter((message) => message.role === "assistant");
     const userMessages = inputMessages.filter((message) => message.role === "user");
+    const systemMessages = inputMessages.filter((message) => message.role === "system");
     if (assistantMessages.length === 0) {
       logWarning(`No assistant message found for test case: ${id}`);
       continue;
@@ -142,6 +193,29 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
     if (assistantMessages.length > 1) {
       logWarning(`Multiple assistant messages found for test case: ${id}, using first`);
     }
+    if (systemMessages.length > 1) {
+      logWarning(`Multiple system messages found for test case: ${id}, using first`);
+    }
+    let systemMessageContent;
+    if (systemMessages.length > 0) {
+      const content = systemMessages[0]?.content;
+      if (typeof content === "string") {
+        systemMessageContent = content;
+      } else if (Array.isArray(content)) {
+        const textParts = [];
+        for (const segment of content) {
+          if (isJsonObject(segment)) {
+            const value = segment.value;
+            if (typeof value === "string") {
+              textParts.push(value);
+            }
+          }
+        }
+        if (textParts.length > 0) {
+          systemMessageContent = textParts.join("\n\n");
+        }
+      }
+    }
     const userSegments = [];
     const guidelinePaths = [];
     const userTextParts = [];
@@ -173,7 +247,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
           }
           try {
             const fileContent = (await readFile(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
-            if (isGuidelineFile(displayPath)) {
+            const relativeToRepo = path.relative(repoRootPath, resolvedPath);
+            if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
               guidelinePaths.push(path.resolve(resolvedPath));
               if (verbose) {
                 console.log(`  [Guideline] Found: ${displayPath}`);
@@ -183,7 +258,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
               userSegments.push({
                 type: "file",
                 path: displayPath,
-                text: fileContent
+                text: fileContent,
+                resolvedPath: path.resolve(resolvedPath)
               });
               if (verbose) {
                 console.log(`  [File] Found: ${displayPath}`);
@@ -205,16 +281,29 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
     }
     const codeSnippets = extractCodeBlocks(userSegments);
     const assistantContent = assistantMessages[0]?.content;
-    const expectedAssistantRaw = normalizeAssistantContent(assistantContent);
+    const expectedAssistantRaw = await resolveAssistantContent(assistantContent, searchRoots, verbose);
     const userTextPrompt = userTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
-    const testCaseGrader = coerceGrader(testcase.grader) ?? globalGrader;
+    const testCaseGrader = coerceGrader(evalcase.grader) ?? globalGrader;
+    const userFilePaths = [];
+    for (const segment of userSegments) {
+      if (segment.type === "file" && typeof segment.resolvedPath === "string") {
+        userFilePaths.push(segment.resolvedPath);
+      }
+    }
+    const allFilePaths = [
+      ...guidelinePaths.map((guidelinePath) => path.resolve(guidelinePath)),
+      ...userFilePaths
+    ];
     const testCase = {
       id,
       conversation_id: conversationId,
       task: userTextPrompt,
       user_segments: userSegments,
+      system_message: systemMessageContent,
       expected_assistant_raw: expectedAssistantRaw,
       guideline_paths: guidelinePaths.map((guidelinePath) => path.resolve(guidelinePath)),
+      guideline_patterns: guidelinePatterns,
+      file_paths: allFilePaths,
       code_snippets: codeSnippets,
       outcome,
       grader: testCaseGrader
@@ -239,7 +328,7 @@ async function buildPromptInputs(testCase) {
   const guidelineContents = [];
   for (const rawPath of testCase.guideline_paths) {
     const absolutePath = path.resolve(rawPath);
-    if (!await fileExists(absolutePath)) {
+    if (!await fileExists2(absolutePath)) {
       logWarning(`Could not read guideline file ${absolutePath}: file does not exist`);
       continue;
     }
@@ -280,9 +369,9 @@ ${body}`);
   }
   const request = requestParts.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
   const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
-  return { request, guidelines };
+  return { request, guidelines, systemMessage: testCase.system_message };
 }
-async function fileExists(absolutePath) {
+async function fileExists2(absolutePath) {
   try {
     await access(absolutePath, constants.F_OK);
     return true;
@@ -321,7 +410,7 @@ function cloneJsonValue(value) {
   }
   return cloneJsonObject(value);
 }
-function normalizeAssistantContent(content) {
+async function resolveAssistantContent(content, searchRoots, verbose) {
   if (typeof content === "string") {
     return content;
   }
@@ -334,12 +423,42 @@ function normalizeAssistantContent(content) {
       parts.push(entry);
       continue;
     }
-    const textValue = asString(entry["text"]);
+    if (!isJsonObject(entry)) {
+      continue;
+    }
+    const segmentType = asString(entry.type);
+    if (segmentType === "file") {
+      const rawValue = asString(entry.value);
+      if (!rawValue) {
+        continue;
+      }
+      const { displayPath, resolvedPath, attempted } = await resolveFileReference(
+        rawValue,
+        searchRoots
+      );
+      if (!resolvedPath) {
+        const attempts = attempted.length ? ["  Tried:", ...attempted.map((candidate) => `    ${candidate}`)] : void 0;
+        logWarning(`File not found in expected_messages: ${displayPath}`, attempts);
+        continue;
+      }
+      try {
+        const fileContent = (await readFile(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
+        parts.push(fileContent);
+        if (verbose) {
+          console.log(`  [Expected Assistant File] Found: ${displayPath}`);
+          console.log(`    Resolved to: ${resolvedPath}`);
+        }
+      } catch (error) {
+        logWarning(`Could not read file ${resolvedPath}: ${error.message}`);
+      }
+      continue;
+    }
+    const textValue = asString(entry.text);
     if (typeof textValue === "string") {
       parts.push(textValue);
       continue;
     }
-    const valueValue = asString(entry["value"]);
+    const valueValue = asString(entry.value);
     if (typeof valueValue === "string") {
       parts.push(valueValue);
       continue;
@@ -376,15 +495,18 @@ function buildChatPrompt(request) {
     return request.chatPrompt;
   }
   const systemSegments = [];
-  if (request.guidelines && request.guidelines.trim().length > 0) {
-    systemSegments.push(`Guidelines:
-${request.guidelines.trim()}`);
-  }
   const metadataSystemPrompt = typeof request.metadata?.systemPrompt === "string" ? request.metadata.systemPrompt : void 0;
   if (metadataSystemPrompt && metadataSystemPrompt.trim().length > 0) {
     systemSegments.push(metadataSystemPrompt.trim());
+  } else {
+    systemSegments.push(DEFAULT_SYSTEM_PROMPT);
+  }
+  if (request.guidelines && request.guidelines.trim().length > 0) {
+    systemSegments.push(`[[ ## Guidelines ## ]]
+${request.guidelines.trim()}`);
   }
-  const systemContent = systemSegments.length > 0 ? systemSegments.join("\n\n") : DEFAULT_SYSTEM_PROMPT;
+  const systemContent = systemSegments.join("\n\n");
   const userContent = request.prompt.trim();
   const prompt = [
     {
@@ -840,11 +962,9 @@ function isLikelyEnvReference(value) {
 }
 // src/evaluation/providers/vscode.ts
-import { mkdtemp, readFile as readFile2, rm, writeFile } from "node:fs/promises";
-import { tmpdir } from "node:os";
+import { readFile as readFile2 } from "node:fs/promises";
 import path2 from "node:path";
 import { dispatchAgentSession, getSubagentRoot, provisionSubagents } from "subagent";
-var PROMPT_FILE_PREFIX = "bbeval-vscode-";
 var VSCodeProvider = class {
   id;
   kind;
@@ -861,128 +981,89 @@ var VSCodeProvider = class {
       throw new Error("VS Code provider request was aborted before dispatch");
     }
     const attachments = normalizeAttachments(request.attachments);
-    const promptContent = buildPromptDocument(request, attachments);
-    const directory = await mkdtemp(path2.join(tmpdir(), PROMPT_FILE_PREFIX));
-    const promptPath = path2.join(directory, `${request.testCaseId ?? "request"}.prompt.md`);
-    try {
-      await writeFile(promptPath, promptContent, "utf8");
-      const session = await dispatchAgentSession({
-        userQuery: composeUserQuery(request),
-        promptFile: promptPath,
-        extraAttachments: attachments,
-        wait: this.config.waitForResponse,
-        dryRun: this.config.dryRun,
-        vscodeCmd: this.config.command,
-        subagentRoot: this.config.subagentRoot,
-        workspaceTemplate: this.config.workspaceTemplate,
-        silent: true
-      });
-      if (session.exitCode !== 0 || !session.responseFile) {
-        const failure = session.error ?? "VS Code subagent did not produce a response";
-        throw new Error(failure);
-      }
-      if (this.config.dryRun) {
-        return {
-          text: "",
-          raw: {
-            session,
-            promptFile: promptPath,
-            attachments
-          }
-        };
-      }
-      const responseText = await readFile2(session.responseFile, "utf8");
+    const promptContent = buildPromptDocument(request, attachments, request.guideline_patterns);
+    const session = await dispatchAgentSession({
+      userQuery: promptContent,
+      // Use full prompt content instead of just request.prompt
+      extraAttachments: attachments,
+      wait: this.config.waitForResponse,
+      dryRun: this.config.dryRun,
+      vscodeCmd: this.config.command,
+      subagentRoot: this.config.subagentRoot,
+      workspaceTemplate: this.config.workspaceTemplate,
+      silent: true
+    });
+    if (session.exitCode !== 0 || !session.responseFile) {
+      const failure = session.error ?? "VS Code subagent did not produce a response";
+      throw new Error(failure);
+    }
+    if (this.config.dryRun) {
       return {
-        text: responseText,
+        text: "",
         raw: {
           session,
-          promptFile: promptPath,
           attachments
         }
       };
-    } finally {
-      await rm(directory, { recursive: true, force: true });
     }
+    const responseText = await readFile2(session.responseFile, "utf8");
+    return {
+      text: responseText,
+      raw: {
+        session,
+        attachments
+      }
+    };
   }
 };
-function buildPromptDocument(request, attachments) {
+function buildPromptDocument(request, attachments, guidelinePatterns) {
   const parts = [];
-  const instructionFiles = collectInstructionFiles(attachments);
-  if (instructionFiles.length > 0) {
-    parts.push(buildMandatoryPrereadBlock(instructionFiles));
-  }
-  parts.push(`# BbEval Request`);
-  if (request.testCaseId) {
-    parts.push(`- Test Case: ${request.testCaseId}`);
-  }
-  if (request.metadata?.target) {
-    parts.push(`- Target: ${String(request.metadata.target)}`);
-  }
-  parts.push("\n## Task\n", request.prompt.trim());
-  if (request.guidelines && request.guidelines.trim().length > 0) {
-    parts.push("\n## Guidelines\n", request.guidelines.trim());
-  }
-  if (attachments && attachments.length > 0) {
-    const attachmentList = attachments.map((item) => `- ${item}`).join("\n");
-    parts.push("\n## Attachments\n", attachmentList);
+  const guidelineFiles = collectGuidelineFiles(attachments, guidelinePatterns);
+  if (guidelineFiles.length > 0) {
+    parts.push("\n", buildMandatoryPrereadBlock(guidelineFiles));
   }
+  parts.push("\n[[ ## user_query ## ]]\n", request.prompt.trim());
   return parts.join("\n").trim();
 }
-function buildMandatoryPrereadBlock(instructionFiles) {
-  if (instructionFiles.length === 0) {
+function buildMandatoryPrereadBlock(guidelineFiles) {
+  if (guidelineFiles.length === 0) {
     return "";
   }
   const fileList = [];
-  const tokenList = [];
   let counter = 0;
-  for (const absolutePath of instructionFiles) {
+  for (const absolutePath of guidelineFiles) {
     counter += 1;
     const fileName = path2.basename(absolutePath);
     const fileUri = pathToFileUri(absolutePath);
-    fileList.push(`[${fileName}](${fileUri})`);
-    tokenList.push(`INSTRUCTIONS_READ: \`${fileName}\` i=${counter} SHA256=<hex>`);
+    fileList.push(`* [${fileName}](${fileUri})`);
   }
-  const filesText = fileList.join(", ");
-  const tokensText = tokenList.join("\n");
+  const filesText = fileList.join("\n");
   const instruction = [
-    `Read all instruction files: ${filesText}.`,
-    `After reading each file, compute its SHA256 hash using this PowerShell command:`,
-    "`Get-FileHash -Algorithm SHA256 -LiteralPath '<file-path>' | Select-Object -ExpandProperty Hash`.",
-    `Then include, at the top of your reply, these exact tokens on separate lines:
+    `Read all guideline files:
+${filesText}.
 `,
-    tokensText,
-    `
-Replace \`<hex>\` with the actual SHA256 hash value computed from the PowerShell command.`,
     `If any file is missing, fail with ERROR: missing-file <filename> and stop.
 `,
-    `Then fetch all documentation required by the instructions before proceeding with your task.`
-  ].join(" ");
-  return `[[ ## mandatory_pre_read ## ]]
-${instruction}
-`;
+    `Then apply system_instructions on the user query below.`
+  ].join("");
+  return `${instruction}`;
 }
-function collectInstructionFiles(attachments) {
+function collectGuidelineFiles(attachments, guidelinePatterns) {
   if (!attachments || attachments.length === 0) {
     return [];
   }
   const unique = /* @__PURE__ */ new Map();
   for (const attachment of attachments) {
-    if (!isInstructionPath(attachment)) {
-      continue;
-    }
     const absolutePath = path2.resolve(attachment);
-    if (!unique.has(absolutePath)) {
-      unique.set(absolutePath, absolutePath);
+    const normalized = absolutePath.split(path2.sep).join("/");
+    if (isGuidelineFile(normalized, guidelinePatterns)) {
+      if (!unique.has(absolutePath)) {
+        unique.set(absolutePath, absolutePath);
+      }
     }
   }
   return Array.from(unique.values());
 }
-function isInstructionPath(filePath) {
-  const normalized = filePath.split(path2.sep).join("/");
-  return normalized.endsWith(".instructions.md") || normalized.includes("/instructions/") || normalized.endsWith(".prompt.md") || normalized.includes("/prompts/");
-}
 function pathToFileUri(filePath) {
   const absolutePath = path2.isAbsolute(filePath) ? filePath : path2.resolve(filePath);
   const normalizedPath = absolutePath.replace(/\\/g, "/");
@@ -991,14 +1072,6 @@ function pathToFileUri(filePath) {
   }
   return `file://${normalizedPath}`;
 }
-function composeUserQuery(request) {
-  const segments = [];
-  segments.push(request.prompt.trim());
-  if (request.guidelines && request.guidelines.trim().length > 0) {
-    segments.push("\nGuidelines:\n", request.guidelines.trim());
-  }
-  return segments.join("\n").trim();
-}
 function normalizeAttachments(attachments) {
   if (!attachments || attachments.length === 0) {
     return void 0;
@@ -1056,18 +1129,24 @@ import { parse as parse2 } from "yaml";
 function isRecord(value) {
   return typeof value === "object" && value !== null && !Array.isArray(value);
 }
-function checkVersion(parsed, absolutePath) {
-  const version = typeof parsed.version === "number" ? parsed.version : typeof parsed.version === "string" ? parseFloat(parsed.version) : void 0;
-  if (version === void 0) {
+function checkSchema(parsed, absolutePath) {
+  const schema = parsed.$schema;
+  if (schema === void 0) {
     throw new Error(
-      `Missing version field in targets.yaml at ${absolutePath}.
-Please add 'version: 2.0' at the top of the file.`
+      `Missing $schema field in targets.yaml at ${absolutePath}.
+Please add '$schema: ${TARGETS_SCHEMA_V2}' at the top of the file.`
     );
   }
-  if (version < 2) {
+  if (typeof schema !== "string") {
     throw new Error(
-      `Outdated targets.yaml format (version ${version}) at ${absolutePath}.
-Please update to version 2.0 format with 'targets' array.`
+      `Invalid $schema field in targets.yaml at ${absolutePath}.
+Expected a string value '${TARGETS_SCHEMA_V2}'.`
+    );
+  }
+  if (schema !== TARGETS_SCHEMA_V2) {
+    throw new Error(
+      `Invalid $schema '${schema}' in targets.yaml at ${absolutePath}.
+Expected '${TARGETS_SCHEMA_V2}'.`
     );
   }
 }
@@ -1099,7 +1178,7 @@ function assertTargetDefinition(value, index, filePath) {
     judge_target: typeof judgeTarget === "string" ? judgeTarget : void 0
   };
 }
-async function fileExists2(filePath) {
+async function fileExists3(filePath) {
   try {
     await access2(filePath, constants2.F_OK);
     return true;
@@ -1109,15 +1188,15 @@ async function fileExists2(filePath) {
 }
 async function readTargetDefinitions(filePath) {
   const absolutePath = path3.resolve(filePath);
-  if (!await fileExists2(absolutePath)) {
+  if (!await fileExists3(absolutePath)) {
     throw new Error(`targets.yaml not found at ${absolutePath}`);
   }
   const raw = await readFile3(absolutePath, "utf8");
   const parsed = parse2(raw);
   if (!isRecord(parsed)) {
-    throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with 'version' and 'targets' fields`);
+    throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
   }
-  checkVersion(parsed, absolutePath);
+  checkSchema(parsed, absolutePath);
   const targets = extractTargetsArray(parsed, absolutePath);
   const definitions = targets.map((entry, index) => assertTargetDefinition(entry, index, absolutePath));
   return definitions;
@@ -1339,7 +1418,7 @@ import { randomUUID } from "node:crypto";
 var HeuristicGrader = class {
   kind = "heuristic";
   grade(context) {
-    const expectedAspects = extractAspects(context.testCase.expected_assistant_raw);
+    const expectedAspects = extractAspects(context.evalCase.expected_assistant_raw);
     const result = scoreCandidateResponse(context.candidate, expectedAspects);
     const misses = [...result.misses];
     if (expectedAspects.length === 0 && isErrorLike(context.candidate)) {
@@ -1372,14 +1451,14 @@ var QualityGrader = class {
     if (!judgeProvider) {
       throw new Error("No judge provider available for LLM grading");
     }
-    const prompt = buildQualityPrompt(context.testCase, context.candidate);
+    const prompt = buildQualityPrompt(context.evalCase, context.candidate);
     const metadata = {
       systemPrompt: QUALITY_SYSTEM_PROMPT
     };
     const response = await judgeProvider.invoke({
       prompt,
       metadata,
-      testCaseId: context.testCase.id,
+      evalCaseId: context.evalCase.id,
       attempt: context.attempt,
       maxOutputTokens: this.maxOutputTokens,
       temperature: this.temperature
@@ -1425,16 +1504,16 @@ var QUALITY_SYSTEM_PROMPT = [
 function buildQualityPrompt(testCase, candidate) {
   const parts = [
     "[[ ## expected_outcome ## ]]",
-    testCase.outcome,
+    testCase.outcome.trim(),
     "",
     "[[ ## request ## ]]",
-    testCase.task,
+    testCase.task.trim(),
     "",
     "[[ ## reference_answer ## ]]",
-    testCase.expected_assistant_raw,
+    testCase.expected_assistant_raw.trim(),
     "",
     "[[ ## generated_answer ## ]]",
-    candidate,
+    candidate.trim(),
     "",
     "Respond with a single JSON object matching the schema described in the system prompt."
   ];
@@ -1678,17 +1757,17 @@ async function runEvaluation(options) {
     cache,
     useCache,
     now,
-    testId,
+    evalId,
     verbose,
     onResult,
     onProgress
   } = options;
-  const load = loadTestCases;
-  const testCases = await load(testFilePath, repoRoot, { verbose });
-  const filteredTestCases = filterTestCases(testCases, testId);
-  if (filteredTestCases.length === 0) {
-    if (testId) {
-      throw new Error(`Test case with id '${testId}' not found in ${testFilePath}`);
+  const load = loadEvalCases;
+  const evalCases = await load(testFilePath, repoRoot, { verbose });
+  const filteredEvalCases = filterEvalCases(evalCases, evalId);
+  if (filteredEvalCases.length === 0) {
+    if (evalId) {
+      throw new Error(`Test case with id '${evalId}' not found in ${testFilePath}`);
     }
     return [];
   }
@@ -1732,11 +1811,11 @@ async function runEvaluation(options) {
   };
   const graderRegistry = buildGraderRegistry(graders, resolveJudgeProvider);
   const primaryProvider = getOrCreateProvider(target);
-  if (onProgress && filteredTestCases.length > 0) {
-    for (let i = 0; i < filteredTestCases.length; i++) {
+  if (onProgress && filteredEvalCases.length > 0) {
+    for (let i = 0; i < filteredEvalCases.length; i++) {
       await onProgress({
         workerId: i + 1,
-        testId: filteredTestCases[i].id,
+        evalId: filteredEvalCases[i].id,
         status: "pending"
       });
     }
@@ -1744,23 +1823,23 @@ async function runEvaluation(options) {
   const workers = options.maxConcurrency ?? target.workers ?? 1;
   const limit = pLimit(workers);
   let nextWorkerId = 1;
-  const workerIdByTestId = /* @__PURE__ */ new Map();
-  const promises = filteredTestCases.map(
-    (testCase) => limit(async () => {
+  const workerIdByEvalId = /* @__PURE__ */ new Map();
+  const promises = filteredEvalCases.map(
+    (evalCase) => limit(async () => {
       const workerId = nextWorkerId++;
-      workerIdByTestId.set(testCase.id, workerId);
+      workerIdByEvalId.set(evalCase.id, workerId);
       if (onProgress) {
         await onProgress({
           workerId,
-          testId: testCase.id,
+          evalId: evalCase.id,
           status: "running",
           startedAt: Date.now()
         });
       }
       try {
         const judgeProvider = await resolveJudgeProvider(target);
-        const result = await runTestCase({
-          testCase,
+        const result = await runEvalCase({
+          evalCase,
           provider: primaryProvider,
           target,
           graders: graderRegistry,
@@ -1775,7 +1854,7 @@ async function runEvaluation(options) {
         if (onProgress) {
           await onProgress({
             workerId,
-            testId: testCase.id,
+            evalId: evalCase.id,
             status: "completed",
             startedAt: 0,
             // Not used for completed status
@@ -1790,7 +1869,7 @@ async function runEvaluation(options) {
         if (onProgress) {
           await onProgress({
             workerId,
-            testId: testCase.id,
+            evalId: evalCase.id,
             status: "failed",
             completedAt: Date.now(),
             error: error instanceof Error ? error.message : String(error)
@@ -1807,10 +1886,10 @@ async function runEvaluation(options) {
     if (outcome.status === "fulfilled") {
       results.push(outcome.value);
     } else {
-      const testCase = filteredTestCases[i];
-      const promptInputs = await buildPromptInputs(testCase);
+      const evalCase = filteredEvalCases[i];
+      const promptInputs = await buildPromptInputs(evalCase);
       const errorResult = buildErrorResult(
-        testCase,
+        evalCase,
         target.name,
         (now ?? (() => /* @__PURE__ */ new Date()))(),
         outcome.reason,
@@ -1824,9 +1903,9 @@ async function runEvaluation(options) {
   }
   return results;
 }
-async function runTestCase(options) {
+async function runEvalCase(options) {
   const {
-    testCase,
+    evalCase,
     provider,
     target,
     graders,
@@ -1839,11 +1918,11 @@ async function runTestCase(options) {
     signal,
     judgeProvider
   } = options;
-  const promptInputs = await buildPromptInputs(testCase);
+  const promptInputs = await buildPromptInputs(evalCase);
   if (promptDumpDir) {
-    await dumpPrompt(promptDumpDir, testCase, promptInputs);
+    await dumpPrompt(promptDumpDir, evalCase, promptInputs);
   }
-  const cacheKey = useCache ? createCacheKey(provider, target, testCase, promptInputs) : void 0;
+  const cacheKey = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
   let cachedResponse;
   if (cacheKey && cache) {
     cachedResponse = await cache.get(cacheKey);
@@ -1856,7 +1935,7 @@ async function runTestCase(options) {
   while (!providerResponse && attempt < attemptBudget) {
     try {
       providerResponse = await invokeProvider(provider, {
-        testCase,
+        evalCase,
         target,
         promptInputs,
         attempt,
@@ -1869,12 +1948,12 @@ async function runTestCase(options) {
         attempt += 1;
         continue;
       }
-      return buildErrorResult(testCase, target.name, nowFn(), error, promptInputs);
+      return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
     }
   }
   if (!providerResponse) {
     return buildErrorResult(
-      testCase,
+      evalCase,
       target.name,
       nowFn(),
       lastError ?? new Error("Provider did not return a response"),
@@ -1884,7 +1963,7 @@ async function runTestCase(options) {
   if (cacheKey && cache && !cachedResponse) {
     await cache.set(cacheKey, providerResponse);
   }
-  const graderKind = testCase.grader ?? "heuristic";
+  const graderKind = evalCase.grader ?? "heuristic";
   const activeGrader = graders[graderKind] ?? graders.heuristic;
   if (!activeGrader) {
     throw new Error(`No grader registered for kind '${graderKind}'`);
@@ -1893,7 +1972,7 @@ async function runTestCase(options) {
   try {
     const gradeTimestamp = nowFn();
     grade = await activeGrader.grade({
-      testCase,
+      evalCase,
       candidate: providerResponse.text ?? "",
       target,
       provider,
@@ -1903,17 +1982,18 @@ async function runTestCase(options) {
       judgeProvider
     });
   } catch (error) {
-    return buildErrorResult(testCase, target.name, nowFn(), error, promptInputs);
+    return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
   }
   const completedAt = nowFn();
   const rawRequest = {
     request: promptInputs.request,
     guidelines: promptInputs.guidelines,
-    guideline_paths: testCase.guideline_paths
+    guideline_paths: evalCase.guideline_paths,
+    system_message: promptInputs.systemMessage ?? ""
   };
   return {
-    test_id: testCase.id,
-    conversation_id: testCase.conversation_id,
+    eval_id: evalCase.id,
+    conversation_id: evalCase.conversation_id,
     score: grade.score,
     hits: grade.hits,
     misses: grade.misses,
@@ -1927,11 +2007,11 @@ async function runTestCase(options) {
     grader_raw_request: grade.graderRawRequest
   };
 }
-function filterTestCases(testCases, testId) {
-  if (!testId) {
-    return testCases;
+function filterEvalCases(evalCases, evalId) {
+  if (!evalId) {
+    return evalCases;
   }
-  return testCases.filter((testCase) => testCase.id === testId);
+  return evalCases.filter((evalCase) => evalCase.id === evalId);
 }
 function buildGraderRegistry(overrides, resolveJudgeProvider) {
   const heuristic = overrides?.heuristic ?? new HeuristicGrader();
@@ -1949,16 +2029,16 @@ function buildGraderRegistry(overrides, resolveJudgeProvider) {
     llm_judge: llmJudge
   };
 }
-async function dumpPrompt(directory, testCase, promptInputs) {
+async function dumpPrompt(directory, evalCase, promptInputs) {
   const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
-  const filename = `${timestamp}_${sanitizeFilename(testCase.id)}.json`;
+  const filename = `${timestamp}_${sanitizeFilename(evalCase.id)}.json`;
   const filePath = path4.resolve(directory, filename);
   await mkdir(path4.dirname(filePath), { recursive: true });
   const payload = {
-    test_id: testCase.id,
+    eval_id: evalCase.id,
     request: promptInputs.request,
     guidelines: promptInputs.guidelines,
-    guideline_paths: testCase.guideline_paths
+    guideline_paths: evalCase.guideline_paths
   };
   await writeFile2(filePath, JSON.stringify(payload, null, 2), "utf8");
 }
@@ -1970,7 +2050,7 @@ function sanitizeFilename(value) {
   return sanitized.length > 0 ? sanitized : randomUUID2();
 }
 async function invokeProvider(provider, options) {
-  const { testCase, target, promptInputs, attempt, agentTimeoutMs, signal } = options;
+  const { evalCase, target, promptInputs, attempt, agentTimeoutMs, signal } = options;
   const controller = new AbortController();
   const timeout = agentTimeoutMs ? setTimeout(() => controller.abort(), agentTimeoutMs) : void 0;
   if (signal) {
@@ -1980,12 +2060,12 @@ async function invokeProvider(provider, options) {
     return await provider.invoke({
       prompt: promptInputs.request,
       guidelines: promptInputs.guidelines,
-      attachments: testCase.guideline_paths,
-      testCaseId: testCase.id,
+      guideline_patterns: evalCase.guideline_patterns,
+      attachments: evalCase.file_paths,
+      evalCaseId: evalCase.id,
       attempt,
       metadata: {
-        target: target.name,
-        grader: testCase.grader
+        systemPrompt: promptInputs.systemMessage ?? ""
       },
       signal: controller.signal
     });
@@ -1995,17 +2075,18 @@ async function invokeProvider(provider, options) {
     }
   }
 }
-function buildErrorResult(testCase, targetName, timestamp, error, promptInputs) {
+function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs) {
   const message = error instanceof Error ? error.message : String(error);
   const rawRequest = {
     request: promptInputs.request,
     guidelines: promptInputs.guidelines,
-    guideline_paths: testCase.guideline_paths,
+    guideline_paths: evalCase.guideline_paths,
+    system_message: promptInputs.systemMessage ?? "",
     error: message
   };
   return {
-    test_id: testCase.id,
-    conversation_id: testCase.conversation_id,
+    eval_id: evalCase.id,
+    conversation_id: evalCase.conversation_id,
     score: 0,
     hits: [],
     misses: [`Error: ${message}`],
@@ -2017,13 +2098,14 @@ function buildErrorResult(testCase, targetName, timestamp, error, promptInputs)
     raw_request: rawRequest
   };
 }
-function createCacheKey(provider, target, testCase, promptInputs) {
+function createCacheKey(provider, target, evalCase, promptInputs) {
   const hash = createHash("sha256");
   hash.update(provider.id);
   hash.update(target.name);
-  hash.update(testCase.id);
+  hash.update(evalCase.id);
   hash.update(promptInputs.request);
   hash.update(promptInputs.guidelines);
+  hash.update(promptInputs.systemMessage ?? "");
   return hash.digest("hex");
 }
 function isTimeoutLike(error) {
@@ -2051,7 +2133,9 @@ export {
   HeuristicGrader,
   QualityGrader,
   TEST_MESSAGE_ROLES,
+  buildDirectoryChain,
   buildPromptInputs,
+  buildSearchRoots,
   calculateHits,
   calculateMisses,
   createAgentKernel,
@@ -2059,6 +2143,8 @@ export {
   ensureVSCodeSubagents,
   extractAspects,
   extractCodeBlocks,
+  fileExists,
+  findGitRoot,
   getHitCount,
   isErrorLike,
   isGraderKind,
@@ -2068,12 +2154,13 @@ export {
   isTestMessage,
   isTestMessageRole,
   listTargetNames,
-  loadTestCases,
+  loadEvalCases,
   readTargetDefinitions,
   resolveAndCreateProvider,
+  resolveFileReference,
   resolveTargetDefinition,
+  runEvalCase,
   runEvaluation,
-  runTestCase,
   scoreCandidateResponse
 };
 //# sourceMappingURL=index.js.map