npm - @agentv/core - Versions diffs - 0.6.1 → 0.7.2 - Mend

@agentv/core 0.6.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/{chunk-OW3SHBIJ.js → chunk-UQLHF3T7.js} +12 -3
package/dist/chunk-UQLHF3T7.js.map +1 -0
package/dist/evaluation/validation/index.cjs +143 -2
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.d.cts +1 -1
package/dist/evaluation/validation/index.d.ts +1 -1
package/dist/evaluation/validation/index.js +143 -2
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +277 -328
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +11 -13
package/dist/index.d.ts +11 -13
package/dist/index.js +267 -325
package/dist/index.js.map +1 -1
package/package.json +2 -2
package/dist/chunk-OW3SHBIJ.js.map +0 -1

package/dist/index.cjs CHANGED Viewed

@@ -299,6 +299,87 @@ function extractCodeBlocks(segments) {
   }
   return codeBlocks;
 }
+async function processMessages(options) {
+  const {
+    messages,
+    searchRoots,
+    repoRootPath,
+    guidelinePatterns,
+    guidelinePaths,
+    textParts,
+    messageType,
+    verbose
+  } = options;
+  const segments = [];
+  for (const message of messages) {
+    const content = message.content;
+    if (typeof content === "string") {
+      segments.push({ type: "text", value: content });
+      if (textParts) {
+        textParts.push(content);
+      }
+      continue;
+    }
+    for (const rawSegment of content) {
+      if (!isJsonObject(rawSegment)) {
+        continue;
+      }
+      const segmentType = asString(rawSegment.type);
+      if (segmentType === "file") {
+        const rawValue = asString(rawSegment.value);
+        if (!rawValue) {
+          continue;
+        }
+        const { displayPath, resolvedPath, attempted } = await resolveFileReference(
+          rawValue,
+          searchRoots
+        );
+        if (!resolvedPath) {
+          const attempts = attempted.length ? ["  Tried:", ...attempted.map((candidate) => `    ${candidate}`)] : void 0;
+          const context = messageType === "input" ? "" : " in expected_messages";
+          logWarning(`File not found${context}: ${displayPath}`, attempts);
+          continue;
+        }
+        try {
+          const fileContent = (await (0, import_promises2.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
+          if (messageType === "input" && guidelinePatterns && guidelinePaths) {
+            const relativeToRepo = import_node_path2.default.relative(repoRootPath, resolvedPath);
+            if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
+              guidelinePaths.push(import_node_path2.default.resolve(resolvedPath));
+              if (verbose) {
+                console.log(`  [Guideline] Found: ${displayPath}`);
+                console.log(`    Resolved to: ${resolvedPath}`);
+              }
+              continue;
+            }
+          }
+          segments.push({
+            type: "file",
+            path: displayPath,
+            text: fileContent,
+            resolvedPath: import_node_path2.default.resolve(resolvedPath)
+          });
+          if (verbose) {
+            const label = messageType === "input" ? "[File]" : "[Expected Output File]";
+            console.log(`  ${label} Found: ${displayPath}`);
+            console.log(`    Resolved to: ${resolvedPath}`);
+          }
+        } catch (error) {
+          const context = messageType === "input" ? "" : " expected output";
+          logWarning(`Could not read${context} file ${resolvedPath}: ${error.message}`);
+        }
+        continue;
+      }
+      const clonedSegment = cloneJsonObject(rawSegment);
+      segments.push(clonedSegment);
+      const inlineValue = clonedSegment.value;
+      if (typeof inlineValue === "string" && textParts) {
+        textParts.push(inlineValue);
+      }
+    }
+  }
+  return segments;
+}
 async function loadEvalCases(evalFilePath, repoRoot, options) {
   const verbose = options?.verbose ?? false;
   const absoluteTestPath = import_node_path2.default.resolve(evalFilePath);
@@ -384,77 +465,34 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
         }
       }
     }
-    const userSegments = [];
     const guidelinePaths = [];
-    const userTextParts = [];
-    for (const userMessage of userMessages) {
-      const content = userMessage.content;
-      if (typeof content === "string") {
-        userSegments.push({ type: "text", value: content });
-        userTextParts.push(content);
-        continue;
-      }
-      for (const rawSegment of content) {
-        if (!isJsonObject(rawSegment)) {
-          continue;
-        }
-        const segmentType = asString(rawSegment.type);
-        if (segmentType === "file") {
-          const rawValue = asString(rawSegment.value);
-          if (!rawValue) {
-            continue;
-          }
-          const { displayPath, resolvedPath, attempted } = await resolveFileReference(
-            rawValue,
-            searchRoots
-          );
-          if (!resolvedPath) {
-            const attempts = attempted.length ? ["  Tried:", ...attempted.map((candidate) => `    ${candidate}`)] : void 0;
-            logWarning(`File not found: ${displayPath}`, attempts);
-            continue;
-          }
-          try {
-            const fileContent = (await (0, import_promises2.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
-            const relativeToRepo = import_node_path2.default.relative(repoRootPath, resolvedPath);
-            if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
-              guidelinePaths.push(import_node_path2.default.resolve(resolvedPath));
-              if (verbose) {
-                console.log(`  [Guideline] Found: ${displayPath}`);
-                console.log(`    Resolved to: ${resolvedPath}`);
-              }
-            } else {
-              userSegments.push({
-                type: "file",
-                path: displayPath,
-                text: fileContent,
-                resolvedPath: import_node_path2.default.resolve(resolvedPath)
-              });
-              if (verbose) {
-                console.log(`  [File] Found: ${displayPath}`);
-                console.log(`    Resolved to: ${resolvedPath}`);
-              }
-            }
-          } catch (error) {
-            logWarning(`Could not read file ${resolvedPath}: ${error.message}`);
-          }
-          continue;
-        }
-        const clonedSegment = cloneJsonObject(rawSegment);
-        userSegments.push(clonedSegment);
-        const inlineValue = clonedSegment.value;
-        if (typeof inlineValue === "string") {
-          userTextParts.push(inlineValue);
-        }
-      }
-    }
-    const codeSnippets = extractCodeBlocks(userSegments);
+    const inputTextParts = [];
+    const inputSegments = await processMessages({
+      messages: userMessages,
+      searchRoots,
+      repoRootPath,
+      guidelinePatterns,
+      guidelinePaths,
+      textParts: inputTextParts,
+      messageType: "input",
+      verbose
+    });
+    const outputSegments = await processMessages({
+      messages: assistantMessages,
+      searchRoots,
+      repoRootPath,
+      guidelinePatterns,
+      messageType: "output",
+      verbose
+    });
+    const codeSnippets = extractCodeBlocks(inputSegments);
     const assistantContent = assistantMessages[0]?.content;
-    const expectedAssistantRaw = await resolveAssistantContent(assistantContent, searchRoots, verbose);
-    const userTextPrompt = userTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
+    const referenceAnswer = await resolveAssistantContent(assistantContent, searchRoots, verbose);
+    const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
     const testCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
     const evaluators = await parseEvaluators(evalcase, searchRoots, id ?? "unknown");
     const userFilePaths = [];
-    for (const segment of userSegments) {
+    for (const segment of inputSegments) {
       if (segment.type === "file" && typeof segment.resolvedPath === "string") {
         userFilePaths.push(segment.resolvedPath);
       }
@@ -467,15 +505,16 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
       id,
       dataset: datasetName,
       conversation_id: conversationId,
-      task: userTextPrompt,
-      user_segments: userSegments,
+      question,
+      input_segments: inputSegments,
+      output_segments: outputSegments,
       system_message: systemMessageContent,
-      expected_assistant_raw: expectedAssistantRaw,
+      reference_answer: referenceAnswer,
       guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path2.default.resolve(guidelinePath)),
       guideline_patterns: guidelinePatterns,
       file_paths: allFilePaths,
       code_snippets: codeSnippets,
-      outcome,
+      expected_outcome: outcome,
       evaluator: testCaseEvaluatorKind,
       evaluators
     };
@@ -511,36 +550,36 @@ ${content}`);
       logWarning(`Could not read guideline file ${absolutePath}: ${error.message}`);
     }
   }
-  const requestParts = [];
-  for (const segment of testCase.user_segments) {
+  const questionParts = [];
+  for (const segment of testCase.input_segments) {
     const typeValue = segment.type;
     if (typeof typeValue === "string" && typeValue === "file") {
       const pathValue = segment.path;
       const textValue = segment.text;
       const label = typeof pathValue === "string" ? pathValue : "file";
       const body = typeof textValue === "string" ? textValue : "";
-      requestParts.push(`=== ${label} ===
+      questionParts.push(`=== ${label} ===
 ${body}`);
       continue;
     }
     if (typeof typeValue === "string" && typeValue === "text") {
       const value = segment.value;
       if (typeof value === "string") {
-        requestParts.push(value);
+        questionParts.push(value);
       }
       continue;
     }
     const genericValue = segment.value;
     if (typeof genericValue === "string") {
-      requestParts.push(genericValue);
+      questionParts.push(genericValue);
     }
   }
   if (testCase.code_snippets.length > 0) {
-    requestParts.push(testCase.code_snippets.join("\n"));
+    questionParts.push(testCase.code_snippets.join("\n"));
   }
-  const request = requestParts.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
+  const question = questionParts.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
   const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
-  return { request, guidelines, systemMessage: testCase.system_message };
+  return { question, guidelines, systemMessage: testCase.system_message };
 }
 async function fileExists2(absolutePath) {
   try {
@@ -752,7 +791,7 @@ function buildChatPrompt(request) {
 ${request.guidelines.trim()}`);
   }
   const systemContent = systemSegments.join("\n\n");
-  const userContent = request.prompt.trim();
+  const userContent = request.question.trim();
   const prompt = [
     {
       role: "system",
@@ -1050,7 +1089,7 @@ var CliProvider = class {
       healthcheck.commandTemplate,
       buildTemplateValues(
         {
-          prompt: "",
+          question: "",
           guidelines: "",
           inputFiles: [],
           evalCaseId: "",
@@ -1077,7 +1116,7 @@ var CliProvider = class {
 function buildTemplateValues(request, config) {
   const inputFiles = normalizeInputFiles(request.inputFiles);
   return {
-    PROMPT: shellEscape(request.prompt ?? ""),
+    PROMPT: shellEscape(request.question ?? ""),
     GUIDELINES: shellEscape(request.guidelines ?? ""),
     EVAL_ID: shellEscape(request.evalCaseId ?? ""),
     ATTEMPT: shellEscape(String(request.attempt ?? 0)),
@@ -1141,6 +1180,59 @@ var import_node_os = require("os");
 var import_node_path5 = __toESM(require("path"), 1);
 var import_node_util2 = require("util");
+// src/evaluation/providers/codex-log-tracker.ts
+var GLOBAL_LOGS_KEY = Symbol.for("agentv.codexLogs");
+var GLOBAL_SUBSCRIBERS_KEY = Symbol.for("agentv.codexLogSubscribers");
+function getCodexLogStore() {
+  const globalObject = globalThis;
+  const existing = globalObject[GLOBAL_LOGS_KEY];
+  if (existing) {
+    return existing;
+  }
+  const created = [];
+  globalObject[GLOBAL_LOGS_KEY] = created;
+  return created;
+}
+function getSubscriberStore() {
+  const globalObject = globalThis;
+  const existing = globalObject[GLOBAL_SUBSCRIBERS_KEY];
+  if (existing) {
+    return existing;
+  }
+  const created = /* @__PURE__ */ new Set();
+  globalObject[GLOBAL_SUBSCRIBERS_KEY] = created;
+  return created;
+}
+function notifySubscribers(entry) {
+  const subscribers = Array.from(getSubscriberStore());
+  for (const listener of subscribers) {
+    try {
+      listener(entry);
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      console.warn(`Codex log subscriber failed: ${message}`);
+    }
+  }
+}
+function recordCodexLogEntry(entry) {
+  getCodexLogStore().push(entry);
+  notifySubscribers(entry);
+}
+function consumeCodexLogEntries() {
+  const store = getCodexLogStore();
+  if (store.length === 0) {
+    return [];
+  }
+  return store.splice(0, store.length);
+}
+function subscribeToCodexLogEntries(listener) {
+  const store = getSubscriberStore();
+  store.add(listener);
+  return () => {
+    store.delete(listener);
+  };
+}
 // src/evaluation/providers/preread.ts
 var import_node_path4 = __toESM(require("path"), 1);
 function buildPromptDocument(request, inputFiles, options) {
@@ -1158,7 +1250,7 @@ function buildPromptDocument(request, inputFiles, options) {
   if (prereadBlock.length > 0) {
     parts.push("\n", prereadBlock);
   }
-  parts.push("\n[[ ## user_query ## ]]\n", request.prompt.trim());
+  parts.push("\n[[ ## user_query ## ]]\n", request.question.trim());
   return parts.join("\n").trim();
 }
 function normalizeInputFiles2(inputFiles) {
@@ -1242,64 +1334,10 @@ function pathToFileUri(filePath) {
   return `file://${normalizedPath}`;
 }
-// src/evaluation/providers/codex-log-tracker.ts
-var GLOBAL_LOGS_KEY = Symbol.for("agentv.codexLogs");
-var GLOBAL_SUBSCRIBERS_KEY = Symbol.for("agentv.codexLogSubscribers");
-function getCodexLogStore() {
-  const globalObject = globalThis;
-  const existing = globalObject[GLOBAL_LOGS_KEY];
-  if (existing) {
-    return existing;
-  }
-  const created = [];
-  globalObject[GLOBAL_LOGS_KEY] = created;
-  return created;
-}
-function getSubscriberStore() {
-  const globalObject = globalThis;
-  const existing = globalObject[GLOBAL_SUBSCRIBERS_KEY];
-  if (existing) {
-    return existing;
-  }
-  const created = /* @__PURE__ */ new Set();
-  globalObject[GLOBAL_SUBSCRIBERS_KEY] = created;
-  return created;
-}
-function notifySubscribers(entry) {
-  const subscribers = Array.from(getSubscriberStore());
-  for (const listener of subscribers) {
-    try {
-      listener(entry);
-    } catch (error) {
-      const message = error instanceof Error ? error.message : String(error);
-      console.warn(`Codex log subscriber failed: ${message}`);
-    }
-  }
-}
-function recordCodexLogEntry(entry) {
-  getCodexLogStore().push(entry);
-  notifySubscribers(entry);
-}
-function consumeCodexLogEntries() {
-  const store = getCodexLogStore();
-  if (store.length === 0) {
-    return [];
-  }
-  return store.splice(0, store.length);
-}
-function subscribeToCodexLogEntries(listener) {
-  const store = getSubscriberStore();
-  store.add(listener);
-  return () => {
-    store.delete(listener);
-  };
-}
 // src/evaluation/providers/codex.ts
 var execAsync2 = (0, import_node_util2.promisify)(import_node_child_process2.exec);
 var WORKSPACE_PREFIX = "agentv-codex-";
 var PROMPT_FILENAME = "prompt.md";
-var FILES_DIR = "files";
 var JSONL_TYPE_ITEM_COMPLETED = "item.completed";
 var CodexProvider = class {
   id;
@@ -1322,21 +1360,10 @@ var CodexProvider = class {
     }
     await this.ensureEnvironmentReady();
     const inputFiles = normalizeInputFiles2(request.inputFiles);
-    const originalGuidelines = new Set(
-      collectGuidelineFiles(inputFiles, request.guideline_patterns).map((file) => import_node_path5.default.resolve(file))
-    );
     const workspaceRoot = await this.createWorkspace();
     const logger = await this.createStreamLogger(request).catch(() => void 0);
     try {
-      const { mirroredInputFiles, guidelineMirrors } = await this.mirrorInputFiles(
-        inputFiles,
-        workspaceRoot,
-        originalGuidelines
-      );
-      const promptContent = buildPromptDocument(request, mirroredInputFiles, {
-        guidelinePatterns: request.guideline_patterns,
-        guidelineOverrides: guidelineMirrors
-      });
+      const promptContent = buildPromptDocument(request, inputFiles);
       const promptFile = import_node_path5.default.join(workspaceRoot, PROMPT_FILENAME);
       await (0, import_promises3.writeFile)(promptFile, promptContent, "utf8");
       const args = this.buildCodexArgs();
@@ -1365,7 +1392,7 @@ var CodexProvider = class {
           executable: this.resolvedExecutable ?? this.config.executable,
           promptFile,
           workspace: workspaceRoot,
-          inputFiles: mirroredInputFiles,
+          inputFiles,
           logFile: logger?.filePath
         }
       };
@@ -1420,37 +1447,6 @@ var CodexProvider = class {
       throw error;
     }
   }
-  async mirrorInputFiles(inputFiles, workspaceRoot, guidelineOriginals) {
-    if (!inputFiles || inputFiles.length === 0) {
-      return {
-        mirroredInputFiles: void 0,
-        guidelineMirrors: /* @__PURE__ */ new Set()
-      };
-    }
-    const filesRoot = import_node_path5.default.join(workspaceRoot, FILES_DIR);
-    await (0, import_promises3.mkdir)(filesRoot, { recursive: true });
-    const mirrored = [];
-    const guidelineMirrors = /* @__PURE__ */ new Set();
-    const nameCounts = /* @__PURE__ */ new Map();
-    for (const inputFile of inputFiles) {
-      const absoluteSource = import_node_path5.default.resolve(inputFile);
-      const baseName = import_node_path5.default.basename(absoluteSource);
-      const count = nameCounts.get(baseName) ?? 0;
-      nameCounts.set(baseName, count + 1);
-      const finalName = count === 0 ? baseName : `${baseName}.${count}`;
-      const destination = import_node_path5.default.join(filesRoot, finalName);
-      await (0, import_promises3.copyFile)(absoluteSource, destination);
-      const resolvedDestination = import_node_path5.default.resolve(destination);
-      mirrored.push(resolvedDestination);
-      if (guidelineOriginals.has(absoluteSource)) {
-        guidelineMirrors.add(resolvedDestination);
-      }
-    }
-    return {
-      mirroredInputFiles: mirrored,
-      guidelineMirrors
-    };
-  }
   async createWorkspace() {
     return await (0, import_promises3.mkdtemp)(import_node_path5.default.join((0, import_node_os.tmpdir)(), WORKSPACE_PREFIX));
   }
@@ -2028,7 +2024,7 @@ var MockProvider = class {
     return {
       text: this.cannedResponse,
       raw: {
-        prompt: request.prompt,
+        question: request.question,
         guidelines: request.guidelines
       }
     };
@@ -2421,23 +2417,25 @@ function resolveOptionalString(source, env, description, options) {
   if (trimmed.length === 0) {
     return void 0;
   }
-  const envValue = env[trimmed];
-  if (envValue !== void 0) {
-    if (envValue.trim().length === 0) {
-      throw new Error(`Environment variable '${trimmed}' for ${description} is empty`);
+  const envVarMatch = trimmed.match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
+  if (envVarMatch) {
+    const varName = envVarMatch[1];
+    const envValue = env[varName];
+    if (envValue !== void 0) {
+      if (envValue.trim().length === 0) {
+        throw new Error(`Environment variable '${varName}' for ${description} is empty`);
+      }
+      return envValue;
     }
-    return envValue;
-  }
-  const allowLiteral = options?.allowLiteral ?? false;
-  const optionalEnv = options?.optionalEnv ?? false;
-  const looksLikeEnv = isLikelyEnvReference(trimmed);
-  if (looksLikeEnv) {
+    const optionalEnv = options?.optionalEnv ?? false;
     if (optionalEnv) {
       return void 0;
     }
-    if (!allowLiteral) {
-      throw new Error(`Environment variable '${trimmed}' required for ${description} is not set`);
-    }
+    throw new Error(`Environment variable '${varName}' required for ${description} is not set`);
+  }
+  const allowLiteral = options?.allowLiteral ?? false;
+  if (!allowLiteral) {
+    throw new Error(`${description} must use \${{ VARIABLE_NAME }} syntax for environment variables or be marked as allowing literals`);
   }
   return trimmed;
 }
@@ -2484,9 +2482,6 @@ function resolveOptionalBoolean(source) {
   }
   throw new Error("expected boolean value");
 }
-function isLikelyEnvReference(value) {
-  return /^[A-Z0-9_]+$/.test(value);
-}
 function resolveOptionalStringArray(source, env, description) {
   if (source === void 0 || source === null) {
     return void 0;
@@ -2507,21 +2502,25 @@ function resolveOptionalStringArray(source, env, description) {
     if (trimmed.length === 0) {
       throw new Error(`${description}[${i}] cannot be empty`);
     }
-    const envValue = env[trimmed];
-    if (envValue !== void 0) {
-      if (envValue.trim().length === 0) {
-        throw new Error(`Environment variable '${trimmed}' for ${description}[${i}] is empty`);
+    const envVarMatch = trimmed.match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
+    if (envVarMatch) {
+      const varName = envVarMatch[1];
+      const envValue = env[varName];
+      if (envValue !== void 0) {
+        if (envValue.trim().length === 0) {
+          throw new Error(`Environment variable '${varName}' for ${description}[${i}] is empty`);
+        }
+        resolved.push(envValue);
+        continue;
       }
-      resolved.push(envValue);
-    } else {
-      resolved.push(trimmed);
+      throw new Error(`Environment variable '${varName}' for ${description}[${i}] is not set`);
     }
+    resolved.push(trimmed);
   }
   return resolved.length > 0 ? resolved : void 0;
 }
 // src/evaluation/providers/vscode.ts
-var import_promises4 = require("fs/promises");
 var import_node_path6 = __toESM(require("path"), 1);
 var import_subagent = require("subagent");
 var VSCodeProvider = class {
@@ -2565,7 +2564,7 @@ var VSCodeProvider = class {
         }
       };
     }
-    const responseText = await (0, import_promises4.readFile)(session.responseFile, "utf8");
+    const responseText = await readTextFile(session.responseFile);
     return {
       text: responseText,
       raw: {
@@ -2619,7 +2618,7 @@ var VSCodeProvider = class {
     }
     const responses = [];
     for (const [index, responseFile] of session.responseFiles.entries()) {
-      const responseText = await (0, import_promises4.readFile)(responseFile, "utf8");
+      const responseText = await readTextFile(responseFile);
       responses.push({
         text: responseText,
         raw: {
@@ -2644,7 +2643,7 @@ function buildPromptDocument2(request, attachments, guidelinePatterns) {
   if (prereadBlock.length > 0) {
     parts.push("\n", prereadBlock);
   }
-  parts.push("\n[[ ## user_query ## ]]\n", request.prompt.trim());
+  parts.push("\n[[ ## user_query ## ]]\n", request.question.trim());
   return parts.join("\n").trim();
 }
 function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
@@ -2769,12 +2768,20 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
 // src/evaluation/providers/targets-file.ts
 var import_node_fs4 = require("fs");
-var import_promises5 = require("fs/promises");
+var import_promises4 = require("fs/promises");
 var import_node_path7 = __toESM(require("path"), 1);
 var import_yaml2 = require("yaml");
 // src/evaluation/providers/types.ts
-var TARGETS_SCHEMA_V2 = "agentv-targets-v2";
+var AGENT_PROVIDER_KINDS = [
+  "codex",
+  "vscode",
+  "vscode-insiders"
+];
+var TARGETS_SCHEMA_V2 = "agentv-targets-v2.1";
+function isAgentProvider(provider) {
+  return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
+}
 // src/evaluation/providers/targets-file.ts
 function isRecord(value) {
@@ -2831,7 +2838,7 @@ function assertTargetDefinition(value, index, filePath) {
 }
 async function fileExists3(filePath) {
   try {
-    await (0, import_promises5.access)(filePath, import_node_fs4.constants.F_OK);
+    await (0, import_promises4.access)(filePath, import_node_fs4.constants.F_OK);
     return true;
   } catch {
     return false;
@@ -2842,7 +2849,7 @@ async function readTargetDefinitions(filePath) {
   if (!await fileExists3(absolutePath)) {
     throw new Error(`targets.yaml not found at ${absolutePath}`);
   }
-  const raw = await (0, import_promises5.readFile)(absolutePath, "utf8");
+  const raw = await (0, import_promises4.readFile)(absolutePath, "utf8");
   const parsed = (0, import_yaml2.parse)(raw);
   if (!isRecord(parsed)) {
     throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
@@ -2886,30 +2893,7 @@ function resolveAndCreateProvider(definition, env = process.env) {
 }
 // src/evaluation/evaluators.ts
-var import_ax3 = require("@ax-llm/ax");
 var import_node_crypto2 = require("crypto");
-var LLM_JUDGE_SIGNATURE = (0, import_ax3.f)().input(
-  "evaluationContext",
-  import_ax3.f.object(
-    {
-      expectedOutcome: import_ax3.f.string("The expected outcome for the original task"),
-      request: import_ax3.f.string("The original task request"),
-      referenceAnswer: import_ax3.f.string("The gold standard reference answer"),
-      generatedAnswer: import_ax3.f.string("The answer to evaluate"),
-      guidelines: import_ax3.f.string("Additional evaluation guidelines or instructions").optional()
-    },
-    "Complete evaluation context for the judge"
-  )
-).output(
-  "evaluation",
-  import_ax3.f.object({
-    score: import_ax3.f.number("Score between 0.0 and 1.0").min(0).max(1),
-    hits: import_ax3.f.string("Brief specific achievement").array(),
-    misses: import_ax3.f.string("Brief specific failure or omission").array(),
-    reasoning: import_ax3.f.string("Concise explanation for the score").max(500)
-  })
-).build();
-var LLM_JUDGE = (0, import_ax3.ax)(LLM_JUDGE_SIGNATURE);
 var LlmJudgeEvaluator = class {
   kind = "llm_judge";
   resolveJudgeProvider;
@@ -2927,52 +2911,29 @@ var LlmJudgeEvaluator = class {
     if (!judgeProvider) {
       throw new Error("No judge provider available for LLM grading");
     }
-    if (providerSupportsAx(judgeProvider)) {
-      return this.evaluateWithAx(context, judgeProvider);
-    }
     return this.evaluateWithPrompt(context, judgeProvider);
   }
-  async evaluateWithAx(context, judgeProvider) {
-    const ai = judgeProvider.getAxAI();
-    const guidelines = context.promptInputs.guidelines?.trim();
-    const evaluationContext = {
-      expectedOutcome: context.evalCase.outcome.trim(),
-      request: context.evalCase.task.trim(),
-      referenceAnswer: context.evalCase.expected_assistant_raw.trim(),
-      generatedAnswer: context.candidate.trim(),
-      ...guidelines ? { guidelines } : {}
-    };
-    const options = this.buildJudgeForwardOptions(context);
-    const result = await LLM_JUDGE.forward(ai, { evaluationContext }, options);
-    const evaluation = result.evaluation;
-    const expectedAspectCount = Math.max(
-      evaluation.hits.length + evaluation.misses.length,
-      1
-    );
-    return {
-      score: evaluation.score,
-      hits: evaluation.hits,
-      misses: evaluation.misses,
-      expectedAspectCount,
-      reasoning: evaluation.reasoning,
-      evaluatorRawRequest: {
-        id: (0, import_node_crypto2.randomUUID)(),
-        provider: judgeProvider.id,
-        target: context.target.name,
-        method: "ax-structured-output",
-        signature: LLM_JUDGE_SIGNATURE.toString()
-      }
-    };
-  }
   async evaluateWithPrompt(context, judgeProvider) {
-    const prompt = buildQualityPrompt(context.evalCase, context.candidate);
-    const systemPrompt = context.systemPrompt ?? this.customPrompt ?? QUALITY_SYSTEM_PROMPT;
+    let prompt = buildQualityPrompt(context.evalCase, context.candidate);
+    let systemPrompt = context.systemPrompt ?? this.customPrompt ?? QUALITY_SYSTEM_PROMPT;
+    if (systemPrompt && hasTemplateVariables(systemPrompt)) {
+      const variables = {
+        input_messages: JSON.stringify(context.evalCase.input_segments, null, 2),
+        output_messages: JSON.stringify(context.evalCase.output_segments, null, 2),
+        candidate_answer: context.candidate,
+        reference_answer: context.evalCase.reference_answer,
+        expected_outcome: context.evalCase.expected_outcome,
+        question: context.evalCase.question
+      };
+      prompt = substituteVariables(systemPrompt, variables);
+      systemPrompt = QUALITY_SYSTEM_PROMPT;
+    }
     const metadata = {
       ...systemPrompt !== void 0 ? { systemPrompt } : {},
       ...context.judgeModel !== void 0 ? { model: context.judgeModel } : {}
     };
     const response = await judgeProvider.invoke({
-      prompt,
+      question: prompt,
       metadata,
       evalCaseId: context.evalCase.id,
       attempt: context.attempt,
@@ -3002,33 +2963,11 @@ var LlmJudgeEvaluator = class {
       evaluatorRawRequest
     };
   }
-  buildJudgeForwardOptions(context) {
-    const modelConfig = this.buildJudgeModelConfig();
-    if (modelConfig === void 0 && context.judgeModel === void 0) {
-      return void 0;
-    }
-    return {
-      ...context.judgeModel ? { model: context.judgeModel } : {},
-      ...modelConfig ? { modelConfig } : {}
-    };
-  }
-  buildJudgeModelConfig() {
-    if (this.maxOutputTokens === void 0 && this.temperature === void 0) {
-      return void 0;
-    }
-    return {
-      ...this.maxOutputTokens !== void 0 ? { maxTokens: this.maxOutputTokens } : {},
-      ...this.temperature !== void 0 ? { temperature: this.temperature } : {}
-    };
-  }
 };
-function providerSupportsAx(provider) {
-  return typeof provider.getAxAI === "function";
-}
 var QUALITY_SYSTEM_PROMPT = [
-  "You are an expert evaluator. Your goal is to grade the generated_answer based on how well it achieves the expected_outcome for the original task.",
+  "You are an expert evaluator. Your goal is to grade the candidate_answer based on how well it achieves the expected_outcome for the original task.",
   "",
-  "Use the reference_answer as a gold standard for a high-quality response. The generated_answer does not need to match it verbatim, but it should capture the key points and follow the same spirit.",
+  "Use the reference_answer as a gold standard for a high-quality response. The candidate_answer does not need to match it verbatim, but it should capture the key points and follow the same spirit.",
   "",
   "Be concise and focused in your evaluation. Provide succinct, specific feedback rather than verbose explanations.",
   "",
@@ -3041,18 +2980,18 @@ var QUALITY_SYSTEM_PROMPT = [
   '  "reasoning": "<string, concise explanation for the score, 1-2 sentences max>"',
   "}"
 ].join("\n");
-function buildQualityPrompt(testCase, candidate) {
+function buildQualityPrompt(evalCase, candidate) {
   const parts = [
     "[[ ## expected_outcome ## ]]",
-    testCase.outcome.trim(),
+    evalCase.expected_outcome.trim(),
     "",
-    "[[ ## request ## ]]",
-    testCase.task.trim(),
+    "[[ ## question ## ]]",
+    evalCase.question.trim(),
     "",
     "[[ ## reference_answer ## ]]",
-    testCase.expected_assistant_raw.trim(),
+    evalCase.reference_answer.trim(),
     "",
-    "[[ ## generated_answer ## ]]",
+    "[[ ## candidate_answer ## ]]",
     candidate.trim(),
     "",
     "Respond with a single JSON object matching the schema described in the system prompt."
@@ -3152,14 +3091,14 @@ var CodeEvaluator = class {
   async evaluate(context) {
     const inputPayload = JSON.stringify(
       {
-        task: context.evalCase.task,
-        outcome: context.evalCase.outcome,
-        expected: context.evalCase.expected_assistant_raw,
-        output: context.candidate,
+        question: context.evalCase.question,
+        expected_outcome: context.evalCase.expected_outcome,
+        reference_answer: context.evalCase.reference_answer,
+        candidate_answer: context.candidate,
         system_message: context.promptInputs.systemMessage ?? "",
         guideline_paths: context.evalCase.guideline_paths,
-        attachments: context.evalCase.file_paths,
-        user_segments: context.evalCase.user_segments
+        input_files: context.evalCase.file_paths,
+        input_segments: context.evalCase.input_segments
       },
       null,
       2
@@ -3245,10 +3184,18 @@ function parseJsonSafe(payload) {
     return void 0;
   }
 }
+function hasTemplateVariables(text) {
+  return /\$\{[a-zA-Z0-9_]+\}/.test(text);
+}
+function substituteVariables(template, variables) {
+  return template.replace(/\$\{([a-zA-Z0-9_]+)\}/g, (match, varName) => {
+    return variables[varName] ?? match;
+  });
+}
 // src/evaluation/orchestrator.ts
 var import_node_crypto3 = require("crypto");
-var import_promises6 = require("fs/promises");
+var import_promises5 = require("fs/promises");
 var import_node_path8 = __toESM(require("path"), 1);
 // ../../node_modules/.pnpm/yocto-queue@1.2.1/node_modules/yocto-queue/index.js
@@ -3567,7 +3514,8 @@ async function runEvaluation(options) {
         target.name,
         (now ?? (() => /* @__PURE__ */ new Date()))(),
         outcome.reason,
-        promptInputs
+        promptInputs,
+        primaryProvider
       );
       results.push(errorResult);
       if (onResult) {
@@ -3601,7 +3549,7 @@ async function runBatchEvaluation(options) {
   const batchRequests = evalCases.map((evalCase, index) => {
     const promptInputs = promptInputsList[index];
     return {
-      prompt: promptInputs.request,
+      question: promptInputs.question,
       guidelines: promptInputs.guidelines,
       guideline_patterns: evalCase.guideline_patterns,
       inputFiles: evalCase.file_paths,
@@ -3651,7 +3599,7 @@ async function runBatchEvaluation(options) {
         agentTimeoutMs
       });
     } catch (error) {
-      const errorResult = buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
+      const errorResult = buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
       results.push(errorResult);
       if (onResult) {
         await onResult(errorResult);
@@ -3728,7 +3676,7 @@ async function runEvalCase(options) {
         attempt += 1;
         continue;
       }
-      return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
+      return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
     }
   }
   if (!providerResponse) {
@@ -3737,7 +3685,8 @@ async function runEvalCase(options) {
       target.name,
       nowFn(),
       lastError ?? new Error("Provider did not return a response"),
-      promptInputs
+      promptInputs,
+      provider
     );
   }
   if (cacheKey && cache && !cachedResponse) {
@@ -3757,7 +3706,7 @@ async function runEvalCase(options) {
       agentTimeoutMs
     });
   } catch (error) {
-    return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
+    return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
   }
 }
 async function evaluateCandidate(options) {
@@ -3788,8 +3737,8 @@ async function evaluateCandidate(options) {
   });
   const completedAt = nowFn();
   const rawRequest = {
-    request: promptInputs.request,
-    guidelines: promptInputs.guidelines,
+    question: promptInputs.question,
+    ...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
     guideline_paths: evalCase.guideline_paths,
     system_message: promptInputs.systemMessage ?? ""
   };
@@ -3800,7 +3749,7 @@ async function evaluateCandidate(options) {
     score: score.score,
     hits: score.hits,
     misses: score.misses,
-    model_answer: candidate,
+    candidate_answer: candidate,
     expected_aspect_count: score.expectedAspectCount,
     target: target.name,
     timestamp: completedAt.toISOString(),
@@ -4007,14 +3956,14 @@ async function dumpPrompt(directory, evalCase, promptInputs) {
   const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
   const filename = `${timestamp}_${sanitizeFilename(evalCase.id)}.json`;
   const filePath = import_node_path8.default.resolve(directory, filename);
-  await (0, import_promises6.mkdir)(import_node_path8.default.dirname(filePath), { recursive: true });
+  await (0, import_promises5.mkdir)(import_node_path8.default.dirname(filePath), { recursive: true });
   const payload = {
     eval_id: evalCase.id,
-    request: promptInputs.request,
+    question: promptInputs.question,
     guidelines: promptInputs.guidelines,
     guideline_paths: evalCase.guideline_paths
   };
-  await (0, import_promises6.writeFile)(filePath, JSON.stringify(payload, null, 2), "utf8");
+  await (0, import_promises5.writeFile)(filePath, JSON.stringify(payload, null, 2), "utf8");
 }
 function sanitizeFilename(value) {
   if (!value) {
@@ -4032,7 +3981,7 @@ async function invokeProvider(provider, options) {
   }
   try {
     return await provider.invoke({
-      prompt: promptInputs.request,
+      question: promptInputs.question,
       guidelines: promptInputs.guidelines,
       guideline_patterns: evalCase.guideline_patterns,
       inputFiles: evalCase.file_paths,
@@ -4049,11 +3998,11 @@ async function invokeProvider(provider, options) {
     }
   }
 }
-function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs) {
+function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs, provider) {
   const message = error instanceof Error ? error.message : String(error);
   const rawRequest = {
-    request: promptInputs.request,
-    guidelines: promptInputs.guidelines,
+    question: promptInputs.question,
+    ...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
     guideline_paths: evalCase.guideline_paths,
     system_message: promptInputs.systemMessage ?? "",
     error: message
@@ -4065,7 +4014,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs)
     score: 0,
     hits: [],
     misses: [`Error: ${message}`],
-    model_answer: `Error occurred: ${message}`,
+    candidate_answer: `Error occurred: ${message}`,
     expected_aspect_count: 0,
     target: targetName,
     timestamp: timestamp.toISOString(),
@@ -4078,7 +4027,7 @@ function createCacheKey(provider, target, evalCase, promptInputs) {
   hash.update(provider.id);
   hash.update(target.name);
   hash.update(evalCase.id);
-  hash.update(promptInputs.request);
+  hash.update(promptInputs.question);
   hash.update(promptInputs.guidelines);
   hash.update(promptInputs.systemMessage ?? "");
   return hash.digest("hex");