npm - @agentv/core - Versions diffs - 0.5.3 → 0.7.0 - Mend

@agentv/core 0.5.3 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/{chunk-NL7K4CAK.js → chunk-L7I5UTJU.js} +7 -2
package/dist/chunk-L7I5UTJU.js.map +1 -0
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +1 -1
package/dist/index.cjs +260 -114
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +34 -10
package/dist/index.d.ts +34 -10
package/dist/index.js +255 -115
package/dist/index.js.map +1 -1
package/package.json +2 -2
package/dist/chunk-NL7K4CAK.js.map +0 -1

package/dist/index.js CHANGED Viewed

@@ -4,8 +4,9 @@ import {
   buildSearchRoots,
   fileExists,
   findGitRoot,
+  readTextFile,
   resolveFileReference
-} from "./chunk-NL7K4CAK.js";
+} from "./chunk-L7I5UTJU.js";
 // src/evaluation/types.ts
 var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
@@ -133,6 +134,87 @@ function extractCodeBlocks(segments) {
   }
   return codeBlocks;
 }
+async function processMessages(options) {
+  const {
+    messages,
+    searchRoots,
+    repoRootPath,
+    guidelinePatterns,
+    guidelinePaths,
+    textParts,
+    messageType,
+    verbose
+  } = options;
+  const segments = [];
+  for (const message of messages) {
+    const content = message.content;
+    if (typeof content === "string") {
+      segments.push({ type: "text", value: content });
+      if (textParts) {
+        textParts.push(content);
+      }
+      continue;
+    }
+    for (const rawSegment of content) {
+      if (!isJsonObject(rawSegment)) {
+        continue;
+      }
+      const segmentType = asString(rawSegment.type);
+      if (segmentType === "file") {
+        const rawValue = asString(rawSegment.value);
+        if (!rawValue) {
+          continue;
+        }
+        const { displayPath, resolvedPath, attempted } = await resolveFileReference(
+          rawValue,
+          searchRoots
+        );
+        if (!resolvedPath) {
+          const attempts = attempted.length ? ["  Tried:", ...attempted.map((candidate) => `    ${candidate}`)] : void 0;
+          const context = messageType === "input" ? "" : " in expected_messages";
+          logWarning(`File not found${context}: ${displayPath}`, attempts);
+          continue;
+        }
+        try {
+          const fileContent = (await readFile(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
+          if (messageType === "input" && guidelinePatterns && guidelinePaths) {
+            const relativeToRepo = path.relative(repoRootPath, resolvedPath);
+            if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
+              guidelinePaths.push(path.resolve(resolvedPath));
+              if (verbose) {
+                console.log(`  [Guideline] Found: ${displayPath}`);
+                console.log(`    Resolved to: ${resolvedPath}`);
+              }
+              continue;
+            }
+          }
+          segments.push({
+            type: "file",
+            path: displayPath,
+            text: fileContent,
+            resolvedPath: path.resolve(resolvedPath)
+          });
+          if (verbose) {
+            const label = messageType === "input" ? "[File]" : "[Expected Output File]";
+            console.log(`  ${label} Found: ${displayPath}`);
+            console.log(`    Resolved to: ${resolvedPath}`);
+          }
+        } catch (error) {
+          const context = messageType === "input" ? "" : " expected output";
+          logWarning(`Could not read${context} file ${resolvedPath}: ${error.message}`);
+        }
+        continue;
+      }
+      const clonedSegment = cloneJsonObject(rawSegment);
+      segments.push(clonedSegment);
+      const inlineValue = clonedSegment.value;
+      if (typeof inlineValue === "string" && textParts) {
+        textParts.push(inlineValue);
+      }
+    }
+  }
+  return segments;
+}
 async function loadEvalCases(evalFilePath, repoRoot, options) {
   const verbose = options?.verbose ?? false;
   const absoluteTestPath = path.resolve(evalFilePath);
@@ -149,6 +231,9 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
     throw new Error(`Invalid test file format: ${evalFilePath}`);
   }
   const suite = parsed;
+  const datasetNameFromSuite = asString(suite.dataset)?.trim();
+  const fallbackDataset = path.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
+  const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
   const schema = suite.$schema;
   if (schema !== SCHEMA_EVAL_V2) {
     const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${evalFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${evalFilePath}.
@@ -215,77 +300,34 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
         }
       }
     }
-    const userSegments = [];
     const guidelinePaths = [];
-    const userTextParts = [];
-    for (const userMessage of userMessages) {
-      const content = userMessage.content;
-      if (typeof content === "string") {
-        userSegments.push({ type: "text", value: content });
-        userTextParts.push(content);
-        continue;
-      }
-      for (const rawSegment of content) {
-        if (!isJsonObject(rawSegment)) {
-          continue;
-        }
-        const segmentType = asString(rawSegment.type);
-        if (segmentType === "file") {
-          const rawValue = asString(rawSegment.value);
-          if (!rawValue) {
-            continue;
-          }
-          const { displayPath, resolvedPath, attempted } = await resolveFileReference(
-            rawValue,
-            searchRoots
-          );
-          if (!resolvedPath) {
-            const attempts = attempted.length ? ["  Tried:", ...attempted.map((candidate) => `    ${candidate}`)] : void 0;
-            logWarning(`File not found: ${displayPath}`, attempts);
-            continue;
-          }
-          try {
-            const fileContent = (await readFile(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
-            const relativeToRepo = path.relative(repoRootPath, resolvedPath);
-            if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
-              guidelinePaths.push(path.resolve(resolvedPath));
-              if (verbose) {
-                console.log(`  [Guideline] Found: ${displayPath}`);
-                console.log(`    Resolved to: ${resolvedPath}`);
-              }
-            } else {
-              userSegments.push({
-                type: "file",
-                path: displayPath,
-                text: fileContent,
-                resolvedPath: path.resolve(resolvedPath)
-              });
-              if (verbose) {
-                console.log(`  [File] Found: ${displayPath}`);
-                console.log(`    Resolved to: ${resolvedPath}`);
-              }
-            }
-          } catch (error) {
-            logWarning(`Could not read file ${resolvedPath}: ${error.message}`);
-          }
-          continue;
-        }
-        const clonedSegment = cloneJsonObject(rawSegment);
-        userSegments.push(clonedSegment);
-        const inlineValue = clonedSegment.value;
-        if (typeof inlineValue === "string") {
-          userTextParts.push(inlineValue);
-        }
-      }
-    }
-    const codeSnippets = extractCodeBlocks(userSegments);
+    const inputTextParts = [];
+    const inputSegments = await processMessages({
+      messages: userMessages,
+      searchRoots,
+      repoRootPath,
+      guidelinePatterns,
+      guidelinePaths,
+      textParts: inputTextParts,
+      messageType: "input",
+      verbose
+    });
+    const outputSegments = await processMessages({
+      messages: assistantMessages,
+      searchRoots,
+      repoRootPath,
+      guidelinePatterns,
+      messageType: "output",
+      verbose
+    });
+    const codeSnippets = extractCodeBlocks(inputSegments);
     const assistantContent = assistantMessages[0]?.content;
-    const expectedAssistantRaw = await resolveAssistantContent(assistantContent, searchRoots, verbose);
-    const userTextPrompt = userTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
+    const referenceAnswer = await resolveAssistantContent(assistantContent, searchRoots, verbose);
+    const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
     const testCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
     const evaluators = await parseEvaluators(evalcase, searchRoots, id ?? "unknown");
     const userFilePaths = [];
-    for (const segment of userSegments) {
+    for (const segment of inputSegments) {
       if (segment.type === "file" && typeof segment.resolvedPath === "string") {
         userFilePaths.push(segment.resolvedPath);
       }
@@ -296,16 +338,18 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
     ];
     const testCase = {
       id,
+      dataset: datasetName,
       conversation_id: conversationId,
-      task: userTextPrompt,
-      user_segments: userSegments,
+      question,
+      input_segments: inputSegments,
+      output_segments: outputSegments,
       system_message: systemMessageContent,
-      expected_assistant_raw: expectedAssistantRaw,
+      reference_answer: referenceAnswer,
       guideline_paths: guidelinePaths.map((guidelinePath) => path.resolve(guidelinePath)),
       guideline_patterns: guidelinePatterns,
       file_paths: allFilePaths,
       code_snippets: codeSnippets,
-      outcome,
+      expected_outcome: outcome,
       evaluator: testCaseEvaluatorKind,
       evaluators
     };
@@ -341,36 +385,36 @@ ${content}`);
       logWarning(`Could not read guideline file ${absolutePath}: ${error.message}`);
     }
   }
-  const requestParts = [];
-  for (const segment of testCase.user_segments) {
+  const questionParts = [];
+  for (const segment of testCase.input_segments) {
     const typeValue = segment.type;
     if (typeof typeValue === "string" && typeValue === "file") {
       const pathValue = segment.path;
       const textValue = segment.text;
       const label = typeof pathValue === "string" ? pathValue : "file";
       const body = typeof textValue === "string" ? textValue : "";
-      requestParts.push(`=== ${label} ===
+      questionParts.push(`=== ${label} ===
 ${body}`);
       continue;
     }
     if (typeof typeValue === "string" && typeValue === "text") {
       const value = segment.value;
       if (typeof value === "string") {
-        requestParts.push(value);
+        questionParts.push(value);
       }
       continue;
     }
     const genericValue = segment.value;
     if (typeof genericValue === "string") {
-      requestParts.push(genericValue);
+      questionParts.push(genericValue);
     }
   }
   if (testCase.code_snippets.length > 0) {
-    requestParts.push(testCase.code_snippets.join("\n"));
+    questionParts.push(testCase.code_snippets.join("\n"));
   }
-  const request = requestParts.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
+  const question = questionParts.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
   const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
-  return { request, guidelines, systemMessage: testCase.system_message };
+  return { question, guidelines, systemMessage: testCase.system_message };
 }
 async function fileExists2(absolutePath) {
   try {
@@ -582,7 +626,7 @@ function buildChatPrompt(request) {
 ${request.guidelines.trim()}`);
   }
   const systemContent = systemSegments.join("\n\n");
-  const userContent = request.prompt.trim();
+  const userContent = request.question.trim();
   const prompt = [
     {
       role: "system",
@@ -676,6 +720,9 @@ var AzureProvider = class {
     );
     return mapResponse(ensureChatResponse(response));
   }
+  getAxAI() {
+    return this.ai;
+  }
 };
 var AnthropicProvider = class {
   constructor(targetName, config) {
@@ -710,6 +757,9 @@ var AnthropicProvider = class {
     );
     return mapResponse(ensureChatResponse(response));
   }
+  getAxAI() {
+    return this.ai;
+  }
 };
 var GeminiProvider = class {
   constructor(targetName, config) {
@@ -743,6 +793,9 @@ var GeminiProvider = class {
     );
     return mapResponse(ensureChatResponse(response));
   }
+  getAxAI() {
+    return this.ai;
+  }
 };
 // src/evaluation/providers/cli.ts
@@ -871,7 +924,7 @@ var CliProvider = class {
       healthcheck.commandTemplate,
       buildTemplateValues(
         {
-          prompt: "",
+          question: "",
           guidelines: "",
           inputFiles: [],
           evalCaseId: "",
@@ -898,7 +951,7 @@ var CliProvider = class {
 function buildTemplateValues(request, config) {
   const inputFiles = normalizeInputFiles(request.inputFiles);
   return {
-    PROMPT: shellEscape(request.prompt ?? ""),
+    PROMPT: shellEscape(request.question ?? ""),
     GUIDELINES: shellEscape(request.guidelines ?? ""),
     EVAL_ID: shellEscape(request.evalCaseId ?? ""),
     ATTEMPT: shellEscape(String(request.attempt ?? 0)),
@@ -962,6 +1015,59 @@ import { tmpdir } from "node:os";
 import path4 from "node:path";
 import { promisify as promisify2 } from "node:util";
+// src/evaluation/providers/codex-log-tracker.ts
+var GLOBAL_LOGS_KEY = Symbol.for("agentv.codexLogs");
+var GLOBAL_SUBSCRIBERS_KEY = Symbol.for("agentv.codexLogSubscribers");
+function getCodexLogStore() {
+  const globalObject = globalThis;
+  const existing = globalObject[GLOBAL_LOGS_KEY];
+  if (existing) {
+    return existing;
+  }
+  const created = [];
+  globalObject[GLOBAL_LOGS_KEY] = created;
+  return created;
+}
+function getSubscriberStore() {
+  const globalObject = globalThis;
+  const existing = globalObject[GLOBAL_SUBSCRIBERS_KEY];
+  if (existing) {
+    return existing;
+  }
+  const created = /* @__PURE__ */ new Set();
+  globalObject[GLOBAL_SUBSCRIBERS_KEY] = created;
+  return created;
+}
+function notifySubscribers(entry) {
+  const subscribers = Array.from(getSubscriberStore());
+  for (const listener of subscribers) {
+    try {
+      listener(entry);
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      console.warn(`Codex log subscriber failed: ${message}`);
+    }
+  }
+}
+function recordCodexLogEntry(entry) {
+  getCodexLogStore().push(entry);
+  notifySubscribers(entry);
+}
+function consumeCodexLogEntries() {
+  const store = getCodexLogStore();
+  if (store.length === 0) {
+    return [];
+  }
+  return store.splice(0, store.length);
+}
+function subscribeToCodexLogEntries(listener) {
+  const store = getSubscriberStore();
+  store.add(listener);
+  return () => {
+    store.delete(listener);
+  };
+}
 // src/evaluation/providers/preread.ts
 import path3 from "node:path";
 function buildPromptDocument(request, inputFiles, options) {
@@ -979,7 +1085,7 @@ function buildPromptDocument(request, inputFiles, options) {
   if (prereadBlock.length > 0) {
     parts.push("\n", prereadBlock);
   }
-  parts.push("\n[[ ## user_query ## ]]\n", request.prompt.trim());
+  parts.push("\n[[ ## user_query ## ]]\n", request.question.trim());
   return parts.join("\n").trim();
 }
 function normalizeInputFiles2(inputFiles) {
@@ -1259,7 +1365,12 @@ var CodexProvider = class {
         attempt: request.attempt,
         format: this.config.logFormat ?? "summary"
       });
-      console.log(`Streaming Codex CLI output to ${filePath}`);
+      recordCodexLogEntry({
+        filePath,
+        targetName: this.targetName,
+        evalCaseId: request.evalCaseId,
+        attempt: request.attempt
+      });
       return logger;
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
@@ -1791,7 +1902,7 @@ var MockProvider = class {
     return {
       text: this.cannedResponse,
       raw: {
-        prompt: request.prompt,
+        question: request.question,
         guidelines: request.guidelines
       }
     };
@@ -2407,7 +2518,7 @@ function buildPromptDocument2(request, attachments, guidelinePatterns) {
   if (prereadBlock.length > 0) {
     parts.push("\n", prereadBlock);
   }
-  parts.push("\n[[ ## user_query ## ]]\n", request.prompt.trim());
+  parts.push("\n[[ ## user_query ## ]]\n", request.question.trim());
   return parts.join("\n").trim();
 }
 function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
@@ -2662,14 +2773,29 @@ var LlmJudgeEvaluator = class {
     if (!judgeProvider) {
       throw new Error("No judge provider available for LLM grading");
     }
-    const prompt = buildQualityPrompt(context.evalCase, context.candidate);
-    const systemPrompt = context.systemPrompt ?? this.customPrompt ?? QUALITY_SYSTEM_PROMPT;
+    return this.evaluateWithPrompt(context, judgeProvider);
+  }
+  async evaluateWithPrompt(context, judgeProvider) {
+    let prompt = buildQualityPrompt(context.evalCase, context.candidate);
+    let systemPrompt = context.systemPrompt ?? this.customPrompt ?? QUALITY_SYSTEM_PROMPT;
+    if (systemPrompt && hasTemplateVariables(systemPrompt)) {
+      const variables = {
+        input_messages: JSON.stringify(context.evalCase.input_segments, null, 2),
+        output_messages: JSON.stringify(context.evalCase.output_segments, null, 2),
+        candidate_answer: context.candidate,
+        reference_answer: context.evalCase.reference_answer,
+        expected_outcome: context.evalCase.expected_outcome,
+        question: context.evalCase.question
+      };
+      prompt = substituteVariables(systemPrompt, variables);
+      systemPrompt = QUALITY_SYSTEM_PROMPT;
+    }
     const metadata = {
       ...systemPrompt !== void 0 ? { systemPrompt } : {},
       ...context.judgeModel !== void 0 ? { model: context.judgeModel } : {}
     };
     const response = await judgeProvider.invoke({
-      prompt,
+      question: prompt,
       metadata,
       evalCaseId: context.evalCase.id,
       attempt: context.attempt,
@@ -2681,6 +2807,7 @@ var LlmJudgeEvaluator = class {
     const hits = Array.isArray(parsed.hits) ? parsed.hits.filter(isNonEmptyString).slice(0, 4) : [];
     const misses = Array.isArray(parsed.misses) ? parsed.misses.filter(isNonEmptyString).slice(0, 4) : [];
     const reasoning = parsed.reasoning ?? response.reasoning;
+    const expectedAspectCount = Math.max(hits.length + misses.length, 1);
     const evaluatorRawRequest = {
       id: randomUUID2(),
       provider: judgeProvider.id,
@@ -2693,16 +2820,16 @@ var LlmJudgeEvaluator = class {
       score,
       hits,
       misses,
-      expectedAspectCount: hits.length + misses.length || 1,
+      expectedAspectCount,
       reasoning,
       evaluatorRawRequest
     };
   }
 };
 var QUALITY_SYSTEM_PROMPT = [
-  "You are an expert evaluator. Your goal is to grade the generated_answer based on how well it achieves the expected_outcome for the original task.",
+  "You are an expert evaluator. Your goal is to grade the candidate_answer based on how well it achieves the expected_outcome for the original task.",
   "",
-  "Use the reference_answer as a gold standard for a high-quality response. The generated_answer does not need to match it verbatim, but it should capture the key points and follow the same spirit.",
+  "Use the reference_answer as a gold standard for a high-quality response. The candidate_answer does not need to match it verbatim, but it should capture the key points and follow the same spirit.",
   "",
   "Be concise and focused in your evaluation. Provide succinct, specific feedback rather than verbose explanations.",
   "",
@@ -2715,18 +2842,18 @@ var QUALITY_SYSTEM_PROMPT = [
   '  "reasoning": "<string, concise explanation for the score, 1-2 sentences max>"',
   "}"
 ].join("\n");
-function buildQualityPrompt(testCase, candidate) {
+function buildQualityPrompt(evalCase, candidate) {
   const parts = [
     "[[ ## expected_outcome ## ]]",
-    testCase.outcome.trim(),
+    evalCase.expected_outcome.trim(),
     "",
-    "[[ ## request ## ]]",
-    testCase.task.trim(),
+    "[[ ## question ## ]]",
+    evalCase.question.trim(),
     "",
     "[[ ## reference_answer ## ]]",
-    testCase.expected_assistant_raw.trim(),
+    evalCase.reference_answer.trim(),
     "",
-    "[[ ## generated_answer ## ]]",
+    "[[ ## candidate_answer ## ]]",
     candidate.trim(),
     "",
     "Respond with a single JSON object matching the schema described in the system prompt."
@@ -2826,14 +2953,14 @@ var CodeEvaluator = class {
   async evaluate(context) {
     const inputPayload = JSON.stringify(
       {
-        task: context.evalCase.task,
-        outcome: context.evalCase.outcome,
-        expected: context.evalCase.expected_assistant_raw,
-        output: context.candidate,
+        question: context.evalCase.question,
+        expected_outcome: context.evalCase.expected_outcome,
+        reference_answer: context.evalCase.reference_answer,
+        candidate_answer: context.candidate,
         system_message: context.promptInputs.systemMessage ?? "",
         guideline_paths: context.evalCase.guideline_paths,
-        attachments: context.evalCase.file_paths,
-        user_segments: context.evalCase.user_segments
+        input_files: context.evalCase.file_paths,
+        input_segments: context.evalCase.input_segments
       },
       null,
       2
@@ -2919,10 +3046,18 @@ function parseJsonSafe(payload) {
     return void 0;
   }
 }
+function hasTemplateVariables(text) {
+  return /\$\{[a-zA-Z0-9_]+\}/.test(text);
+}
+function substituteVariables(template, variables) {
+  return template.replace(/\$\{([a-zA-Z0-9_]+)\}/g, (match, varName) => {
+    return variables[varName] ?? match;
+  });
+}
 // src/evaluation/orchestrator.ts
 import { createHash, randomUUID as randomUUID3 } from "node:crypto";
-import { mkdir as mkdir2, readFile as readFile4, writeFile as writeFile2 } from "node:fs/promises";
+import { mkdir as mkdir2, writeFile as writeFile2 } from "node:fs/promises";
 import path7 from "node:path";
 // ../../node_modules/.pnpm/yocto-queue@1.2.1/node_modules/yocto-queue/index.js
@@ -3275,7 +3410,7 @@ async function runBatchEvaluation(options) {
   const batchRequests = evalCases.map((evalCase, index) => {
     const promptInputs = promptInputsList[index];
     return {
-      prompt: promptInputs.request,
+      question: promptInputs.question,
       guidelines: promptInputs.guidelines,
       guideline_patterns: evalCase.guideline_patterns,
       inputFiles: evalCase.file_paths,
@@ -3462,18 +3597,19 @@ async function evaluateCandidate(options) {
   });
   const completedAt = nowFn();
   const rawRequest = {
-    request: promptInputs.request,
+    question: promptInputs.question,
     guidelines: promptInputs.guidelines,
     guideline_paths: evalCase.guideline_paths,
     system_message: promptInputs.systemMessage ?? ""
   };
   return {
     eval_id: evalCase.id,
+    dataset: evalCase.dataset,
     conversation_id: evalCase.conversation_id,
     score: score.score,
     hits: score.hits,
     misses: score.misses,
-    model_answer: candidate,
+    candidate_answer: candidate,
     expected_aspect_count: score.expectedAspectCount,
     target: target.name,
     timestamp: completedAt.toISOString(),
@@ -3645,7 +3781,7 @@ async function runLlmJudgeEvaluator(options) {
 async function resolveCustomPrompt(config) {
   if (config.promptPath) {
     try {
-      return await readFile4(config.promptPath, "utf8");
+      return await readTextFile(config.promptPath);
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
       console.warn(`Could not read custom prompt at ${config.promptPath}: ${message}`);
@@ -3683,7 +3819,7 @@ async function dumpPrompt(directory, evalCase, promptInputs) {
   await mkdir2(path7.dirname(filePath), { recursive: true });
   const payload = {
     eval_id: evalCase.id,
-    request: promptInputs.request,
+    question: promptInputs.question,
     guidelines: promptInputs.guidelines,
     guideline_paths: evalCase.guideline_paths
   };
@@ -3705,7 +3841,7 @@ async function invokeProvider(provider, options) {
   }
   try {
     return await provider.invoke({
-      prompt: promptInputs.request,
+      question: promptInputs.question,
       guidelines: promptInputs.guidelines,
       guideline_patterns: evalCase.guideline_patterns,
       inputFiles: evalCase.file_paths,
@@ -3725,7 +3861,7 @@ async function invokeProvider(provider, options) {
 function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs) {
   const message = error instanceof Error ? error.message : String(error);
   const rawRequest = {
-    request: promptInputs.request,
+    question: promptInputs.question,
     guidelines: promptInputs.guidelines,
     guideline_paths: evalCase.guideline_paths,
     system_message: promptInputs.systemMessage ?? "",
@@ -3733,11 +3869,12 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs)
   };
   return {
     eval_id: evalCase.id,
+    dataset: evalCase.dataset,
     conversation_id: evalCase.conversation_id,
     score: 0,
     hits: [],
     misses: [`Error: ${message}`],
-    model_answer: `Error occurred: ${message}`,
+    candidate_answer: `Error occurred: ${message}`,
     expected_aspect_count: 0,
     target: targetName,
     timestamp: timestamp.toISOString(),
@@ -3750,7 +3887,7 @@ function createCacheKey(provider, target, evalCase, promptInputs) {
   hash.update(provider.id);
   hash.update(target.name);
   hash.update(evalCase.id);
-  hash.update(promptInputs.request);
+  hash.update(promptInputs.question);
   hash.update(promptInputs.guidelines);
   hash.update(promptInputs.systemMessage ?? "");
   return hash.digest("hex");
@@ -3782,6 +3919,7 @@ export {
   buildDirectoryChain,
   buildPromptInputs,
   buildSearchRoots,
+  consumeCodexLogEntries,
   createAgentKernel,
   createProvider,
   ensureVSCodeSubagents,
@@ -3798,10 +3936,12 @@ export {
   listTargetNames,
   loadEvalCases,
   readTargetDefinitions,
+  readTextFile,
   resolveAndCreateProvider,
   resolveFileReference,
   resolveTargetDefinition,
   runEvalCase,
-  runEvaluation
+  runEvaluation,
+  subscribeToCodexLogEntries
 };
 //# sourceMappingURL=index.js.map