npm - @agentv/core - Versions diffs - 3.10.3 → 3.11.0 - Mend

@agentv/core 3.10.3 → 3.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/{chunk-VCFYWLFV.js → chunk-AVTN5AB7.js} +17 -12
package/dist/chunk-AVTN5AB7.js.map +1 -0
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +1 -1
package/dist/index.cjs +173 -135
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +11 -5
package/dist/index.d.ts +11 -5
package/dist/index.js +158 -125
package/dist/index.js.map +1 -1
package/package.json +1 -1
package/dist/chunk-VCFYWLFV.js.map +0 -1

package/dist/index.js CHANGED Viewed

@@ -19,7 +19,7 @@ import {
   readTextFile,
   resolveFileReference,
   resolveTargetDefinition
-} from "./chunk-VCFYWLFV.js";
+} from "./chunk-AVTN5AB7.js";
 import {
   AgentvProvider
 } from "./chunk-W5YDZWT4.js";
@@ -154,6 +154,64 @@ import path7 from "node:path";
 import micromatch2 from "micromatch";
 import { parse as parse2 } from "yaml";
+// src/evaluation/input-message-utils.ts
+function flattenInputMessages(messages) {
+  return messages.flatMap((message) => extractContentSegments(message.content));
+}
+function collectResolvedInputFilePaths(messages) {
+  const filePaths = [];
+  for (const message of messages) {
+    if (!Array.isArray(message.content)) {
+      continue;
+    }
+    for (const segment of message.content) {
+      if (isJsonObject(segment) && segment.type === "file" && typeof segment.resolvedPath === "string") {
+        filePaths.push(segment.resolvedPath);
+      }
+    }
+  }
+  return filePaths;
+}
+function extractContentSegments(content) {
+  if (typeof content === "string") {
+    return content.trim().length > 0 ? [{ type: "text", value: content }] : [];
+  }
+  if (isJsonObject(content)) {
+    const rendered = JSON.stringify(content, null, 2);
+    return rendered.trim().length > 0 ? [{ type: "text", value: rendered }] : [];
+  }
+  if (!Array.isArray(content)) {
+    return [];
+  }
+  const segments = [];
+  for (const segment of content) {
+    if (!isJsonObject(segment)) {
+      continue;
+    }
+    segments.push(cloneJsonObject(segment));
+  }
+  return segments;
+}
+function cloneJsonObject(source) {
+  const entries = Object.entries(source).map(([key, value]) => [key, cloneJsonValue(value)]);
+  return Object.fromEntries(entries);
+}
+function cloneJsonValue(value) {
+  if (value === null) {
+    return null;
+  }
+  if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
+    return value;
+  }
+  if (Array.isArray(value)) {
+    return value.map((item) => cloneJsonValue(item));
+  }
+  if (typeof value === "object") {
+    return cloneJsonObject(value);
+  }
+  return value;
+}
 // src/evaluation/loaders/agent-skills-parser.ts
 import { readFile } from "node:fs/promises";
 import path from "node:path";
@@ -222,7 +280,6 @@ function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
       id: String(id),
       question: prompt,
       input: [{ role: "user", content: prompt }],
-      input_segments: [{ type: "text", value: prompt }],
       expected_output: evalCase.expected_output ? [{ role: "assistant", content: evalCase.expected_output }] : [],
       reference_answer: evalCase.expected_output,
       file_paths: filePaths,
@@ -357,7 +414,7 @@ async function loadConfig(evalFilePath, repoRoot) {
     }
     try {
       const rawConfig = await readFile2(configPath, "utf8");
-      const parsed = parse(rawConfig);
+      const parsed = interpolateEnv(parse(rawConfig), process.env);
       if (!isJsonObject(parsed)) {
         logWarning(`Invalid .agentv/config.yaml format at ${configPath}`);
         continue;
@@ -575,6 +632,27 @@ function parseExecutionDefaults(raw, configPath) {
   } else if (otelFile !== void 0) {
     logWarning(`Invalid execution.otel_file in ${configPath}, expected non-empty string`);
   }
+  if (typeof obj.export_otel === "boolean") {
+    result.export_otel = obj.export_otel;
+  } else if (obj.export_otel !== void 0) {
+    logWarning(`Invalid execution.export_otel in ${configPath}, expected boolean`);
+  }
+  const otelBackend = obj.otel_backend;
+  if (typeof otelBackend === "string" && otelBackend.trim().length > 0) {
+    result.otel_backend = otelBackend.trim();
+  } else if (otelBackend !== void 0) {
+    logWarning(`Invalid execution.otel_backend in ${configPath}, expected non-empty string`);
+  }
+  if (typeof obj.otel_capture_content === "boolean") {
+    result.otel_capture_content = obj.otel_capture_content;
+  } else if (obj.otel_capture_content !== void 0) {
+    logWarning(`Invalid execution.otel_capture_content in ${configPath}, expected boolean`);
+  }
+  if (typeof obj.otel_group_turns === "boolean") {
+    result.otel_group_turns = obj.otel_group_turns;
+  } else if (obj.otel_group_turns !== void 0) {
+    logWarning(`Invalid execution.otel_group_turns in ${configPath}, expected boolean`);
+  }
   if (typeof obj.pool_workspaces === "boolean") {
     result.pool_workspaces = obj.pool_workspaces;
   } else if (obj.pool_workspaces !== void 0) {
@@ -2045,27 +2123,28 @@ var ANSI_YELLOW4 = "\x1B[33m";
 var ANSI_RESET5 = "\x1B[0m";
 async function processMessages(options) {
   const { messages, searchRoots, repoRootPath, textParts, messageType, verbose } = options;
-  const segments = [];
+  const processedMessages = [];
   for (const message of messages) {
     const content = message.content;
     if (typeof content === "string") {
-      segments.push({ type: "text", value: content });
       if (textParts) {
         textParts.push(content);
       }
+      processedMessages.push({ ...message, content });
       continue;
     }
     if (isJsonObject(content)) {
       const rendered = JSON.stringify(content, null, 2);
-      segments.push({ type: "text", value: rendered });
       if (textParts) {
         textParts.push(rendered);
       }
+      processedMessages.push({ ...message, content: cloneJsonObject(content) });
       continue;
     }
     if (!Array.isArray(content)) {
       continue;
     }
+    const processedContent = [];
     for (const rawSegment of content) {
       if (!isJsonObject(rawSegment)) {
         continue;
@@ -2088,8 +2167,8 @@ async function processMessages(options) {
         }
         try {
           const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
-          segments.push({
-            type: "file",
+          processedContent.push({
+            ...cloneJsonObject(rawSegment),
             path: displayPath,
             text: fileContent,
             resolvedPath: path5.resolve(resolvedPath)
@@ -2106,37 +2185,19 @@ async function processMessages(options) {
         continue;
       }
       const clonedSegment = cloneJsonObject(rawSegment);
-      segments.push(clonedSegment);
+      processedContent.push(clonedSegment);
       const inlineValue = clonedSegment.value;
       if (typeof inlineValue === "string" && textParts) {
         textParts.push(inlineValue);
       }
     }
+    processedMessages.push({ ...message, content: processedContent });
   }
-  return segments;
+  return processedMessages;
 }
 function asString3(value) {
   return typeof value === "string" ? value : void 0;
 }
-function cloneJsonObject(source) {
-  const entries = Object.entries(source).map(([key, value]) => [key, cloneJsonValue(value)]);
-  return Object.fromEntries(entries);
-}
-function cloneJsonValue(value) {
-  if (value === null) {
-    return null;
-  }
-  if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
-    return value;
-  }
-  if (Array.isArray(value)) {
-    return value.map((item) => cloneJsonValue(item));
-  }
-  if (typeof value === "object") {
-    return cloneJsonObject(value);
-  }
-  return value;
-}
 function logWarning3(message, details) {
   if (details && details.length > 0) {
     const detailBlock = details.join("\n");
@@ -2385,10 +2446,10 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
         );
       }
     }
-    const inputMessages = resolveInputMessages(evalcase);
+    const rawInputMessages = resolveInputMessages(evalcase);
     const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
     const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || evalcase.assert !== void 0;
-    if (!id || !hasEvaluationSpec || !inputMessages || inputMessages.length === 0) {
+    if (!id || !hasEvaluationSpec || !rawInputMessages || rawInputMessages.length === 0) {
       logError2(
         `Skipping incomplete test at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assert`
       );
@@ -2396,8 +2457,8 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
     }
     const hasExpectedMessages = expectedMessages.length > 0;
     const inputTextParts = [];
-    const inputSegments = await processMessages({
-      messages: inputMessages,
+    const inputMessages = await processMessages({
+      messages: rawInputMessages,
       searchRoots,
       repoRootPath,
       textParts: inputTextParts,
@@ -2443,19 +2504,13 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
       }
     }
     warnUnconsumedCriteria(outcome, evaluators, id ?? "unknown");
-    const userFilePaths = [];
-    for (const segment of inputSegments) {
-      if (segment.type === "file" && typeof segment.resolvedPath === "string") {
-        userFilePaths.push(segment.resolvedPath);
-      }
-    }
+    const userFilePaths = collectResolvedInputFilePaths(inputMessages);
     const testCase = {
       id,
       eval_set: evalSetName,
       conversation_id: conversationId,
       question,
       input: inputMessages,
-      input_segments: inputSegments,
       expected_output: outputSegments,
       reference_answer: referenceAnswer,
       file_paths: userFilePaths,
@@ -2521,50 +2576,9 @@ function parseMetadata(suite) {
 // src/evaluation/formatting/prompt-builder.ts
 async function buildPromptInputs(testCase, mode = "lm") {
-  const segmentsByMessage = [];
-  const fileContentsByPath = /* @__PURE__ */ new Map();
-  for (const segment of testCase.input_segments) {
-    if (segment.type === "file" && typeof segment.path === "string" && typeof segment.text === "string") {
-      fileContentsByPath.set(segment.path, segment.text);
-    }
-  }
-  for (const message of testCase.input) {
-    const messageSegments = [];
-    if (typeof message.content === "string") {
-      if (message.content.trim().length > 0) {
-        messageSegments.push({ type: "text", value: message.content });
-      }
-    } else if (Array.isArray(message.content)) {
-      for (const segment of message.content) {
-        if (typeof segment === "string") {
-          if (segment.trim().length > 0) {
-            messageSegments.push({ type: "text", value: segment });
-          }
-        } else if (isJsonObject(segment)) {
-          const type = asString5(segment.type);
-          if (type === "file") {
-            const value = asString5(segment.value);
-            if (!value) continue;
-            const fileText = fileContentsByPath.get(value);
-            if (fileText !== void 0) {
-              messageSegments.push({ type: "file", text: fileText, path: value });
-            }
-          } else if (type === "text") {
-            const textValue = asString5(segment.value);
-            if (textValue && textValue.trim().length > 0) {
-              messageSegments.push({ type: "text", value: textValue });
-            }
-          }
-        }
-      }
-    } else if (isJsonObject(message.content)) {
-      const rendered = JSON.stringify(message.content, null, 2);
-      if (rendered.trim().length > 0) {
-        messageSegments.push({ type: "text", value: rendered });
-      }
-    }
-    segmentsByMessage.push(messageSegments);
-  }
+  const segmentsByMessage = testCase.input.map(
+    (message) => extractContentSegments(message.content)
+  );
   const useRoleMarkers = needsRoleMarkers(testCase.input, segmentsByMessage);
   let question;
   if (useRoleMarkers) {
@@ -2592,7 +2606,7 @@ ${messageContent}`);
     question = messageParts.join("\n\n");
   } else {
     const questionParts = [];
-    for (const segment of testCase.input_segments) {
+    for (const segment of flattenInputMessages(testCase.input)) {
       const formattedContent = formatSegment(segment, mode);
       if (formattedContent) {
         questionParts.push(formattedContent);
@@ -2679,9 +2693,6 @@ function buildChatPromptFromSegments(options) {
   }
   return chatPrompt.length > 0 ? chatPrompt : void 0;
 }
-function asString5(value) {
-  return typeof value === "string" ? value : void 0;
-}
 // src/evaluation/yaml-parser.ts
 var ANSI_YELLOW6 = "\x1B[33m";
@@ -2764,7 +2775,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
     throw new Error(`Invalid test file format: ${evalFilePath}`);
   }
   const suite = interpolated;
-  const evalSetNameFromSuite = asString6(suite.name)?.trim();
+  const evalSetNameFromSuite = asString5(suite.name)?.trim();
   const fallbackEvalSet = path7.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
   const evalSetName = evalSetNameFromSuite && evalSetNameFromSuite.length > 0 ? evalSetNameFromSuite : fallbackEvalSet;
   const rawTestcases = resolveTests(suite);
@@ -2783,7 +2794,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
   const suiteInputMessages = expandInputShorthand(suite.input);
   const suiteInputFiles = suite.input_files;
   const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
-  const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
+  const _globalTarget = asString5(rawGlobalExecution?.target) ?? asString5(suite.target);
   const suiteAssertions = suite.assertions ?? suite.assert;
   if (suite.assert !== void 0 && suite.assertions === void 0) {
     logWarning5("'assert' is deprecated at the suite level. Use 'assertions' instead.");
@@ -2796,17 +2807,17 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       continue;
     }
     const evalcase = rawEvalcase;
-    const id = asString6(evalcase.id);
+    const id = asString5(evalcase.id);
     if (filterPattern && (!id || !micromatch2.isMatch(id, filterPattern))) {
       continue;
     }
-    const conversationId = asString6(evalcase.conversation_id);
-    let outcome = asString6(evalcase.criteria);
+    const conversationId = asString5(evalcase.conversation_id);
+    let outcome = asString5(evalcase.criteria);
     if (!outcome && evalcase.expected_outcome !== void 0) {
-      outcome = asString6(evalcase.expected_outcome);
+      outcome = asString5(evalcase.expected_outcome);
       if (outcome) {
         logWarning5(
-          `Test '${asString6(evalcase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
+          `Test '${asString5(evalcase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
         );
       }
     }
@@ -2823,10 +2834,9 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       continue;
     }
     const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
-    const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
     const hasExpectedMessages = expectedMessages.length > 0;
     const inputTextParts = [];
-    const suiteInputSegments = effectiveSuiteInputMessages ? await processMessages({
+    const suiteResolvedInputMessages = effectiveSuiteInputMessages ? await processMessages({
       messages: effectiveSuiteInputMessages,
       searchRoots,
       repoRootPath,
@@ -2834,7 +2844,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       messageType: "input",
       verbose
     }) : [];
-    const testInputSegments = await processMessages({
+    const testResolvedInputMessages = await processMessages({
       messages: testInputMessages,
       searchRoots,
       repoRootPath,
@@ -2842,7 +2852,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       messageType: "input",
       verbose
     });
-    const inputSegments = [...suiteInputSegments, ...testInputSegments];
+    const inputMessages = [...suiteResolvedInputMessages, ...testResolvedInputMessages];
     const outputSegments = hasExpectedMessages ? await processExpectedMessages({
       messages: expectedMessages,
       searchRoots,
@@ -2880,12 +2890,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       }
     }
     warnUnconsumedCriteria(outcome, evaluators, id ?? "unknown");
-    const userFilePaths = [];
-    for (const segment of inputSegments) {
-      if (segment.type === "file" && typeof segment.resolvedPath === "string") {
-        userFilePaths.push(segment.resolvedPath);
-      }
-    }
+    const userFilePaths = collectResolvedInputFilePaths(inputMessages);
     const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
     const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
     const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
@@ -2896,7 +2901,6 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       conversation_id: conversationId,
       question,
       input: inputMessages,
-      input_segments: inputSegments,
       expected_output: outputSegments,
       reference_answer: referenceAnswer,
       file_paths: userFilePaths,
@@ -3105,7 +3109,7 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
     path: caseLevel.path ?? suiteLevel.path
   };
 }
-function asString6(value) {
+function asString5(value) {
   return typeof value === "string" ? value : void 0;
 }
 function logWarning5(message, details) {
@@ -6857,7 +6861,7 @@ var PiAgentSdkProvider = class {
     const { Agent, getModel, getEnvApiKey } = await loadPiModules();
     const startTimeIso = (/* @__PURE__ */ new Date()).toISOString();
     const startMs = Date.now();
-    const providerName = this.config.provider ?? "anthropic";
+    const providerName = this.config.subprovider ?? "anthropic";
     const modelId = this.config.model ?? "claude-sonnet-4-20250514";
     const model = getModel(providerName, modelId);
     const systemPrompt = this.config.systemPrompt ?? "Answer directly and concisely.";
@@ -6969,7 +6973,7 @@ var PiAgentSdkProvider = class {
           messages: agentMessages,
           systemPrompt,
           model: this.config.model,
-          provider: this.config.provider
+          subprovider: this.config.subprovider
         },
         output,
         tokenUsage,
@@ -7205,8 +7209,8 @@ var PiCodingAgentProvider = class {
   }
   buildPiArgs(prompt, inputFiles, _captureFileChanges) {
     const args = [];
-    if (this.config.provider) {
-      args.push("--provider", this.config.provider);
+    if (this.config.subprovider) {
+      args.push("--provider", this.config.subprovider);
     }
     if (this.config.model) {
       args.push("--model", this.config.model);
@@ -7264,7 +7268,7 @@ ${prompt}` : prompt;
   buildEnv() {
     const env = { ...process.env };
     if (this.config.apiKey) {
-      const provider = this.config.provider?.toLowerCase() ?? "google";
+      const provider = this.config.subprovider?.toLowerCase() ?? "google";
       switch (provider) {
         case "google":
         case "gemini":
@@ -10110,7 +10114,8 @@ var freeformEvaluationSchema = z3.object({
       passed: z3.boolean().describe("Whether this aspect was satisfied"),
       evidence: z3.string().describe("Concise evidence (1-2 sentences)").optional()
     })
-  ).describe("Per-aspect evaluation results \u2014 one entry per aspect checked").optional()
+  ).describe("Per-aspect evaluation results \u2014 one entry per aspect checked").optional(),
+  details: z3.record(z3.unknown()).describe("Optional structured metadata for domain-specific metrics").optional()
 });
 var rubricCheckResultSchema = z3.object({
   id: z3.string().describe("The ID of the rubric item being checked"),
@@ -10172,7 +10177,7 @@ var LlmGraderEvaluator = class {
   async evaluateFreeform(context, graderProvider) {
     const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
     const variables = {
-      [TEMPLATE_VARIABLES.INPUT]: JSON.stringify(context.evalCase.input_segments, null, 2),
+      [TEMPLATE_VARIABLES.INPUT]: JSON.stringify(context.evalCase.input, null, 2),
       [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: JSON.stringify(
         context.evalCase.expected_output,
         null,
@@ -10215,6 +10220,7 @@ ${context.fileChanges}`;
         expectedAspectCount: Math.max(assertions.length, 1),
         evaluatorRawRequest,
         graderTarget: graderProvider.targetName,
+        details: data.details,
         tokenUsage
       };
     } catch (e) {
@@ -10634,7 +10640,7 @@ ${outputSchema}`;
         expectedAspectCount: Math.max(assertions.length, 1),
         evaluatorRawRequest,
         graderTarget,
-        details
+        details: data.details && Object.keys(data.details).length > 0 ? { ...details, ...data.details } : details
       };
     } catch {
       return {
@@ -10781,7 +10787,8 @@ function buildOutputSchema() {
     '      "passed": <boolean>,',
     '      "evidence": "<concise evidence, 1-2 sentences, optional>"',
     "    }",
-    "  ]",
+    "  ],",
+    '  "details": {<optional object with domain-specific structured metrics>}',
     "}"
   ].join("\n");
 }
@@ -12145,7 +12152,7 @@ function assembleLlmGraderPrompt(input) {
 function assembleFreeform(evalCase, candidate, promptInputs, fileChanges, evaluatorTemplateOverride) {
   const formattedQuestion = promptInputs.question && promptInputs.question.trim().length > 0 ? promptInputs.question : evalCase.question;
   const variables = {
-    [TEMPLATE_VARIABLES.INPUT]: JSON.stringify(evalCase.input_segments, null, 2),
+    [TEMPLATE_VARIABLES.INPUT]: JSON.stringify(evalCase.input, null, 2),
     [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: JSON.stringify(evalCase.expected_output, null, 2),
     [TEMPLATE_VARIABLES.OUTPUT]: JSON.stringify([], null, 2),
     [TEMPLATE_VARIABLES.CRITERIA]: evalCase.criteria.trim(),
@@ -14426,6 +14433,18 @@ var QUALITY_PASS_THRESHOLD = 0.8;
 function classifyQualityStatus(score) {
   return score >= QUALITY_PASS_THRESHOLD ? "ok" : "quality_failure";
 }
+function buildSkippedEvaluatorError(scores) {
+  const skippedScores = scores?.filter((score) => score.verdict === "skip") ?? [];
+  if (skippedScores.length === 0) {
+    return void 0;
+  }
+  const messages = skippedScores.map((score) => {
+    const label = score.name || score.type;
+    const assertionMessage = score.assertions.find((assertion) => !assertion.passed)?.text ?? "Evaluator skipped";
+    return `${label}: ${assertionMessage}`;
+  });
+  return messages.length === 1 ? messages[0] : `Evaluators skipped: ${messages.join(" | ")}`;
+}
 function usesFileReferencePrompt(provider) {
   return isAgentProvider(provider) || provider.kind === "cli";
 }
@@ -15690,7 +15709,8 @@ async function runEvalCase(options) {
       durationMs: totalDurationMs,
       ...evalRunTokenUsage ? { tokenUsage: evalRunTokenUsage } : {}
     };
-    const executionStatus = providerError ? "execution_error" : classifyQualityStatus(result.score);
+    const skippedEvaluatorError = buildSkippedEvaluatorError(result.scores);
+    const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score);
     const finalResult = providerError ? {
       ...result,
       evalRun,
@@ -15702,7 +15722,26 @@ async function runEvalCase(options) {
       beforeAllOutput,
       beforeEachOutput,
       afterEachOutput
-    } : { ...result, evalRun, executionStatus, beforeAllOutput, beforeEachOutput, afterEachOutput };
+    } : skippedEvaluatorError ? {
+      ...result,
+      score: 0,
+      evalRun,
+      error: skippedEvaluatorError,
+      executionStatus,
+      failureStage: "evaluator",
+      failureReasonCode: "evaluator_error",
+      executionError: { message: skippedEvaluatorError, stage: "evaluator" },
+      beforeAllOutput,
+      beforeEachOutput,
+      afterEachOutput
+    } : {
+      ...result,
+      evalRun,
+      executionStatus,
+      beforeAllOutput,
+      beforeEachOutput,
+      afterEachOutput
+    };
     const isFailure = !!finalResult.error || finalResult.score < 0.5;
     if (workspacePath && !isSharedWorkspace) {
       if (forceCleanup) {
@@ -16447,11 +16486,6 @@ async function evaluate(config) {
     evalCases = (config.tests ?? []).map((test) => {
       const input = typeof test.input === "string" ? [{ role: "user", content: test.input }] : test.input;
       const question = typeof test.input === "string" ? test.input : test.input.find((m) => m.role === "user")?.content ?? "";
-      const inputSegments = input.map((m) => ({
-        type: "text",
-        value: typeof m.content === "string" ? m.content : JSON.stringify(m.content),
-        messageIndex: 0
-      }));
       const expectedOutputValue = test.expectedOutput ?? test.expected_output;
       const expectedOutput = expectedOutputValue ? [
         { role: "assistant", content: expectedOutputValue }
@@ -16480,7 +16514,6 @@ async function evaluate(config) {
         criteria: test.criteria ?? "",
         question: String(question),
         input,
-        input_segments: inputSegments,
         expected_output: expectedOutput,
         reference_answer: expectedOutputValue,
         file_paths: [],