npm - agentv - Versions diffs - 3.10.2 → 3.11.0 - Mend

agentv 3.10.2 → 3.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/dist/{chunk-KGK5NUFG.js → chunk-EZGWZVVK.js} RENAMED Viewed

@@ -301,7 +301,7 @@ var require_dist = __commonJS({
   }
 });
-// ../../packages/core/dist/chunk-VCFYWLFV.js
+// ../../packages/core/dist/chunk-AVTN5AB7.js
 import { constants } from "node:fs";
 import { access, readFile } from "node:fs/promises";
 import path from "node:path";
@@ -419,7 +419,7 @@ __export(external_exports2, {
   void: () => voidType
 });
-// ../../packages/core/dist/chunk-VCFYWLFV.js
+// ../../packages/core/dist/chunk-AVTN5AB7.js
 import { readFile as readFile2 } from "node:fs/promises";
 import path3 from "node:path";
 import fg from "fast-glob";
@@ -1363,7 +1363,7 @@ function normalizeCopilotLogFormat(value) {
 }
 function resolvePiCodingAgentConfig(target, env, evalFilePath) {
   const executableSource = target.executable ?? target.command ?? target.binary;
-  const providerSource = target.pi_provider ?? target.piProvider ?? target.llm_provider;
+  const subproviderSource = target.subprovider;
   const modelSource = target.model ?? target.pi_model ?? target.piModel;
   const apiKeySource = target.api_key ?? target.apiKey;
   const toolsSource = target.tools ?? target.pi_tools ?? target.piTools;
@@ -1379,10 +1379,15 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
     allowLiteral: true,
     optionalEnv: true
   }) ?? "pi";
-  const provider = resolveOptionalString(providerSource, env, `${target.name} pi provider`, {
-    allowLiteral: true,
-    optionalEnv: true
-  });
+  const subprovider = resolveOptionalString(
+    subproviderSource,
+    env,
+    `${target.name} pi subprovider`,
+    {
+      allowLiteral: true,
+      optionalEnv: true
+    }
+  );
   const model = resolveOptionalString(modelSource, env, `${target.name} pi model`, {
     allowLiteral: true,
     optionalEnv: true
@@ -1430,7 +1435,7 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
   const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
   return {
     executable,
-    provider,
+    subprovider,
     model,
     apiKey,
     tools,
@@ -1445,15 +1450,15 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
   };
 }
 function resolvePiAgentSdkConfig(target, env) {
-  const providerSource = target.pi_provider ?? target.piProvider ?? target.llm_provider;
+  const subproviderSource = target.subprovider;
   const modelSource = target.model ?? target.pi_model ?? target.piModel;
   const apiKeySource = target.api_key ?? target.apiKey;
   const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
   const systemPromptSource = target.system_prompt ?? target.systemPrompt;
-  const provider = resolveOptionalString(
-    providerSource,
+  const subprovider = resolveOptionalString(
+    subproviderSource,
     env,
-    `${target.name} pi-agent-sdk provider`,
+    `${target.name} pi-agent-sdk subprovider`,
     {
       allowLiteral: true,
       optionalEnv: true
@@ -1470,7 +1475,7 @@ function resolvePiAgentSdkConfig(target, env) {
   const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} pi-agent-sdk timeout`);
   const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
   return {
-    provider,
+    subprovider,
     model,
     apiKey,
     timeoutMs,
@@ -2039,7 +2044,7 @@ import path8 from "node:path";
 import { parse as parse3 } from "yaml";
 import { createOpenAI } from "@ai-sdk/openai";
-// ../../node_modules/.bun/@openrouter+ai-sdk-provider@2.3.1+3ab978b6804fd9e7/node_modules/@openrouter/ai-sdk-provider/dist/index.mjs
+// ../../node_modules/.bun/@openrouter+ai-sdk-provider@2.3.3+3ab978b6804fd9e7/node_modules/@openrouter/ai-sdk-provider/dist/index.mjs
 var __defProp = Object.defineProperty;
 var __defProps = Object.defineProperties;
 var __getOwnPropDescs = Object.getOwnPropertyDescriptors;
@@ -4202,11 +4207,13 @@ function isDefinedOrNotNull(value) {
 var ReasoningFormat = /* @__PURE__ */ ((ReasoningFormat2) => {
   ReasoningFormat2["Unknown"] = "unknown";
   ReasoningFormat2["OpenAIResponsesV1"] = "openai-responses-v1";
+  ReasoningFormat2["AzureOpenAIResponsesV1"] = "azure-openai-responses-v1";
   ReasoningFormat2["XAIResponsesV1"] = "xai-responses-v1";
   ReasoningFormat2["AnthropicClaudeV1"] = "anthropic-claude-v1";
   ReasoningFormat2["GoogleGeminiV1"] = "google-gemini-v1";
   return ReasoningFormat2;
 })(ReasoningFormat || {});
+var DEFAULT_REASONING_FORMAT = "anthropic-claude-v1";
 var CommonReasoningDetailSchema = external_exports.object({
   id: external_exports.string().nullish(),
   format: external_exports.enum(ReasoningFormat).nullish(),
@@ -4360,7 +4367,11 @@ var OpenRouterProviderMetadataSchema = external_exports.object({
 }).catchall(external_exports.any());
 var OpenRouterProviderOptionsSchema = external_exports.object({
   openrouter: external_exports.object({
-    reasoning_details: external_exports.array(ReasoningDetailUnionSchema).optional(),
+    // Use ReasoningDetailArraySchema (with unknown fallback) instead of
+    // z.array(ReasoningDetailUnionSchema) so that a single malformed entry
+    // (e.g., a future format not yet in the enum) is individually dropped
+    // rather than causing the entire array to fail parsing.
+    reasoning_details: ReasoningDetailArraySchema.optional(),
     annotations: external_exports.array(FileAnnotationSchema).optional()
   }).optional()
 }).optional();
@@ -4758,8 +4769,24 @@ function convertToOpenRouterChatMessages(prompt) {
         const candidateReasoningDetails = messageReasoningDetails && Array.isArray(messageReasoningDetails) && messageReasoningDetails.length > 0 ? messageReasoningDetails : findFirstReasoningDetails(content);
         let finalReasoningDetails;
         if (candidateReasoningDetails && candidateReasoningDetails.length > 0) {
+          const validDetails = candidateReasoningDetails.filter((detail) => {
+            var _a173;
+            if (detail.type !== "reasoning.text") {
+              return true;
+            }
+            const format = (_a173 = detail.format) != null ? _a173 : DEFAULT_REASONING_FORMAT;
+            if (format !== "anthropic-claude-v1") {
+              return true;
+            }
+            return !!detail.signature;
+          });
+          if (validDetails.length < candidateReasoningDetails.length) {
+            console.warn(
+              "[openrouter] Some reasoning_details entries were removed because they were missing signatures. See https://github.com/OpenRouterTeam/ai-sdk-provider/issues/423 for more details."
+            );
+          }
           const uniqueDetails = [];
-          for (const detail of candidateReasoningDetails) {
+          for (const detail of validDetails) {
             if (reasoningDetailsTracker.upsert(detail)) {
               uniqueDetails.push(detail);
             }
@@ -4808,20 +4835,135 @@ function getToolResultContent(input) {
       return input.output.value;
     case "json":
     case "error-json":
-    case "content":
       return JSON.stringify(input.output.value);
+    case "content":
+      return mapToolResultContentParts(input.output.value);
     case "execution-denied":
       return (_a163 = input.output.reason) != null ? _a163 : "Tool execution denied";
   }
 }
+function mapToolResultContentParts(parts) {
+  return parts.map((part) => {
+    var _a163, _b162, _c;
+    switch (part.type) {
+      case "text":
+        return { type: "text", text: part.text };
+      case "image-data":
+        return {
+          type: "image_url",
+          image_url: {
+            url: buildFileDataUrl({
+              data: part.data,
+              mediaType: part.mediaType,
+              defaultMediaType: "image/jpeg"
+            })
+          }
+        };
+      case "image-url":
+        return {
+          type: "image_url",
+          image_url: { url: part.url }
+        };
+      case "file-data": {
+        const dataUrl = buildFileDataUrl({
+          data: part.data,
+          mediaType: part.mediaType,
+          defaultMediaType: "application/octet-stream"
+        });
+        if ((_a163 = part.mediaType) == null ? void 0 : _a163.startsWith("image/")) {
+          return {
+            type: "image_url",
+            image_url: { url: dataUrl }
+          };
+        }
+        if ((_b162 = part.mediaType) == null ? void 0 : _b162.startsWith("audio/")) {
+          const rawFormat = part.mediaType.replace("audio/", "");
+          const format = MIME_TO_FORMAT[rawFormat];
+          if (format !== void 0) {
+            return {
+              type: "input_audio",
+              input_audio: {
+                data: getBase64FromDataUrl(dataUrl),
+                format
+              }
+            };
+          }
+        }
+        return {
+          type: "file",
+          file: {
+            filename: (_c = part.filename) != null ? _c : "",
+            file_data: dataUrl
+          }
+        };
+      }
+      case "file-url": {
+        if (looksLikeImageUrl(part.url)) {
+          return {
+            type: "image_url",
+            image_url: { url: part.url }
+          };
+        }
+        return {
+          type: "file",
+          file: {
+            filename: filenameFromUrl(part.url),
+            file_data: part.url
+          }
+        };
+      }
+      case "file-id":
+      case "image-file-id":
+      case "custom":
+        return { type: "text", text: JSON.stringify(part) };
+      default: {
+        const _exhaustiveCheck = part;
+        return { type: "text", text: JSON.stringify(_exhaustiveCheck) };
+      }
+    }
+  });
+}
+var IMAGE_EXTENSIONS = /* @__PURE__ */ new Set([
+  "jpg",
+  "jpeg",
+  "png",
+  "gif",
+  "webp",
+  "svg",
+  "bmp",
+  "ico",
+  "tif",
+  "tiff",
+  "avif"
+]);
+function looksLikeImageUrl(url) {
+  var _a163;
+  try {
+    const pathname = new URL(url).pathname;
+    const ext = (_a163 = pathname.split(".").pop()) == null ? void 0 : _a163.toLowerCase();
+    return ext !== void 0 && IMAGE_EXTENSIONS.has(ext);
+  } catch (e) {
+    return false;
+  }
+}
+function filenameFromUrl(url) {
+  try {
+    const pathname = new URL(url).pathname;
+    const last = pathname.split("/").pop();
+    return (last == null ? void 0 : last.includes(".")) ? last : "";
+  } catch (e) {
+    return "";
+  }
+}
 function findFirstReasoningDetails(content) {
-  var _a163, _b162, _c;
+  var _a163, _b162, _c, _d;
   for (const part of content) {
     if (part.type === "tool-call") {
-      const openrouter2 = (_a163 = part.providerOptions) == null ? void 0 : _a163.openrouter;
-      const details = openrouter2 == null ? void 0 : openrouter2.reasoning_details;
-      if (Array.isArray(details) && details.length > 0) {
-        return details;
+      const parsed = OpenRouterProviderOptionsSchema.safeParse(
+        part.providerOptions
+      );
+      if (parsed.success && ((_b162 = (_a163 = parsed.data) == null ? void 0 : _a163.openrouter) == null ? void 0 : _b162.reasoning_details) && parsed.data.openrouter.reasoning_details.length > 0) {
+        return parsed.data.openrouter.reasoning_details;
       }
     }
   }
@@ -4830,7 +4972,7 @@ function findFirstReasoningDetails(content) {
       const parsed = OpenRouterProviderOptionsSchema.safeParse(
         part.providerOptions
       );
-      if (parsed.success && ((_c = (_b162 = parsed.data) == null ? void 0 : _b162.openrouter) == null ? void 0 : _c.reasoning_details) && parsed.data.openrouter.reasoning_details.length > 0) {
+      if (parsed.success && ((_d = (_c = parsed.data) == null ? void 0 : _c.openrouter) == null ? void 0 : _d.reasoning_details) && parsed.data.openrouter.reasoning_details.length > 0) {
         return parsed.data.openrouter.reasoning_details;
       }
     }
@@ -6490,7 +6632,7 @@ function withUserAgentSuffix22(headers, ...userAgentSuffixParts) {
     "user-agent": userAgent
   });
 }
-var VERSION2 = false ? "0.0.0-test" : "2.3.1";
+var VERSION2 = false ? "0.0.0-test" : "2.3.3";
 function createOpenRouter(options = {}) {
   var _a163, _b162, _c;
   const baseURL = (_b162 = withoutTrailingSlash2((_a163 = options.baseURL) != null ? _a163 : options.baseUrl)) != null ? _b162 : "https://openrouter.ai/api/v1";
@@ -14227,6 +14369,62 @@ function mergeExecutionMetrics(computed, metrics) {
     endTime: metrics.endTime ?? computed.endTime
   };
 }
+function flattenInputMessages(messages) {
+  return messages.flatMap((message) => extractContentSegments(message.content));
+}
+function collectResolvedInputFilePaths(messages) {
+  const filePaths = [];
+  for (const message of messages) {
+    if (!Array.isArray(message.content)) {
+      continue;
+    }
+    for (const segment of message.content) {
+      if (isJsonObject(segment) && segment.type === "file" && typeof segment.resolvedPath === "string") {
+        filePaths.push(segment.resolvedPath);
+      }
+    }
+  }
+  return filePaths;
+}
+function extractContentSegments(content) {
+  if (typeof content === "string") {
+    return content.trim().length > 0 ? [{ type: "text", value: content }] : [];
+  }
+  if (isJsonObject(content)) {
+    const rendered = JSON.stringify(content, null, 2);
+    return rendered.trim().length > 0 ? [{ type: "text", value: rendered }] : [];
+  }
+  if (!Array.isArray(content)) {
+    return [];
+  }
+  const segments = [];
+  for (const segment of content) {
+    if (!isJsonObject(segment)) {
+      continue;
+    }
+    segments.push(cloneJsonObject(segment));
+  }
+  return segments;
+}
+function cloneJsonObject(source) {
+  const entries = Object.entries(source).map(([key, value]) => [key, cloneJsonValue(value)]);
+  return Object.fromEntries(entries);
+}
+function cloneJsonValue(value) {
+  if (value === null) {
+    return null;
+  }
+  if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
+    return value;
+  }
+  if (Array.isArray(value)) {
+    return value.map((item) => cloneJsonValue(item));
+  }
+  if (typeof value === "object") {
+    return cloneJsonObject(value);
+  }
+  return value;
+}
 var ANSI_RED = "\x1B[31m";
 var ANSI_RESET2 = "\x1B[0m";
 function logError(msg) {
@@ -14292,7 +14490,6 @@ function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
       id: String(id),
       question: prompt,
       input: [{ role: "user", content: prompt }],
-      input_segments: [{ type: "text", value: prompt }],
       expected_output: evalCase.expected_output ? [{ role: "assistant", content: evalCase.expected_output }] : [],
       reference_answer: evalCase.expected_output,
       file_paths: filePaths,
@@ -14414,7 +14611,7 @@ async function loadConfig(evalFilePath, repoRoot) {
     }
     try {
       const rawConfig = await readFile22(configPath, "utf8");
-      const parsed = parse(rawConfig);
+      const parsed = interpolateEnv(parse(rawConfig), process.env);
       if (!isJsonObject(parsed)) {
         logWarning(`Invalid .agentv/config.yaml format at ${configPath}`);
         continue;
@@ -14632,6 +14829,27 @@ function parseExecutionDefaults(raw, configPath) {
   } else if (otelFile !== void 0) {
     logWarning(`Invalid execution.otel_file in ${configPath}, expected non-empty string`);
   }
+  if (typeof obj.export_otel === "boolean") {
+    result.export_otel = obj.export_otel;
+  } else if (obj.export_otel !== void 0) {
+    logWarning(`Invalid execution.export_otel in ${configPath}, expected boolean`);
+  }
+  const otelBackend = obj.otel_backend;
+  if (typeof otelBackend === "string" && otelBackend.trim().length > 0) {
+    result.otel_backend = otelBackend.trim();
+  } else if (otelBackend !== void 0) {
+    logWarning(`Invalid execution.otel_backend in ${configPath}, expected non-empty string`);
+  }
+  if (typeof obj.otel_capture_content === "boolean") {
+    result.otel_capture_content = obj.otel_capture_content;
+  } else if (obj.otel_capture_content !== void 0) {
+    logWarning(`Invalid execution.otel_capture_content in ${configPath}, expected boolean`);
+  }
+  if (typeof obj.otel_group_turns === "boolean") {
+    result.otel_group_turns = obj.otel_group_turns;
+  } else if (obj.otel_group_turns !== void 0) {
+    logWarning(`Invalid execution.otel_group_turns in ${configPath}, expected boolean`);
+  }
   if (typeof obj.pool_workspaces === "boolean") {
     result.pool_workspaces = obj.pool_workspaces;
   } else if (obj.pool_workspaces !== void 0) {
@@ -16076,27 +16294,28 @@ var ANSI_YELLOW4 = "\x1B[33m";
 var ANSI_RESET5 = "\x1B[0m";
 async function processMessages(options) {
   const { messages, searchRoots, repoRootPath, textParts, messageType, verbose } = options;
-  const segments = [];
+  const processedMessages = [];
   for (const message of messages) {
     const content = message.content;
     if (typeof content === "string") {
-      segments.push({ type: "text", value: content });
       if (textParts) {
         textParts.push(content);
       }
+      processedMessages.push({ ...message, content });
       continue;
     }
     if (isJsonObject(content)) {
       const rendered = JSON.stringify(content, null, 2);
-      segments.push({ type: "text", value: rendered });
       if (textParts) {
         textParts.push(rendered);
       }
+      processedMessages.push({ ...message, content: cloneJsonObject(content) });
       continue;
     }
     if (!Array.isArray(content)) {
       continue;
     }
+    const processedContent = [];
     for (const rawSegment of content) {
       if (!isJsonObject(rawSegment)) {
         continue;
@@ -16119,8 +16338,8 @@ async function processMessages(options) {
         }
         try {
           const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
-          segments.push({
-            type: "file",
+          processedContent.push({
+            ...cloneJsonObject(rawSegment),
             path: displayPath,
             text: fileContent,
             resolvedPath: path5.resolve(resolvedPath)
@@ -16137,37 +16356,19 @@ async function processMessages(options) {
         continue;
       }
       const clonedSegment = cloneJsonObject(rawSegment);
-      segments.push(clonedSegment);
+      processedContent.push(clonedSegment);
       const inlineValue = clonedSegment.value;
       if (typeof inlineValue === "string" && textParts) {
         textParts.push(inlineValue);
       }
     }
+    processedMessages.push({ ...message, content: processedContent });
   }
-  return segments;
+  return processedMessages;
 }
 function asString3(value) {
   return typeof value === "string" ? value : void 0;
 }
-function cloneJsonObject(source) {
-  const entries = Object.entries(source).map(([key, value]) => [key, cloneJsonValue(value)]);
-  return Object.fromEntries(entries);
-}
-function cloneJsonValue(value) {
-  if (value === null) {
-    return null;
-  }
-  if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
-    return value;
-  }
-  if (Array.isArray(value)) {
-    return value.map((item) => cloneJsonValue(item));
-  }
-  if (typeof value === "object") {
-    return cloneJsonObject(value);
-  }
-  return value;
-}
 function logWarning3(message, details) {
   if (details && details.length > 0) {
     const detailBlock = details.join("\n");
@@ -16412,10 +16613,10 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
         );
       }
     }
-    const inputMessages = resolveInputMessages(evalcase);
+    const rawInputMessages = resolveInputMessages(evalcase);
     const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
     const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || evalcase.assert !== void 0;
-    if (!id || !hasEvaluationSpec || !inputMessages || inputMessages.length === 0) {
+    if (!id || !hasEvaluationSpec || !rawInputMessages || rawInputMessages.length === 0) {
       logError2(
         `Skipping incomplete test at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assert`
       );
@@ -16423,8 +16624,8 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
     }
     const hasExpectedMessages = expectedMessages.length > 0;
     const inputTextParts = [];
-    const inputSegments = await processMessages({
-      messages: inputMessages,
+    const inputMessages = await processMessages({
+      messages: rawInputMessages,
       searchRoots,
       repoRootPath,
       textParts: inputTextParts,
@@ -16470,19 +16671,13 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
       }
     }
     warnUnconsumedCriteria(outcome, evaluators, id ?? "unknown");
-    const userFilePaths = [];
-    for (const segment of inputSegments) {
-      if (segment.type === "file" && typeof segment.resolvedPath === "string") {
-        userFilePaths.push(segment.resolvedPath);
-      }
-    }
+    const userFilePaths = collectResolvedInputFilePaths(inputMessages);
     const testCase = {
       id,
       eval_set: evalSetName,
       conversation_id: conversationId,
       question,
       input: inputMessages,
-      input_segments: inputSegments,
       expected_output: outputSegments,
       reference_answer: referenceAnswer,
       file_paths: userFilePaths,
@@ -16543,50 +16738,9 @@ function parseMetadata(suite) {
   });
 }
 async function buildPromptInputs(testCase, mode = "lm") {
-  const segmentsByMessage = [];
-  const fileContentsByPath = /* @__PURE__ */ new Map();
-  for (const segment of testCase.input_segments) {
-    if (segment.type === "file" && typeof segment.path === "string" && typeof segment.text === "string") {
-      fileContentsByPath.set(segment.path, segment.text);
-    }
-  }
-  for (const message of testCase.input) {
-    const messageSegments = [];
-    if (typeof message.content === "string") {
-      if (message.content.trim().length > 0) {
-        messageSegments.push({ type: "text", value: message.content });
-      }
-    } else if (Array.isArray(message.content)) {
-      for (const segment of message.content) {
-        if (typeof segment === "string") {
-          if (segment.trim().length > 0) {
-            messageSegments.push({ type: "text", value: segment });
-          }
-        } else if (isJsonObject(segment)) {
-          const type = asString5(segment.type);
-          if (type === "file") {
-            const value = asString5(segment.value);
-            if (!value) continue;
-            const fileText = fileContentsByPath.get(value);
-            if (fileText !== void 0) {
-              messageSegments.push({ type: "file", text: fileText, path: value });
-            }
-          } else if (type === "text") {
-            const textValue = asString5(segment.value);
-            if (textValue && textValue.trim().length > 0) {
-              messageSegments.push({ type: "text", value: textValue });
-            }
-          }
-        }
-      }
-    } else if (isJsonObject(message.content)) {
-      const rendered = JSON.stringify(message.content, null, 2);
-      if (rendered.trim().length > 0) {
-        messageSegments.push({ type: "text", value: rendered });
-      }
-    }
-    segmentsByMessage.push(messageSegments);
-  }
+  const segmentsByMessage = testCase.input.map(
+    (message) => extractContentSegments(message.content)
+  );
   const useRoleMarkers = needsRoleMarkers(testCase.input, segmentsByMessage);
   let question;
   if (useRoleMarkers) {
@@ -16614,7 +16768,7 @@ ${messageContent}`);
     question = messageParts.join("\n\n");
   } else {
     const questionParts = [];
-    for (const segment of testCase.input_segments) {
+    for (const segment of flattenInputMessages(testCase.input)) {
       const formattedContent = formatSegment(segment, mode);
       if (formattedContent) {
         questionParts.push(formattedContent);
@@ -16701,9 +16855,6 @@ function buildChatPromptFromSegments(options) {
   }
   return chatPrompt.length > 0 ? chatPrompt : void 0;
 }
-function asString5(value) {
-  return typeof value === "string" ? value : void 0;
-}
 var ANSI_YELLOW6 = "\x1B[33m";
 var ANSI_RED3 = "\x1B[31m";
 var ANSI_RESET7 = "\x1B[0m";
@@ -16784,7 +16935,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
     throw new Error(`Invalid test file format: ${evalFilePath}`);
   }
   const suite = interpolated;
-  const evalSetNameFromSuite = asString6(suite.name)?.trim();
+  const evalSetNameFromSuite = asString5(suite.name)?.trim();
   const fallbackEvalSet = path7.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
   const evalSetName = evalSetNameFromSuite && evalSetNameFromSuite.length > 0 ? evalSetNameFromSuite : fallbackEvalSet;
   const rawTestcases = resolveTests(suite);
@@ -16803,7 +16954,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
   const suiteInputMessages = expandInputShorthand(suite.input);
   const suiteInputFiles = suite.input_files;
   const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
-  const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
+  const _globalTarget = asString5(rawGlobalExecution?.target) ?? asString5(suite.target);
   const suiteAssertions = suite.assertions ?? suite.assert;
   if (suite.assert !== void 0 && suite.assertions === void 0) {
     logWarning5("'assert' is deprecated at the suite level. Use 'assertions' instead.");
@@ -16816,17 +16967,17 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       continue;
     }
     const evalcase = rawEvalcase;
-    const id = asString6(evalcase.id);
+    const id = asString5(evalcase.id);
     if (filterPattern && (!id || !micromatch2.isMatch(id, filterPattern))) {
       continue;
     }
-    const conversationId = asString6(evalcase.conversation_id);
-    let outcome = asString6(evalcase.criteria);
+    const conversationId = asString5(evalcase.conversation_id);
+    let outcome = asString5(evalcase.criteria);
     if (!outcome && evalcase.expected_outcome !== void 0) {
-      outcome = asString6(evalcase.expected_outcome);
+      outcome = asString5(evalcase.expected_outcome);
       if (outcome) {
         logWarning5(
-          `Test '${asString6(evalcase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
+          `Test '${asString5(evalcase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
         );
       }
     }
@@ -16843,10 +16994,9 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       continue;
     }
     const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
-    const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
     const hasExpectedMessages = expectedMessages.length > 0;
     const inputTextParts = [];
-    const suiteInputSegments = effectiveSuiteInputMessages ? await processMessages({
+    const suiteResolvedInputMessages = effectiveSuiteInputMessages ? await processMessages({
       messages: effectiveSuiteInputMessages,
       searchRoots,
       repoRootPath,
@@ -16854,7 +17004,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       messageType: "input",
       verbose
     }) : [];
-    const testInputSegments = await processMessages({
+    const testResolvedInputMessages = await processMessages({
       messages: testInputMessages,
       searchRoots,
       repoRootPath,
@@ -16862,7 +17012,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       messageType: "input",
       verbose
     });
-    const inputSegments = [...suiteInputSegments, ...testInputSegments];
+    const inputMessages = [...suiteResolvedInputMessages, ...testResolvedInputMessages];
     const outputSegments = hasExpectedMessages ? await processExpectedMessages({
       messages: expectedMessages,
       searchRoots,
@@ -16900,12 +17050,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       }
     }
     warnUnconsumedCriteria(outcome, evaluators, id ?? "unknown");
-    const userFilePaths = [];
-    for (const segment of inputSegments) {
-      if (segment.type === "file" && typeof segment.resolvedPath === "string") {
-        userFilePaths.push(segment.resolvedPath);
-      }
-    }
+    const userFilePaths = collectResolvedInputFilePaths(inputMessages);
     const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
     const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
     const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
@@ -16916,7 +17061,6 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       conversation_id: conversationId,
       question,
       input: inputMessages,
-      input_segments: inputSegments,
       expected_output: outputSegments,
       reference_answer: referenceAnswer,
       file_paths: userFilePaths,
@@ -17125,7 +17269,7 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
     path: caseLevel.path ?? suiteLevel.path
   };
 }
-function asString6(value) {
+function asString5(value) {
   return typeof value === "string" ? value : void 0;
 }
 function logWarning5(message, details) {
@@ -19552,7 +19696,7 @@ ${basePrompt}` : basePrompt;
     if (itemType === "command_execution") {
       completedToolCalls.push({
         tool: "command_execution",
-        input: item.command,
+        input: { command: item.command },
         output: item.aggregated_output,
         id: item.id
       });
@@ -20383,11 +20527,22 @@ async function loadCopilotSdk() {
     try {
       copilotSdkModule = await import("@github/copilot-sdk");
     } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      if (message.includes("vscode-jsonrpc")) {
+        throw new Error(
+          `Failed to load @github/copilot-sdk due to a known ESM compatibility issue with vscode-jsonrpc (https://github.com/github/copilot-sdk/issues/710).
+Workarounds:
+  - Use the copilot-cli target instead (recommended): set target type to "copilot-cli" in your eval YAML
+  - If running under Node.js 24+: set NODE_OPTIONS="--experimental-specifier-resolution=node"
+  - Wait for vscode-jsonrpc@9.0.0 stable to be released upstream`
+        );
+      }
       throw new Error(
         `Failed to load @github/copilot-sdk. Please install it:
   npm install @github/copilot-sdk
-Original error: ${error instanceof Error ? error.message : String(error)}`
+Original error: ${message}`
       );
     }
   }
@@ -20781,7 +20936,7 @@ var PiAgentSdkProvider = class {
     const { Agent, getModel, getEnvApiKey } = await loadPiModules();
     const startTimeIso = (/* @__PURE__ */ new Date()).toISOString();
     const startMs = Date.now();
-    const providerName = this.config.provider ?? "anthropic";
+    const providerName = this.config.subprovider ?? "anthropic";
     const modelId = this.config.model ?? "claude-sonnet-4-20250514";
     const model = getModel(providerName, modelId);
     const systemPrompt = this.config.systemPrompt ?? "Answer directly and concisely.";
@@ -20893,7 +21048,7 @@ var PiAgentSdkProvider = class {
           messages: agentMessages,
           systemPrompt,
           model: this.config.model,
-          provider: this.config.provider
+          subprovider: this.config.subprovider
         },
         output,
         tokenUsage,
@@ -21117,8 +21272,8 @@ var PiCodingAgentProvider = class {
   }
   buildPiArgs(prompt, inputFiles, _captureFileChanges) {
     const args = [];
-    if (this.config.provider) {
-      args.push("--provider", this.config.provider);
+    if (this.config.subprovider) {
+      args.push("--provider", this.config.subprovider);
     }
     if (this.config.model) {
       args.push("--model", this.config.model);
@@ -21176,7 +21331,7 @@ ${prompt}` : prompt;
   buildEnv() {
     const env = { ...process.env };
     if (this.config.apiKey) {
-      const provider = this.config.provider?.toLowerCase() ?? "google";
+      const provider = this.config.subprovider?.toLowerCase() ?? "google";
       switch (provider) {
         case "google":
         case "gemini":
@@ -21592,6 +21747,13 @@ function extractToolCalls4(content) {
         id: typeof p.id === "string" ? p.id : void 0
       });
     }
+    if (p.type === "toolCall" && typeof p.name === "string") {
+      toolCalls.push({
+        tool: p.name,
+        input: p.arguments,
+        id: typeof p.id === "string" ? p.id : void 0
+      });
+    }
     if (p.type === "tool_result" && typeof p.tool_use_id === "string") {
       const existing = toolCalls.find((tc) => tc.id === p.tool_use_id);
       if (existing) {
@@ -23903,7 +24065,8 @@ var freeformEvaluationSchema = external_exports2.object({
       passed: external_exports2.boolean().describe("Whether this aspect was satisfied"),
       evidence: external_exports2.string().describe("Concise evidence (1-2 sentences)").optional()
     })
-  ).describe("Per-aspect evaluation results \u2014 one entry per aspect checked").optional()
+  ).describe("Per-aspect evaluation results \u2014 one entry per aspect checked").optional(),
+  details: external_exports2.record(external_exports2.unknown()).describe("Optional structured metadata for domain-specific metrics").optional()
 });
 var rubricCheckResultSchema = external_exports2.object({
   id: external_exports2.string().describe("The ID of the rubric item being checked"),
@@ -23965,7 +24128,7 @@ var LlmGraderEvaluator = class {
   async evaluateFreeform(context2, graderProvider) {
     const formattedQuestion = context2.promptInputs.question && context2.promptInputs.question.trim().length > 0 ? context2.promptInputs.question : context2.evalCase.question;
     const variables = {
-      [TEMPLATE_VARIABLES.INPUT]: JSON.stringify(context2.evalCase.input_segments, null, 2),
+      [TEMPLATE_VARIABLES.INPUT]: JSON.stringify(context2.evalCase.input, null, 2),
       [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: JSON.stringify(
         context2.evalCase.expected_output,
         null,
@@ -24008,6 +24171,7 @@ ${context2.fileChanges}`;
         expectedAspectCount: Math.max(assertions.length, 1),
         evaluatorRawRequest,
         graderTarget: graderProvider.targetName,
+        details: data.details,
         tokenUsage
       };
     } catch (e) {
@@ -24427,7 +24591,7 @@ ${outputSchema2}`;
         expectedAspectCount: Math.max(assertions.length, 1),
         evaluatorRawRequest,
         graderTarget,
-        details
+        details: data.details && Object.keys(data.details).length > 0 ? { ...details, ...data.details } : details
       };
     } catch {
       return {
@@ -24574,7 +24738,8 @@ function buildOutputSchema() {
     '      "passed": <boolean>,',
     '      "evidence": "<concise evidence, 1-2 sentences, optional>"',
     "    }",
-    "  ]",
+    "  ],",
+    '  "details": {<optional object with domain-specific structured metrics>}',
     "}"
   ].join("\n");
 }
@@ -25778,12 +25943,31 @@ var COPILOT_MATCHER = {
   readToolPrefixes: ["Viewing "],
   readInputFields: ["file_path", "path"]
 };
+var PI_CODING_AGENT_MATCHER = {
+  skillTools: [],
+  skillInputField: "skill",
+  readTools: ["read"],
+  readInputField: "path",
+  readInputFields: ["path", "file_path", "filePath"]
+};
+var CODEX_MATCHER = {
+  skillTools: [],
+  skillInputField: "skill",
+  readTools: ["command_execution"],
+  readInputField: "command",
+  skillToolPrefixes: ["mcp:"],
+  readToolPrefixes: ["mcp:"],
+  readInputFields: ["command", "path", "file_path", "filePath"]
+};
 var PROVIDER_TOOL_SEMANTICS = {
   claude: CLAUDE_MATCHER,
   "claude-cli": CLAUDE_MATCHER,
   "claude-sdk": CLAUDE_MATCHER,
-  "pi-coding-agent": CLAUDE_MATCHER,
-  "pi-agent-sdk": CLAUDE_MATCHER,
+  codex: CODEX_MATCHER,
+  "pi-coding-agent": PI_CODING_AGENT_MATCHER,
+  // pi-agent-sdk has no tools, so skill detection is a no-op. Kept for completeness.
+  // TODO: consider removing pi-agent-sdk provider entirely.
+  "pi-agent-sdk": PI_CODING_AGENT_MATCHER,
   "copilot-cli": COPILOT_MATCHER,
   "copilot-sdk": COPILOT_MATCHER,
   vscode: COPILOT_MATCHER,
@@ -25807,33 +25991,37 @@ var SkillTriggerEvaluator = class {
     const shouldTrigger = this.config.should_trigger !== false;
     const providerKind = context2.provider?.kind;
     const matcher = this.resolveMatcher(providerKind);
-    const firstTool = (context2.output ?? []).flatMap((msg) => msg.toolCalls ?? [])[0];
+    const allToolCalls = (context2.output ?? []).flatMap((msg) => msg.toolCalls ?? []);
     let triggered = false;
     let evidence = "";
-    if (firstTool) {
-      const input = firstTool.input ?? {};
-      if (matcher.skillTools.includes(firstTool.tool)) {
+    for (const toolCall of allToolCalls) {
+      const input = toolCall.input ?? {};
+      if (matcher.skillTools.includes(toolCall.tool)) {
         const skillArg = String(input[matcher.skillInputField] ?? "");
         if (skillArg.includes(skillName)) {
           triggered = true;
           evidence = `Skill tool invoked with ${matcher.skillInputField}="${skillArg}"`;
+          break;
         }
       } else if (matcher.skillToolPrefixes?.some(
-        (prefix) => firstTool.tool.startsWith(prefix) && firstTool.tool.includes(skillName)
+        (prefix) => toolCall.tool.startsWith(prefix) && toolCall.tool.includes(skillName)
       )) {
         triggered = true;
-        evidence = `Skill tool invoked via tool name "${firstTool.tool}"`;
-      } else if (matcher.readTools.includes(firstTool.tool)) {
+        evidence = `Skill tool invoked via tool name "${toolCall.tool}"`;
+        break;
+      } else if (matcher.readTools.includes(toolCall.tool)) {
         const filePath = this.readPathFromInput(input, matcher);
         if (filePath.includes(skillName)) {
           triggered = true;
           evidence = `Read tool loaded skill file: ${filePath}`;
+          break;
         }
       } else if (matcher.readToolPrefixes?.some(
-        (prefix) => firstTool.tool.startsWith(prefix) && firstTool.tool.includes(skillName)
+        (prefix) => toolCall.tool.startsWith(prefix) && toolCall.tool.includes(skillName)
       )) {
         triggered = true;
-        evidence = `Read tool loaded skill file via tool name "${firstTool.tool}"`;
+        evidence = `Read tool loaded skill file via tool name "${toolCall.tool}"`;
+        break;
       }
     }
     const pass = triggered === shouldTrigger;
@@ -25855,7 +26043,7 @@ var SkillTriggerEvaluator = class {
       verdict: "fail",
       assertions: [
         {
-          text: shouldTrigger ? firstTool ? `First tool was "${firstTool.tool}" \u2014 not a skill/read tool for "${skillName}"` : "No tool calls recorded" : evidence || `Skill "${skillName}" triggered unexpectedly`,
+          text: shouldTrigger ? allToolCalls.length > 0 ? `Skill "${skillName}" not found in ${allToolCalls.length} tool call(s)` : "No tool calls recorded" : evidence || `Skill "${skillName}" triggered unexpectedly`,
           passed: false
         }
       ],
@@ -25901,7 +26089,7 @@ function assembleLlmGraderPrompt(input) {
 function assembleFreeform(evalCase, candidate, promptInputs, fileChanges, evaluatorTemplateOverride) {
   const formattedQuestion = promptInputs.question && promptInputs.question.trim().length > 0 ? promptInputs.question : evalCase.question;
   const variables = {
-    [TEMPLATE_VARIABLES.INPUT]: JSON.stringify(evalCase.input_segments, null, 2),
+    [TEMPLATE_VARIABLES.INPUT]: JSON.stringify(evalCase.input, null, 2),
     [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: JSON.stringify(evalCase.expected_output, null, 2),
     [TEMPLATE_VARIABLES.OUTPUT]: JSON.stringify([], null, 2),
     [TEMPLATE_VARIABLES.CRITERIA]: evalCase.criteria.trim(),
@@ -28115,6 +28303,18 @@ var QUALITY_PASS_THRESHOLD = 0.8;
 function classifyQualityStatus(score) {
   return score >= QUALITY_PASS_THRESHOLD ? "ok" : "quality_failure";
 }
+function buildSkippedEvaluatorError(scores) {
+  const skippedScores = scores?.filter((score) => score.verdict === "skip") ?? [];
+  if (skippedScores.length === 0) {
+    return void 0;
+  }
+  const messages = skippedScores.map((score) => {
+    const label = score.name || score.type;
+    const assertionMessage = score.assertions.find((assertion) => !assertion.passed)?.text ?? "Evaluator skipped";
+    return `${label}: ${assertionMessage}`;
+  });
+  return messages.length === 1 ? messages[0] : `Evaluators skipped: ${messages.join(" | ")}`;
+}
 function usesFileReferencePrompt(provider) {
   return isAgentProvider(provider) || provider.kind === "cli";
 }
@@ -29379,7 +29579,8 @@ async function runEvalCase(options) {
       durationMs: totalDurationMs,
       ...evalRunTokenUsage ? { tokenUsage: evalRunTokenUsage } : {}
     };
-    const executionStatus = providerError ? "execution_error" : classifyQualityStatus(result.score);
+    const skippedEvaluatorError = buildSkippedEvaluatorError(result.scores);
+    const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score);
     const finalResult = providerError ? {
       ...result,
       evalRun,
@@ -29391,7 +29592,26 @@ async function runEvalCase(options) {
       beforeAllOutput,
       beforeEachOutput,
       afterEachOutput
-    } : { ...result, evalRun, executionStatus, beforeAllOutput, beforeEachOutput, afterEachOutput };
+    } : skippedEvaluatorError ? {
+      ...result,
+      score: 0,
+      evalRun,
+      error: skippedEvaluatorError,
+      executionStatus,
+      failureStage: "evaluator",
+      failureReasonCode: "evaluator_error",
+      executionError: { message: skippedEvaluatorError, stage: "evaluator" },
+      beforeAllOutput,
+      beforeEachOutput,
+      afterEachOutput
+    } : {
+      ...result,
+      evalRun,
+      executionStatus,
+      beforeAllOutput,
+      beforeEachOutput,
+      afterEachOutput
+    };
     const isFailure = !!finalResult.error || finalResult.score < 0.5;
     if (workspacePath && !isSharedWorkspace) {
       if (forceCleanup) {
@@ -30128,11 +30348,6 @@ async function evaluate(config) {
     evalCases = (config.tests ?? []).map((test) => {
       const input = typeof test.input === "string" ? [{ role: "user", content: test.input }] : test.input;
       const question = typeof test.input === "string" ? test.input : test.input.find((m) => m.role === "user")?.content ?? "";
-      const inputSegments = input.map((m) => ({
-        type: "text",
-        value: typeof m.content === "string" ? m.content : JSON.stringify(m.content),
-        messageIndex: 0
-      }));
       const expectedOutputValue = test.expectedOutput ?? test.expected_output;
       const expectedOutput = expectedOutputValue ? [
         { role: "assistant", content: expectedOutputValue }
@@ -30161,7 +30376,6 @@ async function evaluate(config) {
         criteria: test.criteria ?? "",
         question: String(question),
         input,
-        input_segments: inputSegments,
         expected_output: expectedOutput,
         reference_answer: expectedOutputValue,
         file_paths: [],
@@ -31062,4 +31276,4 @@ export {
   OtelStreamingObserver,
   createAgentKernel
 };
-//# sourceMappingURL=chunk-KGK5NUFG.js.map
+//# sourceMappingURL=chunk-EZGWZVVK.js.map