npm - @agentv/core - Versions diffs - 0.7.5 → 0.10.0 - Mend

@agentv/core 0.7.5 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/{chunk-7XM7HYRS.js → chunk-YQBJAT5I.js} +97 -67
package/dist/chunk-YQBJAT5I.js.map +1 -0
package/dist/evaluation/validation/index.cjs +61 -69
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +51 -58
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +538 -192
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +136 -58
package/dist/index.d.ts +136 -58
package/dist/index.js +443 -127
package/dist/index.js.map +1 -1
package/package.json +1 -2
package/dist/chunk-7XM7HYRS.js.map +0 -1

package/dist/index.cjs CHANGED Viewed

@@ -434,14 +434,11 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
       logWarning(`Skipping incomplete eval case: ${id ?? "unknown"}`);
       continue;
     }
-    if (!Array.isArray(expectedMessagesValue)) {
-      logWarning(`Eval case '${id}' missing expected_messages array`);
-      continue;
-    }
+    const hasExpectedMessages = Array.isArray(expectedMessagesValue) && expectedMessagesValue.length > 0;
     const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
-    const expectedMessages = expectedMessagesValue.filter((msg) => isTestMessage(msg));
-    if (expectedMessages.length === 0) {
-      logWarning(`No expected message found for eval case: ${id}`);
+    const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
+    if (hasExpectedMessages && expectedMessages.length === 0) {
+      logWarning(`No valid expected message found for eval case: ${id}`);
       continue;
     }
     if (expectedMessages.length > 1) {
@@ -459,17 +456,17 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
       messageType: "input",
       verbose
     });
-    const outputSegments = await processMessages({
+    const outputSegments = hasExpectedMessages ? await processMessages({
       messages: expectedMessages,
       searchRoots,
       repoRootPath,
       guidelinePatterns,
       messageType: "output",
       verbose
-    });
+    }) : [];
     const codeSnippets = extractCodeBlocks(inputSegments);
     const expectedContent = expectedMessages[0]?.content;
-    const referenceAnswer = await resolveAssistantContent(expectedContent, searchRoots, verbose);
+    const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
     const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
     const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
     const evaluators = await parseEvaluators(evalcase, searchRoots, id ?? "unknown");
@@ -488,6 +485,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
       dataset: datasetName,
       conversation_id: conversationId,
       question,
+      input_messages: inputMessages,
       input_segments: inputSegments,
       output_segments: outputSegments,
       reference_answer: referenceAnswer,
@@ -515,6 +513,54 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
   }
   return results;
 }
+function needsRoleMarkers(messages, processedSegmentsByMessage) {
+  if (messages.some((msg) => msg.role === "assistant" || msg.role === "tool")) {
+    return true;
+  }
+  let messagesWithContent = 0;
+  for (const segments of processedSegmentsByMessage) {
+    if (hasVisibleContent(segments)) {
+      messagesWithContent++;
+    }
+  }
+  return messagesWithContent > 1;
+}
+function hasVisibleContent(segments) {
+  return segments.some((segment) => {
+    const type = asString(segment.type);
+    if (type === "text") {
+      const value = asString(segment.value);
+      return value !== void 0 && value.trim().length > 0;
+    }
+    if (type === "guideline_ref") {
+      return false;
+    }
+    if (type === "file") {
+      const text = asString(segment.text);
+      return text !== void 0 && text.trim().length > 0;
+    }
+    return false;
+  });
+}
+function formatSegment(segment) {
+  const type = asString(segment.type);
+  if (type === "text") {
+    return asString(segment.value);
+  }
+  if (type === "guideline_ref") {
+    const refPath = asString(segment.path);
+    return refPath ? `<Attached: ${refPath}>` : void 0;
+  }
+  if (type === "file") {
+    const text = asString(segment.text);
+    const filePath = asString(segment.path);
+    if (text && filePath) {
+      return `=== ${filePath} ===
+${text}`;
+    }
+  }
+  return void 0;
+}
 async function buildPromptInputs(testCase) {
   const guidelineContents = [];
   for (const rawPath of testCase.guideline_paths) {
@@ -531,36 +577,168 @@ ${content}`);
       logWarning(`Could not read guideline file ${absolutePath}: ${error.message}`);
     }
   }
-  const questionParts = [];
+  const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
+  const segmentsByMessage = [];
+  const fileContentsByPath = /* @__PURE__ */ new Map();
   for (const segment of testCase.input_segments) {
-    const typeValue = segment.type;
-    if (typeof typeValue === "string" && typeValue === "file") {
-      const pathValue = segment.path;
-      const textValue = segment.text;
-      const label = typeof pathValue === "string" ? pathValue : "file";
-      const body = typeof textValue === "string" ? textValue : "";
-      questionParts.push(`=== ${label} ===
-${body}`);
-      continue;
+    if (segment.type === "file" && typeof segment.path === "string" && typeof segment.text === "string") {
+      fileContentsByPath.set(segment.path, segment.text);
     }
-    if (typeof typeValue === "string" && typeValue === "text") {
-      const value = segment.value;
-      if (typeof value === "string") {
-        questionParts.push(value);
+  }
+  for (const message of testCase.input_messages) {
+    const messageSegments = [];
+    if (typeof message.content === "string") {
+      if (message.content.trim().length > 0) {
+        messageSegments.push({ type: "text", value: message.content });
+      }
+    } else if (Array.isArray(message.content)) {
+      for (const segment of message.content) {
+        if (typeof segment === "string") {
+          if (segment.trim().length > 0) {
+            messageSegments.push({ type: "text", value: segment });
+          }
+        } else if (isJsonObject(segment)) {
+          const type = asString(segment.type);
+          if (type === "file") {
+            const value = asString(segment.value);
+            if (!value) continue;
+            if (testCase.guideline_patterns && isGuidelineFile(value, testCase.guideline_patterns)) {
+              messageSegments.push({ type: "guideline_ref", path: value });
+              continue;
+            }
+            const fileText = fileContentsByPath.get(value);
+            if (fileText !== void 0) {
+              messageSegments.push({ type: "file", text: fileText, path: value });
+            }
+          } else if (type === "text") {
+            const textValue = asString(segment.value);
+            if (textValue && textValue.trim().length > 0) {
+              messageSegments.push({ type: "text", value: textValue });
+            }
+          }
+        }
+      }
+    }
+    segmentsByMessage.push(messageSegments);
+  }
+  const useRoleMarkers = needsRoleMarkers(testCase.input_messages, segmentsByMessage);
+  let question;
+  if (useRoleMarkers) {
+    const messageParts = [];
+    for (let i = 0; i < testCase.input_messages.length; i++) {
+      const message = testCase.input_messages[i];
+      const segments = segmentsByMessage[i];
+      if (!hasVisibleContent(segments)) {
+        continue;
+      }
+      const roleLabel = message.role.charAt(0).toUpperCase() + message.role.slice(1);
+      const contentParts = [];
+      for (const segment of segments) {
+        const formattedContent = formatSegment(segment);
+        if (formattedContent) {
+          contentParts.push(formattedContent);
+        }
+      }
+      if (contentParts.length > 0) {
+        const messageContent = contentParts.join("\n");
+        messageParts.push(`@[${roleLabel}]:
+${messageContent}`);
       }
-      continue;
     }
-    const genericValue = segment.value;
-    if (typeof genericValue === "string") {
-      questionParts.push(genericValue);
+    question = messageParts.join("\n\n");
+  } else {
+    const questionParts = [];
+    for (const segment of testCase.input_segments) {
+      const formattedContent = formatSegment(segment);
+      if (formattedContent) {
+        questionParts.push(formattedContent);
+      }
     }
+    if (testCase.code_snippets.length > 0) {
+      questionParts.push(testCase.code_snippets.join("\n"));
+    }
+    question = questionParts.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
   }
-  if (testCase.code_snippets.length > 0) {
-    questionParts.push(testCase.code_snippets.join("\n"));
+  const chatPrompt = useRoleMarkers ? buildChatPromptFromSegments({
+    messages: testCase.input_messages,
+    segmentsByMessage,
+    guidelinePatterns: testCase.guideline_patterns,
+    guidelineContent: guidelines
+  }) : void 0;
+  return { question, guidelines, chatPrompt };
+}
+function buildChatPromptFromSegments(options) {
+  const { messages, segmentsByMessage, guidelinePatterns, guidelineContent, systemPrompt } = options;
+  if (messages.length === 0) {
+    return void 0;
   }
-  const question = questionParts.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
-  const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
-  return { question, guidelines };
+  const systemSegments = [];
+  if (systemPrompt && systemPrompt.trim().length > 0) {
+    systemSegments.push(systemPrompt.trim());
+  }
+  if (guidelineContent && guidelineContent.trim().length > 0) {
+    systemSegments.push(`[[ ## Guidelines ## ]]
+${guidelineContent.trim()}`);
+  }
+  let startIndex = 0;
+  while (startIndex < messages.length && messages[startIndex].role === "system") {
+    const segments = segmentsByMessage[startIndex];
+    const contentParts = [];
+    for (const segment of segments) {
+      const formatted = formatSegment(segment);
+      if (formatted) {
+        contentParts.push(formatted);
+      }
+    }
+    if (contentParts.length > 0) {
+      systemSegments.push(contentParts.join("\n"));
+    }
+    startIndex += 1;
+  }
+  const chatPrompt = [];
+  if (systemSegments.length > 0) {
+    chatPrompt.push({
+      role: "system",
+      content: systemSegments.join("\n\n")
+    });
+  }
+  for (let i = startIndex; i < messages.length; i++) {
+    const message = messages[i];
+    const segments = segmentsByMessage[i];
+    const contentParts = [];
+    let role = message.role;
+    let name;
+    if (role === "system") {
+      role = "assistant";
+      contentParts.push("@[System]:");
+    } else if (role === "tool") {
+      role = "function";
+      name = "tool";
+    }
+    for (const segment of segments) {
+      if (segment.type === "guideline_ref") {
+        continue;
+      }
+      const formatted = formatSegment(segment);
+      if (formatted) {
+        const isGuidelineRef = segment.type === "file" && typeof segment.path === "string" && guidelinePatterns && isGuidelineFile(segment.path, guidelinePatterns);
+        if (isGuidelineRef) {
+          continue;
+        }
+        contentParts.push(formatted);
+      }
+    }
+    if (contentParts.length === 0) {
+      continue;
+    }
+    chatPrompt.push({
+      role,
+      content: contentParts.join("\n"),
+      ...name ? { name } : {}
+    });
+  }
+  return chatPrompt.length > 0 ? chatPrompt : void 0;
 }
 async function fileExists2(absolutePath) {
   try {
@@ -757,21 +935,14 @@ var import_ax = require("@ax-llm/ax");
 var DEFAULT_SYSTEM_PROMPT = "You are a careful assistant. Follow all provided instructions and do not fabricate results.";
 function buildChatPrompt(request) {
   if (request.chatPrompt) {
-    return request.chatPrompt;
-  }
-  const systemSegments = [];
-  const metadataSystemPrompt = typeof request.metadata?.systemPrompt === "string" ? request.metadata.systemPrompt : void 0;
-  if (metadataSystemPrompt && metadataSystemPrompt.trim().length > 0) {
-    systemSegments.push(metadataSystemPrompt.trim());
-  } else {
-    systemSegments.push(DEFAULT_SYSTEM_PROMPT);
-  }
-  if (request.guidelines && request.guidelines.trim().length > 0) {
-    systemSegments.push(`[[ ## Guidelines ## ]]
-${request.guidelines.trim()}`);
+    const hasSystemMessage = request.chatPrompt.some((message) => message.role === "system");
+    if (hasSystemMessage) {
+      return request.chatPrompt;
+    }
+    const systemContent2 = resolveSystemContent(request);
+    return [{ role: "system", content: systemContent2 }, ...request.chatPrompt];
   }
-  const systemContent = systemSegments.join("\n\n");
+  const systemContent = resolveSystemContent(request);
   const userContent = request.question.trim();
   const prompt = [
     {
@@ -785,6 +956,21 @@ ${request.guidelines.trim()}`);
   ];
   return prompt;
 }
+function resolveSystemContent(request) {
+  const systemSegments = [];
+  const metadataSystemPrompt = typeof request.metadata?.systemPrompt === "string" ? request.metadata.systemPrompt : void 0;
+  if (metadataSystemPrompt && metadataSystemPrompt.trim().length > 0) {
+    systemSegments.push(metadataSystemPrompt.trim());
+  } else {
+    systemSegments.push(DEFAULT_SYSTEM_PROMPT);
+  }
+  if (request.guidelines && request.guidelines.trim().length > 0) {
+    systemSegments.push(`[[ ## Guidelines ## ]]
+${request.guidelines.trim()}`);
+  }
+  return systemSegments.join("\n\n");
+}
 function extractModelConfig(request, defaults) {
   const temperature = request.temperature ?? defaults.temperature;
   const maxTokens = request.maxOutputTokens ?? defaults.maxOutputTokens;
@@ -828,6 +1014,67 @@ function ensureChatResponse(result) {
   }
   return result;
 }
+function isRetryableError(error, retryableStatusCodes) {
+  if (!error || typeof error !== "object") {
+    return false;
+  }
+  if ("status" in error && typeof error.status === "number") {
+    return retryableStatusCodes.includes(error.status);
+  }
+  if ("message" in error && typeof error.message === "string") {
+    const match = error.message.match(/HTTP (\d{3})/);
+    if (match) {
+      const status = Number.parseInt(match[1], 10);
+      return retryableStatusCodes.includes(status);
+    }
+  }
+  if ("name" in error && error.name === "AxAIServiceNetworkError") {
+    return true;
+  }
+  return false;
+}
+function calculateRetryDelay(attempt, config) {
+  const delay = Math.min(
+    config.maxDelayMs,
+    config.initialDelayMs * config.backoffFactor ** attempt
+  );
+  return delay * (0.75 + Math.random() * 0.5);
+}
+async function sleep(ms) {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+async function withRetry(fn, retryConfig, signal) {
+  const config = {
+    maxRetries: retryConfig?.maxRetries ?? 3,
+    initialDelayMs: retryConfig?.initialDelayMs ?? 1e3,
+    maxDelayMs: retryConfig?.maxDelayMs ?? 6e4,
+    backoffFactor: retryConfig?.backoffFactor ?? 2,
+    retryableStatusCodes: retryConfig?.retryableStatusCodes ?? [500, 408, 429, 502, 503, 504]
+  };
+  let lastError;
+  for (let attempt = 0; attempt <= config.maxRetries; attempt++) {
+    if (signal?.aborted) {
+      throw new Error(`Request aborted: ${signal.reason ?? "Unknown reason"}`);
+    }
+    try {
+      return await fn();
+    } catch (error) {
+      lastError = error;
+      if (attempt >= config.maxRetries) {
+        break;
+      }
+      if (!isRetryableError(error, config.retryableStatusCodes)) {
+        throw error;
+      }
+      const delay = calculateRetryDelay(attempt, config);
+      await sleep(delay);
+      if (signal?.aborted) {
+        throw new Error(`Request aborted: ${signal.reason ?? "Unknown reason"}`);
+      }
+    }
+  }
+  throw lastError;
+}
 var AzureProvider = class {
   constructor(targetName, config) {
     this.config = config;
@@ -837,6 +1084,7 @@ var AzureProvider = class {
       temperature: config.temperature,
       maxOutputTokens: config.maxOutputTokens
     };
+    this.retryConfig = config.retry;
     this.ai = import_ax.AxAI.create({
       name: "azure-openai",
       apiKey: config.apiKey,
@@ -853,16 +1101,21 @@ var AzureProvider = class {
   targetName;
   ai;
   defaults;
+  retryConfig;
   async invoke(request) {
     const chatPrompt = buildChatPrompt(request);
     const modelConfig = extractModelConfig(request, this.defaults);
-    const response = await this.ai.chat(
-      {
-        chatPrompt,
-        model: this.config.deploymentName,
-        ...modelConfig ? { modelConfig } : {}
-      },
-      request.signal ? { abortSignal: request.signal } : void 0
+    const response = await withRetry(
+      async () => await this.ai.chat(
+        {
+          chatPrompt,
+          model: this.config.deploymentName,
+          ...modelConfig ? { modelConfig } : {}
+        },
+        request.signal ? { abortSignal: request.signal } : void 0
+      ),
+      this.retryConfig,
+      request.signal
     );
     return mapResponse(ensureChatResponse(response));
   }
@@ -880,6 +1133,7 @@ var AnthropicProvider = class {
       maxOutputTokens: config.maxOutputTokens,
       thinkingBudget: config.thinkingBudget
     };
+    this.retryConfig = config.retry;
     this.ai = import_ax.AxAI.create({
       name: "anthropic",
       apiKey: config.apiKey
@@ -890,16 +1144,21 @@ var AnthropicProvider = class {
   targetName;
   ai;
   defaults;
+  retryConfig;
   async invoke(request) {
     const chatPrompt = buildChatPrompt(request);
     const modelConfig = extractModelConfig(request, this.defaults);
-    const response = await this.ai.chat(
-      {
-        chatPrompt,
-        model: this.config.model,
-        ...modelConfig ? { modelConfig } : {}
-      },
-      request.signal ? { abortSignal: request.signal } : void 0
+    const response = await withRetry(
+      async () => await this.ai.chat(
+        {
+          chatPrompt,
+          model: this.config.model,
+          ...modelConfig ? { modelConfig } : {}
+        },
+        request.signal ? { abortSignal: request.signal } : void 0
+      ),
+      this.retryConfig,
+      request.signal
     );
     return mapResponse(ensureChatResponse(response));
   }
@@ -916,6 +1175,7 @@ var GeminiProvider = class {
       temperature: config.temperature,
       maxOutputTokens: config.maxOutputTokens
     };
+    this.retryConfig = config.retry;
     this.ai = import_ax.AxAI.create({
       name: "google-gemini",
       apiKey: config.apiKey
@@ -926,16 +1186,21 @@ var GeminiProvider = class {
   targetName;
   ai;
   defaults;
+  retryConfig;
   async invoke(request) {
     const chatPrompt = buildChatPrompt(request);
     const modelConfig = extractModelConfig(request, this.defaults);
-    const response = await this.ai.chat(
-      {
-        chatPrompt,
-        model: this.config.model,
-        ...modelConfig ? { modelConfig } : {}
-      },
-      request.signal ? { abortSignal: request.signal } : void 0
+    const response = await withRetry(
+      async () => await this.ai.chat(
+        {
+          chatPrompt,
+          model: this.config.model,
+          ...modelConfig ? { modelConfig } : {}
+        },
+        request.signal ? { abortSignal: request.signal } : void 0
+      ),
+      this.retryConfig,
+      request.signal
     );
     return mapResponse(ensureChatResponse(response));
   }
@@ -1005,10 +1270,9 @@ var CliProvider = class {
     const outputFilePath = generateOutputFilePath(request.evalCaseId);
     const templateValues = buildTemplateValues(request, this.config, outputFilePath);
     const renderedCommand = renderTemplate(this.config.commandTemplate, templateValues);
-    const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
     const result = await this.runCommand(renderedCommand, {
       cwd: this.config.cwd,
-      env,
+      env: process.env,
       timeoutMs: this.config.timeoutMs,
       signal: request.signal
     });
@@ -1097,10 +1361,9 @@ var CliProvider = class {
         generateOutputFilePath("healthcheck")
       )
     );
-    const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
     const result = await this.runCommand(renderedCommand, {
       cwd: healthcheck.cwd ?? this.config.cwd,
-      env,
+      env: process.env,
       timeoutMs,
       signal
     });
@@ -2051,10 +2314,9 @@ var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set(["PROMPT", "GUIDELINES", "EVAL_ID
 var BASE_TARGET_SCHEMA = import_zod.z.object({
   name: import_zod.z.string().min(1, "target name is required"),
   provider: import_zod.z.string().min(1, "provider is required"),
-  settings: import_zod.z.record(import_zod.z.unknown()).optional(),
   judge_target: import_zod.z.string().optional(),
   workers: import_zod.z.number().int().min(1).optional()
-});
+}).passthrough();
 var DEFAULT_AZURE_API_VERSION = "2024-10-01-preview";
 function normalizeAzureApiVersion(value) {
   if (!value) {
@@ -2067,11 +2329,43 @@ function normalizeAzureApiVersion(value) {
   const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, "").trim();
   return withoutPrefix.length > 0 ? withoutPrefix : DEFAULT_AZURE_API_VERSION;
 }
+function resolveRetryConfig(target) {
+  const maxRetries = resolveOptionalNumber(
+    target.max_retries ?? target.maxRetries,
+    `${target.name} max retries`
+  );
+  const initialDelayMs = resolveOptionalNumber(
+    target.retry_initial_delay_ms ?? target.retryInitialDelayMs,
+    `${target.name} retry initial delay`
+  );
+  const maxDelayMs = resolveOptionalNumber(
+    target.retry_max_delay_ms ?? target.retryMaxDelayMs,
+    `${target.name} retry max delay`
+  );
+  const backoffFactor = resolveOptionalNumber(
+    target.retry_backoff_factor ?? target.retryBackoffFactor,
+    `${target.name} retry backoff factor`
+  );
+  const retryableStatusCodes = resolveOptionalNumberArray(
+    target.retry_status_codes ?? target.retryStatusCodes,
+    `${target.name} retry status codes`
+  );
+  if (maxRetries === void 0 && initialDelayMs === void 0 && maxDelayMs === void 0 && backoffFactor === void 0 && retryableStatusCodes === void 0) {
+    return void 0;
+  }
+  return {
+    maxRetries,
+    initialDelayMs,
+    maxDelayMs,
+    backoffFactor,
+    retryableStatusCodes
+  };
+}
 function resolveTargetDefinition(definition, env = process.env) {
   const parsed = BASE_TARGET_SCHEMA.parse(definition);
   const provider = parsed.provider.toLowerCase();
   const providerBatching = resolveOptionalBoolean(
-    parsed.settings?.provider_batching ?? parsed.settings?.providerBatching
+    parsed.provider_batching ?? parsed.providerBatching
   );
   switch (provider) {
     case "azure":
@@ -2147,13 +2441,12 @@ function resolveTargetDefinition(definition, env = process.env) {
   }
 }
 function resolveAzureConfig(target, env) {
-  const settings = target.settings ?? {};
-  const endpointSource = settings.endpoint ?? settings.resource ?? settings.resourceName;
-  const apiKeySource = settings.api_key ?? settings.apiKey;
-  const deploymentSource = settings.deployment ?? settings.deploymentName ?? settings.model;
-  const versionSource = settings.version ?? settings.api_version;
-  const temperatureSource = settings.temperature;
-  const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
+  const endpointSource = target.endpoint ?? target.resource ?? target.resourceName;
+  const apiKeySource = target.api_key ?? target.apiKey;
+  const deploymentSource = target.deployment ?? target.deploymentName ?? target.model;
+  const versionSource = target.version ?? target.api_version;
+  const temperatureSource = target.temperature;
+  const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
   const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
   const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
   const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
@@ -2165,58 +2458,61 @@ function resolveAzureConfig(target, env) {
     maxTokensSource,
     `${target.name} max output tokens`
   );
+  const retry = resolveRetryConfig(target);
   return {
     resourceName,
     deploymentName,
     apiKey,
     version,
     temperature,
-    maxOutputTokens
+    maxOutputTokens,
+    retry
   };
 }
 function resolveAnthropicConfig(target, env) {
-  const settings = target.settings ?? {};
-  const apiKeySource = settings.api_key ?? settings.apiKey;
-  const modelSource = settings.model ?? settings.deployment ?? settings.variant;
-  const temperatureSource = settings.temperature;
-  const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
-  const thinkingBudgetSource = settings.thinking_budget ?? settings.thinkingBudget;
+  const apiKeySource = target.api_key ?? target.apiKey;
+  const modelSource = target.model ?? target.deployment ?? target.variant;
+  const temperatureSource = target.temperature;
+  const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
+  const thinkingBudgetSource = target.thinking_budget ?? target.thinkingBudget;
   const apiKey = resolveString(apiKeySource, env, `${target.name} Anthropic api key`);
   const model = resolveString(modelSource, env, `${target.name} Anthropic model`);
+  const retry = resolveRetryConfig(target);
   return {
     apiKey,
     model,
     temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
     maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
-    thinkingBudget: resolveOptionalNumber(thinkingBudgetSource, `${target.name} thinking budget`)
+    thinkingBudget: resolveOptionalNumber(thinkingBudgetSource, `${target.name} thinking budget`),
+    retry
   };
 }
 function resolveGeminiConfig(target, env) {
-  const settings = target.settings ?? {};
-  const apiKeySource = settings.api_key ?? settings.apiKey;
-  const modelSource = settings.model ?? settings.deployment ?? settings.variant;
-  const temperatureSource = settings.temperature;
-  const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
+  const apiKeySource = target.api_key ?? target.apiKey;
+  const modelSource = target.model ?? target.deployment ?? target.variant;
+  const temperatureSource = target.temperature;
+  const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
   const apiKey = resolveString(apiKeySource, env, `${target.name} Google API key`);
   const model = resolveOptionalString(modelSource, env, `${target.name} Gemini model`, {
     allowLiteral: true,
     optionalEnv: true
   }) ?? "gemini-2.5-flash";
+  const retry = resolveRetryConfig(target);
   return {
     apiKey,
     model,
     temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
-    maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`)
+    maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
+    retry
   };
 }
 function resolveCodexConfig(target, env) {
-  const settings = target.settings ?? {};
-  const executableSource = settings.executable ?? settings.command ?? settings.binary;
-  const argsSource = settings.args ?? settings.arguments;
-  const cwdSource = settings.cwd;
-  const timeoutSource = settings.timeout_seconds ?? settings.timeoutSeconds;
-  const logDirSource = settings.log_dir ?? settings.logDir ?? settings.log_directory ?? settings.logDirectory;
-  const logFormatSource = settings.log_format ?? settings.logFormat ?? settings.log_output_format ?? settings.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
+  const executableSource = target.executable ?? target.command ?? target.binary;
+  const argsSource = target.args ?? target.arguments;
+  const cwdSource = target.cwd;
+  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
+  const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
+  const logFormatSource = target.log_format ?? target.logFormat ?? target.log_output_format ?? target.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
   const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
     allowLiteral: true,
     optionalEnv: true
@@ -2255,21 +2551,19 @@ function normalizeCodexLogFormat(value) {
   throw new Error("codex log format must be 'summary' or 'json'");
 }
 function resolveMockConfig(target) {
-  const settings = target.settings ?? {};
-  const response = typeof settings.response === "string" ? settings.response : void 0;
+  const response = typeof target.response === "string" ? target.response : void 0;
   return { response };
 }
 function resolveVSCodeConfig(target, env, insiders) {
-  const settings = target.settings ?? {};
-  const workspaceTemplateEnvVar = resolveOptionalLiteralString(settings.workspace_template ?? settings.workspaceTemplate);
+  const workspaceTemplateEnvVar = resolveOptionalLiteralString(target.workspace_template ?? target.workspaceTemplate);
   const workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(workspaceTemplateEnvVar, env, `${target.name} workspace template path`, {
     allowLiteral: false,
     optionalEnv: true
   }) : void 0;
-  const commandSource = settings.vscode_cmd ?? settings.command;
-  const waitSource = settings.wait;
-  const dryRunSource = settings.dry_run ?? settings.dryRun;
-  const subagentRootSource = settings.subagent_root ?? settings.subagentRoot;
+  const commandSource = target.vscode_cmd ?? target.command;
+  const waitSource = target.wait;
+  const dryRunSource = target.dry_run ?? target.dryRun;
+  const subagentRootSource = target.subagent_root ?? target.subagentRoot;
   const defaultCommand = insiders ? "code-insiders" : "code";
   const command = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
   return {
@@ -2284,18 +2578,16 @@ function resolveVSCodeConfig(target, env, insiders) {
   };
 }
 function resolveCliConfig(target, env) {
-  const settings = target.settings ?? {};
-  const commandTemplateSource = settings.command_template ?? settings.commandTemplate;
+  const commandTemplateSource = target.command_template ?? target.commandTemplate;
   const filesFormat = resolveOptionalLiteralString(
-    settings.files_format ?? settings.filesFormat ?? settings.attachments_format ?? settings.attachmentsFormat
+    target.files_format ?? target.filesFormat ?? target.attachments_format ?? target.attachmentsFormat
   );
-  const cwd = resolveOptionalString(settings.cwd, env, `${target.name} working directory`, {
+  const cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
     allowLiteral: true,
     optionalEnv: true
   });
-  const envOverrides = resolveEnvOverrides(settings.env, env, target.name);
-  const timeoutMs = resolveTimeoutMs(settings.timeout_seconds ?? settings.timeoutSeconds, `${target.name} timeout`);
-  const healthcheck = resolveCliHealthcheck(settings.healthcheck, env, target.name);
+  const timeoutMs = resolveTimeoutMs(target.timeout_seconds ?? target.timeoutSeconds, `${target.name} timeout`);
+  const healthcheck = resolveCliHealthcheck(target.healthcheck, env, target.name);
   const commandTemplate = resolveString(
     commandTemplateSource,
     env,
@@ -2307,29 +2599,10 @@ function resolveCliConfig(target, env) {
     commandTemplate,
     filesFormat,
     cwd,
-    env: envOverrides,
     timeoutMs,
     healthcheck
   };
 }
-function resolveEnvOverrides(source, env, targetName) {
-  if (source === void 0 || source === null) {
-    return void 0;
-  }
-  if (typeof source !== "object" || Array.isArray(source)) {
-    throw new Error(`${targetName} env overrides must be an object map of strings`);
-  }
-  const entries = Object.entries(source);
-  const resolved = {};
-  for (const [key, value] of entries) {
-    if (typeof value !== "string") {
-      throw new Error(`${targetName} env override '${key}' must be a string`);
-    }
-    const resolvedValue = resolveString(value, env, `${targetName} env override '${key}'`);
-    resolved[key] = resolvedValue;
-  }
-  return Object.keys(resolved).length > 0 ? resolved : void 0;
-}
 function resolveTimeoutMs(source, description) {
   const seconds = resolveOptionalNumber(source, `${description} (seconds)`);
   if (seconds === void 0) {
@@ -2525,6 +2798,26 @@ function resolveOptionalStringArray(source, env, description) {
   }
   return resolved.length > 0 ? resolved : void 0;
 }
+function resolveOptionalNumberArray(source, description) {
+  if (source === void 0 || source === null) {
+    return void 0;
+  }
+  if (!Array.isArray(source)) {
+    throw new Error(`${description} must be an array of numbers`);
+  }
+  if (source.length === 0) {
+    return void 0;
+  }
+  const resolved = [];
+  for (let i = 0; i < source.length; i++) {
+    const item = source[i];
+    if (typeof item !== "number" || !Number.isFinite(item)) {
+      throw new Error(`${description}[${i}] must be a number`);
+    }
+    resolved.push(item);
+  }
+  return resolved.length > 0 ? resolved : void 0;
+}
 // src/evaluation/providers/vscode.ts
 var import_node_path6 = __toESM(require("path"), 1);
@@ -2784,7 +3077,7 @@ var AGENT_PROVIDER_KINDS = [
   "vscode",
   "vscode-insiders"
 ];
-var TARGETS_SCHEMA_V2 = "agentv-targets-v2.1";
+var TARGETS_SCHEMA_V2 = "agentv-targets-v2.2";
 function isAgentProvider(provider) {
   return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
 }
@@ -2827,20 +3120,13 @@ function assertTargetDefinition(value, index, filePath) {
   }
   const name = value.name;
   const provider = value.provider;
-  const settings = value.settings;
-  const judgeTarget = value.judge_target;
   if (typeof name !== "string" || name.trim().length === 0) {
     throw new Error(`targets.yaml entry at index ${index} in ${filePath} is missing a valid 'name'`);
   }
   if (typeof provider !== "string" || provider.trim().length === 0) {
     throw new Error(`targets.yaml entry '${name}' in ${filePath} is missing a valid 'provider'`);
   }
-  return {
-    name,
-    provider,
-    settings: isRecord(settings) ? settings : void 0,
-    judge_target: typeof judgeTarget === "string" ? judgeTarget : void 0
-  };
+  return value;
 }
 async function fileExists3(filePath) {
   try {
@@ -2920,19 +3206,21 @@ var LlmJudgeEvaluator = class {
     return this.evaluateWithPrompt(context, judgeProvider);
   }
   async evaluateWithPrompt(context, judgeProvider) {
-    let prompt = buildQualityPrompt(context.evalCase, context.candidate);
-    let systemPrompt = context.systemPrompt ?? this.customPrompt ?? QUALITY_SYSTEM_PROMPT;
+    const hasReferenceAnswer = hasNonEmptyReferenceAnswer(context.evalCase);
+    const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
+    let prompt = buildQualityPrompt(context.evalCase, context.candidate, formattedQuestion);
+    let systemPrompt = context.systemPrompt ?? this.customPrompt ?? buildSystemPrompt(hasReferenceAnswer);
     if (systemPrompt && hasTemplateVariables(systemPrompt)) {
       const variables = {
         input_messages: JSON.stringify(context.evalCase.input_segments, null, 2),
         output_messages: JSON.stringify(context.evalCase.output_segments, null, 2),
         candidate_answer: context.candidate,
-        reference_answer: context.evalCase.reference_answer,
+        reference_answer: context.evalCase.reference_answer ?? "",
         expected_outcome: context.evalCase.expected_outcome,
-        question: context.evalCase.question
+        question: formattedQuestion
       };
       prompt = substituteVariables(systemPrompt, variables);
-      systemPrompt = QUALITY_SYSTEM_PROMPT;
+      systemPrompt = buildSystemPrompt(hasReferenceAnswer);
     }
     const metadata = {
       ...systemPrompt !== void 0 ? { systemPrompt } : {},
@@ -2970,38 +3258,51 @@ var LlmJudgeEvaluator = class {
     };
   }
 };
-var QUALITY_SYSTEM_PROMPT = [
-  "You are an expert evaluator. Your goal is to grade the candidate_answer based on how well it achieves the expected_outcome for the original task.",
-  "",
-  "Use the reference_answer as a gold standard for a high-quality response. The candidate_answer does not need to match it verbatim, but it should capture the key points and follow the same spirit.",
-  "",
-  "Be concise and focused in your evaluation. Provide succinct, specific feedback rather than verbose explanations.",
-  "",
-  "You must respond with a single JSON object matching this schema:",
-  "",
-  "{",
-  '  "score": <number between 0.0 and 1.0>,',
-  '  "hits": [<array of strings, max 4 items, brief specific achievements>],',
-  '  "misses": [<array of strings, max 4 items, brief specific failures or omissions, empty if none>],',
-  '  "reasoning": "<string, concise explanation for the score, 1-2 sentences max>"',
-  "}"
-].join("\n");
-function buildQualityPrompt(evalCase, candidate) {
+function buildSystemPrompt(hasReferenceAnswer) {
+  const basePrompt = [
+    "You are an expert evaluator. Your goal is to grade the candidate_answer based on how well it achieves the expected_outcome for the original task.",
+    ""
+  ];
+  if (hasReferenceAnswer) {
+    basePrompt.push(
+      "Use the reference_answer as a gold standard for a high-quality response. The candidate_answer does not need to match it verbatim, but should capture the key points and follow the same spirit.",
+      ""
+    );
+  }
+  basePrompt.push(
+    "Be concise and focused in your evaluation. Provide succinct, specific feedback rather than verbose explanations.",
+    "",
+    "You must respond with a single JSON object matching this schema:",
+    "",
+    "{",
+    '  "score": <number between 0.0 and 1.0>,',
+    '  "hits": [<array of strings, max 4 items, brief specific achievements>],',
+    '  "misses": [<array of strings, max 4 items, brief specific failures or omissions, empty if none>],',
+    '  "reasoning": "<string, concise explanation for the score, 1-2 sentences max>"',
+    "}"
+  );
+  return basePrompt.join("\n");
+}
+function buildQualityPrompt(evalCase, candidate, question) {
   const parts = [
     "[[ ## expected_outcome ## ]]",
     evalCase.expected_outcome.trim(),
     "",
     "[[ ## question ## ]]",
-    evalCase.question.trim(),
-    "",
-    "[[ ## reference_answer ## ]]",
-    evalCase.reference_answer.trim(),
-    "",
-    "[[ ## candidate_answer ## ]]",
-    candidate.trim(),
-    "",
-    "Respond with a single JSON object matching the schema described in the system prompt."
+    question.trim(),
+    ""
   ];
+  if (hasNonEmptyReferenceAnswer(evalCase)) {
+    parts.push(
+      "[[ ## reference_answer ## ]]",
+      evalCase.reference_answer.trim(),
+      ""
+    );
+  }
+  parts.push(
+    "[[ ## candidate_answer ## ]]",
+    candidate.trim()
+  );
   return parts.join("\n");
 }
 function clampScore(value) {
@@ -3084,6 +3385,9 @@ function extractJsonBlob(text) {
 function isNonEmptyString(value) {
   return typeof value === "string" && value.trim().length > 0;
 }
+function hasNonEmptyReferenceAnswer(evalCase) {
+  return evalCase.reference_answer !== void 0 && evalCase.reference_answer.trim().length > 0;
+}
 var CodeEvaluator = class {
   kind = "code";
   script;
@@ -3481,10 +3785,11 @@ async function runEvaluation(options) {
           await onProgress({
             workerId,
             evalId: evalCase.id,
-            status: "completed",
+            status: result.error ? "failed" : "completed",
             startedAt: 0,
             // Not used for completed status
-            completedAt: Date.now()
+            completedAt: Date.now(),
+            error: result.error
           });
         }
         if (onResult) {
@@ -3741,11 +4046,27 @@ async function evaluateCandidate(options) {
     agentTimeoutMs
   });
   const completedAt = nowFn();
-  const rawRequest = {
-    question: promptInputs.question,
-    ...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
-    guideline_paths: evalCase.guideline_paths
-  };
+  let agentProviderRequest;
+  let lmProviderRequest;
+  if (isAgentProvider(provider)) {
+    agentProviderRequest = {
+      question: promptInputs.question,
+      guideline_paths: evalCase.guideline_paths
+    };
+  } else {
+    if (promptInputs.chatPrompt) {
+      lmProviderRequest = {
+        chat_prompt: promptInputs.chatPrompt,
+        guideline_paths: evalCase.guideline_paths
+      };
+    } else {
+      lmProviderRequest = {
+        question: promptInputs.question,
+        guidelines: promptInputs.guidelines,
+        guideline_paths: evalCase.guideline_paths
+      };
+    }
+  }
   return {
     eval_id: evalCase.id,
     dataset: evalCase.dataset,
@@ -3759,7 +4080,8 @@ async function evaluateCandidate(options) {
     timestamp: completedAt.toISOString(),
     reasoning: score.reasoning,
     raw_aspects: score.rawAspects,
-    raw_request: rawRequest,
+    agent_provider_request: agentProviderRequest,
+    lm_provider_request: lmProviderRequest,
     evaluator_raw_request: evaluatorResults ? void 0 : score.evaluatorRawRequest,
     evaluator_results: evaluatorResults
   };
@@ -3988,6 +4310,7 @@ async function invokeProvider(provider, options) {
       question: promptInputs.question,
       guidelines: promptInputs.guidelines,
       guideline_patterns: evalCase.guideline_patterns,
+      chatPrompt: promptInputs.chatPrompt,
       inputFiles: evalCase.file_paths,
       evalCaseId: evalCase.id,
       attempt,
@@ -4004,12 +4327,30 @@ async function invokeProvider(provider, options) {
 }
 function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs, provider) {
   const message = error instanceof Error ? error.message : String(error);
-  const rawRequest = {
-    question: promptInputs.question,
-    ...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
-    guideline_paths: evalCase.guideline_paths,
-    error: message
-  };
+  let agentProviderRequest;
+  let lmProviderRequest;
+  if (isAgentProvider(provider)) {
+    agentProviderRequest = {
+      question: promptInputs.question,
+      guideline_paths: evalCase.guideline_paths,
+      error: message
+    };
+  } else {
+    if (promptInputs.chatPrompt) {
+      lmProviderRequest = {
+        chat_prompt: promptInputs.chatPrompt,
+        guideline_paths: evalCase.guideline_paths,
+        error: message
+      };
+    } else {
+      lmProviderRequest = {
+        question: promptInputs.question,
+        guidelines: promptInputs.guidelines,
+        guideline_paths: evalCase.guideline_paths,
+        error: message
+      };
+    }
+  }
   return {
     eval_id: evalCase.id,
     dataset: evalCase.dataset,
@@ -4022,7 +4363,9 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
     target: targetName,
     timestamp: timestamp.toISOString(),
     raw_aspects: [],
-    raw_request: rawRequest
+    agent_provider_request: agentProviderRequest,
+    lm_provider_request: lmProviderRequest,
+    error: message
   };
 }
 function createCacheKey(provider, target, evalCase, promptInputs) {
@@ -4033,6 +4376,9 @@ function createCacheKey(provider, target, evalCase, promptInputs) {
   hash.update(promptInputs.question);
   hash.update(promptInputs.guidelines);
   hash.update(promptInputs.systemMessage ?? "");
+  if (promptInputs.chatPrompt) {
+    hash.update(JSON.stringify(promptInputs.chatPrompt));
+  }
   return hash.digest("hex");
 }
 function isTimeoutLike(error) {