npm - @agentv/core - Versions diffs - 0.7.4 → 0.9.0 - Mend

@agentv/core 0.7.4 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/{chunk-L6RCDZ4Z.js → chunk-SNTZFB24.js} +102 -68
package/dist/chunk-SNTZFB24.js.map +1 -0
package/dist/evaluation/validation/index.cjs +32 -57
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +31 -55
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +211 -107
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +81 -3
package/dist/index.d.ts +81 -3
package/dist/index.js +112 -41
package/dist/index.js.map +1 -1
package/package.json +1 -2
package/dist/chunk-L6RCDZ4Z.js.map +0 -1

package/dist/index.d.cts CHANGED Viewed

@@ -132,6 +132,7 @@ interface EvaluationResult {
     readonly raw_request?: JsonObject;
     readonly evaluator_raw_request?: JsonObject;
     readonly evaluator_results?: readonly EvaluatorResult[];
+    readonly error?: string;
 }
 interface EvaluatorResult {
     readonly name: string;
@@ -174,6 +175,11 @@ declare function buildPromptInputs(testCase: EvalCase): Promise<{
 }>;
 declare function fileExists(filePath: string): Promise<boolean>;
+/**
+ * Normalize line endings to LF (\n).
+ * This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
+ */
+declare function normalizeLineEndings(content: string): string;
 /**
  * Read a text file and normalize line endings to LF (\n).
  * This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
@@ -247,11 +253,81 @@ type EnvLookup = Readonly<Record<string, string | undefined>>;
 interface TargetDefinition {
     readonly name: string;
     readonly provider: ProviderKind | string;
-    readonly settings?: Record<string, unknown> | undefined;
     readonly judge_target?: string | undefined;
     readonly workers?: number | undefined;
+    readonly provider_batching?: boolean | undefined;
+    readonly providerBatching?: boolean | undefined;
+    readonly endpoint?: string | unknown | undefined;
+    readonly resource?: string | unknown | undefined;
+    readonly resourceName?: string | unknown | undefined;
+    readonly api_key?: string | unknown | undefined;
+    readonly apiKey?: string | unknown | undefined;
+    readonly deployment?: string | unknown | undefined;
+    readonly deploymentName?: string | unknown | undefined;
+    readonly model?: string | unknown | undefined;
+    readonly version?: string | unknown | undefined;
+    readonly api_version?: string | unknown | undefined;
+    readonly variant?: string | unknown | undefined;
+    readonly thinking_budget?: number | unknown | undefined;
+    readonly thinkingBudget?: number | unknown | undefined;
+    readonly temperature?: number | unknown | undefined;
+    readonly max_output_tokens?: number | unknown | undefined;
+    readonly maxTokens?: number | unknown | undefined;
+    readonly executable?: string | unknown | undefined;
+    readonly command?: string | unknown | undefined;
+    readonly binary?: string | unknown | undefined;
+    readonly args?: unknown | undefined;
+    readonly arguments?: unknown | undefined;
+    readonly cwd?: string | unknown | undefined;
+    readonly timeout_seconds?: number | unknown | undefined;
+    readonly timeoutSeconds?: number | unknown | undefined;
+    readonly log_dir?: string | unknown | undefined;
+    readonly logDir?: string | unknown | undefined;
+    readonly log_directory?: string | unknown | undefined;
+    readonly logDirectory?: string | unknown | undefined;
+    readonly log_format?: string | unknown | undefined;
+    readonly logFormat?: string | unknown | undefined;
+    readonly log_output_format?: string | unknown | undefined;
+    readonly logOutputFormat?: string | unknown | undefined;
+    readonly response?: string | unknown | undefined;
+    readonly delayMs?: number | unknown | undefined;
+    readonly delayMinMs?: number | unknown | undefined;
+    readonly delayMaxMs?: number | unknown | undefined;
+    readonly vscode_cmd?: string | unknown | undefined;
+    readonly wait?: boolean | unknown | undefined;
+    readonly dry_run?: boolean | unknown | undefined;
+    readonly dryRun?: boolean | unknown | undefined;
+    readonly subagent_root?: string | unknown | undefined;
+    readonly subagentRoot?: string | unknown | undefined;
+    readonly workspace_template?: string | unknown | undefined;
+    readonly workspaceTemplate?: string | unknown | undefined;
+    readonly command_template?: string | unknown | undefined;
+    readonly commandTemplate?: string | unknown | undefined;
+    readonly files_format?: string | unknown | undefined;
+    readonly filesFormat?: string | unknown | undefined;
+    readonly attachments_format?: string | unknown | undefined;
+    readonly attachmentsFormat?: string | unknown | undefined;
+    readonly env?: unknown | undefined;
+    readonly healthcheck?: unknown | undefined;
+    readonly max_retries?: number | unknown | undefined;
+    readonly maxRetries?: number | unknown | undefined;
+    readonly retry_initial_delay_ms?: number | unknown | undefined;
+    readonly retryInitialDelayMs?: number | unknown | undefined;
+    readonly retry_max_delay_ms?: number | unknown | undefined;
+    readonly retryMaxDelayMs?: number | unknown | undefined;
+    readonly retry_backoff_factor?: number | unknown | undefined;
+    readonly retryBackoffFactor?: number | unknown | undefined;
+    readonly retry_status_codes?: unknown | undefined;
+    readonly retryStatusCodes?: unknown | undefined;
 }
+interface RetryConfig {
+    readonly maxRetries?: number;
+    readonly initialDelayMs?: number;
+    readonly maxDelayMs?: number;
+    readonly backoffFactor?: number;
+    readonly retryableStatusCodes?: readonly number[];
+}
 interface AzureResolvedConfig {
     readonly resourceName: string;
     readonly deploymentName: string;
@@ -259,6 +335,7 @@ interface AzureResolvedConfig {
     readonly version?: string;
     readonly temperature?: number;
     readonly maxOutputTokens?: number;
+    readonly retry?: RetryConfig;
 }
 interface AnthropicResolvedConfig {
     readonly apiKey: string;
@@ -266,12 +343,14 @@ interface AnthropicResolvedConfig {
     readonly temperature?: number;
     readonly maxOutputTokens?: number;
     readonly thinkingBudget?: number;
+    readonly retry?: RetryConfig;
 }
 interface GeminiResolvedConfig {
     readonly apiKey: string;
     readonly model: string;
     readonly temperature?: number;
     readonly maxOutputTokens?: number;
+    readonly retry?: RetryConfig;
 }
 interface CodexResolvedConfig {
     readonly executable: string;
@@ -308,7 +387,6 @@ interface CliResolvedConfig {
     readonly commandTemplate: string;
     readonly filesFormat?: string;
     readonly cwd?: string;
-    readonly env?: Record<string, string>;
     readonly timeoutMs?: number;
     readonly healthcheck?: CliHealthcheck;
 }
@@ -516,4 +594,4 @@ type AgentKernel = {
 };
 declare function createAgentKernel(): AgentKernel;
-export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
+export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };

package/dist/index.d.ts CHANGED Viewed

@@ -132,6 +132,7 @@ interface EvaluationResult {
     readonly raw_request?: JsonObject;
     readonly evaluator_raw_request?: JsonObject;
     readonly evaluator_results?: readonly EvaluatorResult[];
+    readonly error?: string;
 }
 interface EvaluatorResult {
     readonly name: string;
@@ -174,6 +175,11 @@ declare function buildPromptInputs(testCase: EvalCase): Promise<{
 }>;
 declare function fileExists(filePath: string): Promise<boolean>;
+/**
+ * Normalize line endings to LF (\n).
+ * This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
+ */
+declare function normalizeLineEndings(content: string): string;
 /**
  * Read a text file and normalize line endings to LF (\n).
  * This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
@@ -247,11 +253,81 @@ type EnvLookup = Readonly<Record<string, string | undefined>>;
 interface TargetDefinition {
     readonly name: string;
     readonly provider: ProviderKind | string;
-    readonly settings?: Record<string, unknown> | undefined;
     readonly judge_target?: string | undefined;
     readonly workers?: number | undefined;
+    readonly provider_batching?: boolean | undefined;
+    readonly providerBatching?: boolean | undefined;
+    readonly endpoint?: string | unknown | undefined;
+    readonly resource?: string | unknown | undefined;
+    readonly resourceName?: string | unknown | undefined;
+    readonly api_key?: string | unknown | undefined;
+    readonly apiKey?: string | unknown | undefined;
+    readonly deployment?: string | unknown | undefined;
+    readonly deploymentName?: string | unknown | undefined;
+    readonly model?: string | unknown | undefined;
+    readonly version?: string | unknown | undefined;
+    readonly api_version?: string | unknown | undefined;
+    readonly variant?: string | unknown | undefined;
+    readonly thinking_budget?: number | unknown | undefined;
+    readonly thinkingBudget?: number | unknown | undefined;
+    readonly temperature?: number | unknown | undefined;
+    readonly max_output_tokens?: number | unknown | undefined;
+    readonly maxTokens?: number | unknown | undefined;
+    readonly executable?: string | unknown | undefined;
+    readonly command?: string | unknown | undefined;
+    readonly binary?: string | unknown | undefined;
+    readonly args?: unknown | undefined;
+    readonly arguments?: unknown | undefined;
+    readonly cwd?: string | unknown | undefined;
+    readonly timeout_seconds?: number | unknown | undefined;
+    readonly timeoutSeconds?: number | unknown | undefined;
+    readonly log_dir?: string | unknown | undefined;
+    readonly logDir?: string | unknown | undefined;
+    readonly log_directory?: string | unknown | undefined;
+    readonly logDirectory?: string | unknown | undefined;
+    readonly log_format?: string | unknown | undefined;
+    readonly logFormat?: string | unknown | undefined;
+    readonly log_output_format?: string | unknown | undefined;
+    readonly logOutputFormat?: string | unknown | undefined;
+    readonly response?: string | unknown | undefined;
+    readonly delayMs?: number | unknown | undefined;
+    readonly delayMinMs?: number | unknown | undefined;
+    readonly delayMaxMs?: number | unknown | undefined;
+    readonly vscode_cmd?: string | unknown | undefined;
+    readonly wait?: boolean | unknown | undefined;
+    readonly dry_run?: boolean | unknown | undefined;
+    readonly dryRun?: boolean | unknown | undefined;
+    readonly subagent_root?: string | unknown | undefined;
+    readonly subagentRoot?: string | unknown | undefined;
+    readonly workspace_template?: string | unknown | undefined;
+    readonly workspaceTemplate?: string | unknown | undefined;
+    readonly command_template?: string | unknown | undefined;
+    readonly commandTemplate?: string | unknown | undefined;
+    readonly files_format?: string | unknown | undefined;
+    readonly filesFormat?: string | unknown | undefined;
+    readonly attachments_format?: string | unknown | undefined;
+    readonly attachmentsFormat?: string | unknown | undefined;
+    readonly env?: unknown | undefined;
+    readonly healthcheck?: unknown | undefined;
+    readonly max_retries?: number | unknown | undefined;
+    readonly maxRetries?: number | unknown | undefined;
+    readonly retry_initial_delay_ms?: number | unknown | undefined;
+    readonly retryInitialDelayMs?: number | unknown | undefined;
+    readonly retry_max_delay_ms?: number | unknown | undefined;
+    readonly retryMaxDelayMs?: number | unknown | undefined;
+    readonly retry_backoff_factor?: number | unknown | undefined;
+    readonly retryBackoffFactor?: number | unknown | undefined;
+    readonly retry_status_codes?: unknown | undefined;
+    readonly retryStatusCodes?: unknown | undefined;
 }
+interface RetryConfig {
+    readonly maxRetries?: number;
+    readonly initialDelayMs?: number;
+    readonly maxDelayMs?: number;
+    readonly backoffFactor?: number;
+    readonly retryableStatusCodes?: readonly number[];
+}
 interface AzureResolvedConfig {
     readonly resourceName: string;
     readonly deploymentName: string;
@@ -259,6 +335,7 @@ interface AzureResolvedConfig {
     readonly version?: string;
     readonly temperature?: number;
     readonly maxOutputTokens?: number;
+    readonly retry?: RetryConfig;
 }
 interface AnthropicResolvedConfig {
     readonly apiKey: string;
@@ -266,12 +343,14 @@ interface AnthropicResolvedConfig {
     readonly temperature?: number;
     readonly maxOutputTokens?: number;
     readonly thinkingBudget?: number;
+    readonly retry?: RetryConfig;
 }
 interface GeminiResolvedConfig {
     readonly apiKey: string;
     readonly model: string;
     readonly temperature?: number;
     readonly maxOutputTokens?: number;
+    readonly retry?: RetryConfig;
 }
 interface CodexResolvedConfig {
     readonly executable: string;
@@ -308,7 +387,6 @@ interface CliResolvedConfig {
     readonly commandTemplate: string;
     readonly filesFormat?: string;
     readonly cwd?: string;
-    readonly env?: Record<string, string>;
     readonly timeoutMs?: number;
     readonly healthcheck?: CliHealthcheck;
 }
@@ -516,4 +594,4 @@ type AgentKernel = {
 };
 declare function createAgentKernel(): AgentKernel;
-export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
+export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };

package/dist/index.js CHANGED Viewed

@@ -5,10 +5,11 @@ import {
   fileExists,
   findGitRoot,
   isAgentProvider,
+  normalizeLineEndings,
   readTextFile,
   resolveFileReference,
   resolveTargetDefinition
-} from "./chunk-L6RCDZ4Z.js";
+} from "./chunk-SNTZFB24.js";
 // src/evaluation/types.ts
 var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
@@ -661,6 +662,67 @@ function ensureChatResponse(result) {
   }
   return result;
 }
+function isRetryableError(error, retryableStatusCodes) {
+  if (!error || typeof error !== "object") {
+    return false;
+  }
+  if ("status" in error && typeof error.status === "number") {
+    return retryableStatusCodes.includes(error.status);
+  }
+  if ("message" in error && typeof error.message === "string") {
+    const match = error.message.match(/HTTP (\d{3})/);
+    if (match) {
+      const status = Number.parseInt(match[1], 10);
+      return retryableStatusCodes.includes(status);
+    }
+  }
+  if ("name" in error && error.name === "AxAIServiceNetworkError") {
+    return true;
+  }
+  return false;
+}
+function calculateRetryDelay(attempt, config) {
+  const delay = Math.min(
+    config.maxDelayMs,
+    config.initialDelayMs * config.backoffFactor ** attempt
+  );
+  return delay * (0.75 + Math.random() * 0.5);
+}
+async function sleep(ms) {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+async function withRetry(fn, retryConfig, signal) {
+  const config = {
+    maxRetries: retryConfig?.maxRetries ?? 3,
+    initialDelayMs: retryConfig?.initialDelayMs ?? 1e3,
+    maxDelayMs: retryConfig?.maxDelayMs ?? 6e4,
+    backoffFactor: retryConfig?.backoffFactor ?? 2,
+    retryableStatusCodes: retryConfig?.retryableStatusCodes ?? [500, 408, 429, 502, 503, 504]
+  };
+  let lastError;
+  for (let attempt = 0; attempt <= config.maxRetries; attempt++) {
+    if (signal?.aborted) {
+      throw new Error(`Request aborted: ${signal.reason ?? "Unknown reason"}`);
+    }
+    try {
+      return await fn();
+    } catch (error) {
+      lastError = error;
+      if (attempt >= config.maxRetries) {
+        break;
+      }
+      if (!isRetryableError(error, config.retryableStatusCodes)) {
+        throw error;
+      }
+      const delay = calculateRetryDelay(attempt, config);
+      await sleep(delay);
+      if (signal?.aborted) {
+        throw new Error(`Request aborted: ${signal.reason ?? "Unknown reason"}`);
+      }
+    }
+  }
+  throw lastError;
+}
 var AzureProvider = class {
   constructor(targetName, config) {
     this.config = config;
@@ -670,6 +732,7 @@ var AzureProvider = class {
       temperature: config.temperature,
       maxOutputTokens: config.maxOutputTokens
     };
+    this.retryConfig = config.retry;
     this.ai = AxAI.create({
       name: "azure-openai",
       apiKey: config.apiKey,
@@ -686,16 +749,21 @@ var AzureProvider = class {
   targetName;
   ai;
   defaults;
+  retryConfig;
   async invoke(request) {
     const chatPrompt = buildChatPrompt(request);
     const modelConfig = extractModelConfig(request, this.defaults);
-    const response = await this.ai.chat(
-      {
-        chatPrompt,
-        model: this.config.deploymentName,
-        ...modelConfig ? { modelConfig } : {}
-      },
-      request.signal ? { abortSignal: request.signal } : void 0
+    const response = await withRetry(
+      async () => await this.ai.chat(
+        {
+          chatPrompt,
+          model: this.config.deploymentName,
+          ...modelConfig ? { modelConfig } : {}
+        },
+        request.signal ? { abortSignal: request.signal } : void 0
+      ),
+      this.retryConfig,
+      request.signal
     );
     return mapResponse(ensureChatResponse(response));
   }
@@ -713,6 +781,7 @@ var AnthropicProvider = class {
       maxOutputTokens: config.maxOutputTokens,
       thinkingBudget: config.thinkingBudget
     };
+    this.retryConfig = config.retry;
     this.ai = AxAI.create({
       name: "anthropic",
       apiKey: config.apiKey
@@ -723,16 +792,21 @@ var AnthropicProvider = class {
   targetName;
   ai;
   defaults;
+  retryConfig;
   async invoke(request) {
     const chatPrompt = buildChatPrompt(request);
     const modelConfig = extractModelConfig(request, this.defaults);
-    const response = await this.ai.chat(
-      {
-        chatPrompt,
-        model: this.config.model,
-        ...modelConfig ? { modelConfig } : {}
-      },
-      request.signal ? { abortSignal: request.signal } : void 0
+    const response = await withRetry(
+      async () => await this.ai.chat(
+        {
+          chatPrompt,
+          model: this.config.model,
+          ...modelConfig ? { modelConfig } : {}
+        },
+        request.signal ? { abortSignal: request.signal } : void 0
+      ),
+      this.retryConfig,
+      request.signal
     );
     return mapResponse(ensureChatResponse(response));
   }
@@ -749,6 +823,7 @@ var GeminiProvider = class {
       temperature: config.temperature,
       maxOutputTokens: config.maxOutputTokens
     };
+    this.retryConfig = config.retry;
     this.ai = AxAI.create({
       name: "google-gemini",
       apiKey: config.apiKey
@@ -759,16 +834,21 @@ var GeminiProvider = class {
   targetName;
   ai;
   defaults;
+  retryConfig;
   async invoke(request) {
     const chatPrompt = buildChatPrompt(request);
     const modelConfig = extractModelConfig(request, this.defaults);
-    const response = await this.ai.chat(
-      {
-        chatPrompt,
-        model: this.config.model,
-        ...modelConfig ? { modelConfig } : {}
-      },
-      request.signal ? { abortSignal: request.signal } : void 0
+    const response = await withRetry(
+      async () => await this.ai.chat(
+        {
+          chatPrompt,
+          model: this.config.model,
+          ...modelConfig ? { modelConfig } : {}
+        },
+        request.signal ? { abortSignal: request.signal } : void 0
+      ),
+      this.retryConfig,
+      request.signal
     );
     return mapResponse(ensureChatResponse(response));
   }
@@ -796,7 +876,6 @@ async function defaultCommandRunner(command, options) {
   };
   try {
     const { stdout, stderr } = await execAsync(command, execOptions);
-    console.error(`[CLI DEBUG] SUCCESS - stdout: ${stdout.length} bytes, stderr: ${stderr.length} bytes`);
     return {
       stdout,
       stderr,
@@ -807,8 +886,6 @@ async function defaultCommandRunner(command, options) {
     };
   } catch (error) {
     const execError = error;
-    console.error(`[CLI DEBUG] ERROR - code: ${execError.code}, message: ${execError.message}`);
-    console.error(`[CLI DEBUG] stdout: ${execError.stdout?.length ?? 0} bytes, stderr: ${execError.stderr?.length ?? 0} bytes`);
     return {
       stdout: execError.stdout ?? "",
       stderr: execError.stderr ?? "",
@@ -841,10 +918,9 @@ var CliProvider = class {
     const outputFilePath = generateOutputFilePath(request.evalCaseId);
     const templateValues = buildTemplateValues(request, this.config, outputFilePath);
     const renderedCommand = renderTemplate(this.config.commandTemplate, templateValues);
-    const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
     const result = await this.runCommand(renderedCommand, {
       cwd: this.config.cwd,
-      env,
+      env: process.env,
       timeoutMs: this.config.timeoutMs,
       signal: request.signal
     });
@@ -876,7 +952,7 @@ var CliProvider = class {
   }
   async readAndCleanupOutputFile(filePath) {
     try {
-      const content = await fs.readFile(filePath, "utf-8");
+      const content = await readTextFile(filePath);
       return content;
     } catch (error) {
       const errorMsg = error instanceof Error ? error.message : String(error);
@@ -933,10 +1009,9 @@ var CliProvider = class {
         generateOutputFilePath("healthcheck")
       )
     );
-    const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
     const result = await this.runCommand(renderedCommand, {
       cwd: healthcheck.cwd ?? this.config.cwd,
-      env,
+      env: process.env,
       timeoutMs,
       signal
     });
@@ -2169,20 +2244,13 @@ function assertTargetDefinition(value, index, filePath) {
   }
   const name = value.name;
   const provider = value.provider;
-  const settings = value.settings;
-  const judgeTarget = value.judge_target;
   if (typeof name !== "string" || name.trim().length === 0) {
     throw new Error(`targets.yaml entry at index ${index} in ${filePath} is missing a valid 'name'`);
   }
   if (typeof provider !== "string" || provider.trim().length === 0) {
     throw new Error(`targets.yaml entry '${name}' in ${filePath} is missing a valid 'provider'`);
   }
-  return {
-    name,
-    provider,
-    settings: isRecord(settings) ? settings : void 0,
-    judge_target: typeof judgeTarget === "string" ? judgeTarget : void 0
-  };
+  return value;
 }
 async function fileExists3(filePath) {
   try {
@@ -2823,10 +2891,11 @@ async function runEvaluation(options) {
           await onProgress({
             workerId,
             evalId: evalCase.id,
-            status: "completed",
+            status: result.error ? "failed" : "completed",
             startedAt: 0,
             // Not used for completed status
-            completedAt: Date.now()
+            completedAt: Date.now(),
+            error: result.error
           });
         }
         if (onResult) {
@@ -3364,7 +3433,8 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
     target: targetName,
     timestamp: timestamp.toISOString(),
     raw_aspects: [],
-    raw_request: rawRequest
+    raw_request: rawRequest,
+    error: message
   };
 }
 function createCacheKey(provider, target, evalCase, promptInputs) {
@@ -3420,6 +3490,7 @@ export {
   isTestMessageRole,
   listTargetNames,
   loadEvalCases,
+  normalizeLineEndings,
   readTargetDefinitions,
   readTextFile,
   resolveAndCreateProvider,