npm - @agentv/core - Versions diffs - 0.7.5 → 0.10.0 - Mend

@agentv/core 0.7.5 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/{chunk-7XM7HYRS.js → chunk-YQBJAT5I.js} +97 -67
package/dist/chunk-YQBJAT5I.js.map +1 -0
package/dist/evaluation/validation/index.cjs +61 -69
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +51 -58
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +538 -192
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +136 -58
package/dist/index.d.ts +136 -58
package/dist/index.js +443 -127
package/dist/index.js.map +1 -1
package/package.json +1 -2
package/dist/chunk-7XM7HYRS.js.map +0 -1

package/dist/index.d.cts CHANGED Viewed

@@ -102,9 +102,10 @@ interface EvalCase {
     readonly dataset?: string;
     readonly conversation_id?: string;
     readonly question: string;
+    readonly input_messages: readonly TestMessage[];
     readonly input_segments: readonly JsonObject[];
     readonly output_segments: readonly JsonObject[];
-    readonly reference_answer: string;
+    readonly reference_answer?: string;
     readonly guideline_paths: readonly string[];
     readonly guideline_patterns?: readonly string[];
     readonly file_paths: readonly string[];
@@ -129,9 +130,11 @@ interface EvaluationResult {
     readonly timestamp: string;
     readonly reasoning?: string;
     readonly raw_aspects?: readonly string[];
-    readonly raw_request?: JsonObject;
+    readonly agent_provider_request?: JsonObject;
+    readonly lm_provider_request?: JsonObject;
     readonly evaluator_raw_request?: JsonObject;
     readonly evaluator_results?: readonly EvaluatorResult[];
+    readonly error?: string;
 }
 interface EvaluatorResult {
     readonly name: string;
@@ -148,6 +151,119 @@ interface EvaluatorResult {
  */
 declare function getHitCount(result: Pick<EvaluationResult, "hits">): number;
+type ChatPrompt = AxChatRequest["chatPrompt"];
+type ProviderKind = "azure" | "anthropic" | "gemini" | "codex" | "cli" | "mock" | "vscode" | "vscode-insiders";
+interface ProviderRequest {
+    readonly question: string;
+    readonly guidelines?: string;
+    readonly guideline_patterns?: readonly string[];
+    readonly chatPrompt?: ChatPrompt;
+    readonly inputFiles?: readonly string[];
+    readonly evalCaseId?: string;
+    readonly attempt?: number;
+    readonly maxOutputTokens?: number;
+    readonly temperature?: number;
+    readonly metadata?: JsonObject;
+    readonly signal?: AbortSignal;
+}
+interface ProviderResponse {
+    readonly text: string;
+    readonly reasoning?: string;
+    readonly raw?: unknown;
+    readonly usage?: JsonObject;
+}
+interface Provider {
+    readonly id: string;
+    readonly kind: ProviderKind;
+    readonly targetName: string;
+    invoke(request: ProviderRequest): Promise<ProviderResponse>;
+    /**
+     * Optional capability marker for provider-managed batching (single session handling multiple requests).
+     */
+    readonly supportsBatch?: boolean;
+    /**
+     * Optional batch invocation hook. When defined alongside supportsBatch=true,
+     * the orchestrator may send multiple requests in a single provider session.
+     */
+    invokeBatch?(requests: readonly ProviderRequest[]): Promise<readonly ProviderResponse[]>;
+    /**
+     * Optional access to the underlying AxAI instance.
+     * This enables using advanced Ax features like structured output signatures.
+     */
+    getAxAI?(): AxAI;
+}
+type EnvLookup = Readonly<Record<string, string | undefined>>;
+interface TargetDefinition {
+    readonly name: string;
+    readonly provider: ProviderKind | string;
+    readonly judge_target?: string | undefined;
+    readonly workers?: number | undefined;
+    readonly provider_batching?: boolean | undefined;
+    readonly providerBatching?: boolean | undefined;
+    readonly endpoint?: string | unknown | undefined;
+    readonly resource?: string | unknown | undefined;
+    readonly resourceName?: string | unknown | undefined;
+    readonly api_key?: string | unknown | undefined;
+    readonly apiKey?: string | unknown | undefined;
+    readonly deployment?: string | unknown | undefined;
+    readonly deploymentName?: string | unknown | undefined;
+    readonly model?: string | unknown | undefined;
+    readonly version?: string | unknown | undefined;
+    readonly api_version?: string | unknown | undefined;
+    readonly variant?: string | unknown | undefined;
+    readonly thinking_budget?: number | unknown | undefined;
+    readonly thinkingBudget?: number | unknown | undefined;
+    readonly temperature?: number | unknown | undefined;
+    readonly max_output_tokens?: number | unknown | undefined;
+    readonly maxTokens?: number | unknown | undefined;
+    readonly executable?: string | unknown | undefined;
+    readonly command?: string | unknown | undefined;
+    readonly binary?: string | unknown | undefined;
+    readonly args?: unknown | undefined;
+    readonly arguments?: unknown | undefined;
+    readonly cwd?: string | unknown | undefined;
+    readonly timeout_seconds?: number | unknown | undefined;
+    readonly timeoutSeconds?: number | unknown | undefined;
+    readonly log_dir?: string | unknown | undefined;
+    readonly logDir?: string | unknown | undefined;
+    readonly log_directory?: string | unknown | undefined;
+    readonly logDirectory?: string | unknown | undefined;
+    readonly log_format?: string | unknown | undefined;
+    readonly logFormat?: string | unknown | undefined;
+    readonly log_output_format?: string | unknown | undefined;
+    readonly logOutputFormat?: string | unknown | undefined;
+    readonly response?: string | unknown | undefined;
+    readonly delayMs?: number | unknown | undefined;
+    readonly delayMinMs?: number | unknown | undefined;
+    readonly delayMaxMs?: number | unknown | undefined;
+    readonly vscode_cmd?: string | unknown | undefined;
+    readonly wait?: boolean | unknown | undefined;
+    readonly dry_run?: boolean | unknown | undefined;
+    readonly dryRun?: boolean | unknown | undefined;
+    readonly subagent_root?: string | unknown | undefined;
+    readonly subagentRoot?: string | unknown | undefined;
+    readonly workspace_template?: string | unknown | undefined;
+    readonly workspaceTemplate?: string | unknown | undefined;
+    readonly command_template?: string | unknown | undefined;
+    readonly commandTemplate?: string | unknown | undefined;
+    readonly files_format?: string | unknown | undefined;
+    readonly filesFormat?: string | unknown | undefined;
+    readonly attachments_format?: string | unknown | undefined;
+    readonly attachmentsFormat?: string | unknown | undefined;
+    readonly env?: unknown | undefined;
+    readonly healthcheck?: unknown | undefined;
+    readonly max_retries?: number | unknown | undefined;
+    readonly maxRetries?: number | unknown | undefined;
+    readonly retry_initial_delay_ms?: number | unknown | undefined;
+    readonly retryInitialDelayMs?: number | unknown | undefined;
+    readonly retry_max_delay_ms?: number | unknown | undefined;
+    readonly retryMaxDelayMs?: number | unknown | undefined;
+    readonly retry_backoff_factor?: number | unknown | undefined;
+    readonly retryBackoffFactor?: number | unknown | undefined;
+    readonly retry_status_codes?: unknown | undefined;
+    readonly retryStatusCodes?: unknown | undefined;
+}
 /**
  * Determine whether a path references guideline content (instructions or prompts).
  */
@@ -167,11 +283,13 @@ declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, opt
 /**
  * Build prompt inputs by consolidating user request context and guideline content.
  */
-declare function buildPromptInputs(testCase: EvalCase): Promise<{
-    question: string;
-    guidelines: string;
-    systemMessage?: string;
-}>;
+interface PromptInputs {
+    readonly question: string;
+    readonly guidelines: string;
+    readonly chatPrompt?: ChatPrompt;
+    readonly systemMessage?: string;
+}
+declare function buildPromptInputs(testCase: EvalCase): Promise<PromptInputs>;
 declare function fileExists(filePath: string): Promise<boolean>;
 /**
@@ -207,56 +325,13 @@ declare function resolveFileReference(rawValue: string, searchRoots: readonly st
     readonly attempted: readonly string[];
 }>;
-type ChatPrompt = AxChatRequest["chatPrompt"];
-type ProviderKind = "azure" | "anthropic" | "gemini" | "codex" | "cli" | "mock" | "vscode" | "vscode-insiders";
-interface ProviderRequest {
-    readonly question: string;
-    readonly guidelines?: string;
-    readonly guideline_patterns?: readonly string[];
-    readonly chatPrompt?: ChatPrompt;
-    readonly inputFiles?: readonly string[];
-    readonly evalCaseId?: string;
-    readonly attempt?: number;
-    readonly maxOutputTokens?: number;
-    readonly temperature?: number;
-    readonly metadata?: JsonObject;
-    readonly signal?: AbortSignal;
-}
-interface ProviderResponse {
-    readonly text: string;
-    readonly reasoning?: string;
-    readonly raw?: unknown;
-    readonly usage?: JsonObject;
-}
-interface Provider {
-    readonly id: string;
-    readonly kind: ProviderKind;
-    readonly targetName: string;
-    invoke(request: ProviderRequest): Promise<ProviderResponse>;
-    /**
-     * Optional capability marker for provider-managed batching (single session handling multiple requests).
-     */
-    readonly supportsBatch?: boolean;
-    /**
-     * Optional batch invocation hook. When defined alongside supportsBatch=true,
-     * the orchestrator may send multiple requests in a single provider session.
-     */
-    invokeBatch?(requests: readonly ProviderRequest[]): Promise<readonly ProviderResponse[]>;
-    /**
-     * Optional access to the underlying AxAI instance.
-     * This enables using advanced Ax features like structured output signatures.
-     */
-    getAxAI?(): AxAI;
-}
-type EnvLookup = Readonly<Record<string, string | undefined>>;
-interface TargetDefinition {
-    readonly name: string;
-    readonly provider: ProviderKind | string;
-    readonly settings?: Record<string, unknown> | undefined;
-    readonly judge_target?: string | undefined;
-    readonly workers?: number | undefined;
+interface RetryConfig {
+    readonly maxRetries?: number;
+    readonly initialDelayMs?: number;
+    readonly maxDelayMs?: number;
+    readonly backoffFactor?: number;
+    readonly retryableStatusCodes?: readonly number[];
 }
 interface AzureResolvedConfig {
     readonly resourceName: string;
     readonly deploymentName: string;
@@ -264,6 +339,7 @@ interface AzureResolvedConfig {
     readonly version?: string;
     readonly temperature?: number;
     readonly maxOutputTokens?: number;
+    readonly retry?: RetryConfig;
 }
 interface AnthropicResolvedConfig {
     readonly apiKey: string;
@@ -271,12 +347,14 @@ interface AnthropicResolvedConfig {
     readonly temperature?: number;
     readonly maxOutputTokens?: number;
     readonly thinkingBudget?: number;
+    readonly retry?: RetryConfig;
 }
 interface GeminiResolvedConfig {
     readonly apiKey: string;
     readonly model: string;
     readonly temperature?: number;
     readonly maxOutputTokens?: number;
+    readonly retry?: RetryConfig;
 }
 interface CodexResolvedConfig {
     readonly executable: string;
@@ -313,7 +391,6 @@ interface CliResolvedConfig {
     readonly commandTemplate: string;
     readonly filesFormat?: string;
     readonly cwd?: string;
-    readonly env?: Record<string, string>;
     readonly timeoutMs?: number;
     readonly healthcheck?: CliHealthcheck;
 }
@@ -413,6 +490,7 @@ interface EvaluationContext {
         readonly question: string;
         readonly guidelines: string;
         readonly systemMessage?: string;
+        readonly chatPrompt?: ChatPrompt;
     };
     readonly now: Date;
     readonly judgeProvider?: Provider;
@@ -521,4 +599,4 @@ type AgentKernel = {
 };
 declare function createAgentKernel(): AgentKernel;
-export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
+export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };

package/dist/index.d.ts CHANGED Viewed

@@ -102,9 +102,10 @@ interface EvalCase {
     readonly dataset?: string;
     readonly conversation_id?: string;
     readonly question: string;
+    readonly input_messages: readonly TestMessage[];
     readonly input_segments: readonly JsonObject[];
     readonly output_segments: readonly JsonObject[];
-    readonly reference_answer: string;
+    readonly reference_answer?: string;
     readonly guideline_paths: readonly string[];
     readonly guideline_patterns?: readonly string[];
     readonly file_paths: readonly string[];
@@ -129,9 +130,11 @@ interface EvaluationResult {
     readonly timestamp: string;
     readonly reasoning?: string;
     readonly raw_aspects?: readonly string[];
-    readonly raw_request?: JsonObject;
+    readonly agent_provider_request?: JsonObject;
+    readonly lm_provider_request?: JsonObject;
     readonly evaluator_raw_request?: JsonObject;
     readonly evaluator_results?: readonly EvaluatorResult[];
+    readonly error?: string;
 }
 interface EvaluatorResult {
     readonly name: string;
@@ -148,6 +151,119 @@ interface EvaluatorResult {
  */
 declare function getHitCount(result: Pick<EvaluationResult, "hits">): number;
+type ChatPrompt = AxChatRequest["chatPrompt"];
+type ProviderKind = "azure" | "anthropic" | "gemini" | "codex" | "cli" | "mock" | "vscode" | "vscode-insiders";
+interface ProviderRequest {
+    readonly question: string;
+    readonly guidelines?: string;
+    readonly guideline_patterns?: readonly string[];
+    readonly chatPrompt?: ChatPrompt;
+    readonly inputFiles?: readonly string[];
+    readonly evalCaseId?: string;
+    readonly attempt?: number;
+    readonly maxOutputTokens?: number;
+    readonly temperature?: number;
+    readonly metadata?: JsonObject;
+    readonly signal?: AbortSignal;
+}
+interface ProviderResponse {
+    readonly text: string;
+    readonly reasoning?: string;
+    readonly raw?: unknown;
+    readonly usage?: JsonObject;
+}
+interface Provider {
+    readonly id: string;
+    readonly kind: ProviderKind;
+    readonly targetName: string;
+    invoke(request: ProviderRequest): Promise<ProviderResponse>;
+    /**
+     * Optional capability marker for provider-managed batching (single session handling multiple requests).
+     */
+    readonly supportsBatch?: boolean;
+    /**
+     * Optional batch invocation hook. When defined alongside supportsBatch=true,
+     * the orchestrator may send multiple requests in a single provider session.
+     */
+    invokeBatch?(requests: readonly ProviderRequest[]): Promise<readonly ProviderResponse[]>;
+    /**
+     * Optional access to the underlying AxAI instance.
+     * This enables using advanced Ax features like structured output signatures.
+     */
+    getAxAI?(): AxAI;
+}
+type EnvLookup = Readonly<Record<string, string | undefined>>;
+interface TargetDefinition {
+    readonly name: string;
+    readonly provider: ProviderKind | string;
+    readonly judge_target?: string | undefined;
+    readonly workers?: number | undefined;
+    readonly provider_batching?: boolean | undefined;
+    readonly providerBatching?: boolean | undefined;
+    readonly endpoint?: string | unknown | undefined;
+    readonly resource?: string | unknown | undefined;
+    readonly resourceName?: string | unknown | undefined;
+    readonly api_key?: string | unknown | undefined;
+    readonly apiKey?: string | unknown | undefined;
+    readonly deployment?: string | unknown | undefined;
+    readonly deploymentName?: string | unknown | undefined;
+    readonly model?: string | unknown | undefined;
+    readonly version?: string | unknown | undefined;
+    readonly api_version?: string | unknown | undefined;
+    readonly variant?: string | unknown | undefined;
+    readonly thinking_budget?: number | unknown | undefined;
+    readonly thinkingBudget?: number | unknown | undefined;
+    readonly temperature?: number | unknown | undefined;
+    readonly max_output_tokens?: number | unknown | undefined;
+    readonly maxTokens?: number | unknown | undefined;
+    readonly executable?: string | unknown | undefined;
+    readonly command?: string | unknown | undefined;
+    readonly binary?: string | unknown | undefined;
+    readonly args?: unknown | undefined;
+    readonly arguments?: unknown | undefined;
+    readonly cwd?: string | unknown | undefined;
+    readonly timeout_seconds?: number | unknown | undefined;
+    readonly timeoutSeconds?: number | unknown | undefined;
+    readonly log_dir?: string | unknown | undefined;
+    readonly logDir?: string | unknown | undefined;
+    readonly log_directory?: string | unknown | undefined;
+    readonly logDirectory?: string | unknown | undefined;
+    readonly log_format?: string | unknown | undefined;
+    readonly logFormat?: string | unknown | undefined;
+    readonly log_output_format?: string | unknown | undefined;
+    readonly logOutputFormat?: string | unknown | undefined;
+    readonly response?: string | unknown | undefined;
+    readonly delayMs?: number | unknown | undefined;
+    readonly delayMinMs?: number | unknown | undefined;
+    readonly delayMaxMs?: number | unknown | undefined;
+    readonly vscode_cmd?: string | unknown | undefined;
+    readonly wait?: boolean | unknown | undefined;
+    readonly dry_run?: boolean | unknown | undefined;
+    readonly dryRun?: boolean | unknown | undefined;
+    readonly subagent_root?: string | unknown | undefined;
+    readonly subagentRoot?: string | unknown | undefined;
+    readonly workspace_template?: string | unknown | undefined;
+    readonly workspaceTemplate?: string | unknown | undefined;
+    readonly command_template?: string | unknown | undefined;
+    readonly commandTemplate?: string | unknown | undefined;
+    readonly files_format?: string | unknown | undefined;
+    readonly filesFormat?: string | unknown | undefined;
+    readonly attachments_format?: string | unknown | undefined;
+    readonly attachmentsFormat?: string | unknown | undefined;
+    readonly env?: unknown | undefined;
+    readonly healthcheck?: unknown | undefined;
+    readonly max_retries?: number | unknown | undefined;
+    readonly maxRetries?: number | unknown | undefined;
+    readonly retry_initial_delay_ms?: number | unknown | undefined;
+    readonly retryInitialDelayMs?: number | unknown | undefined;
+    readonly retry_max_delay_ms?: number | unknown | undefined;
+    readonly retryMaxDelayMs?: number | unknown | undefined;
+    readonly retry_backoff_factor?: number | unknown | undefined;
+    readonly retryBackoffFactor?: number | unknown | undefined;
+    readonly retry_status_codes?: unknown | undefined;
+    readonly retryStatusCodes?: unknown | undefined;
+}
 /**
  * Determine whether a path references guideline content (instructions or prompts).
  */
@@ -167,11 +283,13 @@ declare function loadEvalCases(evalFilePath: string, repoRoot: URL | string, opt
 /**
  * Build prompt inputs by consolidating user request context and guideline content.
  */
-declare function buildPromptInputs(testCase: EvalCase): Promise<{
-    question: string;
-    guidelines: string;
-    systemMessage?: string;
-}>;
+interface PromptInputs {
+    readonly question: string;
+    readonly guidelines: string;
+    readonly chatPrompt?: ChatPrompt;
+    readonly systemMessage?: string;
+}
+declare function buildPromptInputs(testCase: EvalCase): Promise<PromptInputs>;
 declare function fileExists(filePath: string): Promise<boolean>;
 /**
@@ -207,56 +325,13 @@ declare function resolveFileReference(rawValue: string, searchRoots: readonly st
     readonly attempted: readonly string[];
 }>;
-type ChatPrompt = AxChatRequest["chatPrompt"];
-type ProviderKind = "azure" | "anthropic" | "gemini" | "codex" | "cli" | "mock" | "vscode" | "vscode-insiders";
-interface ProviderRequest {
-    readonly question: string;
-    readonly guidelines?: string;
-    readonly guideline_patterns?: readonly string[];
-    readonly chatPrompt?: ChatPrompt;
-    readonly inputFiles?: readonly string[];
-    readonly evalCaseId?: string;
-    readonly attempt?: number;
-    readonly maxOutputTokens?: number;
-    readonly temperature?: number;
-    readonly metadata?: JsonObject;
-    readonly signal?: AbortSignal;
-}
-interface ProviderResponse {
-    readonly text: string;
-    readonly reasoning?: string;
-    readonly raw?: unknown;
-    readonly usage?: JsonObject;
-}
-interface Provider {
-    readonly id: string;
-    readonly kind: ProviderKind;
-    readonly targetName: string;
-    invoke(request: ProviderRequest): Promise<ProviderResponse>;
-    /**
-     * Optional capability marker for provider-managed batching (single session handling multiple requests).
-     */
-    readonly supportsBatch?: boolean;
-    /**
-     * Optional batch invocation hook. When defined alongside supportsBatch=true,
-     * the orchestrator may send multiple requests in a single provider session.
-     */
-    invokeBatch?(requests: readonly ProviderRequest[]): Promise<readonly ProviderResponse[]>;
-    /**
-     * Optional access to the underlying AxAI instance.
-     * This enables using advanced Ax features like structured output signatures.
-     */
-    getAxAI?(): AxAI;
-}
-type EnvLookup = Readonly<Record<string, string | undefined>>;
-interface TargetDefinition {
-    readonly name: string;
-    readonly provider: ProviderKind | string;
-    readonly settings?: Record<string, unknown> | undefined;
-    readonly judge_target?: string | undefined;
-    readonly workers?: number | undefined;
+interface RetryConfig {
+    readonly maxRetries?: number;
+    readonly initialDelayMs?: number;
+    readonly maxDelayMs?: number;
+    readonly backoffFactor?: number;
+    readonly retryableStatusCodes?: readonly number[];
 }
 interface AzureResolvedConfig {
     readonly resourceName: string;
     readonly deploymentName: string;
@@ -264,6 +339,7 @@ interface AzureResolvedConfig {
     readonly version?: string;
     readonly temperature?: number;
     readonly maxOutputTokens?: number;
+    readonly retry?: RetryConfig;
 }
 interface AnthropicResolvedConfig {
     readonly apiKey: string;
@@ -271,12 +347,14 @@ interface AnthropicResolvedConfig {
     readonly temperature?: number;
     readonly maxOutputTokens?: number;
     readonly thinkingBudget?: number;
+    readonly retry?: RetryConfig;
 }
 interface GeminiResolvedConfig {
     readonly apiKey: string;
     readonly model: string;
     readonly temperature?: number;
     readonly maxOutputTokens?: number;
+    readonly retry?: RetryConfig;
 }
 interface CodexResolvedConfig {
     readonly executable: string;
@@ -313,7 +391,6 @@ interface CliResolvedConfig {
     readonly commandTemplate: string;
     readonly filesFormat?: string;
     readonly cwd?: string;
-    readonly env?: Record<string, string>;
     readonly timeoutMs?: number;
     readonly healthcheck?: CliHealthcheck;
 }
@@ -413,6 +490,7 @@ interface EvaluationContext {
         readonly question: string;
         readonly guidelines: string;
         readonly systemMessage?: string;
+        readonly chatPrompt?: ChatPrompt;
     };
     readonly now: Date;
     readonly judgeProvider?: Provider;
@@ -521,4 +599,4 @@ type AgentKernel = {
 };
 declare function createAgentKernel(): AgentKernel;
-export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };
+export { type AgentKernel, type AnthropicResolvedConfig, type AssistantTestMessage, type AzureResolvedConfig, type CliResolvedConfig, CodeEvaluator, type CodeEvaluatorConfig, type CodeEvaluatorOptions, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EvalCase, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type Evaluator, type EvaluatorConfig, type EvaluatorKind, type EvaluatorResult, type GeminiResolvedConfig, type JsonObject, type JsonPrimitive, type JsonValue, LlmJudgeEvaluator, type LlmJudgeEvaluatorConfig, type LlmJudgeEvaluatorOptions, type MockResolvedConfig, type ProgressEvent, type PromptInputs, type Provider, type ProviderKind, type ProviderRequest, type ProviderResponse, type ResolvedTarget, type RunEvalCaseOptions, type RunEvaluationOptions, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetDefinition, type TestMessage, type TestMessageContent, type TestMessageRole, type ToolTestMessage, type UserTestMessage, type VSCodeResolvedConfig, buildDirectoryChain, buildPromptInputs, buildSearchRoots, consumeCodexLogEntries, createAgentKernel, createProvider, ensureVSCodeSubagents, extractCodeBlocks, fileExists, findGitRoot, getHitCount, isEvaluatorKind, isGuidelineFile, isJsonObject, isJsonValue, isTestMessage, isTestMessageRole, listTargetNames, loadEvalCases, normalizeLineEndings, readTargetDefinitions, readTextFile, resolveAndCreateProvider, resolveFileReference, resolveTargetDefinition, runEvalCase, runEvaluation, subscribeToCodexLogEntries };