npm - @agentv/core - Versions diffs - 4.18.0-next.1 → 4.19.0 - Mend

@agentv/core 4.18.0-next.1 → 4.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/{chunk-PYDBJOAO.js → chunk-24ND5HZC.js} +97 -97
package/dist/chunk-24ND5HZC.js.map +1 -0
package/dist/chunk-QXX3IBYV.js +19740 -0
package/dist/chunk-QXX3IBYV.js.map +1 -0
package/dist/evaluation/validation/index.js +1 -1
package/dist/index.cjs +20086 -19073
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +861 -818
package/dist/index.d.ts +861 -818
package/dist/index.js +479 -19769
package/dist/index.js.map +1 -1
package/dist/ts-eval-loader-XFQ6S4DT.js +12 -0
package/dist/ts-eval-loader-XFQ6S4DT.js.map +1 -0
package/package.json +1 -1
package/dist/chunk-PYDBJOAO.js.map +0 -1

package/dist/index.d.cts CHANGED Viewed

@@ -1455,156 +1455,558 @@ interface GraderResult {
     readonly endedAt?: string;
 }
-declare const MetadataSchema: z.ZodObject<{
-    name: z.ZodString;
-    description: z.ZodOptional<z.ZodString>;
-    version: z.ZodOptional<z.ZodString>;
-    author: z.ZodOptional<z.ZodString>;
-    tags: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
-    license: z.ZodOptional<z.ZodString>;
-    requires: z.ZodOptional<z.ZodObject<{
-        agentv: z.ZodOptional<z.ZodString>;
-    }, "strip", z.ZodTypeAny, {
-        agentv?: string | undefined;
+/**
+ * Strict normalized schema for CLI target configuration.
+ * This is the final validated shape after environment variable resolution
+ * and internal field normalization.
+ *
+ * Uses .strict() to reject unknown properties, ensuring configuration
+ * errors are caught early rather than silently ignored.
+ *
+ * @example
+ * ```typescript
+ * const config: CliNormalizedConfig = {
+ *   command: 'agent run {PROMPT}',
+ *   timeoutMs: 120000,
+ *   verbose: true,
+ * };
+ * CliTargetConfigSchema.parse(config); // Validates the normalized config
+ * ```
+ */
+declare const CliTargetConfigSchema: z.ZodObject<{
+    command: z.ZodString;
+    filesFormat: z.ZodOptional<z.ZodString>;
+    cwd: z.ZodOptional<z.ZodString>;
+    timeoutMs: z.ZodOptional<z.ZodNumber>;
+    healthcheck: z.ZodOptional<z.ZodUnion<[z.ZodObject<{
+        url: z.ZodString;
+        timeoutMs: z.ZodOptional<z.ZodNumber>;
+    }, "strict", z.ZodTypeAny, {
+        url: string;
+        timeoutMs?: number | undefined;
     }, {
-        agentv?: string | undefined;
-    }>>;
-}, "strip", z.ZodTypeAny, {
-    name: string;
-    description?: string | undefined;
-    version?: string | undefined;
-    author?: string | undefined;
-    tags?: string[] | undefined;
-    license?: string | undefined;
-    requires?: {
-        agentv?: string | undefined;
+        url: string;
+        timeoutMs?: number | undefined;
+    }>, z.ZodObject<{
+        command: z.ZodString;
+        cwd: z.ZodOptional<z.ZodString>;
+        timeoutMs: z.ZodOptional<z.ZodNumber>;
+    }, "strict", z.ZodTypeAny, {
+        command: string;
+        timeoutMs?: number | undefined;
+        cwd?: string | undefined;
+    }, {
+        command: string;
+        timeoutMs?: number | undefined;
+        cwd?: string | undefined;
+    }>]>>;
+    verbose: z.ZodOptional<z.ZodBoolean>;
+    keepTempFiles: z.ZodOptional<z.ZodBoolean>;
+}, "strict", z.ZodTypeAny, {
+    command: string;
+    timeoutMs?: number | undefined;
+    cwd?: string | undefined;
+    verbose?: boolean | undefined;
+    healthcheck?: {
+        url: string;
+        timeoutMs?: number | undefined;
+    } | {
+        command: string;
+        timeoutMs?: number | undefined;
+        cwd?: string | undefined;
     } | undefined;
+    filesFormat?: string | undefined;
+    keepTempFiles?: boolean | undefined;
 }, {
-    name: string;
-    description?: string | undefined;
-    version?: string | undefined;
-    author?: string | undefined;
-    tags?: string[] | undefined;
-    license?: string | undefined;
-    requires?: {
-        agentv?: string | undefined;
+    command: string;
+    timeoutMs?: number | undefined;
+    cwd?: string | undefined;
+    verbose?: boolean | undefined;
+    healthcheck?: {
+        url: string;
+        timeoutMs?: number | undefined;
+    } | {
+        command: string;
+        timeoutMs?: number | undefined;
+        cwd?: string | undefined;
     } | undefined;
+    filesFormat?: string | undefined;
+    keepTempFiles?: boolean | undefined;
 }>;
-type EvalMetadata = z.infer<typeof MetadataSchema>;
-declare const DEFAULT_EVAL_PATTERNS: readonly string[];
-type ExecutionDefaults = {
-    readonly verbose?: boolean;
-    readonly keep_workspaces?: boolean;
-    readonly otel_file?: string;
-    readonly export_otel?: boolean;
-    readonly otel_backend?: string;
-    readonly otel_capture_content?: boolean;
-    readonly otel_group_turns?: boolean;
-    readonly pool_workspaces?: boolean;
-    readonly pool_slots?: number;
-};
-type ResultsExportConfig = {
-    readonly repo: string;
-    readonly path: string;
-    readonly auto_push?: boolean;
-    readonly branch_prefix?: string;
-};
-type AgentVConfig$1 = {
-    readonly required_version?: string;
-    readonly eval_patterns?: readonly string[];
-    readonly execution?: ExecutionDefaults;
-    readonly results?: {
-        readonly export?: ResultsExportConfig;
-    };
-};
-/**
- * Load optional .agentv/config.yaml configuration file.
- * Searches from eval file directory up to repo root.
- */
-declare function loadConfig(evalFilePath: string, repoRoot: string): Promise<AgentVConfig$1 | null>;
-/**
- * Extract target name from parsed eval suite (checks execution.target then falls back to root-level target).
- */
-declare function extractTargetFromSuite(suite: JsonObject): string | undefined;
-/**
- * Extract target refs from parsed eval suite.
- * Supports both string shorthand and object form with hooks.
- * Returns undefined when no targets array is specified.
- */
-declare function extractTargetRefsFromSuite(suite: JsonObject): readonly EvalTargetRef[] | undefined;
-/**
- * Extract target names from parsed eval suite (backward-compat wrapper).
- * Precedence: execution.targets (array) > execution.target (singular).
- * Returns undefined when no targets array is specified.
- */
-declare function extractTargetsFromSuite(suite: JsonObject): readonly string[] | undefined;
-/**
- * Extract workers count from suite-level execution block.
- */
-declare function extractWorkersFromSuite(suite: JsonObject): number | undefined;
-/**
- * Extract per-test targets array from a raw test case object.
- */
-declare function extractTargetsFromTestCase(testCase: JsonObject): readonly string[] | undefined;
-/**
- * Extract trials configuration from parsed eval suite's execution block.
- * Returns undefined when count is 1 or not specified (no-op).
- */
-declare function extractTrialsConfig(suite: JsonObject): TrialsConfig | undefined;
+type CliNormalizedConfig = z.infer<typeof CliTargetConfigSchema>;
 /**
- * Cache configuration parsed from execution block.
+ * Resolved CLI configuration type derived from CliTargetConfigSchema.
+ * This is the final validated shape used by the CLI provider at runtime.
+ * Using Readonly to ensure immutability for runtime safety.
  */
-interface CacheConfig {
-    readonly enabled: boolean;
-    readonly cachePath?: string;
+type CliResolvedConfig = Readonly<CliNormalizedConfig>;
+interface RetryConfig {
+    readonly maxRetries?: number;
+    readonly initialDelayMs?: number;
+    readonly maxDelayMs?: number;
+    readonly backoffFactor?: number;
+    readonly retryableStatusCodes?: readonly number[];
 }
 /**
- * Extract cache configuration from parsed eval suite's execution block.
- * Returns undefined when no cache config is specified.
- */
-declare function extractCacheConfig(suite: JsonObject): CacheConfig | undefined;
-/**
- * Extract `execution.fail_on_error` from parsed eval suite.
- * Accepts `true` or `false`.
- * Returns undefined when not specified.
+ * Selects which OpenAI-compatible API endpoint to use.
+ * - "chat" (default): POST /chat/completions — universally supported by all OpenAI-compatible providers.
+ * - "responses": POST /responses — only supported by api.openai.com.
+ *
+ * Maps to Vercel AI SDK methods: "chat" → provider.chat(model), "responses" → provider(model).
  */
-declare function extractFailOnError(suite: JsonObject): FailOnError | undefined;
+type ApiFormat = 'chat' | 'responses';
 /**
- * Extract `execution.threshold` from parsed eval suite.
- * Accepts a number in [0, 1] range.
- * Returns undefined when not specified.
+ * Azure OpenAI settings used by the Vercel AI SDK.
  */
-declare function extractThreshold(suite: JsonObject): number | undefined;
+interface AzureResolvedConfig {
+    readonly resourceName: string;
+    readonly deploymentName: string;
+    readonly apiKey: string;
+    readonly version?: string;
+    readonly apiFormat?: ApiFormat;
+    readonly temperature?: number;
+    readonly maxOutputTokens?: number;
+    readonly retry?: RetryConfig;
+}
 /**
- * Formatting mode for segment content.
- * - 'agent': File references only (for providers with filesystem access)
- * - 'lm': Embedded file content with XML tags (for language model providers)
+ * OpenAI-compatible settings used by the Vercel AI SDK.
  */
-type FormattingMode = 'agent' | 'lm';
+interface OpenAIResolvedConfig {
+    readonly baseURL: string;
+    readonly apiKey: string;
+    readonly model: string;
+    readonly apiFormat?: ApiFormat;
+    readonly temperature?: number;
+    readonly maxOutputTokens?: number;
+    readonly retry?: RetryConfig;
+}
 /**
- * Build prompt inputs by consolidating user request context.
+ * OpenRouter settings used by the Vercel AI SDK provider.
  */
-interface PromptInputs {
-    readonly question: string;
-    readonly chatPrompt?: ChatPrompt;
-    readonly systemMessage?: string;
+interface OpenRouterResolvedConfig {
+    readonly apiKey: string;
+    readonly model: string;
+    readonly temperature?: number;
+    readonly maxOutputTokens?: number;
+    readonly retry?: RetryConfig;
 }
 /**
- * Build prompt inputs by consolidating user request context.
- *
- * @param testCase - The evaluation test case
- * @param mode - Formatting mode: 'agent' for file references, 'lm' for embedded content (default: 'lm')
+ * Anthropic Claude settings used by the Vercel AI SDK.
  */
-declare function buildPromptInputs(testCase: EvalTest, mode?: FormattingMode): Promise<PromptInputs>;
+interface AnthropicResolvedConfig {
+    readonly apiKey: string;
+    readonly model: string;
+    readonly temperature?: number;
+    readonly maxOutputTokens?: number;
+    readonly thinkingBudget?: number;
+    readonly retry?: RetryConfig;
+}
 /**
- * Detect file format by extension.
+ * Google Gemini settings used by the Vercel AI SDK.
  */
-declare function detectFormat(filePath: string): 'yaml' | 'jsonl' | 'agent-skills-json';
+interface GeminiResolvedConfig {
+    readonly apiKey: string;
+    readonly model: string;
+    readonly temperature?: number;
+    readonly maxOutputTokens?: number;
+    readonly retry?: RetryConfig;
+}
+interface CodexResolvedConfig {
+    readonly model?: string;
+    readonly executable: string;
+    readonly args?: readonly string[];
+    readonly cwd?: string;
+    readonly timeoutMs?: number;
+    readonly logDir?: string;
+    readonly logFormat?: 'summary' | 'json';
+    /** New stream_log field. false=no stream log (default), 'raw'=per-event, 'summary'=consolidated. */
+    readonly streamLog?: false | 'raw' | 'summary';
+    readonly systemPrompt?: string;
+}
+interface CopilotCliResolvedConfig {
+    readonly executable: string;
+    readonly model?: string;
+    readonly args?: readonly string[];
+    readonly cwd?: string;
+    readonly timeoutMs?: number;
+    readonly logDir?: string;
+    readonly logFormat?: 'summary' | 'json';
+    /** New stream_log field. false=no stream log (default), 'raw'=per-event, 'summary'=consolidated. */
+    readonly streamLog?: false | 'raw' | 'summary';
+    readonly systemPrompt?: string;
+}
+interface CopilotSdkResolvedConfig {
+    readonly cliUrl?: string;
+    readonly cliPath?: string;
+    readonly githubToken?: string;
+    readonly model?: string;
+    readonly cwd?: string;
+    readonly timeoutMs?: number;
+    readonly logDir?: string;
+    readonly logFormat?: 'summary' | 'json';
+    /** New stream_log field. false=no stream log (default), 'raw'=per-event, 'summary'=consolidated. */
+    readonly streamLog?: false | 'raw' | 'summary';
+    readonly systemPrompt?: string;
+    /** BYOK provider type: "azure", "openai", or "anthropic". */
+    readonly byokType?: string;
+    /** BYOK base URL for the provider endpoint. */
+    readonly byokBaseUrl?: string;
+    /** BYOK API key for authenticating with the provider. */
+    readonly byokApiKey?: string;
+    /** BYOK bearer token (takes precedence over apiKey when set). */
+    readonly byokBearerToken?: string;
+    /** BYOK Azure API version (e.g. "2024-10-21"). Only used when byokType is "azure". */
+    readonly byokApiVersion?: string;
+    /** BYOK wire API format: "completions" or "responses". */
+    readonly byokWireApi?: string;
+}
+interface CopilotLogResolvedConfig {
+    /** Explicit path to a session directory containing events.jsonl. */
+    readonly sessionDir?: string;
+    /** Session UUID — combined with sessionStateDir to build the path. */
+    readonly sessionId?: string;
+    /** Auto-discovery mode. 'latest' picks the most recent session. */
+    readonly discover?: 'latest';
+    /** Override the default ~/.copilot/session-state directory. */
+    readonly sessionStateDir?: string;
+    /** Filter discovery by working directory. */
+    readonly cwd?: string;
+}
+interface PiCodingAgentResolvedConfig {
+    readonly subprovider?: string;
+    readonly model?: string;
+    readonly apiKey?: string;
+    readonly baseUrl?: string;
+    readonly tools?: string;
+    readonly thinking?: string;
+    readonly cwd?: string;
+    readonly timeoutMs?: number;
+    readonly logDir?: string;
+    readonly logFormat?: 'summary' | 'json';
+    /** New stream_log field. false=no stream log (default), 'raw'=per-event, 'summary'=consolidated. */
+    readonly streamLog?: false | 'raw' | 'summary';
+    readonly systemPrompt?: string;
+}
+interface PiCliResolvedConfig {
+    readonly executable: string;
+    readonly subprovider?: string;
+    readonly model?: string;
+    readonly apiKey?: string;
+    readonly baseUrl?: string;
+    readonly tools?: string;
+    readonly thinking?: string;
+    readonly args?: readonly string[];
+    readonly cwd?: string;
+    readonly timeoutMs?: number;
+    readonly logDir?: string;
+    readonly logFormat?: 'summary' | 'json';
+    /** New stream_log field. false=no stream log (default), 'raw'=per-event, 'summary'=consolidated. */
+    readonly streamLog?: false | 'raw' | 'summary';
+    readonly systemPrompt?: string;
+}
+interface ClaudeResolvedConfig {
+    readonly executable: string;
+    readonly model?: string;
+    readonly systemPrompt?: string;
+    readonly cwd?: string;
+    readonly timeoutMs?: number;
+    readonly maxTurns?: number;
+    readonly maxBudgetUsd?: number;
+    readonly logDir?: string;
+    readonly logFormat?: 'summary' | 'json';
+    /** New stream_log field. false=no stream log (default), 'raw'=per-event, 'summary'=consolidated. */
+    readonly streamLog?: false | 'raw' | 'summary';
+}
+interface MockResolvedConfig {
+    readonly response?: string;
+    readonly delayMs?: number;
+    readonly delayMinMs?: number;
+    readonly delayMaxMs?: number;
+}
+interface VSCodeResolvedConfig {
+    readonly executable: string;
+    readonly waitForResponse: boolean;
+    readonly dryRun: boolean;
+    readonly subagentRoot?: string;
+    readonly timeoutMs?: number;
+}
+interface AgentVResolvedConfig {
+    readonly model: string;
+    readonly temperature: number;
+}
+/** Base fields shared by all resolved targets. */
+interface ResolvedTargetBase {
+    readonly name: string;
+    readonly graderTarget?: string;
+    readonly workers?: number;
+    readonly providerBatching?: boolean;
+    /**
+     * Whether this target can be executed via executor subagents in subagent mode.
+     * Defaults to `true` for all non-CLI providers. Set `false` in targets.yaml
+     * to force CLI invocation even in subagent mode.
+     */
+    readonly subagentModeAllowed?: boolean;
+    /**
+     * Ordered list of target names to try when the primary target fails after
+     * exhausting retries. Each fallback is attempted in order.
+     */
+    readonly fallbackTargets?: readonly string[];
+}
+type ResolvedTarget = (ResolvedTargetBase & {
+    readonly kind: 'openai';
+    readonly config: OpenAIResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'openrouter';
+    readonly config: OpenRouterResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'azure';
+    readonly config: AzureResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'anthropic';
+    readonly config: AnthropicResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'gemini';
+    readonly config: GeminiResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'codex';
+    readonly config: CodexResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'copilot-sdk';
+    readonly config: CopilotSdkResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'copilot-cli';
+    readonly config: CopilotCliResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'copilot-log';
+    readonly config: CopilotLogResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'pi-coding-agent';
+    readonly config: PiCodingAgentResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'pi-cli';
+    readonly config: PiCliResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'claude';
+    readonly config: ClaudeResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'claude-cli';
+    readonly config: ClaudeResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'claude-sdk';
+    readonly config: ClaudeResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'mock';
+    readonly config: MockResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'vscode' | 'vscode-insiders';
+    readonly config: VSCodeResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'agentv';
+    readonly config: AgentVResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'cli';
+    readonly config: CliResolvedConfig;
+}) | (ResolvedTargetBase & {
+    readonly kind: 'transcript';
+    readonly config: Record<string, never>;
+});
+/**
+ * Optional settings accepted on ALL target definitions regardless of provider.
+ * Exported so the targets validator can reuse the same list — adding a field
+ * here automatically makes it valid in targets.yaml without a separate update.
+ */
+declare const COMMON_TARGET_SETTINGS: readonly ["use_target", "provider_batching", "subagent_mode_allowed", "fallback_targets"];
+declare function resolveDelegatedTargetDefinition(name: string, definitions: ReadonlyMap<string, TargetDefinition>, env?: EnvLookup): TargetDefinition | undefined;
+declare function resolveTargetDefinition(definition: TargetDefinition, env?: EnvLookup, evalFilePath?: string, options?: {
+    readonly emitDeprecationWarnings?: boolean;
+}): ResolvedTarget;
+/**
+ * Extensible provider registry.
+ *
+ * Replaces the hardcoded switch/case dispatch in createProvider() with
+ * a registry of named factory functions. Built-in providers are registered
+ * at startup; users can add custom providers via the registry API or by
+ * dropping files in `.agentv/providers/`.
+ */
+/**
+ * Factory function that creates a Provider instance from a resolved target.
+ */
+type ProviderFactoryFn = (target: ResolvedTarget) => Provider;
+/**
+ * Registry of provider factory functions keyed by provider kind.
+ *
+ * Built-in providers are registered at startup. Custom providers can be
+ * registered via the `register()` method.
+ */
+declare class ProviderRegistry {
+    private readonly factories;
+    /** Register a factory function for a provider kind. */
+    register(kind: string, factory: ProviderFactoryFn): this;
+    /** Get the factory function for a provider kind. */
+    get(kind: string): ProviderFactoryFn | undefined;
+    /** Check if a factory is registered for the given kind. */
+    has(kind: string): boolean;
+    /** List all registered provider kind names. */
+    list(): string[];
+    /**
+     * Create a provider instance from a resolved target.
+     * Falls back to CLI provider for unknown kinds (custom provider escape hatch).
+     */
+    create(target: ResolvedTarget): Provider;
+}
+declare const MetadataSchema: z.ZodObject<{
+    name: z.ZodString;
+    description: z.ZodOptional<z.ZodString>;
+    version: z.ZodOptional<z.ZodString>;
+    author: z.ZodOptional<z.ZodString>;
+    tags: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
+    license: z.ZodOptional<z.ZodString>;
+    requires: z.ZodOptional<z.ZodObject<{
+        agentv: z.ZodOptional<z.ZodString>;
+    }, "strip", z.ZodTypeAny, {
+        agentv?: string | undefined;
+    }, {
+        agentv?: string | undefined;
+    }>>;
+}, "strip", z.ZodTypeAny, {
+    name: string;
+    description?: string | undefined;
+    version?: string | undefined;
+    author?: string | undefined;
+    tags?: string[] | undefined;
+    license?: string | undefined;
+    requires?: {
+        agentv?: string | undefined;
+    } | undefined;
+}, {
+    name: string;
+    description?: string | undefined;
+    version?: string | undefined;
+    author?: string | undefined;
+    tags?: string[] | undefined;
+    license?: string | undefined;
+    requires?: {
+        agentv?: string | undefined;
+    } | undefined;
+}>;
+type EvalMetadata = z.infer<typeof MetadataSchema>;
+declare const DEFAULT_EVAL_PATTERNS: readonly string[];
+type ExecutionDefaults = {
+    readonly verbose?: boolean;
+    readonly keep_workspaces?: boolean;
+    readonly otel_file?: string;
+    readonly export_otel?: boolean;
+    readonly otel_backend?: string;
+    readonly otel_capture_content?: boolean;
+    readonly otel_group_turns?: boolean;
+    readonly pool_workspaces?: boolean;
+    readonly pool_slots?: number;
+};
+type ResultsExportConfig = {
+    readonly repo: string;
+    readonly path: string;
+    readonly auto_push?: boolean;
+    readonly branch_prefix?: string;
+};
+type AgentVConfig$1 = {
+    readonly required_version?: string;
+    readonly eval_patterns?: readonly string[];
+    readonly execution?: ExecutionDefaults;
+    readonly results?: {
+        readonly export?: ResultsExportConfig;
+    };
+};
+/**
+ * Load optional .agentv/config.yaml configuration file.
+ * Searches from eval file directory up to repo root.
+ */
+declare function loadConfig(evalFilePath: string, repoRoot: string): Promise<AgentVConfig$1 | null>;
+/**
+ * Extract target name from parsed eval suite (checks execution.target then falls back to root-level target).
+ */
+declare function extractTargetFromSuite(suite: JsonObject): string | undefined;
+/**
+ * Extract target refs from parsed eval suite.
+ * Supports both string shorthand and object form with hooks.
+ * Returns undefined when no targets array is specified.
+ */
+declare function extractTargetRefsFromSuite(suite: JsonObject): readonly EvalTargetRef[] | undefined;
+/**
+ * Extract target names from parsed eval suite (backward-compat wrapper).
+ * Precedence: execution.targets (array) > execution.target (singular).
+ * Returns undefined when no targets array is specified.
+ */
+declare function extractTargetsFromSuite(suite: JsonObject): readonly string[] | undefined;
+/**
+ * Extract workers count from suite-level execution block.
+ */
+declare function extractWorkersFromSuite(suite: JsonObject): number | undefined;
+/**
+ * Extract per-test targets array from a raw test case object.
+ */
+declare function extractTargetsFromTestCase(testCase: JsonObject): readonly string[] | undefined;
+/**
+ * Extract trials configuration from parsed eval suite's execution block.
+ * Returns undefined when count is 1 or not specified (no-op).
+ */
+declare function extractTrialsConfig(suite: JsonObject): TrialsConfig | undefined;
+/**
+ * Cache configuration parsed from execution block.
+ */
+interface CacheConfig {
+    readonly enabled: boolean;
+    readonly cachePath?: string;
+}
+/**
+ * Extract cache configuration from parsed eval suite's execution block.
+ * Returns undefined when no cache config is specified.
+ */
+declare function extractCacheConfig(suite: JsonObject): CacheConfig | undefined;
+/**
+ * Extract `execution.fail_on_error` from parsed eval suite.
+ * Accepts `true` or `false`.
+ * Returns undefined when not specified.
+ */
+declare function extractFailOnError(suite: JsonObject): FailOnError | undefined;
+/**
+ * Extract `execution.threshold` from parsed eval suite.
+ * Accepts a number in [0, 1] range.
+ * Returns undefined when not specified.
+ */
+declare function extractThreshold(suite: JsonObject): number | undefined;
+/**
+ * Formatting mode for segment content.
+ * - 'agent': File references only (for providers with filesystem access)
+ * - 'lm': Embedded file content with XML tags (for language model providers)
+ */
+type FormattingMode = 'agent' | 'lm';
+/**
+ * Build prompt inputs by consolidating user request context.
+ */
+interface PromptInputs {
+    readonly question: string;
+    readonly chatPrompt?: ChatPrompt;
+    readonly systemMessage?: string;
+}
+/**
+ * Build prompt inputs by consolidating user request context.
+ *
+ * @param testCase - The evaluation test case
+ * @param mode - Formatting mode: 'agent' for file references, 'lm' for embedded content (default: 'lm')
+ */
+declare function buildPromptInputs(testCase: EvalTest, mode?: FormattingMode): Promise<PromptInputs>;
+/**
+ * Detect file format by extension.
+ */
+declare function detectFormat(filePath: string): 'yaml' | 'jsonl' | 'agent-skills-json' | 'typescript';
 type LoadOptions = {
     readonly verbose?: boolean;
     /** Filter tests by ID pattern(s) (glob supported, e.g., "summary-*"). Arrays use OR logic. */
@@ -1647,6 +2049,10 @@ type EvalSuiteResult = {
     readonly threshold?: number;
     /** Resolved workspace.path from the eval YAML (after env-var expansion), if set */
     readonly workspacePath?: string;
+    /** Inline target definition from a TS eval config. */
+    readonly inlineTarget?: TargetDefinition;
+    /** Custom provider factory from a TS eval config task(). */
+    readonly providerFactory?: ProviderFactoryFn;
 };
 /**
  * Load tests and suite metadata from a single parse.
@@ -1693,495 +2099,370 @@ declare function isAgentSkillsFormat(parsed: unknown): parsed is AgentSkillsEval
 declare function parseAgentSkillsEvals(parsed: unknown, source?: string, baseDir?: string): readonly EvalTest[];
 /**
- * EVAL.yaml → evals.json transpiler.
+ * Types for inline assertion functions used in the evaluate() API.
  *
- * Converts an AgentV EVAL.yaml file into Agent Skills evals.json format
- * for consumption by the skill-creator pipeline.
+ * Inline functions are the escape hatch for custom evaluation logic
+ * that doesn't fit a built-in grader type. For built-in assertions
+ * (contains, regex, is-json, etc.), use config objects instead:
  *
- * Handles both `assertions:` (current) and `assert:` (deprecated alias).
- */
-interface EvalsJsonCase {
-    id: number;
-    prompt: string;
-    expected_output?: string;
-    files?: string[];
-    should_trigger?: boolean;
-    assertions: string[];
-}
-interface EvalsJsonFile {
-    skill_name: string;
-    evals: EvalsJsonCase[];
-}
-/**
- * Result of transpiling a single EVAL.yaml.
- * May produce multiple evals.json files (one per skill).
- */
-interface TranspileResult {
-    /** Map from skill_name → EvalsJsonFile */
-    files: Map<string, EvalsJsonFile>;
-    /** Warning messages accumulated during transpilation */
-    warnings: string[];
-}
-/**
- * Transpile a parsed EVAL.yaml object into one or more evals.json objects.
+ *   assert: [{ type: 'contains', value: 'hello' }]
  *
- * @param suite  Parsed YAML object (already loaded, no file I/O here)
- * @param source Source identifier for error messages (e.g. file path)
- */
-declare function transpileEvalYaml(suite: unknown, source?: string): TranspileResult;
-/**
- * Transpile an EVAL.yaml file into one or more evals.json objects.
- * Returns a map from output filename → JSON content.
+ * Inline functions are for custom logic:
  *
- * @param evalYamlPath  Absolute path to the EVAL.yaml file
- */
-declare function transpileEvalYamlFile(evalYamlPath: string): TranspileResult;
-/**
- * Determine the output filename(s) for a transpile result.
- * Single skill → "evals.json"
- * Multiple skills → "<skill>.evals.json"
- */
-declare function getOutputFilenames(result: TranspileResult): Map<string, string>;
-declare function fileExists(filePath: string): Promise<boolean>;
-/**
- * Normalize line endings to LF (\n).
- * This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
- */
-declare function normalizeLineEndings(content: string): string;
-/**
- * Read a text file and normalize line endings to LF (\n).
- * This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
- */
-declare function readTextFile(filePath: string): Promise<string>;
-/**
- * Read a JSON file and parse it.
- */
-declare function readJsonFile<T = unknown>(filePath: string): Promise<T>;
-/**
- * Find git repository root by walking up the directory tree.
- */
-declare function findGitRoot(startPath: string): Promise<string | null>;
-/**
- * Build a chain of directories walking from a file's location up to repo root.
- * Used for discovering configuration files like targets.yaml or config.yaml.
- */
-declare function buildDirectoryChain(filePath: string, repoRoot: string): readonly string[];
-/**
- * Build search roots for file resolution, matching yaml-parser behavior.
- * Searches from eval file directory up to repo root.
- */
-declare function buildSearchRoots(evalPath: string, repoRoot: string): readonly string[];
-/**
- * Resolve a file reference using search roots, matching yaml-parser behavior.
+ *   assert: [({ output }) => ({ name: 'len', score: output.length > 5 ? 1 : 0 })]
  */
-declare function resolveFileReference(rawValue: string, searchRoots: readonly string[]): Promise<{
-    readonly displayPath: string;
-    readonly resolvedPath?: string;
-    readonly attempted: readonly string[];
-}>;
+/** Context passed to inline assertion functions */
+interface AssertContext {
+    readonly input: string;
+    readonly output: string;
+    readonly expectedOutput?: string;
+    readonly criteria?: string;
+    readonly metadata?: Record<string, unknown>;
+}
+/** Result from an inline assertion function */
+interface AssertResult {
+    readonly name: string;
+    readonly score: number;
+    readonly metadata?: Record<string, unknown>;
+}
+/** Inline assertion function signature */
+type AssertFn = (ctx: AssertContext) => AssertResult | Promise<AssertResult>;
 /**
- * Strict normalized schema for CLI target configuration.
- * This is the final validated shape after environment variable resolution
- * and internal field normalization.
+ * Programmatic API for running evaluations.
  *
- * Uses .strict() to reject unknown properties, ensuring configuration
- * errors are caught early rather than silently ignored.
+ * Provides `evaluate()` — a high-level function for using AgentV as a library
+ * instead of a CLI. The config shape mirrors the YAML structure for easy
+ * translation between file-based and programmatic usage.
  *
- * @example
+ * @example Inline tests with config objects
  * ```typescript
- * const config: CliNormalizedConfig = {
- *   command: 'agent run {PROMPT}',
- *   timeoutMs: 120000,
- *   verbose: true,
- * };
- * CliTargetConfigSchema.parse(config); // Validates the normalized config
+ * import { evaluate } from '@agentv/core';
+ *
+ * const results = await evaluate({
+ *   tests: [
+ *     {
+ *       id: 'capital',
+ *       input: 'What is the capital of France?',
+ *       expectedOutput: 'Paris',
+ *       assert: [{ type: 'contains', value: 'Paris' }],
+ *     },
+ *   ],
+ *   target: { provider: 'mock_agent' },
+ * });
+ *
+ * console.log(results.summary.passed, 'passed');
+ * ```
+ *
+ * @example Inline tests with task function and custom assertion
+ * ```typescript
+ * import { evaluate } from '@agentv/core';
+ *
+ * const { summary } = await evaluate({
+ *   tests: [
+ *     {
+ *       id: 'echo',
+ *       input: 'hello',
+ *       expectedOutput: 'Echo: hello',
+ *       assert: [
+ *         { type: 'contains', value: 'hello' },
+ *         { type: 'equals' },
+ *         ({ output }) => ({ name: 'custom', score: output.length > 0 ? 1 : 0 }),
+ *       ],
+ *     },
+ *   ],
+ *   task: async (input) => `Echo: ${input}`,
+ * });
  * ```
- */
-declare const CliTargetConfigSchema: z.ZodObject<{
-    command: z.ZodString;
-    filesFormat: z.ZodOptional<z.ZodString>;
-    cwd: z.ZodOptional<z.ZodString>;
-    timeoutMs: z.ZodOptional<z.ZodNumber>;
-    healthcheck: z.ZodOptional<z.ZodUnion<[z.ZodObject<{
-        url: z.ZodString;
-        timeoutMs: z.ZodOptional<z.ZodNumber>;
-    }, "strict", z.ZodTypeAny, {
-        url: string;
-        timeoutMs?: number | undefined;
-    }, {
-        url: string;
-        timeoutMs?: number | undefined;
-    }>, z.ZodObject<{
-        command: z.ZodString;
-        cwd: z.ZodOptional<z.ZodString>;
-        timeoutMs: z.ZodOptional<z.ZodNumber>;
-    }, "strict", z.ZodTypeAny, {
-        command: string;
-        timeoutMs?: number | undefined;
-        cwd?: string | undefined;
-    }, {
-        command: string;
-        timeoutMs?: number | undefined;
-        cwd?: string | undefined;
-    }>]>>;
-    verbose: z.ZodOptional<z.ZodBoolean>;
-    keepTempFiles: z.ZodOptional<z.ZodBoolean>;
-}, "strict", z.ZodTypeAny, {
-    command: string;
-    timeoutMs?: number | undefined;
-    cwd?: string | undefined;
-    verbose?: boolean | undefined;
-    healthcheck?: {
-        url: string;
-        timeoutMs?: number | undefined;
-    } | {
-        command: string;
-        timeoutMs?: number | undefined;
-        cwd?: string | undefined;
-    } | undefined;
-    filesFormat?: string | undefined;
-    keepTempFiles?: boolean | undefined;
-}, {
-    command: string;
-    timeoutMs?: number | undefined;
-    cwd?: string | undefined;
-    verbose?: boolean | undefined;
-    healthcheck?: {
-        url: string;
-        timeoutMs?: number | undefined;
-    } | {
-        command: string;
-        timeoutMs?: number | undefined;
-        cwd?: string | undefined;
-    } | undefined;
-    filesFormat?: string | undefined;
-    keepTempFiles?: boolean | undefined;
-}>;
-type CliNormalizedConfig = z.infer<typeof CliTargetConfigSchema>;
-/**
- * Resolved CLI configuration type derived from CliTargetConfigSchema.
- * This is the final validated shape used by the CLI provider at runtime.
- * Using Readonly to ensure immutability for runtime safety.
- */
-type CliResolvedConfig = Readonly<CliNormalizedConfig>;
-interface RetryConfig {
-    readonly maxRetries?: number;
-    readonly initialDelayMs?: number;
-    readonly maxDelayMs?: number;
-    readonly backoffFactor?: number;
-    readonly retryableStatusCodes?: readonly number[];
-}
-/**
- * Selects which OpenAI-compatible API endpoint to use.
- * - "chat" (default): POST /chat/completions — universally supported by all OpenAI-compatible providers.
- * - "responses": POST /responses — only supported by api.openai.com.
  *
- * Maps to Vercel AI SDK methods: "chat" → provider.chat(model), "responses" → provider(model).
+ * @example File-based
+ * ```typescript
+ * const results = await evaluate({
+ *   specFile: './evals/EVAL.yaml',
+ *   target: { provider: 'claude_agent' },
+ * });
+ * ```
+ *
+ * @module
  */
-type ApiFormat = 'chat' | 'responses';
 /**
- * Azure OpenAI settings used by the Vercel AI SDK.
+ * Inline test definition for the programmatic API.
+ * Mirrors the YAML test structure.
  */
-interface AzureResolvedConfig {
-    readonly resourceName: string;
-    readonly deploymentName: string;
-    readonly apiKey: string;
-    readonly version?: string;
-    readonly apiFormat?: ApiFormat;
-    readonly temperature?: number;
-    readonly maxOutputTokens?: number;
-    readonly retry?: RetryConfig;
+interface EvalTestInput {
+    /** Unique test identifier */
+    readonly id: string;
+    /** What the response should accomplish */
+    readonly criteria?: string;
+    /** Input to the agent (string or message array). Omit when using turns[]. */
+    readonly input?: string | readonly {
+        role: string;
+        content: string;
+    }[];
+    /** Expected reference output (camelCase preferred) */
+    readonly expectedOutput?: string;
+    /** @deprecated Use `expectedOutput` instead */
+    readonly expected_output?: string;
+    /** Assertion graders — accepts factory functions, config objects, or inline functions */
+    readonly assert?: readonly AssertEntry[];
+    /** Arbitrary metadata */
+    readonly metadata?: Record<string, unknown>;
+    /** Enable multi-turn conversation mode. Inferred automatically when turns[] is provided. */
+    readonly mode?: 'conversation';
+    /** Ordered turns for conversation evaluation. Each turn generates a fresh LLM call. */
+    readonly turns?: readonly ConversationTurnInput[];
+    /** Score aggregation across turns: 'mean' (default), 'min', or 'max'. */
+    readonly aggregation?: ConversationAggregation;
 }
 /**
- * OpenAI-compatible settings used by the Vercel AI SDK.
+ * A single turn in a multi-turn conversation evaluation (programmatic API).
+ * Mirrors the YAML `turns` structure with camelCase naming.
  */
-interface OpenAIResolvedConfig {
-    readonly baseURL: string;
-    readonly apiKey: string;
-    readonly model: string;
-    readonly apiFormat?: ApiFormat;
-    readonly temperature?: number;
-    readonly maxOutputTokens?: number;
-    readonly retry?: RetryConfig;
+interface ConversationTurnInput {
+    /** Input for this turn (string or message array) */
+    readonly input: string | readonly {
+        role: string;
+        content: string;
+    }[];
+    /** Expected reference output for this turn */
+    readonly expectedOutput?: string;
+    /** @deprecated Use `expectedOutput` instead */
+    readonly expected_output?: string;
+    /** Per-turn assertions (string criteria or grader config) */
+    readonly assert?: readonly AssertEntry[];
 }
 /**
- * OpenRouter settings used by the Vercel AI SDK provider.
+ * Inline assertion definition for the programmatic API.
+ * Matches the YAML `assert` block structure.
  */
-interface OpenRouterResolvedConfig {
-    readonly apiKey: string;
-    readonly model: string;
-    readonly temperature?: number;
-    readonly maxOutputTokens?: number;
-    readonly retry?: RetryConfig;
+interface EvalAssertionInput {
+    /** Assertion type (e.g., 'contains', 'llm-grader', 'code-grader') */
+    readonly type: string;
+    /** Display name */
+    readonly name?: string;
+    /** Value for deterministic assertions (contains, equals, regex) */
+    readonly value?: string;
+    /** Weight for scoring */
+    readonly weight?: number;
+    /** Whether this assertion is required to pass */
+    readonly required?: boolean | number;
+    /** Minimum score (0-1) for this evaluator to pass. Independent of `required` gate. */
+    readonly min_score?: number;
+    /** Prompt file for llm_grader */
+    readonly prompt?: string;
+    /** Script for code_grader */
+    readonly script?: string | readonly string[];
+    /** Additional config passed to the assertion */
+    readonly config?: Record<string, unknown>;
+    /** Nested assertions for composite type */
+    readonly assert?: readonly EvalAssertionInput[];
+    /** Rubric criteria for rubrics type */
+    readonly criteria?: readonly (string | {
+        id?: string;
+        outcome: string;
+        weight?: number;
+    })[];
+    /** Additional properties */
+    readonly [key: string]: unknown;
 }
+/** Assert entry: inline function or config object */
+type AssertEntry = AssertFn | EvalAssertionInput;
 /**
- * Anthropic Claude settings used by the Vercel AI SDK.
+ * Configuration for `evaluate()`.
+ * Accepts either inline tests or a spec file path.
  */
-interface AnthropicResolvedConfig {
-    readonly apiKey: string;
-    readonly model: string;
-    readonly temperature?: number;
-    readonly maxOutputTokens?: number;
-    readonly thinkingBudget?: number;
-    readonly retry?: RetryConfig;
+interface EvalConfig {
+    /** Inline test definitions (mutually exclusive with specFile) */
+    readonly tests?: readonly EvalTestInput[];
+    /** Path to an EVAL.yaml spec file (mutually exclusive with tests) */
+    readonly specFile?: string;
+    /** Target provider configuration */
+    readonly target?: TargetDefinition;
+    /** Custom task function — mutually exclusive with target */
+    readonly task?: (input: string) => string | Promise<string>;
+    /** Suite-level assertions applied to all tests */
+    readonly assert?: readonly AssertEntry[];
+    /** Optional suite metadata used by CLI discovery, tagging, and reporting. */
+    readonly metadata?: EvalMetadata;
+    /** Filter tests by ID pattern(s) (glob supported). Arrays use OR logic. */
+    readonly filter?: string | readonly string[];
+    /** Maximum concurrent workers (default: 3) */
+    readonly workers?: number;
+    /** Maximum retries on failure (default: 2) */
+    readonly maxRetries?: number;
+    /** Agent timeout in milliseconds. No timeout if not set. */
+    readonly agentTimeoutMs?: number;
+    /** Enable response caching */
+    readonly cache?: boolean;
+    /** Verbose logging */
+    readonly verbose?: boolean;
+    /** Callback for each completed result */
+    readonly onResult?: (result: EvaluationResult) => void;
+    /** Score threshold for pass/fail (0-1). Default: 0.8 (DEFAULT_THRESHOLD). */
+    readonly threshold?: number;
+    /** Command(s) to run once before the suite starts. Same semantics as YAML before_all. */
+    readonly beforeAll?: string | readonly string[];
+    /** Suite-level cost cap in USD. Stops dispatching new tests when exceeded. */
+    readonly budgetUsd?: number;
 }
 /**
- * Google Gemini settings used by the Vercel AI SDK.
+ * Summary statistics for an evaluation run.
  */
-interface GeminiResolvedConfig {
-    readonly apiKey: string;
-    readonly model: string;
-    readonly temperature?: number;
-    readonly maxOutputTokens?: number;
-    readonly retry?: RetryConfig;
-}
-interface CodexResolvedConfig {
-    readonly model?: string;
-    readonly executable: string;
-    readonly args?: readonly string[];
-    readonly cwd?: string;
-    readonly timeoutMs?: number;
-    readonly logDir?: string;
-    readonly logFormat?: 'summary' | 'json';
-    /** New stream_log field. false=no stream log (default), 'raw'=per-event, 'summary'=consolidated. */
-    readonly streamLog?: false | 'raw' | 'summary';
-    readonly systemPrompt?: string;
-}
-interface CopilotCliResolvedConfig {
-    readonly executable: string;
-    readonly model?: string;
-    readonly args?: readonly string[];
-    readonly cwd?: string;
-    readonly timeoutMs?: number;
-    readonly logDir?: string;
-    readonly logFormat?: 'summary' | 'json';
-    /** New stream_log field. false=no stream log (default), 'raw'=per-event, 'summary'=consolidated. */
-    readonly streamLog?: false | 'raw' | 'summary';
-    readonly systemPrompt?: string;
-}
-interface CopilotSdkResolvedConfig {
-    readonly cliUrl?: string;
-    readonly cliPath?: string;
-    readonly githubToken?: string;
-    readonly model?: string;
-    readonly cwd?: string;
-    readonly timeoutMs?: number;
-    readonly logDir?: string;
-    readonly logFormat?: 'summary' | 'json';
-    /** New stream_log field. false=no stream log (default), 'raw'=per-event, 'summary'=consolidated. */
-    readonly streamLog?: false | 'raw' | 'summary';
-    readonly systemPrompt?: string;
-    /** BYOK provider type: "azure", "openai", or "anthropic". */
-    readonly byokType?: string;
-    /** BYOK base URL for the provider endpoint. */
-    readonly byokBaseUrl?: string;
-    /** BYOK API key for authenticating with the provider. */
-    readonly byokApiKey?: string;
-    /** BYOK bearer token (takes precedence over apiKey when set). */
-    readonly byokBearerToken?: string;
-    /** BYOK Azure API version (e.g. "2024-10-21"). Only used when byokType is "azure". */
-    readonly byokApiVersion?: string;
-    /** BYOK wire API format: "completions" or "responses". */
-    readonly byokWireApi?: string;
-}
-interface CopilotLogResolvedConfig {
-    /** Explicit path to a session directory containing events.jsonl. */
-    readonly sessionDir?: string;
-    /** Session UUID — combined with sessionStateDir to build the path. */
-    readonly sessionId?: string;
-    /** Auto-discovery mode. 'latest' picks the most recent session. */
-    readonly discover?: 'latest';
-    /** Override the default ~/.copilot/session-state directory. */
-    readonly sessionStateDir?: string;
-    /** Filter discovery by working directory. */
-    readonly cwd?: string;
-}
-interface PiCodingAgentResolvedConfig {
-    readonly subprovider?: string;
-    readonly model?: string;
-    readonly apiKey?: string;
-    readonly baseUrl?: string;
-    readonly tools?: string;
-    readonly thinking?: string;
-    readonly cwd?: string;
-    readonly timeoutMs?: number;
-    readonly logDir?: string;
-    readonly logFormat?: 'summary' | 'json';
-    /** New stream_log field. false=no stream log (default), 'raw'=per-event, 'summary'=consolidated. */
-    readonly streamLog?: false | 'raw' | 'summary';
-    readonly systemPrompt?: string;
-}
-interface PiCliResolvedConfig {
-    readonly executable: string;
-    readonly subprovider?: string;
-    readonly model?: string;
-    readonly apiKey?: string;
-    readonly baseUrl?: string;
-    readonly tools?: string;
-    readonly thinking?: string;
-    readonly args?: readonly string[];
-    readonly cwd?: string;
-    readonly timeoutMs?: number;
-    readonly logDir?: string;
-    readonly logFormat?: 'summary' | 'json';
-    /** New stream_log field. false=no stream log (default), 'raw'=per-event, 'summary'=consolidated. */
-    readonly streamLog?: false | 'raw' | 'summary';
-    readonly systemPrompt?: string;
+interface EvalSummary {
+    /** Total number of test cases */
+    readonly total: number;
+    /** Number of passing test cases (score >= threshold) */
+    readonly passed: number;
+    /** Number of failing test cases (score < threshold) */
+    readonly failed: number;
+    /** Total duration in milliseconds */
+    readonly durationMs: number;
+    /** Mean score across all cases */
+    readonly meanScore: number;
 }
-interface ClaudeResolvedConfig {
-    readonly executable: string;
-    readonly model?: string;
-    readonly systemPrompt?: string;
-    readonly cwd?: string;
-    readonly timeoutMs?: number;
-    readonly maxTurns?: number;
-    readonly maxBudgetUsd?: number;
-    readonly logDir?: string;
-    readonly logFormat?: 'summary' | 'json';
-    /** New stream_log field. false=no stream log (default), 'raw'=per-event, 'summary'=consolidated. */
-    readonly streamLog?: false | 'raw' | 'summary';
+/**
+ * Result of an `evaluate()` call.
+ */
+interface EvalRunResult {
+    /** Individual test case results */
+    readonly results: readonly EvaluationResult[];
+    /** Aggregate summary statistics */
+    readonly summary: EvalSummary;
 }
-interface MockResolvedConfig {
-    readonly response?: string;
-    readonly delayMs?: number;
-    readonly delayMinMs?: number;
-    readonly delayMaxMs?: number;
+/**
+ * Run an evaluation suite against a target provider.
+ *
+ * Accepts either inline test definitions or a path to an EVAL.yaml spec file.
+ * The config shape mirrors the YAML structure — users can translate between
+ * file-based and programmatic usage 1:1.
+ *
+ * @param config - Evaluation configuration
+ * @returns Typed evaluation results with summary statistics
+ *
+ * @example Inline tests with assertions
+ * ```typescript
+ * const { results, summary } = await evaluate({
+ *   tests: [
+ *     {
+ *       id: 'greeting',
+ *       input: 'Say hello',
+ *       assert: [{ type: 'contains', value: 'hello' }],
+ *     },
+ *   ],
+ *   target: { provider: 'mock_agent' },
+ * });
+ * console.log(`${summary.passed}/${summary.total} passed`);
+ * ```
+ *
+ * @example Load from YAML
+ * ```typescript
+ * const { summary } = await evaluate({
+ *   specFile: './evals/my-eval.yaml',
+ *   filter: 'greeting-*',
+ * });
+ * ```
+ */
+declare function evaluate(config: EvalConfig): Promise<EvalRunResult>;
+interface TsEvalResult {
+    readonly config: EvalConfig;
+    readonly filePath: string;
 }
-interface VSCodeResolvedConfig {
-    readonly executable: string;
-    readonly waitForResponse: boolean;
-    readonly dryRun: boolean;
-    readonly subagentRoot?: string;
-    readonly timeoutMs?: number;
+/**
+ * Import a *.eval.ts file and extract the EvalConfig export.
+ * Tries default, `config`, and `evalConfig` named exports in priority order.
+ */
+declare function loadTsEvalFile(filePath: string): Promise<TsEvalResult>;
+/**
+ * EVAL.yaml → evals.json transpiler.
+ *
+ * Converts an AgentV EVAL.yaml file into Agent Skills evals.json format
+ * for consumption by the skill-creator pipeline.
+ *
+ * Handles both `assertions:` (current) and `assert:` (deprecated alias).
+ */
+interface EvalsJsonCase {
+    id: number;
+    prompt: string;
+    expected_output?: string;
+    files?: string[];
+    should_trigger?: boolean;
+    assertions: string[];
 }
-interface AgentVResolvedConfig {
-    readonly model: string;
-    readonly temperature: number;
+interface EvalsJsonFile {
+    skill_name: string;
+    evals: EvalsJsonCase[];
 }
-/** Base fields shared by all resolved targets. */
-interface ResolvedTargetBase {
-    readonly name: string;
-    readonly graderTarget?: string;
-    readonly workers?: number;
-    readonly providerBatching?: boolean;
-    /**
-     * Whether this target can be executed via executor subagents in subagent mode.
-     * Defaults to `true` for all non-CLI providers. Set `false` in targets.yaml
-     * to force CLI invocation even in subagent mode.
-     */
-    readonly subagentModeAllowed?: boolean;
-    /**
-     * Ordered list of target names to try when the primary target fails after
-     * exhausting retries. Each fallback is attempted in order.
-     */
-    readonly fallbackTargets?: readonly string[];
+/**
+ * Result of transpiling a single EVAL.yaml.
+ * May produce multiple evals.json files (one per skill).
+ */
+interface TranspileResult {
+    /** Map from skill_name → EvalsJsonFile */
+    files: Map<string, EvalsJsonFile>;
+    /** Warning messages accumulated during transpilation */
+    warnings: string[];
 }
-type ResolvedTarget = (ResolvedTargetBase & {
-    readonly kind: 'openai';
-    readonly config: OpenAIResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'openrouter';
-    readonly config: OpenRouterResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'azure';
-    readonly config: AzureResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'anthropic';
-    readonly config: AnthropicResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'gemini';
-    readonly config: GeminiResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'codex';
-    readonly config: CodexResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'copilot-sdk';
-    readonly config: CopilotSdkResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'copilot-cli';
-    readonly config: CopilotCliResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'copilot-log';
-    readonly config: CopilotLogResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'pi-coding-agent';
-    readonly config: PiCodingAgentResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'pi-cli';
-    readonly config: PiCliResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'claude';
-    readonly config: ClaudeResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'claude-cli';
-    readonly config: ClaudeResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'claude-sdk';
-    readonly config: ClaudeResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'mock';
-    readonly config: MockResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'vscode' | 'vscode-insiders';
-    readonly config: VSCodeResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'agentv';
-    readonly config: AgentVResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'cli';
-    readonly config: CliResolvedConfig;
-}) | (ResolvedTargetBase & {
-    readonly kind: 'transcript';
-    readonly config: Record<string, never>;
-});
 /**
- * Optional settings accepted on ALL target definitions regardless of provider.
- * Exported so the targets validator can reuse the same list — adding a field
- * here automatically makes it valid in targets.yaml without a separate update.
+ * Transpile a parsed EVAL.yaml object into one or more evals.json objects.
+ *
+ * @param suite  Parsed YAML object (already loaded, no file I/O here)
+ * @param source Source identifier for error messages (e.g. file path)
  */
-declare const COMMON_TARGET_SETTINGS: readonly ["use_target", "provider_batching", "subagent_mode_allowed", "fallback_targets"];
-declare function resolveDelegatedTargetDefinition(name: string, definitions: ReadonlyMap<string, TargetDefinition>, env?: EnvLookup): TargetDefinition | undefined;
-declare function resolveTargetDefinition(definition: TargetDefinition, env?: EnvLookup, evalFilePath?: string, options?: {
-    readonly emitDeprecationWarnings?: boolean;
-}): ResolvedTarget;
+declare function transpileEvalYaml(suite: unknown, source?: string): TranspileResult;
 /**
- * Extensible provider registry.
+ * Transpile an EVAL.yaml file into one or more evals.json objects.
+ * Returns a map from output filename → JSON content.
  *
- * Replaces the hardcoded switch/case dispatch in createProvider() with
- * a registry of named factory functions. Built-in providers are registered
- * at startup; users can add custom providers via the registry API or by
- * dropping files in `.agentv/providers/`.
+ * @param evalYamlPath  Absolute path to the EVAL.yaml file
+ */
+declare function transpileEvalYamlFile(evalYamlPath: string): TranspileResult;
+/**
+ * Determine the output filename(s) for a transpile result.
+ * Single skill → "evals.json"
+ * Multiple skills → "<skill>.evals.json"
  */
+declare function getOutputFilenames(result: TranspileResult): Map<string, string>;
+declare function fileExists(filePath: string): Promise<boolean>;
 /**
- * Factory function that creates a Provider instance from a resolved target.
+ * Normalize line endings to LF (\n).
+ * This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
  */
-type ProviderFactoryFn = (target: ResolvedTarget) => Provider;
+declare function normalizeLineEndings(content: string): string;
 /**
- * Registry of provider factory functions keyed by provider kind.
- *
- * Built-in providers are registered at startup. Custom providers can be
- * registered via the `register()` method.
+ * Read a text file and normalize line endings to LF (\n).
+ * This ensures consistent behavior across Windows (CRLF) and Unix (LF) systems.
  */
-declare class ProviderRegistry {
-    private readonly factories;
-    /** Register a factory function for a provider kind. */
-    register(kind: string, factory: ProviderFactoryFn): this;
-    /** Get the factory function for a provider kind. */
-    get(kind: string): ProviderFactoryFn | undefined;
-    /** Check if a factory is registered for the given kind. */
-    has(kind: string): boolean;
-    /** List all registered provider kind names. */
-    list(): string[];
-    /**
-     * Create a provider instance from a resolved target.
-     * Falls back to CLI provider for unknown kinds (custom provider escape hatch).
-     */
-    create(target: ResolvedTarget): Provider;
-}
+declare function readTextFile(filePath: string): Promise<string>;
+/**
+ * Read a JSON file and parse it.
+ */
+declare function readJsonFile<T = unknown>(filePath: string): Promise<T>;
+/**
+ * Find git repository root by walking up the directory tree.
+ */
+declare function findGitRoot(startPath: string): Promise<string | null>;
+/**
+ * Build a chain of directories walking from a file's location up to repo root.
+ * Used for discovering configuration files like targets.yaml or config.yaml.
+ */
+declare function buildDirectoryChain(filePath: string, repoRoot: string): readonly string[];
+/**
+ * Build search roots for file resolution, matching yaml-parser behavior.
+ * Searches from eval file directory up to repo root.
+ */
+declare function buildSearchRoots(evalPath: string, repoRoot: string): readonly string[];
+/**
+ * Resolve a file reference using search roots, matching yaml-parser behavior.
+ */
+declare function resolveFileReference(rawValue: string, searchRoots: readonly string[]): Promise<{
+    readonly displayPath: string;
+    readonly resolvedPath?: string;
+    readonly attempted: readonly string[];
+}>;
 declare function readTargetDefinitions(filePath: string): Promise<readonly TargetDefinition[]>;
 declare function listTargetNames(definitions: readonly TargetDefinition[]): readonly string[];
@@ -2631,26 +2912,26 @@ declare const rubricEvaluationSchema: z.ZodObject<{
         reasoning: z.ZodString;
     }, "strip", z.ZodTypeAny, {
         id: string;
-        reasoning: string;
         satisfied: boolean;
+        reasoning: string;
     }, {
         id: string;
-        reasoning: string;
         satisfied: boolean;
+        reasoning: string;
     }>, "many">;
     overall_reasoning: z.ZodString;
 }, "strip", z.ZodTypeAny, {
     checks: {
         id: string;
-        reasoning: string;
         satisfied: boolean;
+        reasoning: string;
     }[];
     overall_reasoning: string;
 }, {
     checks: {
         id: string;
-        reasoning: string;
         satisfied: boolean;
+        reasoning: string;
     }[];
     overall_reasoning: string;
 }>;
@@ -3105,244 +3386,6 @@ interface RunEvaluationOptions {
 declare function runEvaluation(options: RunEvaluationOptions): Promise<readonly EvaluationResult[]>;
 declare function runEvalCase(options: RunEvalCaseOptions): Promise<EvaluationResult>;
-/**
- * Types for inline assertion functions used in the evaluate() API.
- *
- * Inline functions are the escape hatch for custom evaluation logic
- * that doesn't fit a built-in grader type. For built-in assertions
- * (contains, regex, is-json, etc.), use config objects instead:
- *
- *   assert: [{ type: 'contains', value: 'hello' }]
- *
- * Inline functions are for custom logic:
- *
- *   assert: [({ output }) => ({ name: 'len', score: output.length > 5 ? 1 : 0 })]
- */
-/** Context passed to inline assertion functions */
-interface AssertContext {
-    readonly input: string;
-    readonly output: string;
-    readonly expectedOutput?: string;
-    readonly criteria?: string;
-    readonly metadata?: Record<string, unknown>;
-}
-/** Result from an inline assertion function */
-interface AssertResult {
-    readonly name: string;
-    readonly score: number;
-    readonly metadata?: Record<string, unknown>;
-}
-/** Inline assertion function signature */
-type AssertFn = (ctx: AssertContext) => AssertResult | Promise<AssertResult>;
-/**
- * Programmatic API for running evaluations.
- *
- * Provides `evaluate()` — a high-level function for using AgentV as a library
- * instead of a CLI. The config shape mirrors the YAML structure for easy
- * translation between file-based and programmatic usage.
- *
- * @example Inline tests with config objects
- * ```typescript
- * import { evaluate } from '@agentv/core';
- *
- * const results = await evaluate({
- *   tests: [
- *     {
- *       id: 'capital',
- *       input: 'What is the capital of France?',
- *       expectedOutput: 'Paris',
- *       assert: [{ type: 'contains', value: 'Paris' }],
- *     },
- *   ],
- *   target: { provider: 'mock_agent' },
- * });
- *
- * console.log(results.summary.passed, 'passed');
- * ```
- *
- * @example Inline tests with task function and custom assertion
- * ```typescript
- * import { evaluate } from '@agentv/core';
- *
- * const { summary } = await evaluate({
- *   tests: [
- *     {
- *       id: 'echo',
- *       input: 'hello',
- *       expectedOutput: 'Echo: hello',
- *       assert: [
- *         { type: 'contains', value: 'hello' },
- *         { type: 'equals' },
- *         ({ output }) => ({ name: 'custom', score: output.length > 0 ? 1 : 0 }),
- *       ],
- *     },
- *   ],
- *   task: async (input) => `Echo: ${input}`,
- * });
- * ```
- *
- * @example File-based
- * ```typescript
- * const results = await evaluate({
- *   specFile: './evals/EVAL.yaml',
- *   target: { provider: 'claude_agent' },
- * });
- * ```
- *
- * @module
- */
-/**
- * Inline test definition for the programmatic API.
- * Mirrors the YAML test structure.
- */
-interface EvalTestInput {
-    /** Unique test identifier */
-    readonly id: string;
-    /** What the response should accomplish */
-    readonly criteria?: string;
-    /** Input to the agent (string or message array) */
-    readonly input: string | readonly {
-        role: string;
-        content: string;
-    }[];
-    /** Expected reference output (camelCase preferred) */
-    readonly expectedOutput?: string;
-    /** @deprecated Use `expectedOutput` instead */
-    readonly expected_output?: string;
-    /** Assertion graders — accepts factory functions, config objects, or inline functions */
-    readonly assert?: readonly AssertEntry[];
-    /** Arbitrary metadata */
-    readonly metadata?: Record<string, unknown>;
-}
-/**
- * Inline assertion definition for the programmatic API.
- * Matches the YAML `assert` block structure.
- */
-interface EvalAssertionInput {
-    /** Assertion type (e.g., 'contains', 'llm-grader', 'code-grader') */
-    readonly type: string;
-    /** Display name */
-    readonly name?: string;
-    /** Value for deterministic assertions (contains, equals, regex) */
-    readonly value?: string;
-    /** Weight for scoring */
-    readonly weight?: number;
-    /** Whether this assertion is required to pass */
-    readonly required?: boolean | number;
-    /** Minimum score (0-1) for this evaluator to pass. Independent of `required` gate. */
-    readonly min_score?: number;
-    /** Prompt file for llm_grader */
-    readonly prompt?: string;
-    /** Script for code_grader */
-    readonly script?: string | readonly string[];
-    /** Additional config passed to the assertion */
-    readonly config?: Record<string, unknown>;
-    /** Nested assertions for composite type */
-    readonly assert?: readonly EvalAssertionInput[];
-    /** Rubric criteria for rubrics type */
-    readonly criteria?: readonly (string | {
-        id?: string;
-        outcome: string;
-        weight?: number;
-    })[];
-    /** Additional properties */
-    readonly [key: string]: unknown;
-}
-/** Assert entry: inline function or config object */
-type AssertEntry = AssertFn | EvalAssertionInput;
-/**
- * Configuration for `evaluate()`.
- * Accepts either inline tests or a spec file path.
- */
-interface EvalConfig {
-    /** Inline test definitions (mutually exclusive with specFile) */
-    readonly tests?: readonly EvalTestInput[];
-    /** Path to an EVAL.yaml spec file (mutually exclusive with tests) */
-    readonly specFile?: string;
-    /** Target provider configuration */
-    readonly target?: TargetDefinition;
-    /** Custom task function — mutually exclusive with target */
-    readonly task?: (input: string) => string | Promise<string>;
-    /** Suite-level assertions applied to all tests */
-    readonly assert?: readonly AssertEntry[];
-    /** Filter tests by ID pattern(s) (glob supported). Arrays use OR logic. */
-    readonly filter?: string | readonly string[];
-    /** Maximum concurrent workers (default: 3) */
-    readonly workers?: number;
-    /** Maximum retries on failure (default: 2) */
-    readonly maxRetries?: number;
-    /** Agent timeout in milliseconds. No timeout if not set. */
-    readonly agentTimeoutMs?: number;
-    /** Enable response caching */
-    readonly cache?: boolean;
-    /** Verbose logging */
-    readonly verbose?: boolean;
-    /** Callback for each completed result */
-    readonly onResult?: (result: EvaluationResult) => void;
-    /** Score threshold for pass/fail (0-1). Default: 0.8 (DEFAULT_THRESHOLD). */
-    readonly threshold?: number;
-}
-/**
- * Summary statistics for an evaluation run.
- */
-interface EvalSummary {
-    /** Total number of test cases */
-    readonly total: number;
-    /** Number of passing test cases (score >= threshold) */
-    readonly passed: number;
-    /** Number of failing test cases (score < threshold) */
-    readonly failed: number;
-    /** Total duration in milliseconds */
-    readonly durationMs: number;
-    /** Mean score across all cases */
-    readonly meanScore: number;
-}
-/**
- * Result of an `evaluate()` call.
- */
-interface EvalRunResult {
-    /** Individual test case results */
-    readonly results: readonly EvaluationResult[];
-    /** Aggregate summary statistics */
-    readonly summary: EvalSummary;
-}
-/**
- * Run an evaluation suite against a target provider.
- *
- * Accepts either inline test definitions or a path to an EVAL.yaml spec file.
- * The config shape mirrors the YAML structure — users can translate between
- * file-based and programmatic usage 1:1.
- *
- * @param config - Evaluation configuration
- * @returns Typed evaluation results with summary statistics
- *
- * @example Inline tests with assertions
- * ```typescript
- * const { results, summary } = await evaluate({
- *   tests: [
- *     {
- *       id: 'greeting',
- *       input: 'Say hello',
- *       assert: [{ type: 'contains', value: 'hello' }],
- *     },
- *   ],
- *   target: { provider: 'mock_agent' },
- * });
- * console.log(`${summary.passed}/${summary.total} passed`);
- * ```
- *
- * @example Load from YAML
- * ```typescript
- * const { summary } = await evaluate({
- *   specFile: './evals/my-eval.yaml',
- *   filter: 'greeting-*',
- * });
- * ```
- */
-declare function evaluate(config: EvalConfig): Promise<EvalRunResult>;
 /**
  * Typed configuration file support for AgentV.
  *
@@ -4553,4 +4596,4 @@ type AgentKernel = {
 };
 declare function createAgentKernel(): AgentKernel;
-export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AgentVConfig$1 as AgentVYamlConfig, type AnthropicResolvedConfig, type ApiFormat, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type BenchmarkEntry, type BenchmarkRegistry, COMMON_TARGET_SETTINGS, type CacheConfig, type CheckedOutResultsRepoBranch, type ChildGraderResult, type ClaudeDiscoverOptions, type ClaudeResolvedConfig, type ClaudeSession, type CliResolvedConfig, CodeGrader, type CodeGraderConfig, type CodeGraderOptions, type CodexDiscoverOptions, type CodexSession, type CommandExecutor, type CompositeAggregatorConfig, CompositeGrader, type CompositeGraderConfig, type CompositeGraderOptions, type ConfidenceIntervalAggregation, type ContainsAllGraderConfig, type ContainsAnyGraderConfig, type ContainsGraderConfig, type Content, type ContentFile, type ContentImage, type ContentPreprocessorConfig, type ContentText, type ConversationAggregation, type ConversationMode, type ConversationTurn, type CopilotCliResolvedConfig, type DiscoverOptions as CopilotDiscoverOptions, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, type CopilotSessionMeta, CostGrader, type CostGraderConfig, type CostGraderOptions, type CreateContainerOptions, DEFAULT_CATEGORY, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DEFAULT_GRADER_TEMPLATE, DEFAULT_THRESHOLD, type DependencyFailurePolicy, type DependencyResult, type DepsScanResult, DeterministicAssertionGrader, type DockerWorkspaceConfig, DockerWorkspaceProvider, type EndsWithGraderConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsGraderConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTargetRef, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type ExecInContainerOptions, type ExecResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsGrader, type ExecutionMetricsGraderConfig, type ExecutionMetricsGraderOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyGrader, type FieldAccuracyGraderConfig, type FieldAccuracyGraderOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type Grader, type GraderConfig, type GraderDispatchContext, type GraderFactory, type GraderFactoryFn, type GraderKind, GraderRegistry, type GraderResult, type IcontainsAllGraderConfig, type IcontainsAnyGraderConfig, type IcontainsGraderConfig, type InlineAssertEvaluatorConfig, type IsJsonGraderConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyGrader, type LatencyGraderConfig, type LatencyGraderOptions, LlmGrader, type LlmGraderConfig, type LlmGraderOptions, type LlmGraderPromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, PASS_THRESHOLD, type ParsedCopilotSession, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type PreparedResultsRepoBranch, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexGraderConfig, type RepoCheckout, type RepoClone, type RepoConfig, type RepoDep, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type ResultsExportConfig, type ResultsRepoCachePaths, type ResultsRepoStatus, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerGrader, type SkillTriggerGraderConfig, type StartsWithGraderConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, type TargetHooksConfig, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageGrader, type TokenUsageGraderConfig, type TokenUsageGraderOptions, type ToolCall, type ToolTestMessage, type ToolTrajectoryExpectedItem, ToolTrajectoryGrader, type ToolTrajectoryGraderConfig, type ToolTrajectoryGraderOptions, type TraceComputeResult, type TraceSummary, type TranscriptEntry, type TranscriptJsonLine, TranscriptProvider, type TranscriptReplayEntry, type TranscriptSource, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type TurnFailurePolicy, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, addBenchmark, assembleLlmGraderPrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, checkoutResultsRepoBranch, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, commitAndPushResultsBranch, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createDraftResultsPr, createProvider, createTempWorkspace, deepEqual, defineConfig, deriveBenchmarkId, deriveCategory, detectFormat, directorySizeBytes, discoverAssertions, discoverBenchmarks, discoverClaudeSessions, discoverCodexSessions, discoverCopilotSessions, discoverGraders, discoverProviders, ensureResultsRepoClone, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractImageBlocks, extractJsonBlob, extractLastAssistantContent, extractTargetFromSuite, extractTargetRefsFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractThreshold, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvConfigDir, getAgentvHome, getBenchmark, getBenchmarksRegistryPath, getOutputFilenames, getResultsRepoCachePaths, getResultsRepoStatus, getSubagentsRoot, getTextContent, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, groupTranscriptJsonLines, initializeBaseline, isAgentSkillsFormat, isContent, isContentArray, isGraderKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadBenchmarkRegistry, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, mergeExecutionMetrics, negateScore, normalizeLineEndings, normalizeResultsExportConfig, parseAgentSkillsEvals, parseClaudeSession, parseCodexSession, parseCopilotEvents, parseJsonFromText, parseJsonSafe, prepareResultsRepoBranch, pushResultsRepoBranch, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, readTranscriptFile, readTranscriptJsonl, removeBenchmark, resolveAndCreateProvider, resolveDelegatedTargetDefinition, resolveFileReference, resolveResultsRepoRunsDir, resolveResultsRepoUrl, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, saveBenchmarkRegistry, scanRepoDeps, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, stageResultsArtifacts, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, syncResultsRepo, toCamelCaseDeep, toSnakeCaseDeep, toTranscriptJsonLines, tokensPerTool, touchBenchmark, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };
+export { type AcquireWorkspaceOptions, type AgentKernel, type AgentVConfig$1 as AgentVConfig, type AgentVResolvedConfig, type AgentVConfig as AgentVTsConfig, type AgentVConfig$1 as AgentVYamlConfig, type AnthropicResolvedConfig, type ApiFormat, type ArgsMatchMode, type AssertContext, type AssertEntry, type AssertFn, type AssertResult, type AssertionEntry, type AssertionResult, type AssistantTestMessage, type AzureResolvedConfig, type BenchmarkEntry, type BenchmarkRegistry, COMMON_TARGET_SETTINGS, type CacheConfig, type CheckedOutResultsRepoBranch, type ChildGraderResult, type ClaudeDiscoverOptions, type ClaudeResolvedConfig, type ClaudeSession, type CliResolvedConfig, CodeGrader, type CodeGraderConfig, type CodeGraderOptions, type CodexDiscoverOptions, type CodexSession, type CommandExecutor, type CompositeAggregatorConfig, CompositeGrader, type CompositeGraderConfig, type CompositeGraderOptions, type ConfidenceIntervalAggregation, type ContainsAllGraderConfig, type ContainsAnyGraderConfig, type ContainsGraderConfig, type Content, type ContentFile, type ContentImage, type ContentPreprocessorConfig, type ContentText, type ConversationAggregation, type ConversationMode, type ConversationTurn, type ConversationTurnInput, type CopilotCliResolvedConfig, type DiscoverOptions as CopilotDiscoverOptions, type CopilotLogResolvedConfig, type CopilotSdkResolvedConfig, type CopilotSession, type CopilotSessionMeta, CostGrader, type CostGraderConfig, type CostGraderOptions, type CreateContainerOptions, DEFAULT_CATEGORY, DEFAULT_EVAL_PATTERNS, DEFAULT_EXPLORATION_TOOLS, DEFAULT_GRADER_TEMPLATE, DEFAULT_THRESHOLD, type DependencyFailurePolicy, type DependencyResult, type DepsScanResult, DeterministicAssertionGrader, type DockerWorkspaceConfig, DockerWorkspaceProvider, type EndsWithGraderConfig, type EnsureSubagentsOptions, type EnsureSubagentsResult, type EnvLookup, type EqualsGraderConfig, type EvalAssertionInput, type EvalCase, type EvalConfig, type EvalMetadata, type EvalRunResult, type EvalSuiteResult, type EvalSummary, type EvalTargetRef, type EvalTest, type EvalTestInput, type EvalsJsonCase, type EvalsJsonFile, type EvaluationCache, type EvaluationContext, type EvaluationResult, type EvaluationScore, type EvaluationVerdict, type ExecInContainerOptions, type ExecResult, type ExecutionDefaults, type ExecutionError, type ExecutionMetrics, ExecutionMetricsGrader, type ExecutionMetricsGraderConfig, type ExecutionMetricsGraderOptions, type ExecutionStatus, type FailOnError, type FailureStage, FieldAccuracyGrader, type FieldAccuracyGraderConfig, type FieldAccuracyGraderOptions, type FieldAggregationType, type FieldConfig, type FieldMatchType, type GeminiResolvedConfig, type GenerateRubricsOptions, type Grader, type GraderConfig, type GraderDispatchContext, type GraderFactory, type GraderFactoryFn, type GraderKind, GraderRegistry, type GraderResult, type IcontainsAllGraderConfig, type IcontainsAnyGraderConfig, type IcontainsGraderConfig, type InlineAssertEvaluatorConfig, type IsJsonGraderConfig, type JsonObject, type JsonPrimitive, type JsonValue, LatencyGrader, type LatencyGraderConfig, type LatencyGraderOptions, LlmGrader, type LlmGraderConfig, type LlmGraderOptions, type LlmGraderPromptAssembly, type LocalPathValidationError, type MeanAggregation, type Message, type MockResolvedConfig, OTEL_BACKEND_PRESETS, type OpenAIResolvedConfig, type OpenRouterResolvedConfig, type OtelBackendPreset, type OtelExportOptions, OtelStreamingObserver, OtelTraceExporter, OtlpJsonFileExporter, type OutputMessage, PASS_THRESHOLD, type ParsedCopilotSession, type PassAtKAggregation, type PiCliResolvedConfig, type PiCodingAgentResolvedConfig, type PoolSlot, type PreparedResultsRepoBranch, type ProgressEvent, type PromptInputs, type PromptScriptConfig, type Provider, type ProviderFactoryFn, type ProviderKind, ProviderRegistry, type ProviderRequest, type ProviderResponse, type ProviderStreamCallbacks, type ProviderTokenUsage, type RegexGraderConfig, type RepoCheckout, type RepoClone, type RepoConfig, type RepoDep, RepoManager, type RepoSource, type ResolvedTarget, type ResolvedWorkspaceTemplate, ResponseCache, type ResultsExportConfig, type ResultsRepoCachePaths, type ResultsRepoStatus, type RubricItem, type RubricsEvaluatorConfig, type RunEvalCaseOptions, type RunEvaluationOptions, type ScoreRange, type ScriptExecutionContext, SkillTriggerGrader, type SkillTriggerGraderConfig, type StartsWithGraderConfig, type SystemTestMessage, TEST_MESSAGE_ROLES, type TargetAccessConfig, type TargetDefinition, type TargetHooksConfig, TemplateNotDirectoryError, TemplateNotFoundError, type TestMessage, type TestMessageContent, type TestMessageRole, type TokenUsage, TokenUsageGrader, type TokenUsageGraderConfig, type TokenUsageGraderOptions, type ToolCall, type ToolTestMessage, type ToolTrajectoryExpectedItem, ToolTrajectoryGrader, type ToolTrajectoryGraderConfig, type ToolTrajectoryGraderOptions, type TraceComputeResult, type TraceSummary, type TranscriptEntry, type TranscriptJsonLine, TranscriptProvider, type TranscriptReplayEntry, type TranscriptSource, type TranspileResult, type TrialAggregation, type TrialResult, type TrialStrategy, type TrialsConfig, type TsEvalResult, type TurnFailurePolicy, type UserTestMessage, type VSCodeResolvedConfig, type WorkspaceConfig, WorkspaceCreationError, type WorkspaceHookConfig, type WorkspaceHooksConfig, WorkspacePoolManager, type WorkspaceScriptConfig, addBenchmark, assembleLlmGraderPrompt, avgToolDurationMs, buildDirectoryChain, buildOutputSchema, buildPromptInputs, buildRubricOutputSchema, buildScoreRangeOutputSchema, buildSearchRoots, calculateRubricScore, captureFileChanges, checkoutResultsRepoBranch, clampScore, cleanupEvalWorkspaces, cleanupWorkspace, commitAndPushResultsBranch, computeTraceSummary, computeWorkspaceFingerprint, consumeClaudeLogEntries, consumeCodexLogEntries, consumeCopilotCliLogEntries, consumeCopilotSdkLogEntries, consumePiLogEntries, createAgentKernel, createBuiltinProviderRegistry, createBuiltinRegistry, createDraftResultsPr, createProvider, createTempWorkspace, deepEqual, defineConfig, deriveBenchmarkId, deriveCategory, detectFormat, directorySizeBytes, discoverAssertions, discoverBenchmarks, discoverClaudeSessions, discoverCodexSessions, discoverCopilotSessions, discoverGraders, discoverProviders, ensureResultsRepoClone, ensureVSCodeSubagents, evaluate, executeScript, executeWorkspaceScript, explorationRatio, extractCacheConfig, extractFailOnError, extractImageBlocks, extractJsonBlob, extractLastAssistantContent, extractTargetFromSuite, extractTargetRefsFromSuite, extractTargetsFromSuite, extractTargetsFromTestCase, extractThreshold, extractTrialsConfig, extractWorkersFromSuite, fileExists, findGitRoot, freeformEvaluationSchema, generateRubrics, getAgentvConfigDir, getAgentvHome, getBenchmark, getBenchmarksRegistryPath, getOutputFilenames, getResultsRepoCachePaths, getResultsRepoStatus, getSubagentsRoot, getTextContent, getTraceStateRoot, getWorkspacePath, getWorkspacePoolRoot, getWorkspacesRoot, groupTranscriptJsonLines, initializeBaseline, isAgentSkillsFormat, isContent, isContentArray, isGraderKind, isJsonObject, isJsonValue, isNonEmptyString, isTestMessage, isTestMessageRole, listTargetNames, loadBenchmarkRegistry, loadConfig, loadEvalCaseById, loadEvalCases, loadEvalSuite, loadTestById, loadTestSuite, loadTests, loadTsConfig, loadTsEvalFile, mergeExecutionMetrics, negateScore, normalizeLineEndings, normalizeResultsExportConfig, parseAgentSkillsEvals, parseClaudeSession, parseCodexSession, parseCopilotEvents, parseJsonFromText, parseJsonSafe, prepareResultsRepoBranch, pushResultsRepoBranch, readJsonFile, readTargetDefinitions, readTestSuiteMetadata, readTextFile, readTranscriptFile, readTranscriptJsonl, removeBenchmark, resolveAndCreateProvider, resolveDelegatedTargetDefinition, resolveFileReference, resolveResultsRepoRunsDir, resolveResultsRepoUrl, resolveTargetDefinition, resolveWorkspaceTemplate, rubricEvaluationSchema, runContainsAllAssertion, runContainsAnyAssertion, runContainsAssertion, runEndsWithAssertion, runEqualsAssertion, runEvalCase, runEvaluation, runIcontainsAllAssertion, runIcontainsAnyAssertion, runIcontainsAssertion, runIsJsonAssertion, runRegexAssertion, runStartsWithAssertion, saveBenchmarkRegistry, scanRepoDeps, scoreToVerdict, shouldEnableCache, shouldSkipCacheForTemperature, stageResultsArtifacts, subscribeToClaudeLogEntries, subscribeToCodexLogEntries, subscribeToCopilotCliLogEntries, subscribeToCopilotSdkLogEntries, subscribeToPiLogEntries, substituteVariables, syncResultsRepo, toCamelCaseDeep, toSnakeCaseDeep, toTranscriptJsonLines, tokensPerTool, touchBenchmark, transpileEvalYaml, transpileEvalYamlFile, trimBaselineResult };