npm - @gleanwork/mcp-server-tester - Versions diffs - 1.0.0-beta.2 → 1.0.0-beta.4 - Mend

@gleanwork/mcp-server-tester 1.0.0-beta.2 → 1.0.0-beta.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/README.md +24 -22
package/dist/cli/index.js +38 -12
package/dist/fixtures/mcp.d.ts +14 -6
package/dist/fixtures/mcp.js +9 -6
package/dist/fixtures/mcp.js.map +1 -1
package/dist/index.cjs +69 -47
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +208 -1175
package/dist/index.d.ts +208 -1175
package/dist/index.js +69 -47
package/dist/index.js.map +1 -1
package/dist/reporters/mcpReporter.cjs.map +1 -1
package/dist/reporters/mcpReporter.js.map +1 -1
package/dist/reporters/ui-dist/app.js +107 -7
package/dist/reporters/ui-dist/styles.css +1 -1
package/package.json +11 -8
package/src/reporters/ui-dist/app.js +0 -174
package/src/reporters/ui-dist/index.html +0 -28
package/src/reporters/ui-dist/styles.css +0 -1

package/dist/index.d.ts CHANGED Viewed

@@ -233,314 +233,64 @@ type MCPConfig = StdioMCPConfig | HttpMCPConfig;
 /**
  * Union schema for MCPConfig (validates based on transport type)
  */
-declare const MCPConfigSchema: z.ZodDiscriminatedUnion<"transport", [z.ZodObject<{
+declare const MCPConfigSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
     transport: z.ZodLiteral<"stdio">;
     command: z.ZodString;
-    args: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
+    args: z.ZodOptional<z.ZodArray<z.ZodString>>;
     cwd: z.ZodOptional<z.ZodString>;
     env: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
     capabilities: z.ZodOptional<z.ZodObject<{
         sampling: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
         roots: z.ZodOptional<z.ZodObject<{
             listChanged: z.ZodBoolean;
-        }, "strip", z.ZodTypeAny, {
-            listChanged: boolean;
-        }, {
-            listChanged: boolean;
-        }>>;
-    }, "strip", z.ZodTypeAny, {
-        sampling?: Record<string, unknown> | undefined;
-        roots?: {
-            listChanged: boolean;
-        } | undefined;
-    }, {
-        sampling?: Record<string, unknown> | undefined;
-        roots?: {
-            listChanged: boolean;
-        } | undefined;
-    }>>;
+        }, z.core.$strip>>;
+    }, z.core.$strip>>;
     connectTimeoutMs: z.ZodOptional<z.ZodNumber>;
     requestTimeoutMs: z.ZodOptional<z.ZodNumber>;
     callTimeoutMs: z.ZodOptional<z.ZodNumber>;
     quiet: z.ZodOptional<z.ZodBoolean>;
-}, "strip", z.ZodTypeAny, {
-    transport: "stdio";
-    command: string;
-    args?: string[] | undefined;
-    cwd?: string | undefined;
-    env?: Record<string, string> | undefined;
-    capabilities?: {
-        sampling?: Record<string, unknown> | undefined;
-        roots?: {
-            listChanged: boolean;
-        } | undefined;
-    } | undefined;
-    connectTimeoutMs?: number | undefined;
-    requestTimeoutMs?: number | undefined;
-    callTimeoutMs?: number | undefined;
-    quiet?: boolean | undefined;
-}, {
-    transport: "stdio";
-    command: string;
-    args?: string[] | undefined;
-    cwd?: string | undefined;
-    env?: Record<string, string> | undefined;
-    capabilities?: {
-        sampling?: Record<string, unknown> | undefined;
-        roots?: {
-            listChanged: boolean;
-        } | undefined;
-    } | undefined;
-    connectTimeoutMs?: number | undefined;
-    requestTimeoutMs?: number | undefined;
-    callTimeoutMs?: number | undefined;
-    quiet?: boolean | undefined;
-}>, z.ZodObject<{
+}, z.core.$strip>, z.ZodObject<{
     transport: z.ZodLiteral<"http">;
-    serverUrl: z.ZodEffects<z.ZodString, string, string>;
+    serverUrl: z.ZodString;
     headers: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
     capabilities: z.ZodOptional<z.ZodObject<{
         sampling: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
         roots: z.ZodOptional<z.ZodObject<{
             listChanged: z.ZodBoolean;
-        }, "strip", z.ZodTypeAny, {
-            listChanged: boolean;
-        }, {
-            listChanged: boolean;
-        }>>;
-    }, "strip", z.ZodTypeAny, {
-        sampling?: Record<string, unknown> | undefined;
-        roots?: {
-            listChanged: boolean;
-        } | undefined;
-    }, {
-        sampling?: Record<string, unknown> | undefined;
-        roots?: {
-            listChanged: boolean;
-        } | undefined;
-    }>>;
+        }, z.core.$strip>>;
+    }, z.core.$strip>>;
     connectTimeoutMs: z.ZodOptional<z.ZodNumber>;
     requestTimeoutMs: z.ZodOptional<z.ZodNumber>;
     callTimeoutMs: z.ZodOptional<z.ZodNumber>;
-    auth: z.ZodOptional<z.ZodEffects<z.ZodObject<{
+    auth: z.ZodOptional<z.ZodObject<{
         accessToken: z.ZodOptional<z.ZodString>;
         oauth: z.ZodOptional<z.ZodObject<{
             serverUrl: z.ZodString;
-            scopes: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
+            scopes: z.ZodOptional<z.ZodArray<z.ZodString>>;
             resource: z.ZodOptional<z.ZodString>;
             authStatePath: z.ZodOptional<z.ZodString>;
             clientId: z.ZodOptional<z.ZodString>;
             clientSecret: z.ZodOptional<z.ZodString>;
             redirectUri: z.ZodOptional<z.ZodString>;
-        }, "strip", z.ZodTypeAny, {
-            serverUrl: string;
-            scopes?: string[] | undefined;
-            resource?: string | undefined;
-            authStatePath?: string | undefined;
-            clientId?: string | undefined;
-            clientSecret?: string | undefined;
-            redirectUri?: string | undefined;
-        }, {
-            serverUrl: string;
-            scopes?: string[] | undefined;
-            resource?: string | undefined;
-            authStatePath?: string | undefined;
-            clientId?: string | undefined;
-            clientSecret?: string | undefined;
-            redirectUri?: string | undefined;
-        }>>;
+        }, z.core.$strip>>;
         clientCredentials: z.ZodOptional<z.ZodObject<{
             clientId: z.ZodOptional<z.ZodString>;
             clientSecret: z.ZodOptional<z.ZodString>;
             tokenEndpoint: z.ZodOptional<z.ZodString>;
-            scopes: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
-        }, "strip", z.ZodTypeAny, {
-            scopes?: string[] | undefined;
-            clientId?: string | undefined;
-            clientSecret?: string | undefined;
-            tokenEndpoint?: string | undefined;
-        }, {
-            scopes?: string[] | undefined;
-            clientId?: string | undefined;
-            clientSecret?: string | undefined;
-            tokenEndpoint?: string | undefined;
-        }>>;
-    }, "strip", z.ZodTypeAny, {
-        accessToken?: string | undefined;
-        oauth?: {
-            serverUrl: string;
-            scopes?: string[] | undefined;
-            resource?: string | undefined;
-            authStatePath?: string | undefined;
-            clientId?: string | undefined;
-            clientSecret?: string | undefined;
-            redirectUri?: string | undefined;
-        } | undefined;
-        clientCredentials?: {
-            scopes?: string[] | undefined;
-            clientId?: string | undefined;
-            clientSecret?: string | undefined;
-            tokenEndpoint?: string | undefined;
-        } | undefined;
-    }, {
-        accessToken?: string | undefined;
-        oauth?: {
-            serverUrl: string;
-            scopes?: string[] | undefined;
-            resource?: string | undefined;
-            authStatePath?: string | undefined;
-            clientId?: string | undefined;
-            clientSecret?: string | undefined;
-            redirectUri?: string | undefined;
-        } | undefined;
-        clientCredentials?: {
-            scopes?: string[] | undefined;
-            clientId?: string | undefined;
-            clientSecret?: string | undefined;
-            tokenEndpoint?: string | undefined;
-        } | undefined;
-    }>, {
-        accessToken?: string | undefined;
-        oauth?: {
-            serverUrl: string;
-            scopes?: string[] | undefined;
-            resource?: string | undefined;
-            authStatePath?: string | undefined;
-            clientId?: string | undefined;
-            clientSecret?: string | undefined;
-            redirectUri?: string | undefined;
-        } | undefined;
-        clientCredentials?: {
-            scopes?: string[] | undefined;
-            clientId?: string | undefined;
-            clientSecret?: string | undefined;
-            tokenEndpoint?: string | undefined;
-        } | undefined;
-    }, {
-        accessToken?: string | undefined;
-        oauth?: {
-            serverUrl: string;
-            scopes?: string[] | undefined;
-            resource?: string | undefined;
-            authStatePath?: string | undefined;
-            clientId?: string | undefined;
-            clientSecret?: string | undefined;
-            redirectUri?: string | undefined;
-        } | undefined;
-        clientCredentials?: {
-            scopes?: string[] | undefined;
-            clientId?: string | undefined;
-            clientSecret?: string | undefined;
-            tokenEndpoint?: string | undefined;
-        } | undefined;
-    }>>;
+            scopes: z.ZodOptional<z.ZodArray<z.ZodString>>;
+        }, z.core.$strip>>;
+    }, z.core.$strip>>;
     proxy: z.ZodOptional<z.ZodObject<{
         url: z.ZodString;
-    }, "strip", z.ZodTypeAny, {
-        url: string;
-    }, {
-        url: string;
-    }>>;
+    }, z.core.$strip>>;
     retryAttempts: z.ZodOptional<z.ZodNumber>;
     tls: z.ZodOptional<z.ZodObject<{
         ca: z.ZodOptional<z.ZodString>;
         cert: z.ZodOptional<z.ZodString>;
         key: z.ZodOptional<z.ZodString>;
         rejectUnauthorized: z.ZodOptional<z.ZodBoolean>;
-    }, "strip", z.ZodTypeAny, {
-        ca?: string | undefined;
-        cert?: string | undefined;
-        key?: string | undefined;
-        rejectUnauthorized?: boolean | undefined;
-    }, {
-        ca?: string | undefined;
-        cert?: string | undefined;
-        key?: string | undefined;
-        rejectUnauthorized?: boolean | undefined;
-    }>>;
-}, "strip", z.ZodTypeAny, {
-    serverUrl: string;
-    transport: "http";
-    capabilities?: {
-        sampling?: Record<string, unknown> | undefined;
-        roots?: {
-            listChanged: boolean;
-        } | undefined;
-    } | undefined;
-    connectTimeoutMs?: number | undefined;
-    requestTimeoutMs?: number | undefined;
-    callTimeoutMs?: number | undefined;
-    headers?: Record<string, string> | undefined;
-    auth?: {
-        accessToken?: string | undefined;
-        oauth?: {
-            serverUrl: string;
-            scopes?: string[] | undefined;
-            resource?: string | undefined;
-            authStatePath?: string | undefined;
-            clientId?: string | undefined;
-            clientSecret?: string | undefined;
-            redirectUri?: string | undefined;
-        } | undefined;
-        clientCredentials?: {
-            scopes?: string[] | undefined;
-            clientId?: string | undefined;
-            clientSecret?: string | undefined;
-            tokenEndpoint?: string | undefined;
-        } | undefined;
-    } | undefined;
-    proxy?: {
-        url: string;
-    } | undefined;
-    retryAttempts?: number | undefined;
-    tls?: {
-        ca?: string | undefined;
-        cert?: string | undefined;
-        key?: string | undefined;
-        rejectUnauthorized?: boolean | undefined;
-    } | undefined;
-}, {
-    serverUrl: string;
-    transport: "http";
-    capabilities?: {
-        sampling?: Record<string, unknown> | undefined;
-        roots?: {
-            listChanged: boolean;
-        } | undefined;
-    } | undefined;
-    connectTimeoutMs?: number | undefined;
-    requestTimeoutMs?: number | undefined;
-    callTimeoutMs?: number | undefined;
-    headers?: Record<string, string> | undefined;
-    auth?: {
-        accessToken?: string | undefined;
-        oauth?: {
-            serverUrl: string;
-            scopes?: string[] | undefined;
-            resource?: string | undefined;
-            authStatePath?: string | undefined;
-            clientId?: string | undefined;
-            clientSecret?: string | undefined;
-            redirectUri?: string | undefined;
-        } | undefined;
-        clientCredentials?: {
-            scopes?: string[] | undefined;
-            clientId?: string | undefined;
-            clientSecret?: string | undefined;
-            tokenEndpoint?: string | undefined;
-        } | undefined;
-    } | undefined;
-    proxy?: {
-        url: string;
-    } | undefined;
-    retryAttempts?: number | undefined;
-    tls?: {
-        ca?: string | undefined;
-        cert?: string | undefined;
-        key?: string | undefined;
-        rejectUnauthorized?: boolean | undefined;
-    } | undefined;
-}>]>;
+    }, z.core.$strip>>;
+}, z.core.$strip>], "transport">;
 /**
  * Validates an MCPConfig object
  *
@@ -1790,9 +1540,9 @@ declare function validateError(response: unknown, expected?: boolean | string |
 declare function validateSize(response: unknown, options: SizeValidatorOptions): ValidationResult;
 /**
- * Tool call validators for llm_host simulation results.
+ * Tool call validators for mcp_host simulation results.
  *
- * These validators extract the tool call trace from an LLMHostSimulationResult
+ * These validators extract the tool call trace from an MCPHostSimulationResult
  * and apply assertions against expected call lists and counts.
  */
@@ -1811,16 +1561,16 @@ interface ToolCallCountOptions {
     exact?: number;
 }
 /**
- * Validates tool calls made during an LLM host simulation.
+ * Validates tool calls made during an MCP host simulation.
  *
- * @param response - Must be an LLMHostSimulationResult (from llm_host mode)
+ * @param response - Must be an MCPHostSimulationResult (from mcp_host mode)
  * @param expectation - Expected tool call specification
  */
 declare function validateToolCalls(response: unknown, expectation: ToolCallExpectation): ValidationResult;
 /**
- * Validates the number of tool calls made during an LLM host simulation.
+ * Validates the number of tool calls made during an MCP host simulation.
  *
- * @param response - Must be an LLMHostSimulationResult (from llm_host mode)
+ * @param response - Must be an MCPHostSimulationResult (from mcp_host mode)
  * @param options - Count constraints (min, max, exact)
  */
 declare function validateToolCallCount(response: unknown, options: ToolCallCountOptions): ValidationResult;
@@ -2223,7 +1973,7 @@ declare global {
              */
             toSatisfyToolPredicate(predicate: ToolPredicate, description?: string): Promise<R>;
             /**
-             * Validates which tools the LLM called during an llm_host simulation.
+             * Validates which tools the LLM called during a mcp_host simulation.
              *
              * @example
              * ```typescript
@@ -2235,7 +1985,7 @@ declare global {
              */
             toHaveToolCalls(expectation: ToolCallExpectation): R;
             /**
-             * Validates the number of tool calls made during an llm_host simulation.
+             * Validates the number of tool calls made during a mcp_host simulation.
              *
              * @example
              * ```typescript
@@ -2486,6 +2236,14 @@ declare function toMatchToolPattern(this: {
 /**
  * Creates the toMatchToolSnapshot matcher function
  *
+ * @remarks
+ * **Requires Playwright test context.** This matcher calls `expect(content).toMatchSnapshot()`
+ * internally, which only works inside a Playwright test (i.e., when `testInfo` is available).
+ * Calling it outside a Playwright test will throw a cryptic context error.
+ *
+ * To test sanitizer logic without a Playwright context, use the exported `applySanitizers`
+ * function directly.
+ *
  * Note: This is an async matcher that uses Playwright's snapshot testing.
  */
 declare function toMatchToolSnapshot(this: {
@@ -2595,7 +2353,7 @@ declare function toSatisfyToolPredicate(this: {
 /**
  * toHaveToolCalls Matcher
  *
- * Validates which tools the LLM called during an llm_host simulation.
+ * Validates which tools the LLM called during a mcp_host simulation.
  */
 /**
@@ -2611,7 +2369,7 @@ declare function toHaveToolCalls(this: {
 /**
  * toHaveToolCallCount Matcher
  *
- * Validates the number of tool calls made during an llm_host simulation.
+ * Validates the number of tool calls made during a mcp_host simulation.
  */
 /**
@@ -2720,9 +2478,9 @@ interface MCPAuthFixtures {
 declare const test: playwright_test.TestType<playwright_test.PlaywrightTestArgs & playwright_test.PlaywrightTestOptions & MCPAuthFixtures, playwright_test.PlaywrightWorkerArgs & playwright_test.PlaywrightWorkerOptions>;
 /**
- * Types and interfaces for LLM host simulation mode
+ * Types and interfaces for MCP host simulation mode
  *
- * This module provides types for testing MCP servers through LLM hosts,
+ * This module provides types for testing MCP servers through MCP hosts,
  * validating tool descriptions, parameter clarity, and discoverability.
  */
@@ -2751,9 +2509,9 @@ type LLMProvider = 'openai' | 'anthropic' | 'azure' | 'google' | 'mistral' | 'de
  */
  | 'vertex-anthropic';
 /**
- * Configuration for LLM host simulation
+ * Configuration for MCP host simulation
  */
-interface LLMHostConfig {
+interface MCPHostConfig {
     /**
      * LLM provider to use
      */
@@ -2793,9 +2551,9 @@ interface LLMToolCall {
     id?: string;
 }
 /**
- * Result from an LLM host simulation
+ * Result from an MCP host simulation
  */
-interface LLMHostSimulationResult {
+interface MCPHostSimulationResult {
     /** Whether the simulation succeeded */
     success: boolean;
     /** Tool calls made by the LLM */
@@ -2823,33 +2581,33 @@ interface LLMHostSimulationResult {
     mcpDurationMs?: number;
 }
 /**
- * Interface for LLM host simulators.
+ * Interface for MCP host simulators.
  *
  * The only built-in implementation is the Vercel AI SDK orchestrator
- * (src/evals/llmHost/adapters/vercel.ts). Custom implementations can be
+ * (src/evals/mcpHost/adapters/vercel.ts). Custom implementations can be
  * created for specialised testing needs.
  */
-interface LLMHostSimulator {
+interface MCPHostSimulator {
     /**
-     * Simulates an LLM host interacting with an MCP server
+     * Simulates an MCP host interacting with an MCP server
      *
      * @param mcp - MCP fixture API
      * @param scenario - Natural language prompt describing what the LLM should do
-     * @param config - LLM host configuration
+     * @param config - MCP host configuration
      * @returns Simulation result with tool calls and response
      */
-    simulate(mcp: MCPFixtureApi, scenario: string, config: LLMHostConfig): Promise<LLMHostSimulationResult>;
+    simulate(mcp: MCPFixtureApi, scenario: string, config: MCPHostConfig): Promise<MCPHostSimulationResult>;
 }
 /**
  * Evaluation mode
  */
-type EvalMode = 'direct' | 'llm_host';
+type EvalMode = 'direct' | 'mcp_host';
 /**
  * A single eval test case
  *
  * For 'direct' mode: toolName and args are required
- * For 'llm_host' mode: scenario and llmHostConfig are required
+ * For 'mcp_host' mode: scenario and mcpHostConfig are required
  */
 interface EvalCase {
     /**
@@ -2863,40 +2621,40 @@ interface EvalCase {
     /**
      * Evaluation mode
      * - 'direct': Direct API calls to MCP tools (default)
-     * - 'llm_host': LLM-driven tool selection via natural language
+     * - 'mcp_host': LLM-driven tool selection via natural language
      *
      * @default 'direct'
      */
     mode?: EvalMode;
     /**
-     * Name of the MCP tool to call (required for 'direct' mode, optional for 'llm_host' mode)
+     * Name of the MCP tool to call (required for 'direct' mode, optional for 'mcp_host' mode)
      */
     toolName?: string;
     /**
-     * Arguments to pass to the tool (required for 'direct' mode, optional for 'llm_host' mode)
+     * Arguments to pass to the tool (required for 'direct' mode, optional for 'mcp_host' mode)
      */
     args?: Record<string, unknown>;
     /**
-     * Natural language scenario for LLM to execute (optional, required for 'llm_host' mode)
+     * Natural language scenario for LLM to execute (optional, required for 'mcp_host' mode)
      *
      * @example "Get the weather for London and tell me if I need an umbrella"
      */
     scenario?: string;
     /**
-     * LLM host configuration (optional for 'llm_host' mode)
+     * MCP host configuration (optional for 'mcp_host' mode)
      *
      * If not specified, uses default configuration from test environment
      */
-    llmHostConfig?: LLMHostConfig;
+    mcpHostConfig?: MCPHostConfig;
     /**
      * Additional metadata for this test case
      *
-     * For 'llm_host' mode, can include 'expectedToolCalls' for validation
+     * For 'mcp_host' mode, can include 'expectedToolCalls' for validation
      */
     metadata?: Record<string, unknown>;
     /**
-     * Number of times to run this case and compute an accuracy score.
-     * When > 1, `EvalCaseResult.accuracy` is populated and `pass` is determined
+     * Number of times to run this case and compute an assertion pass rate.
+     * When > 1, `EvalCaseResult.assertionPassRate` is populated and `pass` is determined
      * by `accuracyThreshold` rather than a single run.
      * @default 1
      */
@@ -3027,8 +2785,8 @@ interface EvalExpectBlock {
         minBytes?: number;
     };
     /**
-     * Asserts which tools the LLM called during an llm_host simulation.
-     * Only meaningful for llm_host mode — direct mode has no tool call trace.
+     * Asserts which tools the LLM called during a mcp_host simulation.
+     * Only meaningful for mcp_host mode — direct mode has no tool call trace.
      */
     toolsTriggered?: {
         /** Expected tool calls */
@@ -3049,7 +2807,7 @@ interface EvalExpectBlock {
         exclusive?: boolean;
     };
     /**
-     * Asserts the number of tool calls made during an llm_host simulation.
+     * Asserts the number of tool calls made during a mcp_host simulation.
      */
     toolCallCount?: {
         /** Minimum number of tool calls */
@@ -3088,399 +2846,109 @@ interface EvalDataset {
 /**
  * Zod schema for EvalCase
  *
- * toolName and args are optional for llm_host mode (which uses scenario instead)
+ * toolName and args are optional for mcp_host mode (which uses scenario instead)
  */
 declare const EvalCaseSchema: z.ZodObject<{
     id: z.ZodString;
     description: z.ZodOptional<z.ZodString>;
-    mode: z.ZodOptional<z.ZodEnum<["direct", "llm_host"]>>;
+    mode: z.ZodOptional<z.ZodEnum<{
+        direct: "direct";
+        mcp_host: "mcp_host";
+    }>>;
     toolName: z.ZodOptional<z.ZodString>;
     args: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
     scenario: z.ZodOptional<z.ZodString>;
-    llmHostConfig: z.ZodOptional<z.ZodObject<{
-        provider: z.ZodEnum<["openai", "anthropic", "azure", "google", "mistral", "deepseek", "openrouter", "xai", "vertex-anthropic"]>;
+    mcpHostConfig: z.ZodOptional<z.ZodObject<{
+        provider: z.ZodEnum<{
+            openai: "openai";
+            anthropic: "anthropic";
+            azure: "azure";
+            google: "google";
+            mistral: "mistral";
+            deepseek: "deepseek";
+            openrouter: "openrouter";
+            xai: "xai";
+            "vertex-anthropic": "vertex-anthropic";
+        }>;
         apiKeyEnvVar: z.ZodOptional<z.ZodString>;
         model: z.ZodOptional<z.ZodString>;
         maxTokens: z.ZodOptional<z.ZodNumber>;
         temperature: z.ZodOptional<z.ZodNumber>;
         maxToolCalls: z.ZodOptional<z.ZodNumber>;
-    }, "strip", z.ZodTypeAny, {
-        provider: "openai" | "anthropic" | "azure" | "google" | "mistral" | "deepseek" | "openrouter" | "xai" | "vertex-anthropic";
-        model?: string | undefined;
-        maxTokens?: number | undefined;
-        apiKeyEnvVar?: string | undefined;
-        temperature?: number | undefined;
-        maxToolCalls?: number | undefined;
-    }, {
-        provider: "openai" | "anthropic" | "azure" | "google" | "mistral" | "deepseek" | "openrouter" | "xai" | "vertex-anthropic";
-        model?: string | undefined;
-        maxTokens?: number | undefined;
-        apiKeyEnvVar?: string | undefined;
-        temperature?: number | undefined;
-        maxToolCalls?: number | undefined;
-    }>>;
+    }, z.core.$strip>>;
     metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
     iterations: z.ZodOptional<z.ZodNumber>;
     accuracyThreshold: z.ZodOptional<z.ZodNumber>;
     judgeReps: z.ZodOptional<z.ZodNumber>;
     canonicalAnswer: z.ZodOptional<z.ZodString>;
-    tags: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
+    tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
     expect: z.ZodOptional<z.ZodObject<{
         response: z.ZodOptional<z.ZodUnknown>;
         schema: z.ZodOptional<z.ZodString>;
-        containsText: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodArray<z.ZodString, "many">]>>;
-        matchesPattern: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodArray<z.ZodString, "many">]>>;
+        containsText: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>>;
+        matchesPattern: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>>;
         snapshot: z.ZodOptional<z.ZodString>;
-        snapshotSanitizers: z.ZodOptional<z.ZodArray<z.ZodUnion<[z.ZodEnum<["timestamp", "uuid", "iso-date", "objectId", "jwt"]>, z.ZodObject<{
+        snapshotSanitizers: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodEnum<{
+            timestamp: "timestamp";
+            uuid: "uuid";
+            "iso-date": "iso-date";
+            objectId: "objectId";
+            jwt: "jwt";
+        }>, z.ZodObject<{
             pattern: z.ZodString;
             replacement: z.ZodOptional<z.ZodString>;
-        }, "strip", z.ZodTypeAny, {
-            pattern: string;
-            replacement?: string | undefined;
-        }, {
-            pattern: string;
-            replacement?: string | undefined;
-        }>, z.ZodObject<{
-            remove: z.ZodArray<z.ZodString, "many">;
-        }, "strip", z.ZodTypeAny, {
-            remove: string[];
-        }, {
-            remove: string[];
-        }>]>, "many">>;
-        isError: z.ZodOptional<z.ZodUnion<[z.ZodBoolean, z.ZodString, z.ZodArray<z.ZodString, "many">]>>;
+        }, z.core.$strip>, z.ZodObject<{
+            remove: z.ZodArray<z.ZodString>;
+        }, z.core.$strip>]>>>;
+        isError: z.ZodOptional<z.ZodUnion<readonly [z.ZodBoolean, z.ZodString, z.ZodArray<z.ZodString>]>>;
         passesJudge: z.ZodOptional<z.ZodObject<{
-            rubric: z.ZodUnion<[z.ZodEnum<["correctness", "completeness", "groundedness", "instruction-following", "conciseness"]>, z.ZodObject<{
+            rubric: z.ZodUnion<readonly [z.ZodEnum<{
+                correctness: "correctness";
+                completeness: "completeness";
+                groundedness: "groundedness";
+                "instruction-following": "instruction-following";
+                conciseness: "conciseness";
+            }>, z.ZodObject<{
                 text: z.ZodString;
-            }, "strip", z.ZodTypeAny, {
-                text: string;
-            }, {
-                text: string;
-            }>]>;
+            }, z.core.$strip>]>;
             reference: z.ZodOptional<z.ZodUnknown>;
             threshold: z.ZodOptional<z.ZodNumber>;
             reps: z.ZodOptional<z.ZodNumber>;
-            provider: z.ZodOptional<z.ZodEnum<["anthropic", "openai", "google"]>>;
+            provider: z.ZodOptional<z.ZodEnum<{
+                openai: "openai";
+                anthropic: "anthropic";
+                google: "google";
+            }>>;
             model: z.ZodOptional<z.ZodString>;
             apiKeyEnvVar: z.ZodOptional<z.ZodString>;
             maxTokens: z.ZodOptional<z.ZodNumber>;
             temperature: z.ZodOptional<z.ZodNumber>;
             maxBudgetUsd: z.ZodOptional<z.ZodNumber>;
             maxToolOutputSize: z.ZodOptional<z.ZodNumber>;
-        }, "strip", z.ZodTypeAny, {
-            rubric: "correctness" | "completeness" | "groundedness" | "instruction-following" | "conciseness" | {
-                text: string;
-            };
-            model?: string | undefined;
-            maxTokens?: number | undefined;
-            maxBudgetUsd?: number | undefined;
-            reference?: unknown;
-            threshold?: number | undefined;
-            reps?: number | undefined;
-            provider?: "openai" | "anthropic" | "google" | undefined;
-            apiKeyEnvVar?: string | undefined;
-            temperature?: number | undefined;
-            maxToolOutputSize?: number | undefined;
-        }, {
-            rubric: "correctness" | "completeness" | "groundedness" | "instruction-following" | "conciseness" | {
-                text: string;
-            };
-            model?: string | undefined;
-            maxTokens?: number | undefined;
-            maxBudgetUsd?: number | undefined;
-            reference?: unknown;
-            threshold?: number | undefined;
-            reps?: number | undefined;
-            provider?: "openai" | "anthropic" | "google" | undefined;
-            apiKeyEnvVar?: string | undefined;
-            temperature?: number | undefined;
-            maxToolOutputSize?: number | undefined;
-        }>>;
+        }, z.core.$strip>>;
         responseSize: z.ZodOptional<z.ZodObject<{
             maxBytes: z.ZodOptional<z.ZodNumber>;
             minBytes: z.ZodOptional<z.ZodNumber>;
-        }, "strip", z.ZodTypeAny, {
-            maxBytes?: number | undefined;
-            minBytes?: number | undefined;
-        }, {
-            maxBytes?: number | undefined;
-            minBytes?: number | undefined;
-        }>>;
+        }, z.core.$strip>>;
         toolsTriggered: z.ZodOptional<z.ZodObject<{
             calls: z.ZodArray<z.ZodObject<{
                 name: z.ZodString;
                 arguments: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
                 required: z.ZodOptional<z.ZodBoolean>;
-            }, "strip", z.ZodTypeAny, {
-                name: string;
-                required?: boolean | undefined;
-                arguments?: Record<string, unknown> | undefined;
-            }, {
-                name: string;
-                required?: boolean | undefined;
-                arguments?: Record<string, unknown> | undefined;
-            }>, "many">;
-            order: z.ZodOptional<z.ZodEnum<["strict", "any"]>>;
+            }, z.core.$strip>>;
+            order: z.ZodOptional<z.ZodEnum<{
+                any: "any";
+                strict: "strict";
+            }>>;
             exclusive: z.ZodOptional<z.ZodBoolean>;
-        }, "strip", z.ZodTypeAny, {
-            calls: {
-                name: string;
-                required?: boolean | undefined;
-                arguments?: Record<string, unknown> | undefined;
-            }[];
-            order?: "strict" | "any" | undefined;
-            exclusive?: boolean | undefined;
-        }, {
-            calls: {
-                name: string;
-                required?: boolean | undefined;
-                arguments?: Record<string, unknown> | undefined;
-            }[];
-            order?: "strict" | "any" | undefined;
-            exclusive?: boolean | undefined;
-        }>>;
+        }, z.core.$strip>>;
         toolCallCount: z.ZodOptional<z.ZodObject<{
             min: z.ZodOptional<z.ZodNumber>;
             max: z.ZodOptional<z.ZodNumber>;
             exact: z.ZodOptional<z.ZodNumber>;
-        }, "strip", z.ZodTypeAny, {
-            exact?: number | undefined;
-            min?: number | undefined;
-            max?: number | undefined;
-        }, {
-            exact?: number | undefined;
-            min?: number | undefined;
-            max?: number | undefined;
-        }>>;
-    }, "strip", z.ZodTypeAny, {
-        response?: unknown;
-        isError?: string | boolean | string[] | undefined;
-        schema?: string | undefined;
-        snapshot?: string | undefined;
-        toolsTriggered?: {
-            calls: {
-                name: string;
-                required?: boolean | undefined;
-                arguments?: Record<string, unknown> | undefined;
-            }[];
-            order?: "strict" | "any" | undefined;
-            exclusive?: boolean | undefined;
-        } | undefined;
-        toolCallCount?: {
-            exact?: number | undefined;
-            min?: number | undefined;
-            max?: number | undefined;
-        } | undefined;
-        containsText?: string | string[] | undefined;
-        matchesPattern?: string | string[] | undefined;
-        snapshotSanitizers?: ("uuid" | "jwt" | "timestamp" | "iso-date" | "objectId" | {
-            pattern: string;
-            replacement?: string | undefined;
-        } | {
-            remove: string[];
-        })[] | undefined;
-        passesJudge?: {
-            rubric: "correctness" | "completeness" | "groundedness" | "instruction-following" | "conciseness" | {
-                text: string;
-            };
-            model?: string | undefined;
-            maxTokens?: number | undefined;
-            maxBudgetUsd?: number | undefined;
-            reference?: unknown;
-            threshold?: number | undefined;
-            reps?: number | undefined;
-            provider?: "openai" | "anthropic" | "google" | undefined;
-            apiKeyEnvVar?: string | undefined;
-            temperature?: number | undefined;
-            maxToolOutputSize?: number | undefined;
-        } | undefined;
-        responseSize?: {
-            maxBytes?: number | undefined;
-            minBytes?: number | undefined;
-        } | undefined;
-    }, {
-        response?: unknown;
-        isError?: string | boolean | string[] | undefined;
-        schema?: string | undefined;
-        snapshot?: string | undefined;
-        toolsTriggered?: {
-            calls: {
-                name: string;
-                required?: boolean | undefined;
-                arguments?: Record<string, unknown> | undefined;
-            }[];
-            order?: "strict" | "any" | undefined;
-            exclusive?: boolean | undefined;
-        } | undefined;
-        toolCallCount?: {
-            exact?: number | undefined;
-            min?: number | undefined;
-            max?: number | undefined;
-        } | undefined;
-        containsText?: string | string[] | undefined;
-        matchesPattern?: string | string[] | undefined;
-        snapshotSanitizers?: ("uuid" | "jwt" | "timestamp" | "iso-date" | "objectId" | {
-            pattern: string;
-            replacement?: string | undefined;
-        } | {
-            remove: string[];
-        })[] | undefined;
-        passesJudge?: {
-            rubric: "correctness" | "completeness" | "groundedness" | "instruction-following" | "conciseness" | {
-                text: string;
-            };
-            model?: string | undefined;
-            maxTokens?: number | undefined;
-            maxBudgetUsd?: number | undefined;
-            reference?: unknown;
-            threshold?: number | undefined;
-            reps?: number | undefined;
-            provider?: "openai" | "anthropic" | "google" | undefined;
-            apiKeyEnvVar?: string | undefined;
-            temperature?: number | undefined;
-            maxToolOutputSize?: number | undefined;
-        } | undefined;
-        responseSize?: {
-            maxBytes?: number | undefined;
-            minBytes?: number | undefined;
-        } | undefined;
-    }>>;
-}, "strip", z.ZodTypeAny, {
-    id: string;
-    args?: Record<string, unknown> | undefined;
-    mode?: "direct" | "llm_host" | undefined;
-    metadata?: Record<string, unknown> | undefined;
-    description?: string | undefined;
-    toolName?: string | undefined;
-    scenario?: string | undefined;
-    llmHostConfig?: {
-        provider: "openai" | "anthropic" | "azure" | "google" | "mistral" | "deepseek" | "openrouter" | "xai" | "vertex-anthropic";
-        model?: string | undefined;
-        maxTokens?: number | undefined;
-        apiKeyEnvVar?: string | undefined;
-        temperature?: number | undefined;
-        maxToolCalls?: number | undefined;
-    } | undefined;
-    iterations?: number | undefined;
-    accuracyThreshold?: number | undefined;
-    judgeReps?: number | undefined;
-    canonicalAnswer?: string | undefined;
-    tags?: string[] | undefined;
-    expect?: {
-        response?: unknown;
-        isError?: string | boolean | string[] | undefined;
-        schema?: string | undefined;
-        snapshot?: string | undefined;
-        toolsTriggered?: {
-            calls: {
-                name: string;
-                required?: boolean | undefined;
-                arguments?: Record<string, unknown> | undefined;
-            }[];
-            order?: "strict" | "any" | undefined;
-            exclusive?: boolean | undefined;
-        } | undefined;
-        toolCallCount?: {
-            exact?: number | undefined;
-            min?: number | undefined;
-            max?: number | undefined;
-        } | undefined;
-        containsText?: string | string[] | undefined;
-        matchesPattern?: string | string[] | undefined;
-        snapshotSanitizers?: ("uuid" | "jwt" | "timestamp" | "iso-date" | "objectId" | {
-            pattern: string;
-            replacement?: string | undefined;
-        } | {
-            remove: string[];
-        })[] | undefined;
-        passesJudge?: {
-            rubric: "correctness" | "completeness" | "groundedness" | "instruction-following" | "conciseness" | {
-                text: string;
-            };
-            model?: string | undefined;
-            maxTokens?: number | undefined;
-            maxBudgetUsd?: number | undefined;
-            reference?: unknown;
-            threshold?: number | undefined;
-            reps?: number | undefined;
-            provider?: "openai" | "anthropic" | "google" | undefined;
-            apiKeyEnvVar?: string | undefined;
-            temperature?: number | undefined;
-            maxToolOutputSize?: number | undefined;
-        } | undefined;
-        responseSize?: {
-            maxBytes?: number | undefined;
-            minBytes?: number | undefined;
-        } | undefined;
-    } | undefined;
-}, {
-    id: string;
-    args?: Record<string, unknown> | undefined;
-    mode?: "direct" | "llm_host" | undefined;
-    metadata?: Record<string, unknown> | undefined;
-    description?: string | undefined;
-    toolName?: string | undefined;
-    scenario?: string | undefined;
-    llmHostConfig?: {
-        provider: "openai" | "anthropic" | "azure" | "google" | "mistral" | "deepseek" | "openrouter" | "xai" | "vertex-anthropic";
-        model?: string | undefined;
-        maxTokens?: number | undefined;
-        apiKeyEnvVar?: string | undefined;
-        temperature?: number | undefined;
-        maxToolCalls?: number | undefined;
-    } | undefined;
-    iterations?: number | undefined;
-    accuracyThreshold?: number | undefined;
-    judgeReps?: number | undefined;
-    canonicalAnswer?: string | undefined;
-    tags?: string[] | undefined;
-    expect?: {
-        response?: unknown;
-        isError?: string | boolean | string[] | undefined;
-        schema?: string | undefined;
-        snapshot?: string | undefined;
-        toolsTriggered?: {
-            calls: {
-                name: string;
-                required?: boolean | undefined;
-                arguments?: Record<string, unknown> | undefined;
-            }[];
-            order?: "strict" | "any" | undefined;
-            exclusive?: boolean | undefined;
-        } | undefined;
-        toolCallCount?: {
-            exact?: number | undefined;
-            min?: number | undefined;
-            max?: number | undefined;
-        } | undefined;
-        containsText?: string | string[] | undefined;
-        matchesPattern?: string | string[] | undefined;
-        snapshotSanitizers?: ("uuid" | "jwt" | "timestamp" | "iso-date" | "objectId" | {
-            pattern: string;
-            replacement?: string | undefined;
-        } | {
-            remove: string[];
-        })[] | undefined;
-        passesJudge?: {
-            rubric: "correctness" | "completeness" | "groundedness" | "instruction-following" | "conciseness" | {
-                text: string;
-            };
-            model?: string | undefined;
-            maxTokens?: number | undefined;
-            maxBudgetUsd?: number | undefined;
-            reference?: unknown;
-            threshold?: number | undefined;
-            reps?: number | undefined;
-            provider?: "openai" | "anthropic" | "google" | undefined;
-            apiKeyEnvVar?: string | undefined;
-            temperature?: number | undefined;
-            maxToolOutputSize?: number | undefined;
-        } | undefined;
-        responseSize?: {
-            maxBytes?: number | undefined;
-            minBytes?: number | undefined;
-        } | undefined;
-    } | undefined;
-}>;
+        }, z.core.$strip>>;
+    }, z.core.$strip>>;
+}, z.core.$strip>;
 /**
  * Zod schema for EvalDataset (without schemas field, as schemas aren't serializable)
  */
@@ -3490,542 +2958,106 @@ declare const EvalDatasetSchema: z.ZodObject<{
     cases: z.ZodArray<z.ZodObject<{
         id: z.ZodString;
         description: z.ZodOptional<z.ZodString>;
-        mode: z.ZodOptional<z.ZodEnum<["direct", "llm_host"]>>;
+        mode: z.ZodOptional<z.ZodEnum<{
+            direct: "direct";
+            mcp_host: "mcp_host";
+        }>>;
         toolName: z.ZodOptional<z.ZodString>;
         args: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
         scenario: z.ZodOptional<z.ZodString>;
-        llmHostConfig: z.ZodOptional<z.ZodObject<{
-            provider: z.ZodEnum<["openai", "anthropic", "azure", "google", "mistral", "deepseek", "openrouter", "xai", "vertex-anthropic"]>;
+        mcpHostConfig: z.ZodOptional<z.ZodObject<{
+            provider: z.ZodEnum<{
+                openai: "openai";
+                anthropic: "anthropic";
+                azure: "azure";
+                google: "google";
+                mistral: "mistral";
+                deepseek: "deepseek";
+                openrouter: "openrouter";
+                xai: "xai";
+                "vertex-anthropic": "vertex-anthropic";
+            }>;
             apiKeyEnvVar: z.ZodOptional<z.ZodString>;
             model: z.ZodOptional<z.ZodString>;
             maxTokens: z.ZodOptional<z.ZodNumber>;
             temperature: z.ZodOptional<z.ZodNumber>;
             maxToolCalls: z.ZodOptional<z.ZodNumber>;
-        }, "strip", z.ZodTypeAny, {
-            provider: "openai" | "anthropic" | "azure" | "google" | "mistral" | "deepseek" | "openrouter" | "xai" | "vertex-anthropic";
-            model?: string | undefined;
-            maxTokens?: number | undefined;
-            apiKeyEnvVar?: string | undefined;
-            temperature?: number | undefined;
-            maxToolCalls?: number | undefined;
-        }, {
-            provider: "openai" | "anthropic" | "azure" | "google" | "mistral" | "deepseek" | "openrouter" | "xai" | "vertex-anthropic";
-            model?: string | undefined;
-            maxTokens?: number | undefined;
-            apiKeyEnvVar?: string | undefined;
-            temperature?: number | undefined;
-            maxToolCalls?: number | undefined;
-        }>>;
+        }, z.core.$strip>>;
         metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
         iterations: z.ZodOptional<z.ZodNumber>;
         accuracyThreshold: z.ZodOptional<z.ZodNumber>;
         judgeReps: z.ZodOptional<z.ZodNumber>;
         canonicalAnswer: z.ZodOptional<z.ZodString>;
-        tags: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
+        tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
         expect: z.ZodOptional<z.ZodObject<{
             response: z.ZodOptional<z.ZodUnknown>;
             schema: z.ZodOptional<z.ZodString>;
-            containsText: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodArray<z.ZodString, "many">]>>;
-            matchesPattern: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodArray<z.ZodString, "many">]>>;
+            containsText: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>>;
+            matchesPattern: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>>;
             snapshot: z.ZodOptional<z.ZodString>;
-            snapshotSanitizers: z.ZodOptional<z.ZodArray<z.ZodUnion<[z.ZodEnum<["timestamp", "uuid", "iso-date", "objectId", "jwt"]>, z.ZodObject<{
+            snapshotSanitizers: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodEnum<{
+                timestamp: "timestamp";
+                uuid: "uuid";
+                "iso-date": "iso-date";
+                objectId: "objectId";
+                jwt: "jwt";
+            }>, z.ZodObject<{
                 pattern: z.ZodString;
                 replacement: z.ZodOptional<z.ZodString>;
-            }, "strip", z.ZodTypeAny, {
-                pattern: string;
-                replacement?: string | undefined;
-            }, {
-                pattern: string;
-                replacement?: string | undefined;
-            }>, z.ZodObject<{
-                remove: z.ZodArray<z.ZodString, "many">;
-            }, "strip", z.ZodTypeAny, {
-                remove: string[];
-            }, {
-                remove: string[];
-            }>]>, "many">>;
-            isError: z.ZodOptional<z.ZodUnion<[z.ZodBoolean, z.ZodString, z.ZodArray<z.ZodString, "many">]>>;
+            }, z.core.$strip>, z.ZodObject<{
+                remove: z.ZodArray<z.ZodString>;
+            }, z.core.$strip>]>>>;
+            isError: z.ZodOptional<z.ZodUnion<readonly [z.ZodBoolean, z.ZodString, z.ZodArray<z.ZodString>]>>;
             passesJudge: z.ZodOptional<z.ZodObject<{
-                rubric: z.ZodUnion<[z.ZodEnum<["correctness", "completeness", "groundedness", "instruction-following", "conciseness"]>, z.ZodObject<{
+                rubric: z.ZodUnion<readonly [z.ZodEnum<{
+                    correctness: "correctness";
+                    completeness: "completeness";
+                    groundedness: "groundedness";
+                    "instruction-following": "instruction-following";
+                    conciseness: "conciseness";
+                }>, z.ZodObject<{
                     text: z.ZodString;
-                }, "strip", z.ZodTypeAny, {
-                    text: string;
-                }, {
-                    text: string;
-                }>]>;
+                }, z.core.$strip>]>;
                 reference: z.ZodOptional<z.ZodUnknown>;
                 threshold: z.ZodOptional<z.ZodNumber>;
                 reps: z.ZodOptional<z.ZodNumber>;
-                provider: z.ZodOptional<z.ZodEnum<["anthropic", "openai", "google"]>>;
+                provider: z.ZodOptional<z.ZodEnum<{
+                    openai: "openai";
+                    anthropic: "anthropic";
+                    google: "google";
+                }>>;
                 model: z.ZodOptional<z.ZodString>;
                 apiKeyEnvVar: z.ZodOptional<z.ZodString>;
                 maxTokens: z.ZodOptional<z.ZodNumber>;
                 temperature: z.ZodOptional<z.ZodNumber>;
                 maxBudgetUsd: z.ZodOptional<z.ZodNumber>;
                 maxToolOutputSize: z.ZodOptional<z.ZodNumber>;
-            }, "strip", z.ZodTypeAny, {
-                rubric: "correctness" | "completeness" | "groundedness" | "instruction-following" | "conciseness" | {
-                    text: string;
-                };
-                model?: string | undefined;
-                maxTokens?: number | undefined;
-                maxBudgetUsd?: number | undefined;
-                reference?: unknown;
-                threshold?: number | undefined;
-                reps?: number | undefined;
-                provider?: "openai" | "anthropic" | "google" | undefined;
-                apiKeyEnvVar?: string | undefined;
-                temperature?: number | undefined;
-                maxToolOutputSize?: number | undefined;
-            }, {
-                rubric: "correctness" | "completeness" | "groundedness" | "instruction-following" | "conciseness" | {
-                    text: string;
-                };
-                model?: string | undefined;
-                maxTokens?: number | undefined;
-                maxBudgetUsd?: number | undefined;
-                reference?: unknown;
-                threshold?: number | undefined;
-                reps?: number | undefined;
-                provider?: "openai" | "anthropic" | "google" | undefined;
-                apiKeyEnvVar?: string | undefined;
-                temperature?: number | undefined;
-                maxToolOutputSize?: number | undefined;
-            }>>;
+            }, z.core.$strip>>;
             responseSize: z.ZodOptional<z.ZodObject<{
                 maxBytes: z.ZodOptional<z.ZodNumber>;
                 minBytes: z.ZodOptional<z.ZodNumber>;
-            }, "strip", z.ZodTypeAny, {
-                maxBytes?: number | undefined;
-                minBytes?: number | undefined;
-            }, {
-                maxBytes?: number | undefined;
-                minBytes?: number | undefined;
-            }>>;
+            }, z.core.$strip>>;
             toolsTriggered: z.ZodOptional<z.ZodObject<{
                 calls: z.ZodArray<z.ZodObject<{
                     name: z.ZodString;
                     arguments: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
                     required: z.ZodOptional<z.ZodBoolean>;
-                }, "strip", z.ZodTypeAny, {
-                    name: string;
-                    required?: boolean | undefined;
-                    arguments?: Record<string, unknown> | undefined;
-                }, {
-                    name: string;
-                    required?: boolean | undefined;
-                    arguments?: Record<string, unknown> | undefined;
-                }>, "many">;
-                order: z.ZodOptional<z.ZodEnum<["strict", "any"]>>;
+                }, z.core.$strip>>;
+                order: z.ZodOptional<z.ZodEnum<{
+                    any: "any";
+                    strict: "strict";
+                }>>;
                 exclusive: z.ZodOptional<z.ZodBoolean>;
-            }, "strip", z.ZodTypeAny, {
-                calls: {
-                    name: string;
-                    required?: boolean | undefined;
-                    arguments?: Record<string, unknown> | undefined;
-                }[];
-                order?: "strict" | "any" | undefined;
-                exclusive?: boolean | undefined;
-            }, {
-                calls: {
-                    name: string;
-                    required?: boolean | undefined;
-                    arguments?: Record<string, unknown> | undefined;
-                }[];
-                order?: "strict" | "any" | undefined;
-                exclusive?: boolean | undefined;
-            }>>;
+            }, z.core.$strip>>;
             toolCallCount: z.ZodOptional<z.ZodObject<{
                 min: z.ZodOptional<z.ZodNumber>;
                 max: z.ZodOptional<z.ZodNumber>;
                 exact: z.ZodOptional<z.ZodNumber>;
-            }, "strip", z.ZodTypeAny, {
-                exact?: number | undefined;
-                min?: number | undefined;
-                max?: number | undefined;
-            }, {
-                exact?: number | undefined;
-                min?: number | undefined;
-                max?: number | undefined;
-            }>>;
-        }, "strip", z.ZodTypeAny, {
-            response?: unknown;
-            isError?: string | boolean | string[] | undefined;
-            schema?: string | undefined;
-            snapshot?: string | undefined;
-            toolsTriggered?: {
-                calls: {
-                    name: string;
-                    required?: boolean | undefined;
-                    arguments?: Record<string, unknown> | undefined;
-                }[];
-                order?: "strict" | "any" | undefined;
-                exclusive?: boolean | undefined;
-            } | undefined;
-            toolCallCount?: {
-                exact?: number | undefined;
-                min?: number | undefined;
-                max?: number | undefined;
-            } | undefined;
-            containsText?: string | string[] | undefined;
-            matchesPattern?: string | string[] | undefined;
-            snapshotSanitizers?: ("uuid" | "jwt" | "timestamp" | "iso-date" | "objectId" | {
-                pattern: string;
-                replacement?: string | undefined;
-            } | {
-                remove: string[];
-            })[] | undefined;
-            passesJudge?: {
-                rubric: "correctness" | "completeness" | "groundedness" | "instruction-following" | "conciseness" | {
-                    text: string;
-                };
-                model?: string | undefined;
-                maxTokens?: number | undefined;
-                maxBudgetUsd?: number | undefined;
-                reference?: unknown;
-                threshold?: number | undefined;
-                reps?: number | undefined;
-                provider?: "openai" | "anthropic" | "google" | undefined;
-                apiKeyEnvVar?: string | undefined;
-                temperature?: number | undefined;
-                maxToolOutputSize?: number | undefined;
-            } | undefined;
-            responseSize?: {
-                maxBytes?: number | undefined;
-                minBytes?: number | undefined;
-            } | undefined;
-        }, {
-            response?: unknown;
-            isError?: string | boolean | string[] | undefined;
-            schema?: string | undefined;
-            snapshot?: string | undefined;
-            toolsTriggered?: {
-                calls: {
-                    name: string;
-                    required?: boolean | undefined;
-                    arguments?: Record<string, unknown> | undefined;
-                }[];
-                order?: "strict" | "any" | undefined;
-                exclusive?: boolean | undefined;
-            } | undefined;
-            toolCallCount?: {
-                exact?: number | undefined;
-                min?: number | undefined;
-                max?: number | undefined;
-            } | undefined;
-            containsText?: string | string[] | undefined;
-            matchesPattern?: string | string[] | undefined;
-            snapshotSanitizers?: ("uuid" | "jwt" | "timestamp" | "iso-date" | "objectId" | {
-                pattern: string;
-                replacement?: string | undefined;
-            } | {
-                remove: string[];
-            })[] | undefined;
-            passesJudge?: {
-                rubric: "correctness" | "completeness" | "groundedness" | "instruction-following" | "conciseness" | {
-                    text: string;
-                };
-                model?: string | undefined;
-                maxTokens?: number | undefined;
-                maxBudgetUsd?: number | undefined;
-                reference?: unknown;
-                threshold?: number | undefined;
-                reps?: number | undefined;
-                provider?: "openai" | "anthropic" | "google" | undefined;
-                apiKeyEnvVar?: string | undefined;
-                temperature?: number | undefined;
-                maxToolOutputSize?: number | undefined;
-            } | undefined;
-            responseSize?: {
-                maxBytes?: number | undefined;
-                minBytes?: number | undefined;
-            } | undefined;
-        }>>;
-    }, "strip", z.ZodTypeAny, {
-        id: string;
-        args?: Record<string, unknown> | undefined;
-        mode?: "direct" | "llm_host" | undefined;
-        metadata?: Record<string, unknown> | undefined;
-        description?: string | undefined;
-        toolName?: string | undefined;
-        scenario?: string | undefined;
-        llmHostConfig?: {
-            provider: "openai" | "anthropic" | "azure" | "google" | "mistral" | "deepseek" | "openrouter" | "xai" | "vertex-anthropic";
-            model?: string | undefined;
-            maxTokens?: number | undefined;
-            apiKeyEnvVar?: string | undefined;
-            temperature?: number | undefined;
-            maxToolCalls?: number | undefined;
-        } | undefined;
-        iterations?: number | undefined;
-        accuracyThreshold?: number | undefined;
-        judgeReps?: number | undefined;
-        canonicalAnswer?: string | undefined;
-        tags?: string[] | undefined;
-        expect?: {
-            response?: unknown;
-            isError?: string | boolean | string[] | undefined;
-            schema?: string | undefined;
-            snapshot?: string | undefined;
-            toolsTriggered?: {
-                calls: {
-                    name: string;
-                    required?: boolean | undefined;
-                    arguments?: Record<string, unknown> | undefined;
-                }[];
-                order?: "strict" | "any" | undefined;
-                exclusive?: boolean | undefined;
-            } | undefined;
-            toolCallCount?: {
-                exact?: number | undefined;
-                min?: number | undefined;
-                max?: number | undefined;
-            } | undefined;
-            containsText?: string | string[] | undefined;
-            matchesPattern?: string | string[] | undefined;
-            snapshotSanitizers?: ("uuid" | "jwt" | "timestamp" | "iso-date" | "objectId" | {
-                pattern: string;
-                replacement?: string | undefined;
-            } | {
-                remove: string[];
-            })[] | undefined;
-            passesJudge?: {
-                rubric: "correctness" | "completeness" | "groundedness" | "instruction-following" | "conciseness" | {
-                    text: string;
-                };
-                model?: string | undefined;
-                maxTokens?: number | undefined;
-                maxBudgetUsd?: number | undefined;
-                reference?: unknown;
-                threshold?: number | undefined;
-                reps?: number | undefined;
-                provider?: "openai" | "anthropic" | "google" | undefined;
-                apiKeyEnvVar?: string | undefined;
-                temperature?: number | undefined;
-                maxToolOutputSize?: number | undefined;
-            } | undefined;
-            responseSize?: {
-                maxBytes?: number | undefined;
-                minBytes?: number | undefined;
-            } | undefined;
-        } | undefined;
-    }, {
-        id: string;
-        args?: Record<string, unknown> | undefined;
-        mode?: "direct" | "llm_host" | undefined;
-        metadata?: Record<string, unknown> | undefined;
-        description?: string | undefined;
-        toolName?: string | undefined;
-        scenario?: string | undefined;
-        llmHostConfig?: {
-            provider: "openai" | "anthropic" | "azure" | "google" | "mistral" | "deepseek" | "openrouter" | "xai" | "vertex-anthropic";
-            model?: string | undefined;
-            maxTokens?: number | undefined;
-            apiKeyEnvVar?: string | undefined;
-            temperature?: number | undefined;
-            maxToolCalls?: number | undefined;
-        } | undefined;
-        iterations?: number | undefined;
-        accuracyThreshold?: number | undefined;
-        judgeReps?: number | undefined;
-        canonicalAnswer?: string | undefined;
-        tags?: string[] | undefined;
-        expect?: {
-            response?: unknown;
-            isError?: string | boolean | string[] | undefined;
-            schema?: string | undefined;
-            snapshot?: string | undefined;
-            toolsTriggered?: {
-                calls: {
-                    name: string;
-                    required?: boolean | undefined;
-                    arguments?: Record<string, unknown> | undefined;
-                }[];
-                order?: "strict" | "any" | undefined;
-                exclusive?: boolean | undefined;
-            } | undefined;
-            toolCallCount?: {
-                exact?: number | undefined;
-                min?: number | undefined;
-                max?: number | undefined;
-            } | undefined;
-            containsText?: string | string[] | undefined;
-            matchesPattern?: string | string[] | undefined;
-            snapshotSanitizers?: ("uuid" | "jwt" | "timestamp" | "iso-date" | "objectId" | {
-                pattern: string;
-                replacement?: string | undefined;
-            } | {
-                remove: string[];
-            })[] | undefined;
-            passesJudge?: {
-                rubric: "correctness" | "completeness" | "groundedness" | "instruction-following" | "conciseness" | {
-                    text: string;
-                };
-                model?: string | undefined;
-                maxTokens?: number | undefined;
-                maxBudgetUsd?: number | undefined;
-                reference?: unknown;
-                threshold?: number | undefined;
-                reps?: number | undefined;
-                provider?: "openai" | "anthropic" | "google" | undefined;
-                apiKeyEnvVar?: string | undefined;
-                temperature?: number | undefined;
-                maxToolOutputSize?: number | undefined;
-            } | undefined;
-            responseSize?: {
-                maxBytes?: number | undefined;
-                minBytes?: number | undefined;
-            } | undefined;
-        } | undefined;
-    }>, "many">;
+            }, z.core.$strip>>;
+        }, z.core.$strip>>;
+    }, z.core.$strip>>;
     metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
-}, "strip", z.ZodTypeAny, {
-    name: string;
-    cases: {
-        id: string;
-        args?: Record<string, unknown> | undefined;
-        mode?: "direct" | "llm_host" | undefined;
-        metadata?: Record<string, unknown> | undefined;
-        description?: string | undefined;
-        toolName?: string | undefined;
-        scenario?: string | undefined;
-        llmHostConfig?: {
-            provider: "openai" | "anthropic" | "azure" | "google" | "mistral" | "deepseek" | "openrouter" | "xai" | "vertex-anthropic";
-            model?: string | undefined;
-            maxTokens?: number | undefined;
-            apiKeyEnvVar?: string | undefined;
-            temperature?: number | undefined;
-            maxToolCalls?: number | undefined;
-        } | undefined;
-        iterations?: number | undefined;
-        accuracyThreshold?: number | undefined;
-        judgeReps?: number | undefined;
-        canonicalAnswer?: string | undefined;
-        tags?: string[] | undefined;
-        expect?: {
-            response?: unknown;
-            isError?: string | boolean | string[] | undefined;
-            schema?: string | undefined;
-            snapshot?: string | undefined;
-            toolsTriggered?: {
-                calls: {
-                    name: string;
-                    required?: boolean | undefined;
-                    arguments?: Record<string, unknown> | undefined;
-                }[];
-                order?: "strict" | "any" | undefined;
-                exclusive?: boolean | undefined;
-            } | undefined;
-            toolCallCount?: {
-                exact?: number | undefined;
-                min?: number | undefined;
-                max?: number | undefined;
-            } | undefined;
-            containsText?: string | string[] | undefined;
-            matchesPattern?: string | string[] | undefined;
-            snapshotSanitizers?: ("uuid" | "jwt" | "timestamp" | "iso-date" | "objectId" | {
-                pattern: string;
-                replacement?: string | undefined;
-            } | {
-                remove: string[];
-            })[] | undefined;
-            passesJudge?: {
-                rubric: "correctness" | "completeness" | "groundedness" | "instruction-following" | "conciseness" | {
-                    text: string;
-                };
-                model?: string | undefined;
-                maxTokens?: number | undefined;
-                maxBudgetUsd?: number | undefined;
-                reference?: unknown;
-                threshold?: number | undefined;
-                reps?: number | undefined;
-                provider?: "openai" | "anthropic" | "google" | undefined;
-                apiKeyEnvVar?: string | undefined;
-                temperature?: number | undefined;
-                maxToolOutputSize?: number | undefined;
-            } | undefined;
-            responseSize?: {
-                maxBytes?: number | undefined;
-                minBytes?: number | undefined;
-            } | undefined;
-        } | undefined;
-    }[];
-    metadata?: Record<string, unknown> | undefined;
-    description?: string | undefined;
-}, {
-    name: string;
-    cases: {
-        id: string;
-        args?: Record<string, unknown> | undefined;
-        mode?: "direct" | "llm_host" | undefined;
-        metadata?: Record<string, unknown> | undefined;
-        description?: string | undefined;
-        toolName?: string | undefined;
-        scenario?: string | undefined;
-        llmHostConfig?: {
-            provider: "openai" | "anthropic" | "azure" | "google" | "mistral" | "deepseek" | "openrouter" | "xai" | "vertex-anthropic";
-            model?: string | undefined;
-            maxTokens?: number | undefined;
-            apiKeyEnvVar?: string | undefined;
-            temperature?: number | undefined;
-            maxToolCalls?: number | undefined;
-        } | undefined;
-        iterations?: number | undefined;
-        accuracyThreshold?: number | undefined;
-        judgeReps?: number | undefined;
-        canonicalAnswer?: string | undefined;
-        tags?: string[] | undefined;
-        expect?: {
-            response?: unknown;
-            isError?: string | boolean | string[] | undefined;
-            schema?: string | undefined;
-            snapshot?: string | undefined;
-            toolsTriggered?: {
-                calls: {
-                    name: string;
-                    required?: boolean | undefined;
-                    arguments?: Record<string, unknown> | undefined;
-                }[];
-                order?: "strict" | "any" | undefined;
-                exclusive?: boolean | undefined;
-            } | undefined;
-            toolCallCount?: {
-                exact?: number | undefined;
-                min?: number | undefined;
-                max?: number | undefined;
-            } | undefined;
-            containsText?: string | string[] | undefined;
-            matchesPattern?: string | string[] | undefined;
-            snapshotSanitizers?: ("uuid" | "jwt" | "timestamp" | "iso-date" | "objectId" | {
-                pattern: string;
-                replacement?: string | undefined;
-            } | {
-                remove: string[];
-            })[] | undefined;
-            passesJudge?: {
-                rubric: "correctness" | "completeness" | "groundedness" | "instruction-following" | "conciseness" | {
-                    text: string;
-                };
-                model?: string | undefined;
-                maxTokens?: number | undefined;
-                maxBudgetUsd?: number | undefined;
-                reference?: unknown;
-                threshold?: number | undefined;
-                reps?: number | undefined;
-                provider?: "openai" | "anthropic" | "google" | undefined;
-                apiKeyEnvVar?: string | undefined;
-                temperature?: number | undefined;
-                maxToolOutputSize?: number | undefined;
-            } | undefined;
-            responseSize?: {
-                maxBytes?: number | undefined;
-                minBytes?: number | undefined;
-            } | undefined;
-        } | undefined;
-    }[];
-    metadata?: Record<string, unknown> | undefined;
-    description?: string | undefined;
-}>;
+}, z.core.$strip>;
 /**
  * Type for serialized eval dataset (without Zod schemas)
  */
@@ -4122,15 +3154,15 @@ interface EvalRunMetadata {
     timestamp: string;
     /** Package version from package.json */
     packageVersion: string;
-    /** LLM host model identifier (if llm_host mode) */
-    llmHostModel?: string;
+    /** MCP host model identifier (if mcp_host mode) */
+    mcpHostModel?: string;
     /** Judge model identifier (if judge was used) */
     judgeModel?: string;
 }
 /**
  * Individual conformance check result
  */
-interface MCPConformanceCheck$1 {
+interface MCPConformanceCheck {
     /**
      * Check name (e.g., 'server_info_present', 'list_tools_succeeds')
      */
@@ -4159,7 +3191,7 @@ interface MCPConformanceResultData {
     /**
      * Individual check results
      */
-    checks: MCPConformanceCheck$1[];
+    checks: MCPConformanceCheck[];
     /**
      * Server info if available
      */
@@ -4282,12 +3314,6 @@ interface EvalCaseResult {
      * Only present when the case was run with `iterations > 1`.
      */
     infrastructureErrorRate?: number;
-    /**
-     * Accuracy score (0–1) across all iterations.
-     * Alias for `assertionPassRate`. Only present when the case was run with `iterations > 1`.
-     * @deprecated Use `assertionPassRate` for clarity; this field is kept for backward compatibility.
-     */
-    accuracy?: number;
     /**
      * Per-iteration pass/fail breakdown.
      * Only present when the case was run with `iterations > 1`.
@@ -4300,7 +3326,7 @@ interface EvalCaseResult {
     /**
      * Precision of tool calls made (0–1).
      * 1.0 means every tool called was expected; <1.0 means unexpected tools were called.
-     * Only populated when exclusive: true in toolsTriggered and the expectation was evaluated.
+     * Populated whenever a `toolsTriggered` expectation is evaluated.
      */
     toolPrecision?: number;
     /**
@@ -4319,6 +3345,23 @@ interface EvalCaseResult {
      * Only present when the case was run with `iterations > 1`.
      */
     infrastructureErrorCount?: number;
+    /**
+     * Ordered trace of tool calls made by the LLM in mcp_host mode.
+     * Only populated when the eval case uses toolsTriggered expectations.
+     */
+    mcpHostTrace?: {
+        /** The ordered sequence of tool calls made by the LLM */
+        calls: Array<{
+            name: string;
+            arguments: Record<string, unknown>;
+            /** 'expected' = was in the expected set, 'unexpected' = was not expected */
+            status: 'expected' | 'unexpected';
+        }>;
+        /** Tools that were required but never called */
+        missed: Array<{
+            name: string;
+        }>;
+    };
 }
 /**
  * Aggregated MCP eval run data
@@ -4462,13 +3505,13 @@ interface EvalRunnerResult {
      */
     improvements?: number;
     /**
-     * Average tool precision across all llm_host cases that have a
+     * Average tool precision across all mcp_host cases that have a
      * `toolsTriggered` expectation (precision = fraction of called tools
      * that were expected). Only present when at least one such case ran.
      */
     datasetToolPrecision?: number;
     /**
-     * Average tool recall across all llm_host cases that have a
+     * Average tool recall across all mcp_host cases that have a
      * `toolsTriggered` expectation (recall = fraction of required tools
      * that were actually called). Only present when at least one such case ran.
      */
@@ -4523,7 +3566,7 @@ interface EvalRunnerOptions {
      */
     concurrency?: number;
     /**
-     * Default iteration count for `llm_host` mode cases that do not specify
+     * Default iteration count for `mcp_host` mode cases that do not specify
      * `iterations` explicitly. Has no effect on `direct` mode cases (which are
      * deterministic and always default to 1 iteration).
      *
@@ -4534,7 +3577,7 @@ interface EvalRunnerOptions {
      *
      * @example
      * ```typescript
-     * // Run all llm_host cases 10 times each by default
+     * // Run all mcp_host cases 10 times each by default
      * await runEvalDataset({ dataset, defaultLlmIterations: 10 }, { mcp });
      * ```
      */
@@ -4567,12 +3610,12 @@ interface EvalRunnerOptions {
      */
     baselineResultsFrom?: string;
     /**
-     * LLM host model identifier to record in run metadata.
-     * Use this to identify which model was used when running llm_host cases.
+     * MCP host model identifier to record in run metadata.
+     * Use this to identify which model was used when running mcp_host cases.
      *
      * @example 'claude-opus-4-20250514'
      */
-    llmHostModel?: string;
+    mcpHostModel?: string;
     /**
      * Judge model identifier to record in run metadata.
      * Use this to identify which model was used for judge evaluations.
@@ -4660,8 +3703,6 @@ interface ServerComparisonResult {
     ties: number;
     /** Cases where both failed */
     bothFail: number;
-    /** Raw count of cases where both servers failed (same as bothFail) */
-    bothFailCount: number;
     /** Cases with a decisive outcome (aWins + bWins + ties, excludes BOTH_FAIL) */
     decidedCases: number;
     /** Fraction of total cases where both servers failed (bothFail / total) */
@@ -4712,7 +3753,7 @@ type ServerComparisonOptions = Omit<EvalRunnerOptions, 'saveResultsTo' | 'baseli
 declare function runServerComparison(options: ServerComparisonOptions, contextA: EvalContext, contextB: EvalContext): Promise<ServerComparisonResult>;
 /**
- * LLM Host Simulation - Main entry point
+ * MCP Host Simulation - Main entry point
  *
  * All providers (openai, anthropic, google, azure, mistral, deepseek,
  * openrouter, xai) run through the Vercel AI SDK orchestrator, which uses
@@ -4731,7 +3772,7 @@ declare function runServerComparison(options: ServerComparisonOptions, contextA:
  */
 /**
- * Simulates an LLM host interacting with an MCP server.
+ * Simulates an MCP host interacting with an MCP server.
  *
  * The LLM chooses which tools to call based solely on their descriptions and
  * schemas, testing discoverability and parameter clarity at the level a real
@@ -4743,12 +3784,12 @@ declare function runServerComparison(options: ServerComparisonOptions, contextA:
  *
  * @param mcp - MCP fixture API
  * @param scenario - Natural language prompt describing what the LLM should do
- * @param config - LLM host configuration (provider, model, temperature, etc.)
+ * @param config - MCP host configuration (provider, model, temperature, etc.)
  * @returns Simulation result with tool calls, final response, and latency data
  *
  * @example
  * ```typescript
- * const result = await simulateLLMHost(mcp,
+ * const result = await simulateMCPHost(mcp,
  *   "Find recent documents about MCP testing frameworks",
  *   { provider: 'anthropic', model: 'claude-3-5-sonnet-20241022' }
  * );
@@ -4757,7 +3798,7 @@ declare function runServerComparison(options: ServerComparisonOptions, contextA:
  * expect(result.toolCalls.map(c => c.name)).toContain('search');
  * ```
  */
-declare function simulateLLMHost(mcp: MCPFixtureApi, scenario: string, config: LLMHostConfig): Promise<LLMHostSimulationResult>;
+declare function simulateMCPHost(mcp: MCPFixtureApi, scenario: string, config: MCPHostConfig): Promise<MCPHostSimulationResult>;
 /**
  * Returns true if the given provider is supported.
  *
@@ -4836,14 +3877,6 @@ interface MCPConformanceOptions {
      */
     checkPrompts?: boolean;
 }
-/**
- * Individual check result
- */
-interface MCPConformanceCheck {
-    name: string;
-    pass: boolean;
-    message: string;
-}
 /**
  * Raw MCP responses for snapshotting
  */
@@ -4976,4 +4009,4 @@ interface MCPEvalReporterConfig {
     includeAutoTracking?: boolean;
 }
-export { type AuthType, BUILT_IN_RUBRICS, type BuiltInRubric, type BuiltInSanitizer, CLIOAuthClient, type CLIOAuthClientConfig, type CLIOAuthResult, type CaseComparisonResult, type ClientCredentialsConfig, type ComparisonOutcome, type ContentBlock, type CreateMCPClientOptions, DiscoveryError, ENV_VAR_NAMES, type EvalCase, type EvalCaseResult, EvalCaseSchema, type EvalContext, type EvalDataset, EvalDatasetSchema, type EvalExpectBlock, type EvalExpectationResult, type EvalMode, type EvalRunnerOptions, type EvalRunnerResult, type ExpectationBreakdown, type ExpectationResultMap, type ExpectationType, type FieldRemovalSanitizer, type HttpMCPConfig, type IterationResult, type Judge, type JudgeConfig, type JudgeMatcherOptions, type JudgeResult, type JudgeValidatorConfig, type LLMHostConfig, type LLMHostSimulationResult, type LLMHostSimulator, type LLMProvider, type LLMToolCall, type LoadDatasetOptions, type MCPAuthConfig, type MCPAuthFixtures, type MCPClientCredentialsConfig, type MCPConfig, MCPConfigSchema, type MCPConformanceCheck, type MCPConformanceOptions, type MCPConformanceRaw, type MCPConformanceResult, type MCPConformanceResultData, type MCPEvalData, type MCPEvalHistoricalSummary, type MCPEvalReporterConfig, type MCPEvalRunData, type MCPFixtureApi, type MCPFixtureOptions, type MCPHostCapabilities, type MCPOAuthConfig, type MCPServerCapabilitiesData, MCP_PROTOCOL_VERSION, type NormalizedToolResponse, type OAuthSetupConfig, type PatternValidatorOptions, PlaywrightOAuthClientProvider, type PlaywrightOAuthClientProviderConfig, type PredicateResult, type ProtectedResourceDiscoveryResult, type ProtectedResourceMetadata, type ProviderKind, type RegexSanitizer, type ResultSource, type RubricSpec, type SchemaRegistry, type SchemaValidatorOptions, type SerializedEvalDataset, type ServerComparisonOptions, type ServerComparisonResult, type SizeValidatorOptions, type SnapshotSanitizer, SnapshotSanitizers, type StdioMCPConfig, type StoredClientInfo, type StoredOAuthState, type StoredServerMetadata, type StoredTokens, type TextValidatorOptions, type TokenResult, type ToolCallCountOptions, type ToolCallExpectation, type ToolPredicate, type UsageMetrics, type ValidationResult, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, getMissingDependencyMessage, getResponseSizeBytes, hasValidTokens, injectTokens, isBuiltInRubric, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadBaseline, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, test as mcpAuthTest, normalizeToolResponse, normalizeWhitespace, performClientCredentialsFlow, performOAuthSetup, performOAuthSetupIfNeeded, resolveRubric, runConformanceChecks, runEvalCase, runEvalDataset, runServerComparison, saveBaseline, simulateLLMHost, test$1 as test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateJudge, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText, validateToolCallCount, validateToolCalls };
+export { type AuthType, BUILT_IN_RUBRICS, type BuiltInRubric, type BuiltInSanitizer, CLIOAuthClient, type CLIOAuthClientConfig, type CLIOAuthResult, type CaseComparisonResult, type ClientCredentialsConfig, type ComparisonOutcome, type ContentBlock, type CreateMCPClientOptions, DiscoveryError, ENV_VAR_NAMES, type EvalCase, type EvalCaseResult, EvalCaseSchema, type EvalContext, type EvalDataset, EvalDatasetSchema, type EvalExpectBlock, type EvalExpectationResult, type EvalMode, type EvalRunnerOptions, type EvalRunnerResult, type ExpectationBreakdown, type ExpectationResultMap, type ExpectationType, type FieldRemovalSanitizer, type HttpMCPConfig, type IterationResult, type Judge, type JudgeConfig, type JudgeMatcherOptions, type JudgeResult, type JudgeValidatorConfig, type LLMProvider, type LLMToolCall, type LoadDatasetOptions, type MCPAuthConfig, type MCPAuthFixtures, type MCPClientCredentialsConfig, type MCPConfig, MCPConfigSchema, type MCPConformanceCheck, type MCPConformanceOptions, type MCPConformanceRaw, type MCPConformanceResult, type MCPConformanceResultData, type MCPEvalData, type MCPEvalHistoricalSummary, type MCPEvalReporterConfig, type MCPEvalRunData, type MCPFixtureApi, type MCPFixtureOptions, type MCPHostCapabilities, type MCPHostConfig, type MCPHostSimulationResult, type MCPHostSimulator, type MCPOAuthConfig, type MCPServerCapabilitiesData, MCP_PROTOCOL_VERSION, type NormalizedToolResponse, type OAuthSetupConfig, type PatternValidatorOptions, PlaywrightOAuthClientProvider, type PlaywrightOAuthClientProviderConfig, type PredicateResult, type ProtectedResourceDiscoveryResult, type ProtectedResourceMetadata, type ProviderKind, type RegexSanitizer, type ResultSource, type RubricSpec, type SchemaRegistry, type SchemaValidatorOptions, type SerializedEvalDataset, type ServerComparisonOptions, type ServerComparisonResult, type SizeValidatorOptions, type SnapshotSanitizer, SnapshotSanitizers, type StdioMCPConfig, type StoredClientInfo, type StoredOAuthState, type StoredServerMetadata, type StoredTokens, type TextValidatorOptions, type TokenResult, type ToolCallCountOptions, type ToolCallExpectation, type ToolPredicate, type UsageMetrics, type ValidationResult, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, getMissingDependencyMessage, getResponseSizeBytes, hasValidTokens, injectTokens, isBuiltInRubric, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadBaseline, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, test as mcpAuthTest, normalizeToolResponse, normalizeWhitespace, performClientCredentialsFlow, performOAuthSetup, performOAuthSetupIfNeeded, resolveRubric, runConformanceChecks, runEvalCase, runEvalDataset, runServerComparison, saveBaseline, simulateMCPHost, test$1 as test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateJudge, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText, validateToolCallCount, validateToolCalls };