npm - @gleanwork/mcp-server-tester - Versions diffs - 1.0.0-beta.8 → 1.0.1-beta.0 - Mend

@gleanwork/mcp-server-tester 1.0.0-beta.8 → 1.0.1-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/README.md +20 -1
package/dist/cli/index.js +12 -1
package/dist/fixtures/mcp.js +71 -14
package/dist/fixtures/mcp.js.map +1 -1
package/dist/index.cjs +142 -24
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +118 -16
package/dist/index.d.ts +118 -16
package/dist/index.js +142 -25
package/dist/index.js.map +1 -1
package/dist/reporters/mcpReporter.cjs +34 -1
package/dist/reporters/mcpReporter.cjs.map +1 -1
package/dist/reporters/mcpReporter.d.cts +90 -0
package/dist/reporters/mcpReporter.d.ts +90 -0
package/dist/reporters/mcpReporter.js +34 -1
package/dist/reporters/mcpReporter.js.map +1 -1
package/package.json +1 -1

package/dist/index.d.cts CHANGED Viewed

@@ -735,6 +735,34 @@ interface AuthServerMetadata {
      */
     issuer: string;
 }
+/**
+ * Configuration for token refresh
+ */
+interface TokenRefreshConfig {
+    /**
+     * Authorization server metadata
+     */
+    authServer: AuthServerMetadata;
+    /**
+     * Client ID
+     */
+    clientId: string;
+    /**
+     * Client secret (for confidential clients)
+     */
+    clientSecret?: string;
+    /**
+     * Refresh token
+     */
+    refreshToken: string;
+}
+/**
+ * Refreshes an access token using a refresh token
+ *
+ * @param config - Token refresh configuration
+ * @returns New token result
+ */
+declare function refreshAccessToken(config: TokenRefreshConfig): Promise<TokenResult>;
 /**
  * Configuration for client credentials grant
  */
@@ -2131,7 +2159,7 @@ type ExpectationResultMap = Partial<Record<ExpectationType, EvalExpectationResul
 /**
  * Breakdown of expectation types used in a run
  */
-type ExpectationBreakdown = Record<ExpectationType, number>;
+type ExpectationBreakdown = Partial<Record<ExpectationType, number>>;
 /**
  * Options for creating an MCP fixture
@@ -2656,6 +2684,56 @@ interface CLIConfig {
      */
     timeout?: number;
 }
+/**
+ * A cookie to inject into the browser context before running the script.
+ * Matches the shape expected by Playwright's `BrowserContext.addCookies()`.
+ */
+interface BrowserCookie {
+    name: string;
+    value: string;
+    url?: string;
+    domain?: string;
+    path?: string;
+    expires?: number;
+    httpOnly?: boolean;
+    secure?: boolean;
+    sameSite?: 'Strict' | 'Lax' | 'None';
+    partitionKey?: string;
+}
+/**
+ * Configuration for a browser-based host.
+ *
+ * Uses Playwright to launch a Chromium instance, inject auth state,
+ * and execute a user-provided script that drives a web-based MCP host
+ * (e.g., claude.ai).
+ */
+interface BrowserConfig {
+    /**
+     * Path to the browser script (resolved relative to cwd).
+     * The script must default-export an async function
+     * `(page: Page, scenario: string) => Promise<MCPHostSimulationResult>`.
+     */
+    script: string;
+    /**
+     * Timeout in milliseconds for the browser script.
+     * @default 120000 (2 minutes)
+     */
+    timeout?: number;
+    /**
+     * Whether to launch in headless mode.
+     * @default true
+     */
+    headless?: boolean;
+    /**
+     * Path to a Playwright storage state JSON file (cookies + localStorage).
+     * Resolved relative to cwd.
+     */
+    storageState?: string;
+    /**
+     * Extra cookies to inject into the browser context.
+     */
+    cookies?: BrowserCookie[];
+}
 /**
  * Configuration for MCP host simulation
  */
@@ -2701,6 +2779,10 @@ interface MCPHostConfig {
      * CLI host configuration (required for 'cli' host type).
      */
     cli?: CLIConfig;
+    /**
+     * Browser host configuration (required for 'browser' host type).
+     */
+    browser?: BrowserConfig;
 }
 /**
  * A tool call made by the LLM
@@ -2742,6 +2824,11 @@ interface MCPHostSimulationResult {
      * (excludes LLM response time)
      */
     mcpDurationMs?: number;
+    /**
+     * Token usage from the LLM during simulation.
+     * Populated by SDK-based hosts from the AI SDK response.
+     */
+    usage?: UsageMetrics;
 }
 /**
  * Interface for MCP host simulators.
@@ -3043,15 +3130,15 @@ declare const EvalCaseSchema: z.ZodObject<{
             desktop: "desktop";
         }>>;
         provider: z.ZodOptional<z.ZodEnum<{
-            openai: "openai";
             anthropic: "anthropic";
-            azure: "azure";
+            "vertex-anthropic": "vertex-anthropic";
+            openai: "openai";
             google: "google";
+            azure: "azure";
             mistral: "mistral";
             deepseek: "deepseek";
             openrouter: "openrouter";
             xai: "xai";
-            "vertex-anthropic": "vertex-anthropic";
         }>>;
         apiKeyEnvVar: z.ZodOptional<z.ZodString>;
         model: z.ZodOptional<z.ZodString>;
@@ -3108,11 +3195,11 @@ declare const EvalCaseSchema: z.ZodObject<{
             threshold: z.ZodOptional<z.ZodNumber>;
             reps: z.ZodOptional<z.ZodNumber>;
             provider: z.ZodOptional<z.ZodEnum<{
-                openai: "openai";
                 anthropic: "anthropic";
-                google: "google";
                 "vertex-anthropic": "vertex-anthropic";
                 "anthropic-agent-sdk": "anthropic-agent-sdk";
+                openai: "openai";
+                google: "google";
             }>>;
             model: z.ZodOptional<z.ZodString>;
             apiKeyEnvVar: z.ZodOptional<z.ZodString>;
@@ -3135,11 +3222,11 @@ declare const EvalCaseSchema: z.ZodObject<{
             threshold: z.ZodOptional<z.ZodNumber>;
             reps: z.ZodOptional<z.ZodNumber>;
             provider: z.ZodOptional<z.ZodEnum<{
-                openai: "openai";
                 anthropic: "anthropic";
-                google: "google";
                 "vertex-anthropic": "vertex-anthropic";
                 "anthropic-agent-sdk": "anthropic-agent-sdk";
+                openai: "openai";
+                google: "google";
             }>>;
             model: z.ZodOptional<z.ZodString>;
             apiKeyEnvVar: z.ZodOptional<z.ZodString>;
@@ -3195,15 +3282,15 @@ declare const EvalDatasetSchema: z.ZodObject<{
                 desktop: "desktop";
             }>>;
             provider: z.ZodOptional<z.ZodEnum<{
-                openai: "openai";
                 anthropic: "anthropic";
-                azure: "azure";
+                "vertex-anthropic": "vertex-anthropic";
+                openai: "openai";
                 google: "google";
+                azure: "azure";
                 mistral: "mistral";
                 deepseek: "deepseek";
                 openrouter: "openrouter";
                 xai: "xai";
-                "vertex-anthropic": "vertex-anthropic";
             }>>;
             apiKeyEnvVar: z.ZodOptional<z.ZodString>;
             model: z.ZodOptional<z.ZodString>;
@@ -3260,11 +3347,11 @@ declare const EvalDatasetSchema: z.ZodObject<{
                 threshold: z.ZodOptional<z.ZodNumber>;
                 reps: z.ZodOptional<z.ZodNumber>;
                 provider: z.ZodOptional<z.ZodEnum<{
-                    openai: "openai";
                     anthropic: "anthropic";
-                    google: "google";
                     "vertex-anthropic": "vertex-anthropic";
                     "anthropic-agent-sdk": "anthropic-agent-sdk";
+                    openai: "openai";
+                    google: "google";
                 }>>;
                 model: z.ZodOptional<z.ZodString>;
                 apiKeyEnvVar: z.ZodOptional<z.ZodString>;
@@ -3287,11 +3374,11 @@ declare const EvalDatasetSchema: z.ZodObject<{
                 threshold: z.ZodOptional<z.ZodNumber>;
                 reps: z.ZodOptional<z.ZodNumber>;
                 provider: z.ZodOptional<z.ZodEnum<{
-                    openai: "openai";
                     anthropic: "anthropic";
-                    google: "google";
                     "vertex-anthropic": "vertex-anthropic";
                     "anthropic-agent-sdk": "anthropic-agent-sdk";
+                    openai: "openai";
+                    google: "google";
                 }>>;
                 model: z.ZodOptional<z.ZodString>;
                 apiKeyEnvVar: z.ZodOptional<z.ZodString>;
@@ -3534,6 +3621,8 @@ interface IterationResult {
             name: string;
         }>;
     };
+    /** Token usage from mcp_host LLM simulation in this iteration */
+    hostUsage?: UsageMetrics;
 }
 /**
  * Request data captured from the eval case input.
@@ -3682,6 +3771,11 @@ interface EvalCaseResult {
             name: string;
         }>;
     };
+    /**
+     * Aggregate token usage from mcp_host LLM simulation for this case.
+     * Summed across all iterations. Only populated for mcp_host mode cases.
+     */
+    hostUsage?: UsageMetrics;
 }
 /**
  * Aggregated MCP eval run data
@@ -3731,6 +3825,10 @@ interface MCPEvalRunData {
          * Expectation type breakdown
          */
         expectationBreakdown: ExpectationBreakdown;
+        /**
+         * Aggregate token usage from all mcp_host LLM simulations in this run.
+         */
+        totalHostUsage?: UsageMetrics;
     };
     /**
      * All eval results from this run
@@ -3845,6 +3943,10 @@ interface EvalRunnerResult {
      * Experiment tracking metadata captured at run time.
      */
     metadata?: EvalRunMetadata;
+    /**
+     * Aggregate token usage from all mcp_host LLM simulations across all cases.
+     */
+    totalHostUsage?: UsageMetrics;
 }
 /**
  * Options for running eval dataset
@@ -4455,4 +4557,4 @@ interface MCPEvalReporterConfig {
     includeAutoTracking?: boolean;
 }
-export { type AuthType, BUILT_IN_RUBRICS, type BuiltInRubric, type BuiltInSanitizer, type CLIConfig, CLIOAuthClient, type CLIOAuthClientConfig, type CLIOAuthResult, type CLIOutputFormat, type CaseComparisonResult, type ClientCredentialsConfig, type ComparisonOutcome, type ContentBlock, type CreateMCPClientOptions, type CustomJudgeExecutor, type CustomJudgeResult, DiscoveryError, ENV_VAR_NAMES, type EvalCase, type EvalCaseResult, EvalCaseSchema, type EvalContext, type EvalDataset, EvalDatasetSchema, type EvalExpectBlock, type EvalExpectationResult, type EvalMode, type EvalRunnerOptions, type EvalRunnerResult, type ExpectationBreakdown, type ExpectationResultMap, type ExpectationType, type FieldRemovalSanitizer, type HostType, type HttpMCPConfig, type IterationResult, type Judge, type JudgeConfig, type JudgeExpectConfig, type JudgeMatcherOptions, type JudgeResult, type JudgeValidatorConfig, type LLMProvider, type LLMToolCall, type LoadDatasetOptions, type MCPAuthConfig, type MCPAuthFixtures, type MCPClientCredentialsConfig, type MCPConfig, MCPConfigSchema, type MCPConformanceCheck, type MCPConformanceOptions, type MCPConformanceRaw, type MCPConformanceResult, type MCPConformanceResultData, type MCPEvalData, type MCPEvalHistoricalSummary, type MCPEvalReporterConfig, type MCPEvalRunData, type MCPFixtureApi, type MCPFixtureOptions, type MCPHostCapabilities, type MCPHostConfig, type MCPHostSimulationResult, type MCPHostSimulator, type MCPOAuthConfig, type MCPServerCapabilitiesData, MCP_PROTOCOL_VERSION, type NormalizedToolResponse, type OAuthSetupConfig, type PatternValidatorOptions, PlaywrightOAuthClientProvider, type PlaywrightOAuthClientProviderConfig, type PredicateResult, type ProtectedResourceDiscoveryResult, type ProtectedResourceMetadata, type ProviderKind, type RegexSanitizer, type ResultSource, type RubricSpec, type SaveBaselineOptions, type SchemaRegistry, type SchemaValidatorOptions, type SerializedEvalDataset, type ServerComparisonOptions, type ServerComparisonResult, type SizeValidatorOptions, type SnapshotSanitizer, SnapshotSanitizers, type StdioMCPConfig, type StoredClientInfo, type StoredOAuthState, type StoredServerMetadata, type StoredTokens, type TextValidatorOptions, type TokenResult, type ToolCallCountOptions, type ToolCallExpectation, type ToolPredicate, type UsageMetrics, type ValidationResult, clearJudgeRegistry, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, getMissingDependencyMessage, getRegisteredJudge, getResponseSizeBytes, hasValidTokens, injectTokens, isBuiltInRubric, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadBaseline, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, test as mcpAuthTest, normalizeToolResponse, normalizeWhitespace, performClientCredentialsFlow, performOAuthSetup, performOAuthSetupIfNeeded, registerJudge, resolveRubric, runConformanceChecks, runEvalCase, runEvalDataset, runServerComparison, saveBaseline, simulateMCPHost, test$1 as test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateJudge, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText, validateToolCallCount, validateToolCalls };
+export { type AuthType, BUILT_IN_RUBRICS, type BuiltInRubric, type BuiltInSanitizer, type CLIConfig, CLIOAuthClient, type CLIOAuthClientConfig, type CLIOAuthResult, type CLIOutputFormat, type CaseComparisonResult, type ClientCredentialsConfig, type ComparisonOutcome, type ContentBlock, type CreateMCPClientOptions, type CustomJudgeExecutor, type CustomJudgeResult, DiscoveryError, ENV_VAR_NAMES, type EvalCase, type EvalCaseRequest, type EvalCaseResult, EvalCaseSchema, type EvalContext, type EvalDataset, EvalDatasetSchema, type EvalExpectBlock, type EvalExpectationResult, type EvalMode, type EvalRunMetadata, type EvalRunnerOptions, type EvalRunnerResult, type ExpectationBreakdown, type ExpectationResultMap, type ExpectationType, type FieldRemovalSanitizer, type HostType, type HttpMCPConfig, type IterationResult, type Judge, type JudgeConfig, type JudgeExpectConfig, type JudgeMatcherOptions, type JudgeResult, type JudgeValidatorConfig, type LLMProvider, type LLMToolCall, type LoadDatasetOptions, type MCPAuthConfig, type MCPAuthFixtures, type MCPClientCredentialsConfig, type MCPConfig, MCPConfigSchema, type MCPConformanceCheck, type MCPConformanceOptions, type MCPConformanceRaw, type MCPConformanceResult, type MCPConformanceResultData, type MCPEvalData, type MCPEvalHistoricalSummary, type MCPEvalReporterConfig, type MCPEvalRunData, type MCPFixtureApi, type MCPFixtureOptions, type MCPHostCapabilities, type MCPHostConfig, type MCPHostSimulationResult, type MCPHostSimulator, type MCPOAuthConfig, type MCPServerCapabilitiesData, MCP_PROTOCOL_VERSION, type NormalizedToolResponse, type OAuthSetupConfig, type PatternValidatorOptions, PlaywrightOAuthClientProvider, type PlaywrightOAuthClientProviderConfig, type PredicateResult, type ProtectedResourceDiscoveryResult, type ProtectedResourceMetadata, type ProviderKind, type RegexSanitizer, type ResultSource, type RubricSpec, type SaveBaselineOptions, type SchemaRegistry, type SchemaValidatorOptions, type SerializedEvalDataset, type ServerComparisonOptions, type ServerComparisonResult, type SizeValidatorOptions, type SnapshotSanitizer, SnapshotSanitizers, type StdioMCPConfig, type StoredClientInfo, type StoredOAuthState, type StoredServerMetadata, type StoredTokens, type TextValidatorOptions, type TokenResult, type ToolCallCountOptions, type ToolCallExpectation, type ToolPredicate, type UsageMetrics, type ValidationResult, clearJudgeRegistry, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, getMissingDependencyMessage, getRegisteredJudge, getResponseSizeBytes, hasValidTokens, injectTokens, isBuiltInRubric, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadBaseline, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, test as mcpAuthTest, normalizeToolResponse, normalizeWhitespace, performClientCredentialsFlow, performOAuthSetup, performOAuthSetupIfNeeded, refreshAccessToken, registerJudge, resolveRubric, runConformanceChecks, runEvalCase, runEvalDataset, runServerComparison, saveBaseline, simulateMCPHost, test$1 as test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateJudge, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText, validateToolCallCount, validateToolCalls };

package/dist/index.d.ts CHANGED Viewed

@@ -735,6 +735,34 @@ interface AuthServerMetadata {
      */
     issuer: string;
 }
+/**
+ * Configuration for token refresh
+ */
+interface TokenRefreshConfig {
+    /**
+     * Authorization server metadata
+     */
+    authServer: AuthServerMetadata;
+    /**
+     * Client ID
+     */
+    clientId: string;
+    /**
+     * Client secret (for confidential clients)
+     */
+    clientSecret?: string;
+    /**
+     * Refresh token
+     */
+    refreshToken: string;
+}
+/**
+ * Refreshes an access token using a refresh token
+ *
+ * @param config - Token refresh configuration
+ * @returns New token result
+ */
+declare function refreshAccessToken(config: TokenRefreshConfig): Promise<TokenResult>;
 /**
  * Configuration for client credentials grant
  */
@@ -2131,7 +2159,7 @@ type ExpectationResultMap = Partial<Record<ExpectationType, EvalExpectationResul
 /**
  * Breakdown of expectation types used in a run
  */
-type ExpectationBreakdown = Record<ExpectationType, number>;
+type ExpectationBreakdown = Partial<Record<ExpectationType, number>>;
 /**
  * Options for creating an MCP fixture
@@ -2656,6 +2684,56 @@ interface CLIConfig {
      */
     timeout?: number;
 }
+/**
+ * A cookie to inject into the browser context before running the script.
+ * Matches the shape expected by Playwright's `BrowserContext.addCookies()`.
+ */
+interface BrowserCookie {
+    name: string;
+    value: string;
+    url?: string;
+    domain?: string;
+    path?: string;
+    expires?: number;
+    httpOnly?: boolean;
+    secure?: boolean;
+    sameSite?: 'Strict' | 'Lax' | 'None';
+    partitionKey?: string;
+}
+/**
+ * Configuration for a browser-based host.
+ *
+ * Uses Playwright to launch a Chromium instance, inject auth state,
+ * and execute a user-provided script that drives a web-based MCP host
+ * (e.g., claude.ai).
+ */
+interface BrowserConfig {
+    /**
+     * Path to the browser script (resolved relative to cwd).
+     * The script must default-export an async function
+     * `(page: Page, scenario: string) => Promise<MCPHostSimulationResult>`.
+     */
+    script: string;
+    /**
+     * Timeout in milliseconds for the browser script.
+     * @default 120000 (2 minutes)
+     */
+    timeout?: number;
+    /**
+     * Whether to launch in headless mode.
+     * @default true
+     */
+    headless?: boolean;
+    /**
+     * Path to a Playwright storage state JSON file (cookies + localStorage).
+     * Resolved relative to cwd.
+     */
+    storageState?: string;
+    /**
+     * Extra cookies to inject into the browser context.
+     */
+    cookies?: BrowserCookie[];
+}
 /**
  * Configuration for MCP host simulation
  */
@@ -2701,6 +2779,10 @@ interface MCPHostConfig {
      * CLI host configuration (required for 'cli' host type).
      */
     cli?: CLIConfig;
+    /**
+     * Browser host configuration (required for 'browser' host type).
+     */
+    browser?: BrowserConfig;
 }
 /**
  * A tool call made by the LLM
@@ -2742,6 +2824,11 @@ interface MCPHostSimulationResult {
      * (excludes LLM response time)
      */
     mcpDurationMs?: number;
+    /**
+     * Token usage from the LLM during simulation.
+     * Populated by SDK-based hosts from the AI SDK response.
+     */
+    usage?: UsageMetrics;
 }
 /**
  * Interface for MCP host simulators.
@@ -3043,15 +3130,15 @@ declare const EvalCaseSchema: z.ZodObject<{
             desktop: "desktop";
         }>>;
         provider: z.ZodOptional<z.ZodEnum<{
-            openai: "openai";
             anthropic: "anthropic";
-            azure: "azure";
+            "vertex-anthropic": "vertex-anthropic";
+            openai: "openai";
             google: "google";
+            azure: "azure";
             mistral: "mistral";
             deepseek: "deepseek";
             openrouter: "openrouter";
             xai: "xai";
-            "vertex-anthropic": "vertex-anthropic";
         }>>;
         apiKeyEnvVar: z.ZodOptional<z.ZodString>;
         model: z.ZodOptional<z.ZodString>;
@@ -3108,11 +3195,11 @@ declare const EvalCaseSchema: z.ZodObject<{
             threshold: z.ZodOptional<z.ZodNumber>;
             reps: z.ZodOptional<z.ZodNumber>;
             provider: z.ZodOptional<z.ZodEnum<{
-                openai: "openai";
                 anthropic: "anthropic";
-                google: "google";
                 "vertex-anthropic": "vertex-anthropic";
                 "anthropic-agent-sdk": "anthropic-agent-sdk";
+                openai: "openai";
+                google: "google";
             }>>;
             model: z.ZodOptional<z.ZodString>;
             apiKeyEnvVar: z.ZodOptional<z.ZodString>;
@@ -3135,11 +3222,11 @@ declare const EvalCaseSchema: z.ZodObject<{
             threshold: z.ZodOptional<z.ZodNumber>;
             reps: z.ZodOptional<z.ZodNumber>;
             provider: z.ZodOptional<z.ZodEnum<{
-                openai: "openai";
                 anthropic: "anthropic";
-                google: "google";
                 "vertex-anthropic": "vertex-anthropic";
                 "anthropic-agent-sdk": "anthropic-agent-sdk";
+                openai: "openai";
+                google: "google";
             }>>;
             model: z.ZodOptional<z.ZodString>;
             apiKeyEnvVar: z.ZodOptional<z.ZodString>;
@@ -3195,15 +3282,15 @@ declare const EvalDatasetSchema: z.ZodObject<{
                 desktop: "desktop";
             }>>;
             provider: z.ZodOptional<z.ZodEnum<{
-                openai: "openai";
                 anthropic: "anthropic";
-                azure: "azure";
+                "vertex-anthropic": "vertex-anthropic";
+                openai: "openai";
                 google: "google";
+                azure: "azure";
                 mistral: "mistral";
                 deepseek: "deepseek";
                 openrouter: "openrouter";
                 xai: "xai";
-                "vertex-anthropic": "vertex-anthropic";
             }>>;
             apiKeyEnvVar: z.ZodOptional<z.ZodString>;
             model: z.ZodOptional<z.ZodString>;
@@ -3260,11 +3347,11 @@ declare const EvalDatasetSchema: z.ZodObject<{
                 threshold: z.ZodOptional<z.ZodNumber>;
                 reps: z.ZodOptional<z.ZodNumber>;
                 provider: z.ZodOptional<z.ZodEnum<{
-                    openai: "openai";
                     anthropic: "anthropic";
-                    google: "google";
                     "vertex-anthropic": "vertex-anthropic";
                     "anthropic-agent-sdk": "anthropic-agent-sdk";
+                    openai: "openai";
+                    google: "google";
                 }>>;
                 model: z.ZodOptional<z.ZodString>;
                 apiKeyEnvVar: z.ZodOptional<z.ZodString>;
@@ -3287,11 +3374,11 @@ declare const EvalDatasetSchema: z.ZodObject<{
                 threshold: z.ZodOptional<z.ZodNumber>;
                 reps: z.ZodOptional<z.ZodNumber>;
                 provider: z.ZodOptional<z.ZodEnum<{
-                    openai: "openai";
                     anthropic: "anthropic";
-                    google: "google";
                     "vertex-anthropic": "vertex-anthropic";
                     "anthropic-agent-sdk": "anthropic-agent-sdk";
+                    openai: "openai";
+                    google: "google";
                 }>>;
                 model: z.ZodOptional<z.ZodString>;
                 apiKeyEnvVar: z.ZodOptional<z.ZodString>;
@@ -3534,6 +3621,8 @@ interface IterationResult {
             name: string;
         }>;
     };
+    /** Token usage from mcp_host LLM simulation in this iteration */
+    hostUsage?: UsageMetrics;
 }
 /**
  * Request data captured from the eval case input.
@@ -3682,6 +3771,11 @@ interface EvalCaseResult {
             name: string;
         }>;
     };
+    /**
+     * Aggregate token usage from mcp_host LLM simulation for this case.
+     * Summed across all iterations. Only populated for mcp_host mode cases.
+     */
+    hostUsage?: UsageMetrics;
 }
 /**
  * Aggregated MCP eval run data
@@ -3731,6 +3825,10 @@ interface MCPEvalRunData {
          * Expectation type breakdown
          */
         expectationBreakdown: ExpectationBreakdown;
+        /**
+         * Aggregate token usage from all mcp_host LLM simulations in this run.
+         */
+        totalHostUsage?: UsageMetrics;
     };
     /**
      * All eval results from this run
@@ -3845,6 +3943,10 @@ interface EvalRunnerResult {
      * Experiment tracking metadata captured at run time.
      */
     metadata?: EvalRunMetadata;
+    /**
+     * Aggregate token usage from all mcp_host LLM simulations across all cases.
+     */
+    totalHostUsage?: UsageMetrics;
 }
 /**
  * Options for running eval dataset
@@ -4455,4 +4557,4 @@ interface MCPEvalReporterConfig {
     includeAutoTracking?: boolean;
 }
-export { type AuthType, BUILT_IN_RUBRICS, type BuiltInRubric, type BuiltInSanitizer, type CLIConfig, CLIOAuthClient, type CLIOAuthClientConfig, type CLIOAuthResult, type CLIOutputFormat, type CaseComparisonResult, type ClientCredentialsConfig, type ComparisonOutcome, type ContentBlock, type CreateMCPClientOptions, type CustomJudgeExecutor, type CustomJudgeResult, DiscoveryError, ENV_VAR_NAMES, type EvalCase, type EvalCaseResult, EvalCaseSchema, type EvalContext, type EvalDataset, EvalDatasetSchema, type EvalExpectBlock, type EvalExpectationResult, type EvalMode, type EvalRunnerOptions, type EvalRunnerResult, type ExpectationBreakdown, type ExpectationResultMap, type ExpectationType, type FieldRemovalSanitizer, type HostType, type HttpMCPConfig, type IterationResult, type Judge, type JudgeConfig, type JudgeExpectConfig, type JudgeMatcherOptions, type JudgeResult, type JudgeValidatorConfig, type LLMProvider, type LLMToolCall, type LoadDatasetOptions, type MCPAuthConfig, type MCPAuthFixtures, type MCPClientCredentialsConfig, type MCPConfig, MCPConfigSchema, type MCPConformanceCheck, type MCPConformanceOptions, type MCPConformanceRaw, type MCPConformanceResult, type MCPConformanceResultData, type MCPEvalData, type MCPEvalHistoricalSummary, type MCPEvalReporterConfig, type MCPEvalRunData, type MCPFixtureApi, type MCPFixtureOptions, type MCPHostCapabilities, type MCPHostConfig, type MCPHostSimulationResult, type MCPHostSimulator, type MCPOAuthConfig, type MCPServerCapabilitiesData, MCP_PROTOCOL_VERSION, type NormalizedToolResponse, type OAuthSetupConfig, type PatternValidatorOptions, PlaywrightOAuthClientProvider, type PlaywrightOAuthClientProviderConfig, type PredicateResult, type ProtectedResourceDiscoveryResult, type ProtectedResourceMetadata, type ProviderKind, type RegexSanitizer, type ResultSource, type RubricSpec, type SaveBaselineOptions, type SchemaRegistry, type SchemaValidatorOptions, type SerializedEvalDataset, type ServerComparisonOptions, type ServerComparisonResult, type SizeValidatorOptions, type SnapshotSanitizer, SnapshotSanitizers, type StdioMCPConfig, type StoredClientInfo, type StoredOAuthState, type StoredServerMetadata, type StoredTokens, type TextValidatorOptions, type TokenResult, type ToolCallCountOptions, type ToolCallExpectation, type ToolPredicate, type UsageMetrics, type ValidationResult, clearJudgeRegistry, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, getMissingDependencyMessage, getRegisteredJudge, getResponseSizeBytes, hasValidTokens, injectTokens, isBuiltInRubric, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadBaseline, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, test as mcpAuthTest, normalizeToolResponse, normalizeWhitespace, performClientCredentialsFlow, performOAuthSetup, performOAuthSetupIfNeeded, registerJudge, resolveRubric, runConformanceChecks, runEvalCase, runEvalDataset, runServerComparison, saveBaseline, simulateMCPHost, test$1 as test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateJudge, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText, validateToolCallCount, validateToolCalls };
+export { type AuthType, BUILT_IN_RUBRICS, type BuiltInRubric, type BuiltInSanitizer, type CLIConfig, CLIOAuthClient, type CLIOAuthClientConfig, type CLIOAuthResult, type CLIOutputFormat, type CaseComparisonResult, type ClientCredentialsConfig, type ComparisonOutcome, type ContentBlock, type CreateMCPClientOptions, type CustomJudgeExecutor, type CustomJudgeResult, DiscoveryError, ENV_VAR_NAMES, type EvalCase, type EvalCaseRequest, type EvalCaseResult, EvalCaseSchema, type EvalContext, type EvalDataset, EvalDatasetSchema, type EvalExpectBlock, type EvalExpectationResult, type EvalMode, type EvalRunMetadata, type EvalRunnerOptions, type EvalRunnerResult, type ExpectationBreakdown, type ExpectationResultMap, type ExpectationType, type FieldRemovalSanitizer, type HostType, type HttpMCPConfig, type IterationResult, type Judge, type JudgeConfig, type JudgeExpectConfig, type JudgeMatcherOptions, type JudgeResult, type JudgeValidatorConfig, type LLMProvider, type LLMToolCall, type LoadDatasetOptions, type MCPAuthConfig, type MCPAuthFixtures, type MCPClientCredentialsConfig, type MCPConfig, MCPConfigSchema, type MCPConformanceCheck, type MCPConformanceOptions, type MCPConformanceRaw, type MCPConformanceResult, type MCPConformanceResultData, type MCPEvalData, type MCPEvalHistoricalSummary, type MCPEvalReporterConfig, type MCPEvalRunData, type MCPFixtureApi, type MCPFixtureOptions, type MCPHostCapabilities, type MCPHostConfig, type MCPHostSimulationResult, type MCPHostSimulator, type MCPOAuthConfig, type MCPServerCapabilitiesData, MCP_PROTOCOL_VERSION, type NormalizedToolResponse, type OAuthSetupConfig, type PatternValidatorOptions, PlaywrightOAuthClientProvider, type PlaywrightOAuthClientProviderConfig, type PredicateResult, type ProtectedResourceDiscoveryResult, type ProtectedResourceMetadata, type ProviderKind, type RegexSanitizer, type ResultSource, type RubricSpec, type SaveBaselineOptions, type SchemaRegistry, type SchemaValidatorOptions, type SerializedEvalDataset, type ServerComparisonOptions, type ServerComparisonResult, type SizeValidatorOptions, type SnapshotSanitizer, SnapshotSanitizers, type StdioMCPConfig, type StoredClientInfo, type StoredOAuthState, type StoredServerMetadata, type StoredTokens, type TextValidatorOptions, type TokenResult, type ToolCallCountOptions, type ToolCallExpectation, type ToolPredicate, type UsageMetrics, type ValidationResult, clearJudgeRegistry, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, getMissingDependencyMessage, getRegisteredJudge, getResponseSizeBytes, hasValidTokens, injectTokens, isBuiltInRubric, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadBaseline, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, test as mcpAuthTest, normalizeToolResponse, normalizeWhitespace, performClientCredentialsFlow, performOAuthSetup, performOAuthSetupIfNeeded, refreshAccessToken, registerJudge, resolveRubric, runConformanceChecks, runEvalCase, runEvalDataset, runServerComparison, saveBaseline, simulateMCPHost, test$1 as test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateJudge, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText, validateToolCallCount, validateToolCalls };