@gleanwork/mcp-server-tester 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3857 @@
1
+ import { z, ZodType } from 'zod';
2
+ import { OAuthClientProvider } from '@modelcontextprotocol/sdk/client/auth.js';
3
+ import { OAuthClientMetadata, OAuthClientInformationFull, OAuthTokens } from '@modelcontextprotocol/sdk/shared/auth.js';
4
+ import * as oauth from 'oauth4webapi';
5
+ import { Client } from '@modelcontextprotocol/sdk/client/index.js';
6
+ import { CallToolResult, Tool, Implementation, ServerCapabilities, Resource, Prompt } from '@modelcontextprotocol/sdk/types.js';
7
+ import { TestInfo, Expect } from '@playwright/test';
8
+ import * as playwright_test from 'playwright/test';
9
+
10
+ /**
11
+ * OAuth configuration for MCP authentication
12
+ */
13
+ interface MCPOAuthConfig {
14
+ /**
15
+ * OAuth authorization server metadata URL
16
+ * (e.g., https://auth.example.com/.well-known/oauth-authorization-server)
17
+ */
18
+ serverUrl: string;
19
+ /**
20
+ * Scopes to request during authorization
21
+ */
22
+ scopes?: Array<string>;
23
+ /**
24
+ * Resource indicator (RFC 8707, required by MCP 2025-06-18 spec)
25
+ */
26
+ resource?: string;
27
+ /**
28
+ * Path to Playwright auth state file
29
+ * (e.g., playwright/.auth/oauth-state.json)
30
+ */
31
+ authStatePath?: string;
32
+ /**
33
+ * Client ID (if pre-registered; otherwise uses Dynamic Client Registration)
34
+ */
35
+ clientId?: string;
36
+ /**
37
+ * Client secret (for confidential clients)
38
+ */
39
+ clientSecret?: string;
40
+ /**
41
+ * Redirect URI for OAuth callback
42
+ */
43
+ redirectUri?: string;
44
+ }
45
+ /**
46
+ * Authentication configuration for MCP connections
47
+ */
48
+ interface MCPAuthConfig {
49
+ /**
50
+ * Pre-acquired access token (simplest authentication mode)
51
+ */
52
+ accessToken?: string;
53
+ /**
54
+ * Full OAuth configuration for browser-based authentication
55
+ */
56
+ oauth?: MCPOAuthConfig;
57
+ }
58
+ /**
59
+ * MCP host capabilities that can be registered with the server
60
+ */
61
+ interface MCPHostCapabilities {
62
+ /**
63
+ * Sampling capabilities (for LLM sampling)
64
+ */
65
+ sampling?: Record<string, unknown>;
66
+ /**
67
+ * Roots capabilities (for file system roots)
68
+ */
69
+ roots?: {
70
+ /**
71
+ * Whether the client can notify the server when roots change
72
+ */
73
+ listChanged: boolean;
74
+ };
75
+ }
76
+ /**
77
+ * Configuration for MCP client connection
78
+ *
79
+ * Supports both stdio (local) and HTTP (remote) transports
80
+ */
81
+ interface MCPConfig {
82
+ /**
83
+ * Transport type
84
+ */
85
+ transport: 'http' | 'stdio';
86
+ /**
87
+ * Server URL (required when transport === 'http')
88
+ */
89
+ serverUrl?: string;
90
+ /**
91
+ * HTTP headers (optional for http transport, e.g., Authorization)
92
+ */
93
+ headers?: Record<string, string>;
94
+ /**
95
+ * Command to execute (required when transport === 'stdio')
96
+ */
97
+ command?: string;
98
+ /**
99
+ * Command arguments (optional for stdio)
100
+ */
101
+ args?: Array<string>;
102
+ /**
103
+ * Working directory for the command (optional for stdio)
104
+ */
105
+ cwd?: string;
106
+ /**
107
+ * Host capabilities to register with the server
108
+ */
109
+ capabilities?: MCPHostCapabilities;
110
+ /**
111
+ * Connection timeout in milliseconds
112
+ */
113
+ connectTimeoutMs?: number;
114
+ /**
115
+ * Request timeout in milliseconds
116
+ */
117
+ requestTimeoutMs?: number;
118
+ /**
119
+ * Suppress stderr output from the server process (stdio only)
120
+ * When true, server stderr is ignored instead of inherited
121
+ */
122
+ quiet?: boolean;
123
+ /**
124
+ * Authentication configuration (optional for http transport)
125
+ */
126
+ auth?: MCPAuthConfig;
127
+ }
128
+ /**
129
+ * Union schema for MCPConfig (validates based on transport type)
130
+ */
131
+ declare const MCPConfigSchema: z.ZodDiscriminatedUnion<"transport", [z.ZodObject<{
132
+ transport: z.ZodLiteral<"stdio">;
133
+ command: z.ZodString;
134
+ args: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
135
+ cwd: z.ZodOptional<z.ZodString>;
136
+ capabilities: z.ZodOptional<z.ZodObject<{
137
+ sampling: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
138
+ roots: z.ZodOptional<z.ZodObject<{
139
+ listChanged: z.ZodBoolean;
140
+ }, "strip", z.ZodTypeAny, {
141
+ listChanged: boolean;
142
+ }, {
143
+ listChanged: boolean;
144
+ }>>;
145
+ }, "strip", z.ZodTypeAny, {
146
+ sampling?: Record<string, unknown> | undefined;
147
+ roots?: {
148
+ listChanged: boolean;
149
+ } | undefined;
150
+ }, {
151
+ sampling?: Record<string, unknown> | undefined;
152
+ roots?: {
153
+ listChanged: boolean;
154
+ } | undefined;
155
+ }>>;
156
+ connectTimeoutMs: z.ZodOptional<z.ZodNumber>;
157
+ requestTimeoutMs: z.ZodOptional<z.ZodNumber>;
158
+ quiet: z.ZodOptional<z.ZodBoolean>;
159
+ }, "strip", z.ZodTypeAny, {
160
+ transport: "stdio";
161
+ command: string;
162
+ args?: string[] | undefined;
163
+ cwd?: string | undefined;
164
+ capabilities?: {
165
+ sampling?: Record<string, unknown> | undefined;
166
+ roots?: {
167
+ listChanged: boolean;
168
+ } | undefined;
169
+ } | undefined;
170
+ connectTimeoutMs?: number | undefined;
171
+ requestTimeoutMs?: number | undefined;
172
+ quiet?: boolean | undefined;
173
+ }, {
174
+ transport: "stdio";
175
+ command: string;
176
+ args?: string[] | undefined;
177
+ cwd?: string | undefined;
178
+ capabilities?: {
179
+ sampling?: Record<string, unknown> | undefined;
180
+ roots?: {
181
+ listChanged: boolean;
182
+ } | undefined;
183
+ } | undefined;
184
+ connectTimeoutMs?: number | undefined;
185
+ requestTimeoutMs?: number | undefined;
186
+ quiet?: boolean | undefined;
187
+ }>, z.ZodObject<{
188
+ transport: z.ZodLiteral<"http">;
189
+ serverUrl: z.ZodString;
190
+ headers: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
191
+ capabilities: z.ZodOptional<z.ZodObject<{
192
+ sampling: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
193
+ roots: z.ZodOptional<z.ZodObject<{
194
+ listChanged: z.ZodBoolean;
195
+ }, "strip", z.ZodTypeAny, {
196
+ listChanged: boolean;
197
+ }, {
198
+ listChanged: boolean;
199
+ }>>;
200
+ }, "strip", z.ZodTypeAny, {
201
+ sampling?: Record<string, unknown> | undefined;
202
+ roots?: {
203
+ listChanged: boolean;
204
+ } | undefined;
205
+ }, {
206
+ sampling?: Record<string, unknown> | undefined;
207
+ roots?: {
208
+ listChanged: boolean;
209
+ } | undefined;
210
+ }>>;
211
+ connectTimeoutMs: z.ZodOptional<z.ZodNumber>;
212
+ requestTimeoutMs: z.ZodOptional<z.ZodNumber>;
213
+ auth: z.ZodOptional<z.ZodEffects<z.ZodObject<{
214
+ accessToken: z.ZodOptional<z.ZodString>;
215
+ oauth: z.ZodOptional<z.ZodObject<{
216
+ serverUrl: z.ZodString;
217
+ scopes: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
218
+ resource: z.ZodOptional<z.ZodString>;
219
+ authStatePath: z.ZodOptional<z.ZodString>;
220
+ clientId: z.ZodOptional<z.ZodString>;
221
+ clientSecret: z.ZodOptional<z.ZodString>;
222
+ redirectUri: z.ZodOptional<z.ZodString>;
223
+ }, "strip", z.ZodTypeAny, {
224
+ serverUrl: string;
225
+ scopes?: string[] | undefined;
226
+ resource?: string | undefined;
227
+ authStatePath?: string | undefined;
228
+ clientId?: string | undefined;
229
+ clientSecret?: string | undefined;
230
+ redirectUri?: string | undefined;
231
+ }, {
232
+ serverUrl: string;
233
+ scopes?: string[] | undefined;
234
+ resource?: string | undefined;
235
+ authStatePath?: string | undefined;
236
+ clientId?: string | undefined;
237
+ clientSecret?: string | undefined;
238
+ redirectUri?: string | undefined;
239
+ }>>;
240
+ }, "strip", z.ZodTypeAny, {
241
+ accessToken?: string | undefined;
242
+ oauth?: {
243
+ serverUrl: string;
244
+ scopes?: string[] | undefined;
245
+ resource?: string | undefined;
246
+ authStatePath?: string | undefined;
247
+ clientId?: string | undefined;
248
+ clientSecret?: string | undefined;
249
+ redirectUri?: string | undefined;
250
+ } | undefined;
251
+ }, {
252
+ accessToken?: string | undefined;
253
+ oauth?: {
254
+ serverUrl: string;
255
+ scopes?: string[] | undefined;
256
+ resource?: string | undefined;
257
+ authStatePath?: string | undefined;
258
+ clientId?: string | undefined;
259
+ clientSecret?: string | undefined;
260
+ redirectUri?: string | undefined;
261
+ } | undefined;
262
+ }>, {
263
+ accessToken?: string | undefined;
264
+ oauth?: {
265
+ serverUrl: string;
266
+ scopes?: string[] | undefined;
267
+ resource?: string | undefined;
268
+ authStatePath?: string | undefined;
269
+ clientId?: string | undefined;
270
+ clientSecret?: string | undefined;
271
+ redirectUri?: string | undefined;
272
+ } | undefined;
273
+ }, {
274
+ accessToken?: string | undefined;
275
+ oauth?: {
276
+ serverUrl: string;
277
+ scopes?: string[] | undefined;
278
+ resource?: string | undefined;
279
+ authStatePath?: string | undefined;
280
+ clientId?: string | undefined;
281
+ clientSecret?: string | undefined;
282
+ redirectUri?: string | undefined;
283
+ } | undefined;
284
+ }>>;
285
+ }, "strip", z.ZodTypeAny, {
286
+ serverUrl: string;
287
+ transport: "http";
288
+ capabilities?: {
289
+ sampling?: Record<string, unknown> | undefined;
290
+ roots?: {
291
+ listChanged: boolean;
292
+ } | undefined;
293
+ } | undefined;
294
+ connectTimeoutMs?: number | undefined;
295
+ requestTimeoutMs?: number | undefined;
296
+ headers?: Record<string, string> | undefined;
297
+ auth?: {
298
+ accessToken?: string | undefined;
299
+ oauth?: {
300
+ serverUrl: string;
301
+ scopes?: string[] | undefined;
302
+ resource?: string | undefined;
303
+ authStatePath?: string | undefined;
304
+ clientId?: string | undefined;
305
+ clientSecret?: string | undefined;
306
+ redirectUri?: string | undefined;
307
+ } | undefined;
308
+ } | undefined;
309
+ }, {
310
+ serverUrl: string;
311
+ transport: "http";
312
+ capabilities?: {
313
+ sampling?: Record<string, unknown> | undefined;
314
+ roots?: {
315
+ listChanged: boolean;
316
+ } | undefined;
317
+ } | undefined;
318
+ connectTimeoutMs?: number | undefined;
319
+ requestTimeoutMs?: number | undefined;
320
+ headers?: Record<string, string> | undefined;
321
+ auth?: {
322
+ accessToken?: string | undefined;
323
+ oauth?: {
324
+ serverUrl: string;
325
+ scopes?: string[] | undefined;
326
+ resource?: string | undefined;
327
+ authStatePath?: string | undefined;
328
+ clientId?: string | undefined;
329
+ clientSecret?: string | undefined;
330
+ redirectUri?: string | undefined;
331
+ } | undefined;
332
+ } | undefined;
333
+ }>]>;
334
+ /**
335
+ * Validates an MCPConfig object
336
+ *
337
+ * @param config - The config to validate
338
+ * @returns The validated config
339
+ * @throws {z.ZodError} If validation fails
340
+ */
341
+ declare function validateMCPConfig(config: unknown): MCPConfig;
342
+ /**
343
+ * Type guard to check if a config is for stdio transport
344
+ */
345
+ declare function isStdioConfig(config: MCPConfig): config is MCPConfig & {
346
+ transport: 'stdio';
347
+ command: string;
348
+ };
349
+ /**
350
+ * Type guard to check if a config is for HTTP transport
351
+ */
352
+ declare function isHttpConfig(config: MCPConfig): config is MCPConfig & {
353
+ transport: 'http';
354
+ serverUrl: string;
355
+ };
356
+
357
+ /**
358
+ * Auth types for MCP OAuth integration
359
+ */
360
+ /**
361
+ * Stored OAuth tokens
362
+ */
363
+ interface StoredTokens {
364
+ /**
365
+ * OAuth access token
366
+ */
367
+ accessToken: string;
368
+ /**
369
+ * OAuth refresh token (if provided)
370
+ */
371
+ refreshToken?: string;
372
+ /**
373
+ * Token expiration timestamp (Unix milliseconds)
374
+ */
375
+ expiresAt?: number;
376
+ /**
377
+ * Token type (typically "Bearer")
378
+ */
379
+ tokenType: string;
380
+ /**
381
+ * Client ID that was used to obtain these tokens.
382
+ * Required for token refresh since refresh tokens are bound to the client.
383
+ */
384
+ clientId?: string;
385
+ }
386
+ /**
387
+ * Stored client information from Dynamic Client Registration
388
+ */
389
+ interface StoredClientInfo {
390
+ /**
391
+ * Client ID from DCR
392
+ */
393
+ clientId: string;
394
+ /**
395
+ * Client secret from DCR (for confidential clients)
396
+ */
397
+ clientSecret?: string;
398
+ /**
399
+ * Client ID issued at timestamp
400
+ */
401
+ clientIdIssuedAt?: number;
402
+ /**
403
+ * Client secret expiration timestamp
404
+ */
405
+ clientSecretExpiresAt?: number;
406
+ }
407
+ /**
408
+ * Complete OAuth state persisted to disk for Playwright auth state pattern
409
+ */
410
+ interface StoredOAuthState {
411
+ /**
412
+ * OAuth tokens
413
+ */
414
+ tokens?: StoredTokens;
415
+ /**
416
+ * DCR client information
417
+ */
418
+ clientInfo?: StoredClientInfo;
419
+ /**
420
+ * PKCE code verifier (used during authorization flow)
421
+ */
422
+ codeVerifier?: string;
423
+ /**
424
+ * OAuth state parameter (for CSRF protection)
425
+ */
426
+ state?: string;
427
+ /**
428
+ * Timestamp when this state was saved
429
+ */
430
+ savedAt: number;
431
+ }
432
+ /**
433
+ * Configuration for OAuth setup flow
434
+ */
435
+ interface OAuthSetupConfig {
436
+ /**
437
+ * OAuth authorization server metadata URL
438
+ */
439
+ authServerUrl: string;
440
+ /**
441
+ * Scopes to request
442
+ */
443
+ scopes: Array<string>;
444
+ /**
445
+ * Resource indicator (RFC 8707)
446
+ */
447
+ resource?: string;
448
+ /**
449
+ * Login form selectors for automation
450
+ */
451
+ loginSelectors: {
452
+ /**
453
+ * Selector for username/email input field
454
+ */
455
+ usernameInput: string;
456
+ /**
457
+ * Selector for password input field
458
+ */
459
+ passwordInput: string;
460
+ /**
461
+ * Selector for login submit button
462
+ */
463
+ submitButton: string;
464
+ /**
465
+ * Selector for consent/authorize button (optional)
466
+ */
467
+ consentButton?: string;
468
+ };
469
+ /**
470
+ * Test user credentials
471
+ */
472
+ credentials: {
473
+ username: string;
474
+ password: string;
475
+ };
476
+ /**
477
+ * Path to save OAuth state file
478
+ */
479
+ outputPath: string;
480
+ /**
481
+ * Pre-registered client ID (optional, uses DCR if not provided)
482
+ */
483
+ clientId?: string;
484
+ /**
485
+ * Pre-registered client secret (optional)
486
+ */
487
+ clientSecret?: string;
488
+ /**
489
+ * Redirect URI for OAuth callback
490
+ */
491
+ redirectUri?: string;
492
+ /**
493
+ * Timeout for login flow in milliseconds (default: 30000)
494
+ */
495
+ timeoutMs?: number;
496
+ }
497
+ /**
498
+ * Result of token exchange or refresh
499
+ */
500
+ interface TokenResult {
501
+ /**
502
+ * Access token
503
+ */
504
+ accessToken: string;
505
+ /**
506
+ * Token type (typically "Bearer")
507
+ */
508
+ tokenType: string;
509
+ /**
510
+ * Expires in seconds
511
+ */
512
+ expiresIn?: number;
513
+ /**
514
+ * Refresh token (if provided)
515
+ */
516
+ refreshToken?: string;
517
+ /**
518
+ * Granted scopes (space-separated)
519
+ */
520
+ scope?: string;
521
+ }
522
+
523
+ /**
524
+ * OAuth client provider implementation for MCP SDK
525
+ *
526
+ * Implements the MCP SDK's OAuthClientProvider interface using file-based storage
527
+ * for integration with Playwright's auth state pattern.
528
+ */
529
+
530
+ /**
531
+ * Configuration for the Playwright OAuth client provider
532
+ */
533
+ interface PlaywrightOAuthClientProviderConfig {
534
+ /**
535
+ * Path to the auth state file (e.g., playwright/.auth/oauth-state.json)
536
+ */
537
+ storagePath: string;
538
+ /**
539
+ * OAuth redirect URI for callback
540
+ */
541
+ redirectUri: string;
542
+ /**
543
+ * Client metadata for DCR or display
544
+ */
545
+ clientMetadata?: Partial<OAuthClientMetadata>;
546
+ /**
547
+ * Pre-registered client ID (if not using DCR)
548
+ */
549
+ clientId?: string;
550
+ /**
551
+ * Pre-registered client secret (if not using DCR)
552
+ */
553
+ clientSecret?: string;
554
+ }
555
+ /**
556
+ * OAuth client provider that implements the MCP SDK's OAuthClientProvider interface
557
+ *
558
+ * Uses file-based storage for integration with Playwright's auth state pattern.
559
+ * Auth state is persisted to disk so it can be reused across test runs.
560
+ *
561
+ * @example
562
+ * ```typescript
563
+ * const provider = new PlaywrightOAuthClientProvider({
564
+ * storagePath: 'playwright/.auth/oauth-state.json',
565
+ * redirectUri: 'http://localhost:3000/callback',
566
+ * });
567
+ *
568
+ * const transport = new StreamableHTTPClientTransport(serverUrl, {
569
+ * authProvider: provider,
570
+ * });
571
+ * ```
572
+ */
573
+ declare class PlaywrightOAuthClientProvider implements OAuthClientProvider {
574
+ private readonly config;
575
+ private cachedState;
576
+ private stateParam;
577
+ constructor(config: PlaywrightOAuthClientProviderConfig);
578
+ /**
579
+ * The URL to redirect the user agent to after authorization
580
+ */
581
+ get redirectUrl(): string;
582
+ /**
583
+ * Metadata about this OAuth client
584
+ */
585
+ get clientMetadata(): OAuthClientMetadata;
586
+ /**
587
+ * Returns an OAuth2 state parameter
588
+ */
589
+ state(): string;
590
+ /**
591
+ * Loads information about this OAuth client
592
+ */
593
+ clientInformation(): Promise<OAuthClientInformationFull | undefined>;
594
+ /**
595
+ * Saves client information from Dynamic Client Registration
596
+ */
597
+ saveClientInformation(clientInformation: OAuthClientInformationFull): Promise<void>;
598
+ /**
599
+ * Loads any existing OAuth tokens for the current session
600
+ */
601
+ tokens(): Promise<OAuthTokens | undefined>;
602
+ /**
603
+ * Stores new OAuth tokens for the current session
604
+ */
605
+ saveTokens(tokens: OAuthTokens): Promise<void>;
606
+ /**
607
+ * Invoked to redirect the user agent to the given URL
608
+ *
609
+ * In a testing context, this is typically handled by Playwright automation.
610
+ * This implementation throws an error to signal that the caller needs to
611
+ * handle the redirect externally.
612
+ */
613
+ redirectToAuthorization(authorizationUrl: URL): Promise<void>;
614
+ /**
615
+ * Saves a PKCE code verifier for the current session
616
+ */
617
+ saveCodeVerifier(codeVerifier: string): Promise<void>;
618
+ /**
619
+ * Loads the PKCE code verifier for the current session
620
+ */
621
+ codeVerifier(): Promise<string>;
622
+ /**
623
+ * Invalidates the specified credentials
624
+ */
625
+ invalidateCredentials(scope: 'all' | 'client' | 'tokens' | 'verifier'): Promise<void>;
626
+ private loadState;
627
+ private saveState;
628
+ private deleteState;
629
+ private createEmptyState;
630
+ private generateRandomString;
631
+ }
632
+
633
+ /**
634
+ * Static token authentication utilities
635
+ *
636
+ * Simple utilities for pre-acquired token authentication
637
+ */
638
+ /**
639
+ * Creates HTTP headers for static token authentication
640
+ *
641
+ * @param accessToken - The pre-acquired access token
642
+ * @param tokenType - The token type (default: "Bearer")
643
+ * @returns HTTP headers with Authorization header
644
+ *
645
+ * @example
646
+ * ```typescript
647
+ * const headers = createTokenAuthHeaders(process.env.MCP_ACCESS_TOKEN);
648
+ * // { Authorization: 'Bearer eyJ...' }
649
+ * ```
650
+ */
651
+ declare function createTokenAuthHeaders(accessToken: string, tokenType?: string): Record<string, string>;
652
+ /**
653
+ * Validates that an access token is present and non-empty
654
+ *
655
+ * @param accessToken - The access token to validate
656
+ * @throws Error if token is missing or empty
657
+ */
658
+ declare function validateAccessToken(accessToken: string | undefined): void;
659
+ /**
660
+ * Checks if a token appears to be expired based on common JWT structure
661
+ *
662
+ * Note: This is a best-effort check and may not work for all token formats.
663
+ * For reliable expiration checking, use the token's associated expiration time.
664
+ *
665
+ * @param accessToken - The access token to check
666
+ * @returns true if the token appears to be expired, false otherwise
667
+ */
668
+ declare function isTokenExpired(accessToken: string): boolean;
669
+ /**
670
+ * Checks if a token will expire within the specified buffer time
671
+ *
672
+ * @param expiresAt - Token expiration timestamp in milliseconds
673
+ * @param bufferMs - Buffer time in milliseconds (default: 60000 = 1 minute)
674
+ * @returns true if the token will expire within the buffer time
675
+ */
676
+ declare function isTokenExpiringSoon(expiresAt: number | undefined, bufferMs?: number): boolean;
677
+
678
+ /**
679
+ * OAuth setup utility for Playwright globalSetup
680
+ *
681
+ * Performs the browser-based OAuth flow and saves the auth state
682
+ * for reuse across tests following Playwright's auth state pattern.
683
+ */
684
+
685
+ /**
686
+ * Performs the OAuth authorization flow using Playwright browser automation
687
+ *
688
+ * This function is designed to be used in Playwright's globalSetup to
689
+ * authenticate once before running tests. The resulting auth state is
690
+ * saved to disk and reused across tests.
691
+ *
692
+ * @param config - OAuth setup configuration
693
+ *
694
+ * @example
695
+ * ```typescript
696
+ * // global-setup.ts
697
+ * import { performOAuthSetup } from '@gleanwork/mcp-server-tester';
698
+ *
699
+ * export default async function globalSetup() {
700
+ * await performOAuthSetup({
701
+ * authServerUrl: 'https://auth.example.com',
702
+ * scopes: ['mcp:read', 'mcp:write'],
703
+ * loginSelectors: {
704
+ * usernameInput: '#username',
705
+ * passwordInput: '#password',
706
+ * submitButton: 'button[type="submit"]',
707
+ * },
708
+ * credentials: {
709
+ * username: process.env.TEST_USER!,
710
+ * password: process.env.TEST_PASSWORD!,
711
+ * },
712
+ * outputPath: 'playwright/.auth/oauth-state.json',
713
+ * });
714
+ * }
715
+ * ```
716
+ */
717
+ declare function performOAuthSetup(config: OAuthSetupConfig): Promise<void>;
718
+ /**
719
+ * Performs OAuth setup only if valid state doesn't already exist
720
+ *
721
+ * Use this in globalSetup to avoid re-authenticating on every test run.
722
+ *
723
+ * @param config - OAuth setup configuration
724
+ *
725
+ * @example
726
+ * ```typescript
727
+ * // global-setup.ts
728
+ * export default async function globalSetup() {
729
+ * await performOAuthSetupIfNeeded({
730
+ * authServerUrl: 'https://auth.example.com',
731
+ * scopes: ['mcp:read'],
732
+ * loginSelectors: { ... },
733
+ * credentials: { ... },
734
+ * outputPath: 'playwright/.auth/oauth-state.json',
735
+ * });
736
+ * }
737
+ * ```
738
+ */
739
+ declare function performOAuthSetupIfNeeded(config: OAuthSetupConfig): Promise<void>;
740
+
741
+ /**
742
+ * OAuth flow utilities using oauth4webapi
743
+ *
744
+ * Implements OAuth 2.1 with PKCE as required by MCP specification
745
+ */
746
+
747
+ /**
748
+ * Discovered OAuth authorization server metadata
749
+ */
750
+ interface AuthServerMetadata {
751
+ /**
752
+ * The oauth4webapi AuthorizationServer object
753
+ */
754
+ server: oauth.AuthorizationServer;
755
+ /**
756
+ * Issuer URL
757
+ */
758
+ issuer: string;
759
+ }
760
+
761
+ /**
762
+ * OAuth Protected Resource and Authorization Server discovery
763
+ *
764
+ * Implements RFC 9728 (OAuth Protected Resource Metadata) and
765
+ * RFC 8414 (Authorization Server Metadata) for MCP servers.
766
+ */
767
+
768
+ /**
769
+ * MCP Protocol version header value
770
+ */
771
+ declare const MCP_PROTOCOL_VERSION = "2025-06-18";
772
+ /**
773
+ * Protected Resource Metadata (RFC 9728)
774
+ */
775
+ interface ProtectedResourceMetadata {
776
+ /**
777
+ * The protected resource URL
778
+ */
779
+ resource: string;
780
+ /**
781
+ * Array of authorization server URLs
782
+ */
783
+ authorization_servers?: Array<string>;
784
+ /**
785
+ * Scopes supported by the protected resource
786
+ */
787
+ scopes_supported?: Array<string>;
788
+ /**
789
+ * Bearer token formats supported
790
+ */
791
+ bearer_methods_supported?: Array<string>;
792
+ /**
793
+ * Resource documentation URL
794
+ */
795
+ resource_documentation?: string;
796
+ /**
797
+ * Resource signing algorithms
798
+ */
799
+ resource_signing_alg_values_supported?: Array<string>;
800
+ }
801
+ /**
802
+ * Result of protected resource discovery
803
+ */
804
+ interface ProtectedResourceDiscoveryResult {
805
+ /**
806
+ * The discovered metadata
807
+ */
808
+ metadata: ProtectedResourceMetadata;
809
+ /**
810
+ * The URL where metadata was found
811
+ */
812
+ discoveryUrl: string;
813
+ /**
814
+ * Whether path-aware discovery was used (vs base discovery)
815
+ */
816
+ usedPathAwareDiscovery: boolean;
817
+ }
818
+ /**
819
+ * Discovers protected resource metadata per RFC 9728
820
+ *
821
+ * Follows RFC 9728 Section 4.1 for path-aware discovery:
822
+ * 1. First tries: {origin}/.well-known/oauth-protected-resource{pathname}
823
+ * 2. Falls back to: {origin}/.well-known/oauth-protected-resource
824
+ *
825
+ * @param mcpServerUrl - The MCP server URL
826
+ * @returns Protected resource discovery result
827
+ * @throws Error if discovery fails completely
828
+ *
829
+ * @example
830
+ * const result = await discoverProtectedResource('https://api.example.com/mcp/default');
831
+ * console.log(result.metadata.authorization_servers);
832
+ */
833
+ declare function discoverProtectedResource(mcpServerUrl: string): Promise<ProtectedResourceDiscoveryResult>;
834
+ /**
835
+ * Error thrown when discovery fails
836
+ */
837
+ declare class DiscoveryError extends Error {
838
+ readonly status?: number | undefined;
839
+ readonly url?: string | undefined;
840
+ constructor(message: string, status?: number | undefined, url?: string | undefined);
841
+ }
842
+ /**
843
+ * Discovers OAuth Authorization Server metadata per RFC 8414
844
+ *
845
+ * Wraps oauth4webapi's discovery with MCP-specific headers.
846
+ *
847
+ * @param authServerUrl - The authorization server URL
848
+ * @returns Authorization server metadata
849
+ * @throws Error if discovery fails
850
+ *
851
+ * @example
852
+ * const authServer = await discoverAuthorizationServer('https://auth.example.com');
853
+ * console.log(authServer.server.token_endpoint);
854
+ */
855
+ declare function discoverAuthorizationServer(authServerUrl: string): Promise<AuthServerMetadata>;
856
+
857
+ /**
858
+ * OAuth token storage with environment variable support for CI/CD
859
+ *
860
+ * Provides file-based storage for OAuth state per MCP server, with support
861
+ * for token injection via environment variables for automated testing.
862
+ */
863
+
864
+ /**
865
+ * Combined server metadata (auth server + protected resource)
866
+ */
867
+ interface StoredServerMetadata {
868
+ /**
869
+ * Authorization server metadata
870
+ */
871
+ authServer: AuthServerMetadata;
872
+ /**
873
+ * Protected resource metadata
874
+ */
875
+ protectedResource: ProtectedResourceMetadata;
876
+ /**
877
+ * Timestamp when metadata was discovered
878
+ */
879
+ discoveredAt: number;
880
+ }
881
+ /**
882
+ * Environment variable names for CI/CD token injection
883
+ */
884
+ declare const ENV_VAR_NAMES: {
885
+ readonly accessToken: "MCP_ACCESS_TOKEN";
886
+ readonly refreshToken: "MCP_REFRESH_TOKEN";
887
+ readonly tokenType: "MCP_TOKEN_TYPE";
888
+ readonly expiresAt: "MCP_TOKEN_EXPIRES_AT";
889
+ };
890
+ /**
891
+ * Reads tokens from environment variables (for CI/CD)
892
+ *
893
+ * @returns StoredTokens if MCP_ACCESS_TOKEN is set, null otherwise
894
+ */
895
+ declare function loadTokensFromEnv(): StoredTokens | null;
896
+ /**
897
+ * Programmatically inject tokens into storage (for CI/CD setup)
898
+ *
899
+ * @param serverUrl - The MCP server URL
900
+ * @param tokens - The tokens to inject
901
+ * @param stateDir - Optional custom state directory
902
+ */
903
+ declare function injectTokens(serverUrl: string, tokens: StoredTokens, stateDir?: string): Promise<void>;
904
+ /**
905
+ * Load stored OAuth tokens for an MCP server
906
+ *
907
+ * Reads tokens from the standard storage location for the given server URL.
908
+ * Tokens are stored by `mcp-server-tester login` or `injectTokens()`.
909
+ *
910
+ * @param serverUrl - The MCP server URL
911
+ * @param stateDir - Optional custom state directory
912
+ * @returns StoredTokens if found, null otherwise
913
+ *
914
+ * @example
915
+ * ```typescript
916
+ * // After running: npx mcp-server-tester login https://api.example.com/mcp
917
+ * const tokens = await loadTokens('https://api.example.com/mcp');
918
+ * if (tokens) {
919
+ * console.log('Access token:', tokens.accessToken);
920
+ * }
921
+ * ```
922
+ */
923
+ declare function loadTokens(serverUrl: string, stateDir?: string): Promise<StoredTokens | null>;
924
+ /**
925
+ * Check if valid OAuth tokens exist for an MCP server
926
+ *
927
+ * Returns true if tokens exist and are not expired (with buffer).
928
+ * Use this to check if authentication is needed before making requests.
929
+ *
930
+ * @param serverUrl - The MCP server URL
931
+ * @param options - Optional configuration
932
+ * @param options.stateDir - Custom state directory
933
+ * @param options.bufferMs - Buffer time before expiration (default: 60000ms)
934
+ * @returns true if valid (non-expired) tokens exist
935
+ *
936
+ * @example
937
+ * ```typescript
938
+ * if (await hasValidTokens('https://api.example.com/mcp')) {
939
+ * // Use stored tokens
940
+ * const tokens = await loadTokens('https://api.example.com/mcp');
941
+ * } else {
942
+ * console.log('Run: npx mcp-server-tester login https://api.example.com/mcp');
943
+ * }
944
+ * ```
945
+ */
946
+ declare function hasValidTokens(serverUrl: string, options?: {
947
+ stateDir?: string;
948
+ bufferMs?: number;
949
+ }): Promise<boolean>;
950
+
951
+ /**
952
+ * CLI OAuth client for command-line authentication flows
953
+ *
954
+ * Provides browser-based OAuth authentication for CLI environments,
955
+ * with support for environment variable token injection for CI/CD.
956
+ */
957
+ /**
958
+ * Configuration for CLI OAuth client
959
+ */
960
+ interface CLIOAuthClientConfig {
961
+ /**
962
+ * MCP server URL (for protected resource discovery)
963
+ */
964
+ mcpServerUrl: string;
965
+ /**
966
+ * Scopes to request (optional, uses discovered scopes if not provided)
967
+ */
968
+ scopes?: Array<string>;
969
+ /**
970
+ * Custom storage directory
971
+ */
972
+ stateDir?: string;
973
+ /**
974
+ * Pre-registered client ID (skips DCR if provided)
975
+ */
976
+ clientId?: string;
977
+ /**
978
+ * Pre-registered client secret
979
+ */
980
+ clientSecret?: string;
981
+ /**
982
+ * Preferred callback port (default: random available port)
983
+ */
984
+ callbackPort?: number;
985
+ /**
986
+ * Timeout for OAuth flow in milliseconds (default: 300000 = 5 min)
987
+ */
988
+ timeoutMs?: number;
989
+ /**
990
+ * Client name for DCR registration
991
+ */
992
+ clientName?: string;
993
+ }
994
+ /**
995
+ * Result of CLI OAuth authentication
996
+ */
997
+ interface CLIOAuthResult {
998
+ /**
999
+ * Access token
1000
+ */
1001
+ accessToken: string;
1002
+ /**
1003
+ * Token type (typically "Bearer")
1004
+ */
1005
+ tokenType: string;
1006
+ /**
1007
+ * Expiration timestamp (Unix ms)
1008
+ */
1009
+ expiresAt?: number;
1010
+ /**
1011
+ * Whether token was refreshed vs newly acquired
1012
+ */
1013
+ refreshed: boolean;
1014
+ /**
1015
+ * Scopes that were requested (only set for new authentications)
1016
+ */
1017
+ requestedScopes?: string[];
1018
+ /**
1019
+ * Whether token came from environment variables
1020
+ */
1021
+ fromEnv: boolean;
1022
+ }
1023
+ /**
1024
+ * CLI OAuth client for command-line authentication flows
1025
+ */
1026
+ declare class CLIOAuthClient {
1027
+ private readonly config;
1028
+ private readonly storage;
1029
+ constructor(config: CLIOAuthClientConfig);
1030
+ /**
1031
+ * Get a valid access token, authenticating if necessary
1032
+ *
1033
+ * Token resolution priority:
1034
+ * 1. Check environment variables (for CI/CD)
1035
+ * 2. Check file storage for cached tokens
1036
+ * 3. Try to refresh if expired but refresh token exists
1037
+ * 4. Run full OAuth flow if needed
1038
+ */
1039
+ getAccessToken(): Promise<CLIOAuthResult>;
1040
+ /**
1041
+ * Try to get a valid access token without triggering browser auth
1042
+ *
1043
+ * Returns null if no valid token is available (no stored tokens,
1044
+ * expired without refresh token, or refresh failed). Unlike getAccessToken(),
1045
+ * this will NOT open a browser for authentication.
1046
+ *
1047
+ * Use this for CLI commands that should prompt the user to run `login`
1048
+ * instead of automatically starting the OAuth flow.
1049
+ */
1050
+ tryGetAccessToken(): Promise<CLIOAuthResult | null>;
1051
+ /**
1052
+ * Force a new authentication flow
1053
+ */
1054
+ authenticate(): Promise<CLIOAuthResult>;
1055
+ /**
1056
+ * Check if stored credentials exist (may be expired)
1057
+ */
1058
+ hasStoredCredentials(): Promise<boolean>;
1059
+ /**
1060
+ * Clear stored credentials
1061
+ */
1062
+ clearCredentials(): Promise<void>;
1063
+ /**
1064
+ * Discover protected resource and authorization server
1065
+ */
1066
+ private discoverServers;
1067
+ /**
1068
+ * Get existing client or register new one via DCR
1069
+ */
1070
+ private getOrRegisterClient;
1071
+ /**
1072
+ * Register a new client via Dynamic Client Registration
1073
+ */
1074
+ private registerClient;
1075
+ /**
1076
+ * Perform the full OAuth authorization flow
1077
+ */
1078
+ private performOAuthFlow;
1079
+ /**
1080
+ * Refresh an expired token
1081
+ *
1082
+ * Uses the clientId stored with the tokens (if available) to ensure
1083
+ * the refresh request uses the same client that obtained the original tokens.
1084
+ * This is important because refresh tokens are bound to the client_id.
1085
+ */
1086
+ private refreshStoredToken;
1087
+ /**
1088
+ * Start local callback server
1089
+ */
1090
+ private startCallbackServer;
1091
+ /**
1092
+ * Open browser or print URL for headless environments
1093
+ */
1094
+ private openBrowserOrPrintUrl;
1095
+ /**
1096
+ * Convert TokenResult to StoredTokens
1097
+ *
1098
+ * @param result - Token result from exchange or refresh
1099
+ * @param clientId - Client ID that was used to obtain these tokens
1100
+ */
1101
+ private tokenResultToStoredTokens;
1102
+ /**
1103
+ * HTML page for successful authentication
1104
+ */
1105
+ private successHtml;
1106
+ /**
1107
+ * HTML page for authentication error
1108
+ */
1109
+ private errorHtml;
1110
+ }
1111
+
1112
+ /**
1113
+ * Options for creating an MCP client
1114
+ */
1115
+ interface CreateMCPClientOptions {
1116
+ /**
1117
+ * Client information (name and version)
1118
+ */
1119
+ clientInfo?: {
1120
+ name?: string;
1121
+ version?: string;
1122
+ };
1123
+ /**
1124
+ * OAuth client provider for authentication
1125
+ *
1126
+ * When provided, the MCP SDK handles OAuth flow automatically.
1127
+ * This takes precedence over static token auth in config.auth.accessToken.
1128
+ */
1129
+ authProvider?: OAuthClientProvider;
1130
+ }
1131
+ /**
1132
+ * Creates and connects an MCP client based on the provided configuration
1133
+ *
1134
+ * @param config - MCP configuration (will be validated)
1135
+ * @param options - Optional client options including auth provider
1136
+ * @returns Connected MCP Client instance
1137
+ * @throws {Error} If config is invalid or connection fails
1138
+ *
1139
+ * @example
1140
+ * // Stdio transport
1141
+ * const client = await createMCPClientForConfig({
1142
+ * transport: 'stdio',
1143
+ * command: 'node',
1144
+ * args: ['server.js']
1145
+ * });
1146
+ *
1147
+ * @example
1148
+ * // HTTP transport with static token auth
1149
+ * const client = await createMCPClientForConfig({
1150
+ * transport: 'http',
1151
+ * serverUrl: 'http://localhost:3000/mcp',
1152
+ * auth: { accessToken: 'your-token' }
1153
+ * });
1154
+ *
1155
+ * @example
1156
+ * // HTTP transport with OAuth provider
1157
+ * const client = await createMCPClientForConfig(
1158
+ * { transport: 'http', serverUrl: 'http://localhost:3000/mcp' },
1159
+ * { authProvider: myOAuthProvider }
1160
+ * );
1161
+ */
1162
+ declare function createMCPClientForConfig(config: MCPConfig, options?: CreateMCPClientOptions): Promise<Client>;
1163
+ /**
1164
+ * Safely closes an MCP client connection
1165
+ *
1166
+ * @param client - The client to close
1167
+ */
1168
+ declare function closeMCPClient(client: Client): Promise<void>;
1169
+
1170
+ /**
1171
+ * A single content block from an MCP response
1172
+ */
1173
+ interface ContentBlock {
1174
+ type: string;
1175
+ text?: string;
1176
+ data?: unknown;
1177
+ mimeType?: string;
1178
+ }
1179
+ /**
1180
+ * Normalized representation of an MCP tool response
1181
+ *
1182
+ * This provides a consistent interface regardless of the response format
1183
+ * returned by the MCP server.
1184
+ */
1185
+ interface NormalizedToolResponse {
1186
+ /**
1187
+ * Extracted text content (concatenated from all text blocks)
1188
+ */
1189
+ text: string;
1190
+ /**
1191
+ * Original raw response from the MCP SDK
1192
+ */
1193
+ raw: CallToolResult;
1194
+ /**
1195
+ * Whether the tool call resulted in an error
1196
+ */
1197
+ isError: boolean;
1198
+ /**
1199
+ * Parsed content blocks from the response
1200
+ */
1201
+ contentBlocks: ContentBlock[];
1202
+ /**
1203
+ * Structured content if present (parsed JSON or raw data)
1204
+ */
1205
+ structuredContent: unknown;
1206
+ }
1207
+ /**
1208
+ * Normalizes an MCP CallToolResult into a consistent format
1209
+ *
1210
+ * @param result - Raw CallToolResult from the MCP SDK
1211
+ * @returns Normalized response with extracted text, content blocks, etc.
1212
+ *
1213
+ * @example
1214
+ * ```typescript
1215
+ * const result = await client.callTool({ name: 'read_file', arguments: { path: 'readme.txt' } });
1216
+ * const normalized = normalizeToolResponse(result);
1217
+ *
1218
+ * console.log(normalized.text); // "Hello World"
1219
+ * console.log(normalized.isError); // false
1220
+ * console.log(normalized.contentBlocks); // [{ type: 'text', text: 'Hello World' }]
1221
+ * ```
1222
+ */
1223
+ declare function normalizeToolResponse(result: CallToolResult): NormalizedToolResponse;
1224
+ /**
1225
+ * Extracts just the text content from a normalized or raw response
1226
+ *
1227
+ * This is a convenience function that works with both:
1228
+ * - Raw CallToolResult from the MCP SDK
1229
+ * - NormalizedToolResponse from normalizeToolResponse()
1230
+ * - Plain strings or other legacy formats
1231
+ *
1232
+ * @param response - Response in any supported format
1233
+ * @returns Extracted text content
1234
+ */
1235
+ declare function extractText(response: unknown): string;
1236
+
1237
+ /**
1238
+ * Validator Types
1239
+ *
1240
+ * Core types for the unified assertion architecture.
1241
+ * These types are used by both Playwright matchers and the eval runner.
1242
+ */
1243
+
1244
+ /**
1245
+ * Result of a validation operation
1246
+ */
1247
+ interface ValidationResult {
1248
+ /** Whether the validation passed */
1249
+ pass: boolean;
1250
+ /** Human-readable message explaining the result */
1251
+ message: string;
1252
+ /** Additional structured details about the validation */
1253
+ details?: Record<string, unknown>;
1254
+ }
1255
+ /**
1256
+ * Options for text validation
1257
+ */
1258
+ interface TextValidatorOptions {
1259
+ /** Whether to perform case-sensitive matching (default: true) */
1260
+ caseSensitive?: boolean;
1261
+ }
1262
+ /**
1263
+ * Options for response size validation
1264
+ */
1265
+ interface SizeValidatorOptions {
1266
+ /** Maximum allowed size in bytes */
1267
+ maxBytes?: number;
1268
+ /** Minimum required size in bytes */
1269
+ minBytes?: number;
1270
+ }
1271
+ /**
1272
+ * Options for schema validation
1273
+ */
1274
+ interface SchemaValidatorOptions {
1275
+ /** Whether to use strict mode (fail on extra properties) */
1276
+ strict?: boolean;
1277
+ }
1278
+ /**
1279
+ * Options for pattern validation
1280
+ */
1281
+ interface PatternValidatorOptions {
1282
+ /** Whether to perform case-sensitive matching (default: true) */
1283
+ caseSensitive?: boolean;
1284
+ }
1285
+ /**
1286
+ * Built-in sanitizer names for common variable patterns
1287
+ */
1288
+ type BuiltInSanitizer = 'timestamp' | 'uuid' | 'iso-date' | 'objectId' | 'jwt';
1289
+ /**
1290
+ * Custom regex-based sanitizer
1291
+ */
1292
+ interface RegexSanitizer {
1293
+ /** Regex pattern to match */
1294
+ pattern: string | RegExp;
1295
+ /** Replacement string (default: "[SANITIZED]") */
1296
+ replacement?: string;
1297
+ }
1298
+ /**
1299
+ * Field removal sanitizer - removes specified fields from objects
1300
+ */
1301
+ interface FieldRemovalSanitizer {
1302
+ /** Field paths to remove (supports dot notation for nested fields) */
1303
+ remove: string[];
1304
+ }
1305
+ /**
1306
+ * Snapshot sanitizer configuration
1307
+ *
1308
+ * Sanitizers transform response data before snapshot comparison,
1309
+ * allowing variable content (timestamps, IDs, etc.) to be normalized.
1310
+ *
1311
+ * Can be:
1312
+ * - A built-in sanitizer name: 'timestamp', 'uuid', 'iso-date', 'objectId', 'jwt'
1313
+ * - A regex sanitizer: { pattern: /regex/, replacement: '[REPLACED]' }
1314
+ * - A field removal sanitizer: { remove: ['field1', 'nested.field'] }
1315
+ */
1316
+ type SnapshotSanitizer = BuiltInSanitizer | RegexSanitizer | FieldRemovalSanitizer;
1317
+ /**
1318
+ * Schema registry for named schemas in datasets
1319
+ */
1320
+ type SchemaRegistry = Record<string, ZodType>;
1321
+
1322
+ /**
1323
+ * Response Validator
1324
+ *
1325
+ * Validates that a response exactly matches an expected value.
1326
+ */
1327
+
1328
+ /**
1329
+ * Validates that a response exactly matches the expected value
1330
+ *
1331
+ * Performs deep equality comparison using JSON serialization.
1332
+ *
1333
+ * @param actual - The actual response
1334
+ * @param expected - The expected response
1335
+ * @returns Validation result
1336
+ *
1337
+ * @example
1338
+ * ```typescript
1339
+ * const result = validateResponse(response, { status: 'ok', count: 42 });
1340
+ * if (!result.pass) {
1341
+ * console.log(result.message);
1342
+ * }
1343
+ * ```
1344
+ */
1345
+ declare function validateResponse(actual: unknown, expected: unknown): ValidationResult;
1346
+
1347
+ /**
1348
+ * Schema Validator
1349
+ *
1350
+ * Validates that a response matches a Zod schema.
1351
+ */
1352
+
1353
+ /**
1354
+ * Validates that a response matches a Zod schema
1355
+ *
1356
+ * Attempts to parse the response with the provided Zod schema.
1357
+ * If the response is a text representation of JSON, it will be parsed first.
1358
+ *
1359
+ * @param response - The response to validate
1360
+ * @param schema - The Zod schema to validate against
1361
+ * @param options - Validation options
1362
+ * @returns Validation result
1363
+ *
1364
+ * @example
1365
+ * ```typescript
1366
+ * import { z } from 'zod';
1367
+ *
1368
+ * const WeatherSchema = z.object({
1369
+ * temperature: z.number(),
1370
+ * conditions: z.string(),
1371
+ * });
1372
+ *
1373
+ * const result = validateSchema(response, WeatherSchema);
1374
+ * if (!result.pass) {
1375
+ * console.log(result.message);
1376
+ * }
1377
+ * ```
1378
+ */
1379
+ declare function validateSchema(response: unknown, schema: ZodType, options?: SchemaValidatorOptions): ValidationResult;
1380
+
1381
+ /**
1382
+ * Text Validator
1383
+ *
1384
+ * Validates that a response contains expected text substrings.
1385
+ */
1386
+
1387
+ /**
1388
+ * Validates that a response contains all expected text substrings
1389
+ *
1390
+ * Extracts text from the response and checks that each expected substring
1391
+ * is present. By default, matching is case-sensitive.
1392
+ *
1393
+ * @param response - The response to validate
1394
+ * @param expected - Expected substring(s) to find
1395
+ * @param options - Validation options
1396
+ * @returns Validation result
1397
+ *
1398
+ * @example
1399
+ * ```typescript
1400
+ * const result = validateText(response, ['temperature', 'conditions']);
1401
+ * if (!result.pass) {
1402
+ * console.log(result.message);
1403
+ * }
1404
+ *
1405
+ * // Case-insensitive matching
1406
+ * const result2 = validateText(response, 'HELLO', { caseSensitive: false });
1407
+ * ```
1408
+ */
1409
+ declare function validateText(response: unknown, expected: string | string[], options?: TextValidatorOptions): ValidationResult;
1410
+
1411
+ /**
1412
+ * Pattern Validator
1413
+ *
1414
+ * Validates that a response matches regex patterns.
1415
+ */
1416
+
1417
+ /**
1418
+ * Validates that a response matches all expected regex patterns
1419
+ *
1420
+ * Extracts text from the response and checks that each pattern matches.
1421
+ * Patterns can be strings (which are compiled to RegExp) or RegExp objects.
1422
+ *
1423
+ * @param response - The response to validate
1424
+ * @param patterns - Expected pattern(s) to match
1425
+ * @param options - Validation options
1426
+ * @returns Validation result
1427
+ *
1428
+ * @example
1429
+ * ```typescript
1430
+ * // String pattern
1431
+ * const result = validatePattern(response, 'temperature: \\d+');
1432
+ *
1433
+ * // RegExp pattern
1434
+ * const result2 = validatePattern(response, /temperature: \d+/);
1435
+ *
1436
+ * // Multiple patterns
1437
+ * const result3 = validatePattern(response, [
1438
+ * /temperature: \d+/,
1439
+ * /humidity: \d+%/,
1440
+ * ]);
1441
+ *
1442
+ * // Case-insensitive matching
1443
+ * const result4 = validatePattern(response, 'HELLO', { caseSensitive: false });
1444
+ * ```
1445
+ */
1446
+ declare function validatePattern(response: unknown, patterns: string | RegExp | (string | RegExp)[], options?: PatternValidatorOptions): ValidationResult;
1447
+
1448
+ /**
1449
+ * Error Validator
1450
+ *
1451
+ * Validates error response behavior.
1452
+ */
1453
+
1454
+ /**
1455
+ * Validates that a response is (or is not) an error
1456
+ *
1457
+ * Can check for:
1458
+ * - Any error (expected = true)
1459
+ * - No error (expected = false)
1460
+ * - Error with specific message(s) (expected = string or string[])
1461
+ *
1462
+ * @param response - The response to validate
1463
+ * @param expected - What to expect (true for any error, false for no error, string for specific message)
1464
+ * @returns Validation result
1465
+ *
1466
+ * @example
1467
+ * ```typescript
1468
+ * // Expect any error
1469
+ * const result = validateError(response, true);
1470
+ *
1471
+ * // Expect no error
1472
+ * const result2 = validateError(response, false);
1473
+ *
1474
+ * // Expect error with specific message
1475
+ * const result3 = validateError(response, 'File not found');
1476
+ *
1477
+ * // Expect error containing one of several messages
1478
+ * const result4 = validateError(response, ['not found', 'does not exist']);
1479
+ * ```
1480
+ */
1481
+ declare function validateError(response: unknown, expected?: boolean | string | string[]): ValidationResult;
1482
+
1483
+ /**
1484
+ * Size Validator
1485
+ *
1486
+ * Validates that a response meets size constraints.
1487
+ */
1488
+
1489
+ /**
1490
+ * Validates that a response meets size constraints
1491
+ *
1492
+ * Checks that the response size in bytes is within the specified bounds.
1493
+ * At least one of minBytes or maxBytes must be provided.
1494
+ *
1495
+ * @param response - The response to validate
1496
+ * @param options - Size constraints
1497
+ * @returns Validation result
1498
+ *
1499
+ * @example
1500
+ * ```typescript
1501
+ * // Maximum size check
1502
+ * const result = validateSize(response, { maxBytes: 10000 });
1503
+ *
1504
+ * // Minimum size check
1505
+ * const result2 = validateSize(response, { minBytes: 100 });
1506
+ *
1507
+ * // Both bounds
1508
+ * const result3 = validateSize(response, { minBytes: 100, maxBytes: 10000 });
1509
+ * ```
1510
+ */
1511
+ declare function validateSize(response: unknown, options: SizeValidatorOptions): ValidationResult;
1512
+
1513
+ /**
1514
+ * Validator Utilities
1515
+ *
1516
+ * Shared utility functions for validation operations.
1517
+ * Re-exports core utilities from mcp/response.ts and adds validation-specific helpers.
1518
+ */
1519
+
1520
+ /**
1521
+ * Gets the size of a response in bytes
1522
+ *
1523
+ * Serializes the response to JSON (with pretty printing for consistency)
1524
+ * and returns the byte length using UTF-8 encoding.
1525
+ *
1526
+ * @param response - Response in any format
1527
+ * @returns Size in bytes
1528
+ */
1529
+ declare function getResponseSizeBytes(response: unknown): number;
1530
+ /**
1531
+ * Normalizes whitespace in text for consistent comparison
1532
+ *
1533
+ * Collapses multiple whitespace characters (spaces, tabs, newlines) into single spaces
1534
+ * and trims leading/trailing whitespace.
1535
+ *
1536
+ * @param text - Text to normalize
1537
+ * @returns Normalized text with collapsed whitespace
1538
+ *
1539
+ * @example
1540
+ * ```typescript
1541
+ * normalizeWhitespace(' hello\n\n world ');
1542
+ * // Returns: "hello world"
1543
+ * ```
1544
+ */
1545
+ declare function normalizeWhitespace(text: string): string;
1546
+
1547
+ /**
1548
+ * Usage metrics from Claude Agent SDK response
1549
+ */
1550
+ interface UsageMetrics {
1551
+ /**
1552
+ * Number of input tokens consumed
1553
+ */
1554
+ inputTokens: number;
1555
+ /**
1556
+ * Number of output tokens generated
1557
+ */
1558
+ outputTokens: number;
1559
+ /**
1560
+ * Total cost in USD
1561
+ */
1562
+ totalCostUsd: number;
1563
+ /**
1564
+ * Execution duration in milliseconds
1565
+ */
1566
+ durationMs: number;
1567
+ /**
1568
+ * API call duration in milliseconds (excluding network overhead)
1569
+ */
1570
+ durationApiMs?: number;
1571
+ /**
1572
+ * Number of tokens read from cache
1573
+ */
1574
+ cacheReadInputTokens?: number;
1575
+ /**
1576
+ * Number of tokens written to cache
1577
+ */
1578
+ cacheCreationInputTokens?: number;
1579
+ }
1580
+ /**
1581
+ * Supported LLM provider types
1582
+ */
1583
+ type ProviderKind = 'claude' | 'anthropic' | 'openai' | 'custom-http';
1584
+ /**
1585
+ * Configuration for an LLM judge
1586
+ */
1587
+ interface JudgeConfig {
1588
+ /**
1589
+ * LLM provider to use
1590
+ * @default 'claude'
1591
+ */
1592
+ provider?: ProviderKind;
1593
+ /**
1594
+ * Environment variable name containing the API key
1595
+ * @default 'ANTHROPIC_API_KEY'
1596
+ */
1597
+ apiKeyEnvVar?: string;
1598
+ /**
1599
+ * Model to use for judging
1600
+ * @default 'claude-sonnet-4-20250514'
1601
+ */
1602
+ model?: string;
1603
+ /**
1604
+ * Maximum tokens for response
1605
+ * @default 1000
1606
+ */
1607
+ maxTokens?: number;
1608
+ /**
1609
+ * Temperature (0-1, lower is more deterministic)
1610
+ * @default 0.0
1611
+ */
1612
+ temperature?: number;
1613
+ /**
1614
+ * Maximum budget in USD for the judge evaluation
1615
+ * @default 0.10
1616
+ */
1617
+ maxBudgetUsd?: number;
1618
+ /**
1619
+ * Maximum size (in bytes) for tool output before failing the test
1620
+ * When set, the judge will fail if the candidate response exceeds this size
1621
+ */
1622
+ maxToolOutputSize?: number;
1623
+ }
1624
+ /**
1625
+ * Result from LLM judge evaluation
1626
+ */
1627
+ interface JudgeResult {
1628
+ /**
1629
+ * Whether the evaluation passed
1630
+ */
1631
+ pass: boolean;
1632
+ /**
1633
+ * Numeric score (0-1, where 1 is best)
1634
+ */
1635
+ score?: number;
1636
+ /**
1637
+ * Reasoning/explanation from the judge
1638
+ */
1639
+ reasoning?: string;
1640
+ /**
1641
+ * Usage metrics from the Claude Agent SDK
1642
+ */
1643
+ usage?: UsageMetrics;
1644
+ /**
1645
+ * Size of the candidate response in bytes (for maxToolOutputSize tracking)
1646
+ */
1647
+ candidateSizeBytes?: number;
1648
+ /**
1649
+ * Whether the candidate exceeded maxToolOutputSize
1650
+ */
1651
+ exceedsMaxToolOutputSize?: boolean;
1652
+ }
1653
+ /**
1654
+ * LLM judge client interface
1655
+ */
1656
+ interface Judge {
1657
+ /**
1658
+ * Evaluates a candidate response against a reference
1659
+ *
1660
+ * @param candidate - The actual response to evaluate
1661
+ * @param reference - The expected/reference response (or null if not applicable)
1662
+ * @param rubric - The evaluation rubric/criteria
1663
+ * @returns Evaluation result with usage metrics
1664
+ */
1665
+ evaluate(candidate: unknown, reference: unknown, rubric: string): Promise<JudgeResult>;
1666
+ }
1667
+
1668
+ /**
1669
+ * Matcher Types
1670
+ *
1671
+ * TypeScript declarations for custom Playwright matchers.
1672
+ */
1673
+
1674
+ /**
1675
+ * Options for the LLM judge matcher
1676
+ */
1677
+ interface JudgeMatcherOptions {
1678
+ /** Reference response to compare against */
1679
+ reference?: unknown;
1680
+ /** Score threshold for passing (default: 0.7) */
1681
+ passingThreshold?: number;
1682
+ /** Judge configuration override */
1683
+ judgeConfig?: JudgeConfig;
1684
+ }
1685
+ /**
1686
+ * Declaration merging for Playwright matchers
1687
+ */
1688
+ declare global {
1689
+ namespace PlaywrightTest {
1690
+ interface Matchers<R, T = unknown> {
1691
+ /**
1692
+ * Validates that a response exactly matches the expected value
1693
+ *
1694
+ * @param expected - The expected response value
1695
+ *
1696
+ * @example
1697
+ * ```typescript
1698
+ * expect(result).toMatchToolResponse({ status: 'ok', count: 42 });
1699
+ * ```
1700
+ */
1701
+ toMatchToolResponse(expected: unknown): R;
1702
+ /**
1703
+ * Validates that a response matches a Zod schema
1704
+ *
1705
+ * @param schema - The Zod schema to validate against
1706
+ * @param options - Validation options
1707
+ *
1708
+ * @example
1709
+ * ```typescript
1710
+ * const WeatherSchema = z.object({
1711
+ * temperature: z.number(),
1712
+ * conditions: z.string(),
1713
+ * });
1714
+ * expect(result).toMatchToolSchema(WeatherSchema);
1715
+ * ```
1716
+ */
1717
+ toMatchToolSchema(schema: ZodType, options?: SchemaValidatorOptions): R;
1718
+ /**
1719
+ * Validates that a response contains expected text substrings
1720
+ *
1721
+ * @param expected - Expected substring(s) to find
1722
+ * @param options - Validation options
1723
+ *
1724
+ * @example
1725
+ * ```typescript
1726
+ * expect(result).toContainToolText('temperature');
1727
+ * expect(result).toContainToolText(['temperature', 'conditions']);
1728
+ * expect(result).toContainToolText('HELLO', { caseSensitive: false });
1729
+ * ```
1730
+ */
1731
+ toContainToolText(expected: string | string[], options?: TextValidatorOptions): R;
1732
+ /**
1733
+ * Validates that a response matches regex patterns
1734
+ *
1735
+ * @param patterns - Expected pattern(s) to match
1736
+ * @param options - Validation options
1737
+ *
1738
+ * @example
1739
+ * ```typescript
1740
+ * expect(result).toMatchToolPattern(/temperature: \d+/);
1741
+ * expect(result).toMatchToolPattern(['temp: \\d+', 'humidity: \\d+%']);
1742
+ * ```
1743
+ */
1744
+ toMatchToolPattern(patterns: string | RegExp | (string | RegExp)[], options?: PatternValidatorOptions): R;
1745
+ /**
1746
+ * Validates that a response matches a saved snapshot
1747
+ *
1748
+ * @param name - Snapshot name
1749
+ * @param sanitizers - Optional sanitizers for non-deterministic values
1750
+ *
1751
+ * @example
1752
+ * ```typescript
1753
+ * expect(result).toMatchToolSnapshot('weather-response');
1754
+ * expect(result).toMatchToolSnapshot('user-data', [
1755
+ * { pattern: /\d{4}-\d{2}-\d{2}/, replacement: '[DATE]' },
1756
+ * ]);
1757
+ * ```
1758
+ */
1759
+ toMatchToolSnapshot(name: string, sanitizers?: SnapshotSanitizer[]): Promise<R>;
1760
+ /**
1761
+ * Validates that a response is (or is not) an error
1762
+ *
1763
+ * @param expected - What to expect (true for error, false for success, string for specific message)
1764
+ *
1765
+ * @example
1766
+ * ```typescript
1767
+ * expect(result).toBeToolError(); // Expects any error
1768
+ * expect(result).not.toBeToolError(); // Expects success
1769
+ * expect(result).toBeToolError('File not found'); // Expects specific error
1770
+ * ```
1771
+ */
1772
+ toBeToolError(expected?: boolean | string | string[]): R;
1773
+ /**
1774
+ * Validates that a response passes LLM-as-judge evaluation
1775
+ *
1776
+ * @param rubric - Evaluation rubric/criteria
1777
+ * @param options - Judge options
1778
+ *
1779
+ * @example
1780
+ * ```typescript
1781
+ * expect(result).toPassToolJudge('Response should be helpful and accurate');
1782
+ * expect(result).toPassToolJudge('Response should match reference', {
1783
+ * reference: expectedOutput,
1784
+ * passingThreshold: 0.8,
1785
+ * });
1786
+ * ```
1787
+ */
1788
+ toPassToolJudge(rubric: string, options?: JudgeMatcherOptions): Promise<R>;
1789
+ /**
1790
+ * Validates that a response meets size constraints
1791
+ *
1792
+ * @param options - Size constraints (maxBytes, minBytes)
1793
+ *
1794
+ * @example
1795
+ * ```typescript
1796
+ * expect(result).toHaveToolResponseSize({ maxBytes: 10000 });
1797
+ * expect(result).toHaveToolResponseSize({ minBytes: 100, maxBytes: 50000 });
1798
+ * ```
1799
+ */
1800
+ toHaveToolResponseSize(options: SizeValidatorOptions): R;
1801
+ /**
1802
+ * Validates that a response satisfies a custom predicate function
1803
+ *
1804
+ * Use this as an escape hatch when built-in matchers don't cover your use case.
1805
+ * The predicate receives both the raw response and extracted text for convenience.
1806
+ *
1807
+ * @param predicate - Function that validates the response
1808
+ * @param description - Optional description for error messages
1809
+ *
1810
+ * @example
1811
+ * ```typescript
1812
+ * // Simple boolean predicate
1813
+ * expect(result).toSatisfyToolPredicate((response) => {
1814
+ * return response.data?.items?.length > 0;
1815
+ * });
1816
+ *
1817
+ * // Predicate with custom message
1818
+ * expect(result).toSatisfyToolPredicate(
1819
+ * (response, text) => ({
1820
+ * pass: text.includes('success'),
1821
+ * message: 'Expected response to contain "success"',
1822
+ * }),
1823
+ * 'success check'
1824
+ * );
1825
+ *
1826
+ * // Async predicate
1827
+ * expect(result).toSatisfyToolPredicate(async (response) => {
1828
+ * return await validateWithExternalService(response);
1829
+ * });
1830
+ * ```
1831
+ */
1832
+ toSatisfyToolPredicate(predicate: ToolPredicate, description?: string): Promise<R>;
1833
+ }
1834
+ }
1835
+ }
1836
+ /**
1837
+ * Predicate result returned by the user's predicate function
1838
+ */
1839
+ interface PredicateResult {
1840
+ /** Whether the predicate passed */
1841
+ pass: boolean;
1842
+ /** Message explaining the result (shown on failure) */
1843
+ message?: string;
1844
+ }
1845
+ /**
1846
+ * A predicate function that validates a response
1847
+ */
1848
+ type ToolPredicate = (response: unknown, text: string) => boolean | PredicateResult | Promise<boolean | PredicateResult>;
1849
+
1850
+ /**
1851
+ * Canonical type definitions for @gleanwork/mcp-server-tester
1852
+ *
1853
+ * This module is the single source of truth for shared types.
1854
+ * All other modules should import from here rather than defining their own.
1855
+ *
1856
+ * @packageDocumentation
1857
+ */
1858
+ /**
1859
+ * Authentication type for MCP connections
1860
+ *
1861
+ * - 'oauth': Interactive OAuth 2.1 with PKCE (browser-based authentication)
1862
+ * - 'api-token': Static API token (e.g., from a dashboard or environment variable)
1863
+ * - 'none': No authentication
1864
+ */
1865
+ type AuthType = 'oauth' | 'api-token' | 'none';
1866
+ /**
1867
+ * Source of test results
1868
+ *
1869
+ * - 'eval': From runEvalDataset() using JSON eval datasets
1870
+ * - 'test': From direct API test tracking (MCP fixture calls)
1871
+ */
1872
+ type ResultSource = 'eval' | 'test';
1873
+ /**
1874
+ * Known expectation types supported by the framework
1875
+ */
1876
+ type ExpectationType = 'exact' | 'schema' | 'textContains' | 'regex' | 'snapshot' | 'judge' | 'error' | 'size';
1877
+ /**
1878
+ * Result of an expectation check
1879
+ */
1880
+ interface EvalExpectationResult {
1881
+ /**
1882
+ * Whether the expectation passed
1883
+ */
1884
+ pass: boolean;
1885
+ /**
1886
+ * Optional details about the result
1887
+ */
1888
+ details?: string;
1889
+ }
1890
+ /**
1891
+ * Map of expectation type to result
1892
+ */
1893
+ type ExpectationResultMap = Partial<Record<ExpectationType, EvalExpectationResult>>;
1894
+ /**
1895
+ * Breakdown of expectation types used in a run
1896
+ */
1897
+ type ExpectationBreakdown = Record<ExpectationType, number>;
1898
+
1899
+ /**
1900
+ * Options for creating an MCP fixture
1901
+ */
1902
+ interface MCPFixtureOptions {
1903
+ /**
1904
+ * Authentication type used for this test
1905
+ * - 'oauth': Interactive OAuth 2.1 with PKCE (browser-based authentication)
1906
+ * - 'api-token': Static API token (e.g., from a dashboard or environment variable)
1907
+ * - 'none': No authentication
1908
+ */
1909
+ authType?: AuthType;
1910
+ /**
1911
+ * Playwright project name for this test
1912
+ * Used for filtering and grouping in the reporter
1913
+ */
1914
+ project?: string;
1915
+ }
1916
+ /**
1917
+ * High-level API for interacting with MCP servers in tests
1918
+ *
1919
+ * This interface wraps the raw MCP Client with test-friendly methods
1920
+ */
1921
+ interface MCPFixtureApi {
1922
+ /**
1923
+ * The underlying MCP client (for advanced usage)
1924
+ */
1925
+ client: Client;
1926
+ /**
1927
+ * Authentication type used for this test session
1928
+ */
1929
+ authType: AuthType;
1930
+ /**
1931
+ * Playwright project name for this test session
1932
+ */
1933
+ project?: string;
1934
+ /**
1935
+ * Lists all available tools from the MCP server
1936
+ *
1937
+ * @returns Array of tool definitions
1938
+ */
1939
+ listTools(): Promise<Array<Tool>>;
1940
+ /**
1941
+ * Calls a tool on the MCP server
1942
+ *
1943
+ * @param name - Tool name
1944
+ * @param args - Tool arguments
1945
+ * @returns Tool call result
1946
+ */
1947
+ callTool<TArgs extends Record<string, unknown> = Record<string, unknown>>(name: string, args: TArgs): Promise<CallToolResult>;
1948
+ /**
1949
+ * Gets information about the connected server
1950
+ */
1951
+ getServerInfo(): {
1952
+ name?: string;
1953
+ version?: string;
1954
+ } | null;
1955
+ }
1956
+ /**
1957
+ * Creates an MCP fixture wrapper around a Client
1958
+ *
1959
+ * When testInfo is provided, automatically tracks all MCP operations with test.step()
1960
+ * and creates attachments for the MCP Test Reporter.
1961
+ *
1962
+ * @param client - The MCP client to wrap
1963
+ * @param testInfo - Optional Playwright TestInfo for auto-tracking
1964
+ * @returns MCPFixtureApi instance
1965
+ *
1966
+ * @example
1967
+ * ```typescript
1968
+ * // With tracking (recommended)
1969
+ * const test = base.extend<{ mcp: MCPFixtureApi }>({
1970
+ * mcp: async ({}, use, testInfo) => {
1971
+ * const client = await createMCPClientForConfig(config);
1972
+ * const api = createMCPFixture(client, testInfo);
1973
+ * await use(api);
1974
+ * await closeMCPClient(client);
1975
+ * }
1976
+ * });
1977
+ *
1978
+ * // Without tracking
1979
+ * const api = createMCPFixture(client);
1980
+ * ```
1981
+ */
1982
+ declare function createMCPFixture(client: Client, testInfo?: TestInfo, options?: MCPFixtureOptions): MCPFixtureApi;
1983
+
1984
+ /**
1985
+ * toMatchToolResponse Matcher
1986
+ *
1987
+ * Validates that a response exactly matches an expected value.
1988
+ */
1989
+ /**
1990
+ * Creates the toMatchToolResponse matcher function
1991
+ */
1992
+ declare function toMatchToolResponse(this: {
1993
+ isNot: boolean;
1994
+ }, received: unknown, expected: unknown): {
1995
+ pass: boolean;
1996
+ message: () => string;
1997
+ };
1998
+
1999
+ /**
2000
+ * toMatchToolSchema Matcher
2001
+ *
2002
+ * Validates that a response matches a Zod schema.
2003
+ */
2004
+
2005
+ /**
2006
+ * Creates the toMatchToolSchema matcher function
2007
+ */
2008
+ declare function toMatchToolSchema(this: {
2009
+ isNot: boolean;
2010
+ }, received: unknown, schema: ZodType, options?: SchemaValidatorOptions): {
2011
+ pass: boolean;
2012
+ message: () => string;
2013
+ };
2014
+
2015
+ /**
2016
+ * toContainToolText Matcher
2017
+ *
2018
+ * Validates that a response contains expected text substrings.
2019
+ */
2020
+
2021
+ /**
2022
+ * Creates the toContainToolText matcher function
2023
+ */
2024
+ declare function toContainToolText(this: {
2025
+ isNot: boolean;
2026
+ }, received: unknown, expected: string | string[], options?: TextValidatorOptions): {
2027
+ pass: boolean;
2028
+ message: () => string;
2029
+ };
2030
+
2031
+ /**
2032
+ * toMatchToolPattern Matcher
2033
+ *
2034
+ * Validates that a response matches regex patterns.
2035
+ */
2036
+
2037
+ /**
2038
+ * Creates the toMatchToolPattern matcher function
2039
+ */
2040
+ declare function toMatchToolPattern(this: {
2041
+ isNot: boolean;
2042
+ }, received: unknown, patterns: string | RegExp | (string | RegExp)[], options?: PatternValidatorOptions): {
2043
+ pass: boolean;
2044
+ message: () => string;
2045
+ };
2046
+
2047
+ /**
2048
+ * toMatchToolSnapshot Matcher
2049
+ *
2050
+ * Validates that a response matches a saved snapshot.
2051
+ * Uses Playwright's native snapshot testing functionality.
2052
+ */
2053
+
2054
+ /**
2055
+ * Creates the toMatchToolSnapshot matcher function
2056
+ *
2057
+ * Note: This is an async matcher that uses Playwright's snapshot testing.
2058
+ */
2059
+ declare function toMatchToolSnapshot(this: {
2060
+ isNot: boolean;
2061
+ }, received: unknown, name: string, sanitizers?: SnapshotSanitizer[]): Promise<{
2062
+ pass: boolean;
2063
+ message: () => string;
2064
+ }>;
2065
+
2066
+ /**
2067
+ * toBeToolError Matcher
2068
+ *
2069
+ * Validates that a response is (or is not) an error.
2070
+ */
2071
+ /**
2072
+ * Creates the toBeToolError matcher function
2073
+ */
2074
+ declare function toBeToolError(this: {
2075
+ isNot: boolean;
2076
+ }, received: unknown, expected?: boolean | string | string[]): {
2077
+ pass: boolean;
2078
+ message: () => string;
2079
+ };
2080
+
2081
+ /**
2082
+ * toPassToolJudge Matcher
2083
+ *
2084
+ * Validates that a response passes LLM-as-judge evaluation.
2085
+ */
2086
+
2087
+ /**
2088
+ * Creates the toPassToolJudge matcher function
2089
+ *
2090
+ * Note: This is an async matcher that calls an LLM for evaluation.
2091
+ */
2092
+ declare function toPassToolJudge(this: {
2093
+ isNot: boolean;
2094
+ }, received: unknown, rubric: string, options?: JudgeMatcherOptions): Promise<{
2095
+ pass: boolean;
2096
+ message: () => string;
2097
+ }>;
2098
+
2099
+ /**
2100
+ * toHaveToolResponseSize Matcher
2101
+ *
2102
+ * Validates that a response meets size constraints.
2103
+ */
2104
+
2105
+ /**
2106
+ * Creates the toHaveToolResponseSize matcher function
2107
+ */
2108
+ declare function toHaveToolResponseSize(this: {
2109
+ isNot: boolean;
2110
+ }, received: unknown, options: SizeValidatorOptions): {
2111
+ pass: boolean;
2112
+ message: () => string;
2113
+ };
2114
+
2115
+ /**
2116
+ * toSatisfyToolPredicate Matcher
2117
+ *
2118
+ * Validates that a response satisfies a custom predicate function.
2119
+ * This is an escape hatch for custom validation logic when built-in
2120
+ * matchers don't cover the use case.
2121
+ */
2122
+
2123
+ /**
2124
+ * Creates the toSatisfyToolPredicate matcher function
2125
+ *
2126
+ * This matcher allows custom validation logic via a predicate function.
2127
+ * The predicate receives both the raw response and extracted text.
2128
+ *
2129
+ * @example
2130
+ * ```typescript
2131
+ * // Simple boolean predicate
2132
+ * expect(result).toSatisfyToolPredicate((response) => {
2133
+ * return response.data?.length > 0;
2134
+ * });
2135
+ *
2136
+ * // Predicate with custom message
2137
+ * expect(result).toSatisfyToolPredicate((response, text) => {
2138
+ * const hasTemperature = text.includes('temperature');
2139
+ * return {
2140
+ * pass: hasTemperature,
2141
+ * message: hasTemperature
2142
+ * ? 'Found temperature in response'
2143
+ * : 'Expected response to contain temperature',
2144
+ * };
2145
+ * });
2146
+ *
2147
+ * // Async predicate
2148
+ * expect(result).toSatisfyToolPredicate(async (response) => {
2149
+ * const isValid = await validateWithExternalService(response);
2150
+ * return isValid;
2151
+ * });
2152
+ * ```
2153
+ */
2154
+ declare function toSatisfyToolPredicate(this: {
2155
+ isNot: boolean;
2156
+ }, received: unknown, predicate: ToolPredicate, description?: string): Promise<{
2157
+ pass: boolean;
2158
+ message: () => string;
2159
+ }>;
2160
+
2161
+ /**
2162
+ * Extended Playwright expect with MCP tool matchers
2163
+ *
2164
+ * @example
2165
+ * ```typescript
2166
+ * import { expect } from '@gleanwork/mcp-server-tester';
2167
+ *
2168
+ * test('weather tool', async ({ mcp }) => {
2169
+ * const result = await mcp.callTool('get_weather', { city: 'London' });
2170
+ *
2171
+ * expect(result).toContainToolText('temperature');
2172
+ * expect(result).toMatchToolSchema(WeatherSchema);
2173
+ * expect(result).not.toBeToolError();
2174
+ * });
2175
+ * ```
2176
+ */
2177
+ declare const expect: playwright_test.Expect<{
2178
+ toMatchToolResponse: typeof toMatchToolResponse;
2179
+ toMatchToolSchema: typeof toMatchToolSchema;
2180
+ toContainToolText: typeof toContainToolText;
2181
+ toMatchToolPattern: typeof toMatchToolPattern;
2182
+ toMatchToolSnapshot: typeof toMatchToolSnapshot;
2183
+ toBeToolError: typeof toBeToolError;
2184
+ toPassToolJudge: typeof toPassToolJudge;
2185
+ toHaveToolResponseSize: typeof toHaveToolResponseSize;
2186
+ toSatisfyToolPredicate: typeof toSatisfyToolPredicate;
2187
+ }>;
2188
+
2189
+ /**
2190
+ * Internal fixture state for passing auth type between fixtures
2191
+ */
2192
+ interface MCPFixtureState {
2193
+ /**
2194
+ * The resolved authentication type (may differ from config if CLI tokens are used)
2195
+ */
2196
+ resolvedAuthType: AuthType;
2197
+ }
2198
+ /**
2199
+ * Extended test fixtures for MCP testing
2200
+ */
2201
+ type MCPFixtures = {
2202
+ /**
2203
+ * Raw MCP client instance (automatically connected and cleaned up)
2204
+ */
2205
+ mcpClient: Client;
2206
+ /**
2207
+ * High-level MCP API for tests
2208
+ */
2209
+ mcp: MCPFixtureApi;
2210
+ /**
2211
+ * Internal fixture state (not for external use)
2212
+ */
2213
+ _mcpFixtureState: MCPFixtureState;
2214
+ };
2215
+ /**
2216
+ * Extended Playwright test with MCP fixtures
2217
+ *
2218
+ * @example
2219
+ * import { test, expect } from '@gleanwork/mcp-server-tester';
2220
+ *
2221
+ * test('lists tools from MCP server', async ({ mcp }) => {
2222
+ * const tools = await mcp.listTools();
2223
+ * expect(tools.length).toBeGreaterThan(0);
2224
+ * });
2225
+ */
2226
+ declare const test: playwright_test.TestType<playwright_test.PlaywrightTestArgs & playwright_test.PlaywrightTestOptions & MCPFixtures, playwright_test.PlaywrightWorkerArgs & playwright_test.PlaywrightWorkerOptions>;
2227
+
2228
+ /**
2229
+ * Types and interfaces for LLM host simulation mode
2230
+ *
2231
+ * This module provides types for testing MCP servers through LLM hosts,
2232
+ * validating tool descriptions, parameter clarity, and discoverability.
2233
+ */
2234
+
2235
+ /**
2236
+ * LLM provider for host simulation
2237
+ */
2238
+ type LLMProvider = 'openai' | 'anthropic';
2239
+ /**
2240
+ * Configuration for LLM host simulation
2241
+ */
2242
+ interface LLMHostConfig {
2243
+ /**
2244
+ * LLM provider to use
2245
+ */
2246
+ provider: LLMProvider;
2247
+ /**
2248
+ * Environment variable name containing the API key
2249
+ * @default 'OPENAI_API_KEY' for openai, 'ANTHROPIC_API_KEY' for anthropic
2250
+ */
2251
+ apiKeyEnvVar?: string;
2252
+ /**
2253
+ * Model to use
2254
+ * @default 'gpt-4' for openai, 'claude-3-5-sonnet-20241022' for anthropic
2255
+ */
2256
+ model?: string;
2257
+ /**
2258
+ * Maximum tokens for response
2259
+ */
2260
+ maxTokens?: number;
2261
+ /**
2262
+ * Temperature (0-1, lower is more deterministic)
2263
+ * @default 0.0
2264
+ */
2265
+ temperature?: number;
2266
+ /**
2267
+ * Maximum number of tool calls to allow in a single conversation
2268
+ * @default 10
2269
+ */
2270
+ maxToolCalls?: number;
2271
+ }
2272
+ /**
2273
+ * A tool call made by the LLM
2274
+ */
2275
+ interface LLMToolCall {
2276
+ /**
2277
+ * Tool name
2278
+ */
2279
+ name: string;
2280
+ /**
2281
+ * Tool arguments (as provided by LLM)
2282
+ */
2283
+ arguments: Record<string, unknown>;
2284
+ /**
2285
+ * Optional tool call ID (for tracking)
2286
+ */
2287
+ id?: string;
2288
+ }
2289
+ /**
2290
+ * Result of a tool call validation
2291
+ */
2292
+ interface ToolCallValidationResult {
2293
+ /**
2294
+ * Whether the tool call was valid
2295
+ */
2296
+ valid: boolean;
2297
+ /**
2298
+ * List of actual tool calls made
2299
+ */
2300
+ actualCalls: Array<LLMToolCall>;
2301
+ /**
2302
+ * Expected tool calls (if specified in eval case)
2303
+ */
2304
+ expectedCalls?: Array<LLMToolCall>;
2305
+ /**
2306
+ * Details about validation (e.g., missing calls, incorrect arguments)
2307
+ */
2308
+ details?: string;
2309
+ }
2310
+ /**
2311
+ * Result from an LLM host simulation
2312
+ */
2313
+ interface LLMHostSimulationResult {
2314
+ /**
2315
+ * Whether the simulation succeeded
2316
+ */
2317
+ success: boolean;
2318
+ /**
2319
+ * Tool calls made by the LLM
2320
+ */
2321
+ toolCalls: Array<LLMToolCall>;
2322
+ /**
2323
+ * Final response from the LLM
2324
+ */
2325
+ response?: string;
2326
+ /**
2327
+ * Error message if simulation failed
2328
+ */
2329
+ error?: string;
2330
+ /**
2331
+ * Full conversation history (for debugging)
2332
+ */
2333
+ conversationHistory?: Array<{
2334
+ role: 'user' | 'assistant' | 'tool';
2335
+ content: string;
2336
+ }>;
2337
+ }
2338
+ /**
2339
+ * Interface for LLM host simulators
2340
+ *
2341
+ * Implementations communicate with MCP servers via the actual MCP protocol
2342
+ */
2343
+ interface LLMHostSimulator {
2344
+ /**
2345
+ * Simulates an LLM host interacting with an MCP server
2346
+ *
2347
+ * @param mcp - MCP fixture API
2348
+ * @param scenario - Natural language prompt describing what the LLM should do
2349
+ * @param config - LLM host configuration
2350
+ * @returns Simulation result with tool calls and response
2351
+ */
2352
+ simulate(mcp: MCPFixtureApi, scenario: string, config: LLMHostConfig): Promise<LLMHostSimulationResult>;
2353
+ }
2354
+ /**
2355
+ * Expected tool call specification (for validation)
2356
+ */
2357
+ interface ExpectedToolCall {
2358
+ /**
2359
+ * Tool name
2360
+ */
2361
+ name: string;
2362
+ /**
2363
+ * Expected arguments (partial match)
2364
+ */
2365
+ arguments?: Record<string, unknown>;
2366
+ /**
2367
+ * Whether this call is required
2368
+ * @default true
2369
+ */
2370
+ required?: boolean;
2371
+ }
2372
+
2373
+ /**
2374
+ * Evaluation mode
2375
+ */
2376
+ type EvalMode = 'direct' | 'llm_host';
2377
+ /**
2378
+ * A single eval test case
2379
+ *
2380
+ * For 'direct' mode: toolName and args are required
2381
+ * For 'llm_host' mode: scenario and llmHostConfig are required
2382
+ */
2383
+ interface EvalCase {
2384
+ /**
2385
+ * Unique identifier for this test case
2386
+ */
2387
+ id: string;
2388
+ /**
2389
+ * Human-readable description of what this test case validates
2390
+ */
2391
+ description?: string;
2392
+ /**
2393
+ * Evaluation mode
2394
+ * - 'direct': Direct API calls to MCP tools (default)
2395
+ * - 'llm_host': LLM-driven tool selection via natural language
2396
+ *
2397
+ * @default 'direct'
2398
+ */
2399
+ mode?: EvalMode;
2400
+ /**
2401
+ * Name of the MCP tool to call (required for 'direct' mode, optional for 'llm_host' mode)
2402
+ */
2403
+ toolName?: string;
2404
+ /**
2405
+ * Arguments to pass to the tool (required for 'direct' mode, optional for 'llm_host' mode)
2406
+ */
2407
+ args?: Record<string, unknown>;
2408
+ /**
2409
+ * Natural language scenario for LLM to execute (optional, required for 'llm_host' mode)
2410
+ *
2411
+ * @example "Get the weather for London and tell me if I need an umbrella"
2412
+ */
2413
+ scenario?: string;
2414
+ /**
2415
+ * LLM host configuration (optional for 'llm_host' mode)
2416
+ *
2417
+ * If not specified, uses default configuration from test environment
2418
+ */
2419
+ llmHostConfig?: LLMHostConfig;
2420
+ /**
2421
+ * Additional metadata for this test case
2422
+ *
2423
+ * For 'llm_host' mode, can include 'expectedToolCalls' for validation
2424
+ */
2425
+ metadata?: Record<string, unknown>;
2426
+ /**
2427
+ * Expectations to validate against the tool response
2428
+ *
2429
+ * Multiple expectations can be combined and will all be validated.
2430
+ *
2431
+ * @example
2432
+ * ```json
2433
+ * {
2434
+ * "id": "weather-london",
2435
+ * "toolName": "get_weather",
2436
+ * "args": { "city": "London" },
2437
+ * "expect": {
2438
+ * "containsText": ["temperature", "conditions"],
2439
+ * "schema": "WeatherResponse",
2440
+ * "responseSize": { "maxBytes": 10000 },
2441
+ * "isError": false
2442
+ * }
2443
+ * }
2444
+ * ```
2445
+ */
2446
+ expect?: EvalExpectBlock;
2447
+ }
2448
+ /**
2449
+ * Unified expectation block for eval cases
2450
+ *
2451
+ * Mirrors the Playwright matcher API for consistency.
2452
+ */
2453
+ interface EvalExpectBlock {
2454
+ /**
2455
+ * Exact response match (toMatchToolResponse)
2456
+ */
2457
+ response?: unknown;
2458
+ /**
2459
+ * Name of schema to validate against (toMatchToolSchema)
2460
+ */
2461
+ schema?: string;
2462
+ /**
2463
+ * Text substring(s) that must be present (toContainToolText)
2464
+ */
2465
+ containsText?: string | string[];
2466
+ /**
2467
+ * Regex pattern(s) that must match (toMatchToolPattern)
2468
+ */
2469
+ matchesPattern?: string | string[];
2470
+ /**
2471
+ * Snapshot name for comparison (toMatchToolSnapshot)
2472
+ */
2473
+ snapshot?: string;
2474
+ /**
2475
+ * Snapshot sanitizers to apply
2476
+ */
2477
+ snapshotSanitizers?: SnapshotSanitizer[];
2478
+ /**
2479
+ * Error expectation (toBeToolError)
2480
+ * - true: expects any error
2481
+ * - false: expects no error
2482
+ * - string: expects error containing this message
2483
+ */
2484
+ isError?: boolean | string | string[];
2485
+ /**
2486
+ * LLM-as-judge evaluation (toPassToolJudge)
2487
+ */
2488
+ passesJudge?: {
2489
+ /** Evaluation rubric/criteria */
2490
+ rubric: string;
2491
+ /** Reference response to compare against */
2492
+ reference?: unknown;
2493
+ /** Score threshold for passing (0-1, default: 0.7) */
2494
+ threshold?: number;
2495
+ /** Judge configuration ID */
2496
+ configId?: string;
2497
+ };
2498
+ /**
2499
+ * Response size validation (toHaveToolResponseSize)
2500
+ */
2501
+ responseSize?: {
2502
+ /** Maximum allowed size in bytes */
2503
+ maxBytes?: number;
2504
+ /** Minimum required size in bytes */
2505
+ minBytes?: number;
2506
+ };
2507
+ }
2508
+ /**
2509
+ * A complete eval dataset containing multiple test cases
2510
+ */
2511
+ interface EvalDataset {
2512
+ /**
2513
+ * Dataset name
2514
+ */
2515
+ name: string;
2516
+ /**
2517
+ * Dataset description
2518
+ */
2519
+ description?: string;
2520
+ /**
2521
+ * Test cases in this dataset
2522
+ */
2523
+ cases: Array<EvalCase>;
2524
+ /**
2525
+ * Optional schema definitions referenced by test cases
2526
+ */
2527
+ schemas?: Record<string, z.ZodSchema>;
2528
+ /**
2529
+ * Additional dataset metadata
2530
+ */
2531
+ metadata?: Record<string, unknown>;
2532
+ }
2533
+ /**
2534
+ * Zod schema for EvalCase
2535
+ *
2536
+ * toolName and args are optional for llm_host mode (which uses scenario instead)
2537
+ */
2538
+ declare const EvalCaseSchema: z.ZodObject<{
2539
+ id: z.ZodString;
2540
+ description: z.ZodOptional<z.ZodString>;
2541
+ mode: z.ZodOptional<z.ZodEnum<["direct", "llm_host"]>>;
2542
+ toolName: z.ZodOptional<z.ZodString>;
2543
+ args: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
2544
+ scenario: z.ZodOptional<z.ZodString>;
2545
+ llmHostConfig: z.ZodOptional<z.ZodObject<{
2546
+ provider: z.ZodEnum<["openai", "anthropic"]>;
2547
+ apiKeyEnvVar: z.ZodOptional<z.ZodString>;
2548
+ model: z.ZodOptional<z.ZodString>;
2549
+ maxTokens: z.ZodOptional<z.ZodNumber>;
2550
+ temperature: z.ZodOptional<z.ZodNumber>;
2551
+ maxToolCalls: z.ZodOptional<z.ZodNumber>;
2552
+ }, "strip", z.ZodTypeAny, {
2553
+ provider: "anthropic" | "openai";
2554
+ model?: string | undefined;
2555
+ maxTokens?: number | undefined;
2556
+ apiKeyEnvVar?: string | undefined;
2557
+ temperature?: number | undefined;
2558
+ maxToolCalls?: number | undefined;
2559
+ }, {
2560
+ provider: "anthropic" | "openai";
2561
+ model?: string | undefined;
2562
+ maxTokens?: number | undefined;
2563
+ apiKeyEnvVar?: string | undefined;
2564
+ temperature?: number | undefined;
2565
+ maxToolCalls?: number | undefined;
2566
+ }>>;
2567
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
2568
+ expect: z.ZodOptional<z.ZodObject<{
2569
+ response: z.ZodOptional<z.ZodUnknown>;
2570
+ schema: z.ZodOptional<z.ZodString>;
2571
+ containsText: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodArray<z.ZodString, "many">]>>;
2572
+ matchesPattern: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodArray<z.ZodString, "many">]>>;
2573
+ snapshot: z.ZodOptional<z.ZodString>;
2574
+ snapshotSanitizers: z.ZodOptional<z.ZodArray<z.ZodUnion<[z.ZodEnum<["timestamp", "uuid", "iso-date", "objectId", "jwt"]>, z.ZodObject<{
2575
+ pattern: z.ZodString;
2576
+ replacement: z.ZodOptional<z.ZodString>;
2577
+ }, "strip", z.ZodTypeAny, {
2578
+ pattern: string;
2579
+ replacement?: string | undefined;
2580
+ }, {
2581
+ pattern: string;
2582
+ replacement?: string | undefined;
2583
+ }>, z.ZodObject<{
2584
+ remove: z.ZodArray<z.ZodString, "many">;
2585
+ }, "strip", z.ZodTypeAny, {
2586
+ remove: string[];
2587
+ }, {
2588
+ remove: string[];
2589
+ }>]>, "many">>;
2590
+ isError: z.ZodOptional<z.ZodUnion<[z.ZodBoolean, z.ZodString, z.ZodArray<z.ZodString, "many">]>>;
2591
+ passesJudge: z.ZodOptional<z.ZodObject<{
2592
+ rubric: z.ZodString;
2593
+ reference: z.ZodOptional<z.ZodUnknown>;
2594
+ threshold: z.ZodOptional<z.ZodNumber>;
2595
+ configId: z.ZodOptional<z.ZodString>;
2596
+ }, "strip", z.ZodTypeAny, {
2597
+ rubric: string;
2598
+ reference?: unknown;
2599
+ threshold?: number | undefined;
2600
+ configId?: string | undefined;
2601
+ }, {
2602
+ rubric: string;
2603
+ reference?: unknown;
2604
+ threshold?: number | undefined;
2605
+ configId?: string | undefined;
2606
+ }>>;
2607
+ responseSize: z.ZodOptional<z.ZodObject<{
2608
+ maxBytes: z.ZodOptional<z.ZodNumber>;
2609
+ minBytes: z.ZodOptional<z.ZodNumber>;
2610
+ }, "strip", z.ZodTypeAny, {
2611
+ maxBytes?: number | undefined;
2612
+ minBytes?: number | undefined;
2613
+ }, {
2614
+ maxBytes?: number | undefined;
2615
+ minBytes?: number | undefined;
2616
+ }>>;
2617
+ }, "strip", z.ZodTypeAny, {
2618
+ isError?: string | boolean | string[] | undefined;
2619
+ schema?: string | undefined;
2620
+ snapshot?: string | undefined;
2621
+ response?: unknown;
2622
+ containsText?: string | string[] | undefined;
2623
+ matchesPattern?: string | string[] | undefined;
2624
+ snapshotSanitizers?: ("timestamp" | "uuid" | "iso-date" | "objectId" | "jwt" | {
2625
+ pattern: string;
2626
+ replacement?: string | undefined;
2627
+ } | {
2628
+ remove: string[];
2629
+ })[] | undefined;
2630
+ passesJudge?: {
2631
+ rubric: string;
2632
+ reference?: unknown;
2633
+ threshold?: number | undefined;
2634
+ configId?: string | undefined;
2635
+ } | undefined;
2636
+ responseSize?: {
2637
+ maxBytes?: number | undefined;
2638
+ minBytes?: number | undefined;
2639
+ } | undefined;
2640
+ }, {
2641
+ isError?: string | boolean | string[] | undefined;
2642
+ schema?: string | undefined;
2643
+ snapshot?: string | undefined;
2644
+ response?: unknown;
2645
+ containsText?: string | string[] | undefined;
2646
+ matchesPattern?: string | string[] | undefined;
2647
+ snapshotSanitizers?: ("timestamp" | "uuid" | "iso-date" | "objectId" | "jwt" | {
2648
+ pattern: string;
2649
+ replacement?: string | undefined;
2650
+ } | {
2651
+ remove: string[];
2652
+ })[] | undefined;
2653
+ passesJudge?: {
2654
+ rubric: string;
2655
+ reference?: unknown;
2656
+ threshold?: number | undefined;
2657
+ configId?: string | undefined;
2658
+ } | undefined;
2659
+ responseSize?: {
2660
+ maxBytes?: number | undefined;
2661
+ minBytes?: number | undefined;
2662
+ } | undefined;
2663
+ }>>;
2664
+ }, "strip", z.ZodTypeAny, {
2665
+ id: string;
2666
+ args?: Record<string, unknown> | undefined;
2667
+ metadata?: Record<string, unknown> | undefined;
2668
+ mode?: "direct" | "llm_host" | undefined;
2669
+ description?: string | undefined;
2670
+ toolName?: string | undefined;
2671
+ scenario?: string | undefined;
2672
+ llmHostConfig?: {
2673
+ provider: "anthropic" | "openai";
2674
+ model?: string | undefined;
2675
+ maxTokens?: number | undefined;
2676
+ apiKeyEnvVar?: string | undefined;
2677
+ temperature?: number | undefined;
2678
+ maxToolCalls?: number | undefined;
2679
+ } | undefined;
2680
+ expect?: {
2681
+ isError?: string | boolean | string[] | undefined;
2682
+ schema?: string | undefined;
2683
+ snapshot?: string | undefined;
2684
+ response?: unknown;
2685
+ containsText?: string | string[] | undefined;
2686
+ matchesPattern?: string | string[] | undefined;
2687
+ snapshotSanitizers?: ("timestamp" | "uuid" | "iso-date" | "objectId" | "jwt" | {
2688
+ pattern: string;
2689
+ replacement?: string | undefined;
2690
+ } | {
2691
+ remove: string[];
2692
+ })[] | undefined;
2693
+ passesJudge?: {
2694
+ rubric: string;
2695
+ reference?: unknown;
2696
+ threshold?: number | undefined;
2697
+ configId?: string | undefined;
2698
+ } | undefined;
2699
+ responseSize?: {
2700
+ maxBytes?: number | undefined;
2701
+ minBytes?: number | undefined;
2702
+ } | undefined;
2703
+ } | undefined;
2704
+ }, {
2705
+ id: string;
2706
+ args?: Record<string, unknown> | undefined;
2707
+ metadata?: Record<string, unknown> | undefined;
2708
+ mode?: "direct" | "llm_host" | undefined;
2709
+ description?: string | undefined;
2710
+ toolName?: string | undefined;
2711
+ scenario?: string | undefined;
2712
+ llmHostConfig?: {
2713
+ provider: "anthropic" | "openai";
2714
+ model?: string | undefined;
2715
+ maxTokens?: number | undefined;
2716
+ apiKeyEnvVar?: string | undefined;
2717
+ temperature?: number | undefined;
2718
+ maxToolCalls?: number | undefined;
2719
+ } | undefined;
2720
+ expect?: {
2721
+ isError?: string | boolean | string[] | undefined;
2722
+ schema?: string | undefined;
2723
+ snapshot?: string | undefined;
2724
+ response?: unknown;
2725
+ containsText?: string | string[] | undefined;
2726
+ matchesPattern?: string | string[] | undefined;
2727
+ snapshotSanitizers?: ("timestamp" | "uuid" | "iso-date" | "objectId" | "jwt" | {
2728
+ pattern: string;
2729
+ replacement?: string | undefined;
2730
+ } | {
2731
+ remove: string[];
2732
+ })[] | undefined;
2733
+ passesJudge?: {
2734
+ rubric: string;
2735
+ reference?: unknown;
2736
+ threshold?: number | undefined;
2737
+ configId?: string | undefined;
2738
+ } | undefined;
2739
+ responseSize?: {
2740
+ maxBytes?: number | undefined;
2741
+ minBytes?: number | undefined;
2742
+ } | undefined;
2743
+ } | undefined;
2744
+ }>;
2745
+ /**
2746
+ * Zod schema for EvalDataset (without schemas field, as schemas aren't serializable)
2747
+ */
2748
+ declare const EvalDatasetSchema: z.ZodObject<{
2749
+ name: z.ZodString;
2750
+ description: z.ZodOptional<z.ZodString>;
2751
+ cases: z.ZodArray<z.ZodObject<{
2752
+ id: z.ZodString;
2753
+ description: z.ZodOptional<z.ZodString>;
2754
+ mode: z.ZodOptional<z.ZodEnum<["direct", "llm_host"]>>;
2755
+ toolName: z.ZodOptional<z.ZodString>;
2756
+ args: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
2757
+ scenario: z.ZodOptional<z.ZodString>;
2758
+ llmHostConfig: z.ZodOptional<z.ZodObject<{
2759
+ provider: z.ZodEnum<["openai", "anthropic"]>;
2760
+ apiKeyEnvVar: z.ZodOptional<z.ZodString>;
2761
+ model: z.ZodOptional<z.ZodString>;
2762
+ maxTokens: z.ZodOptional<z.ZodNumber>;
2763
+ temperature: z.ZodOptional<z.ZodNumber>;
2764
+ maxToolCalls: z.ZodOptional<z.ZodNumber>;
2765
+ }, "strip", z.ZodTypeAny, {
2766
+ provider: "anthropic" | "openai";
2767
+ model?: string | undefined;
2768
+ maxTokens?: number | undefined;
2769
+ apiKeyEnvVar?: string | undefined;
2770
+ temperature?: number | undefined;
2771
+ maxToolCalls?: number | undefined;
2772
+ }, {
2773
+ provider: "anthropic" | "openai";
2774
+ model?: string | undefined;
2775
+ maxTokens?: number | undefined;
2776
+ apiKeyEnvVar?: string | undefined;
2777
+ temperature?: number | undefined;
2778
+ maxToolCalls?: number | undefined;
2779
+ }>>;
2780
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
2781
+ expect: z.ZodOptional<z.ZodObject<{
2782
+ response: z.ZodOptional<z.ZodUnknown>;
2783
+ schema: z.ZodOptional<z.ZodString>;
2784
+ containsText: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodArray<z.ZodString, "many">]>>;
2785
+ matchesPattern: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodArray<z.ZodString, "many">]>>;
2786
+ snapshot: z.ZodOptional<z.ZodString>;
2787
+ snapshotSanitizers: z.ZodOptional<z.ZodArray<z.ZodUnion<[z.ZodEnum<["timestamp", "uuid", "iso-date", "objectId", "jwt"]>, z.ZodObject<{
2788
+ pattern: z.ZodString;
2789
+ replacement: z.ZodOptional<z.ZodString>;
2790
+ }, "strip", z.ZodTypeAny, {
2791
+ pattern: string;
2792
+ replacement?: string | undefined;
2793
+ }, {
2794
+ pattern: string;
2795
+ replacement?: string | undefined;
2796
+ }>, z.ZodObject<{
2797
+ remove: z.ZodArray<z.ZodString, "many">;
2798
+ }, "strip", z.ZodTypeAny, {
2799
+ remove: string[];
2800
+ }, {
2801
+ remove: string[];
2802
+ }>]>, "many">>;
2803
+ isError: z.ZodOptional<z.ZodUnion<[z.ZodBoolean, z.ZodString, z.ZodArray<z.ZodString, "many">]>>;
2804
+ passesJudge: z.ZodOptional<z.ZodObject<{
2805
+ rubric: z.ZodString;
2806
+ reference: z.ZodOptional<z.ZodUnknown>;
2807
+ threshold: z.ZodOptional<z.ZodNumber>;
2808
+ configId: z.ZodOptional<z.ZodString>;
2809
+ }, "strip", z.ZodTypeAny, {
2810
+ rubric: string;
2811
+ reference?: unknown;
2812
+ threshold?: number | undefined;
2813
+ configId?: string | undefined;
2814
+ }, {
2815
+ rubric: string;
2816
+ reference?: unknown;
2817
+ threshold?: number | undefined;
2818
+ configId?: string | undefined;
2819
+ }>>;
2820
+ responseSize: z.ZodOptional<z.ZodObject<{
2821
+ maxBytes: z.ZodOptional<z.ZodNumber>;
2822
+ minBytes: z.ZodOptional<z.ZodNumber>;
2823
+ }, "strip", z.ZodTypeAny, {
2824
+ maxBytes?: number | undefined;
2825
+ minBytes?: number | undefined;
2826
+ }, {
2827
+ maxBytes?: number | undefined;
2828
+ minBytes?: number | undefined;
2829
+ }>>;
2830
+ }, "strip", z.ZodTypeAny, {
2831
+ isError?: string | boolean | string[] | undefined;
2832
+ schema?: string | undefined;
2833
+ snapshot?: string | undefined;
2834
+ response?: unknown;
2835
+ containsText?: string | string[] | undefined;
2836
+ matchesPattern?: string | string[] | undefined;
2837
+ snapshotSanitizers?: ("timestamp" | "uuid" | "iso-date" | "objectId" | "jwt" | {
2838
+ pattern: string;
2839
+ replacement?: string | undefined;
2840
+ } | {
2841
+ remove: string[];
2842
+ })[] | undefined;
2843
+ passesJudge?: {
2844
+ rubric: string;
2845
+ reference?: unknown;
2846
+ threshold?: number | undefined;
2847
+ configId?: string | undefined;
2848
+ } | undefined;
2849
+ responseSize?: {
2850
+ maxBytes?: number | undefined;
2851
+ minBytes?: number | undefined;
2852
+ } | undefined;
2853
+ }, {
2854
+ isError?: string | boolean | string[] | undefined;
2855
+ schema?: string | undefined;
2856
+ snapshot?: string | undefined;
2857
+ response?: unknown;
2858
+ containsText?: string | string[] | undefined;
2859
+ matchesPattern?: string | string[] | undefined;
2860
+ snapshotSanitizers?: ("timestamp" | "uuid" | "iso-date" | "objectId" | "jwt" | {
2861
+ pattern: string;
2862
+ replacement?: string | undefined;
2863
+ } | {
2864
+ remove: string[];
2865
+ })[] | undefined;
2866
+ passesJudge?: {
2867
+ rubric: string;
2868
+ reference?: unknown;
2869
+ threshold?: number | undefined;
2870
+ configId?: string | undefined;
2871
+ } | undefined;
2872
+ responseSize?: {
2873
+ maxBytes?: number | undefined;
2874
+ minBytes?: number | undefined;
2875
+ } | undefined;
2876
+ }>>;
2877
+ }, "strip", z.ZodTypeAny, {
2878
+ id: string;
2879
+ args?: Record<string, unknown> | undefined;
2880
+ metadata?: Record<string, unknown> | undefined;
2881
+ mode?: "direct" | "llm_host" | undefined;
2882
+ description?: string | undefined;
2883
+ toolName?: string | undefined;
2884
+ scenario?: string | undefined;
2885
+ llmHostConfig?: {
2886
+ provider: "anthropic" | "openai";
2887
+ model?: string | undefined;
2888
+ maxTokens?: number | undefined;
2889
+ apiKeyEnvVar?: string | undefined;
2890
+ temperature?: number | undefined;
2891
+ maxToolCalls?: number | undefined;
2892
+ } | undefined;
2893
+ expect?: {
2894
+ isError?: string | boolean | string[] | undefined;
2895
+ schema?: string | undefined;
2896
+ snapshot?: string | undefined;
2897
+ response?: unknown;
2898
+ containsText?: string | string[] | undefined;
2899
+ matchesPattern?: string | string[] | undefined;
2900
+ snapshotSanitizers?: ("timestamp" | "uuid" | "iso-date" | "objectId" | "jwt" | {
2901
+ pattern: string;
2902
+ replacement?: string | undefined;
2903
+ } | {
2904
+ remove: string[];
2905
+ })[] | undefined;
2906
+ passesJudge?: {
2907
+ rubric: string;
2908
+ reference?: unknown;
2909
+ threshold?: number | undefined;
2910
+ configId?: string | undefined;
2911
+ } | undefined;
2912
+ responseSize?: {
2913
+ maxBytes?: number | undefined;
2914
+ minBytes?: number | undefined;
2915
+ } | undefined;
2916
+ } | undefined;
2917
+ }, {
2918
+ id: string;
2919
+ args?: Record<string, unknown> | undefined;
2920
+ metadata?: Record<string, unknown> | undefined;
2921
+ mode?: "direct" | "llm_host" | undefined;
2922
+ description?: string | undefined;
2923
+ toolName?: string | undefined;
2924
+ scenario?: string | undefined;
2925
+ llmHostConfig?: {
2926
+ provider: "anthropic" | "openai";
2927
+ model?: string | undefined;
2928
+ maxTokens?: number | undefined;
2929
+ apiKeyEnvVar?: string | undefined;
2930
+ temperature?: number | undefined;
2931
+ maxToolCalls?: number | undefined;
2932
+ } | undefined;
2933
+ expect?: {
2934
+ isError?: string | boolean | string[] | undefined;
2935
+ schema?: string | undefined;
2936
+ snapshot?: string | undefined;
2937
+ response?: unknown;
2938
+ containsText?: string | string[] | undefined;
2939
+ matchesPattern?: string | string[] | undefined;
2940
+ snapshotSanitizers?: ("timestamp" | "uuid" | "iso-date" | "objectId" | "jwt" | {
2941
+ pattern: string;
2942
+ replacement?: string | undefined;
2943
+ } | {
2944
+ remove: string[];
2945
+ })[] | undefined;
2946
+ passesJudge?: {
2947
+ rubric: string;
2948
+ reference?: unknown;
2949
+ threshold?: number | undefined;
2950
+ configId?: string | undefined;
2951
+ } | undefined;
2952
+ responseSize?: {
2953
+ maxBytes?: number | undefined;
2954
+ minBytes?: number | undefined;
2955
+ } | undefined;
2956
+ } | undefined;
2957
+ }>, "many">;
2958
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
2959
+ }, "strip", z.ZodTypeAny, {
2960
+ name: string;
2961
+ cases: {
2962
+ id: string;
2963
+ args?: Record<string, unknown> | undefined;
2964
+ metadata?: Record<string, unknown> | undefined;
2965
+ mode?: "direct" | "llm_host" | undefined;
2966
+ description?: string | undefined;
2967
+ toolName?: string | undefined;
2968
+ scenario?: string | undefined;
2969
+ llmHostConfig?: {
2970
+ provider: "anthropic" | "openai";
2971
+ model?: string | undefined;
2972
+ maxTokens?: number | undefined;
2973
+ apiKeyEnvVar?: string | undefined;
2974
+ temperature?: number | undefined;
2975
+ maxToolCalls?: number | undefined;
2976
+ } | undefined;
2977
+ expect?: {
2978
+ isError?: string | boolean | string[] | undefined;
2979
+ schema?: string | undefined;
2980
+ snapshot?: string | undefined;
2981
+ response?: unknown;
2982
+ containsText?: string | string[] | undefined;
2983
+ matchesPattern?: string | string[] | undefined;
2984
+ snapshotSanitizers?: ("timestamp" | "uuid" | "iso-date" | "objectId" | "jwt" | {
2985
+ pattern: string;
2986
+ replacement?: string | undefined;
2987
+ } | {
2988
+ remove: string[];
2989
+ })[] | undefined;
2990
+ passesJudge?: {
2991
+ rubric: string;
2992
+ reference?: unknown;
2993
+ threshold?: number | undefined;
2994
+ configId?: string | undefined;
2995
+ } | undefined;
2996
+ responseSize?: {
2997
+ maxBytes?: number | undefined;
2998
+ minBytes?: number | undefined;
2999
+ } | undefined;
3000
+ } | undefined;
3001
+ }[];
3002
+ metadata?: Record<string, unknown> | undefined;
3003
+ description?: string | undefined;
3004
+ }, {
3005
+ name: string;
3006
+ cases: {
3007
+ id: string;
3008
+ args?: Record<string, unknown> | undefined;
3009
+ metadata?: Record<string, unknown> | undefined;
3010
+ mode?: "direct" | "llm_host" | undefined;
3011
+ description?: string | undefined;
3012
+ toolName?: string | undefined;
3013
+ scenario?: string | undefined;
3014
+ llmHostConfig?: {
3015
+ provider: "anthropic" | "openai";
3016
+ model?: string | undefined;
3017
+ maxTokens?: number | undefined;
3018
+ apiKeyEnvVar?: string | undefined;
3019
+ temperature?: number | undefined;
3020
+ maxToolCalls?: number | undefined;
3021
+ } | undefined;
3022
+ expect?: {
3023
+ isError?: string | boolean | string[] | undefined;
3024
+ schema?: string | undefined;
3025
+ snapshot?: string | undefined;
3026
+ response?: unknown;
3027
+ containsText?: string | string[] | undefined;
3028
+ matchesPattern?: string | string[] | undefined;
3029
+ snapshotSanitizers?: ("timestamp" | "uuid" | "iso-date" | "objectId" | "jwt" | {
3030
+ pattern: string;
3031
+ replacement?: string | undefined;
3032
+ } | {
3033
+ remove: string[];
3034
+ })[] | undefined;
3035
+ passesJudge?: {
3036
+ rubric: string;
3037
+ reference?: unknown;
3038
+ threshold?: number | undefined;
3039
+ configId?: string | undefined;
3040
+ } | undefined;
3041
+ responseSize?: {
3042
+ maxBytes?: number | undefined;
3043
+ minBytes?: number | undefined;
3044
+ } | undefined;
3045
+ } | undefined;
3046
+ }[];
3047
+ metadata?: Record<string, unknown> | undefined;
3048
+ description?: string | undefined;
3049
+ }>;
3050
+ /**
3051
+ * Type for serialized eval dataset (without Zod schemas)
3052
+ */
3053
+ type SerializedEvalDataset = z.infer<typeof EvalDatasetSchema>;
3054
+ /**
3055
+ * Validates an eval case
3056
+ *
3057
+ * @param evalCase - The eval case to validate
3058
+ * @returns The validated eval case
3059
+ * @throws {z.ZodError} If validation fails
3060
+ */
3061
+ declare function validateEvalCase(evalCase: unknown): EvalCase;
3062
+ /**
3063
+ * Validates a serialized eval dataset
3064
+ *
3065
+ * @param dataset - The dataset to validate
3066
+ * @returns The validated dataset
3067
+ * @throws {z.ZodError} If validation fails
3068
+ */
3069
+ declare function validateEvalDataset(dataset: unknown): SerializedEvalDataset;
3070
+
3071
+ /**
3072
+ * Options for loading an eval dataset
3073
+ */
3074
+ interface LoadDatasetOptions {
3075
+ /**
3076
+ * Optional schema definitions to attach to the dataset
3077
+ *
3078
+ * Keys should match the expectedSchemaName in eval cases
3079
+ */
3080
+ schemas?: Record<string, z.ZodSchema>;
3081
+ /**
3082
+ * Whether to validate the loaded dataset
3083
+ * @default true
3084
+ */
3085
+ validate?: boolean;
3086
+ }
3087
+ /**
3088
+ * Loads an eval dataset from a JSON file
3089
+ *
3090
+ * @param filePath - Absolute path to the JSON file
3091
+ * @param options - Load options
3092
+ * @returns The loaded and validated dataset
3093
+ * @throws {Error} If file cannot be read or JSON is invalid
3094
+ * @throws {z.ZodError} If validation fails
3095
+ *
3096
+ * @example
3097
+ * const dataset = await loadEvalDataset('./data/my-evals.json', {
3098
+ * schemas: {
3099
+ * 'weather-response': WeatherResponseSchema,
3100
+ * },
3101
+ * });
3102
+ */
3103
+ declare function loadEvalDataset(filePath: string, options?: LoadDatasetOptions): Promise<EvalDataset>;
3104
+ /**
3105
+ * Loads an eval dataset from a plain object
3106
+ *
3107
+ * Useful for programmatically creating datasets in tests
3108
+ *
3109
+ * @param data - The dataset data
3110
+ * @param options - Load options
3111
+ * @returns The loaded and validated dataset
3112
+ * @throws {z.ZodError} If validation fails
3113
+ *
3114
+ * @example
3115
+ * const dataset = loadEvalDatasetFromObject({
3116
+ * name: 'my-test-dataset',
3117
+ * cases: [
3118
+ * {
3119
+ * id: 'case-1',
3120
+ * toolName: 'get_weather',
3121
+ * args: { city: 'London' },
3122
+ * },
3123
+ * ],
3124
+ * });
3125
+ */
3126
+ declare function loadEvalDatasetFromObject(data: unknown, options?: LoadDatasetOptions): EvalDataset;
3127
+
3128
+ /**
3129
+ * Context passed to the eval runner
3130
+ */
3131
+ interface EvalContext {
3132
+ /**
3133
+ * MCP fixture API for interacting with the server
3134
+ */
3135
+ mcp: MCPFixtureApi;
3136
+ /**
3137
+ * Optional Playwright TestInfo for reporter integration
3138
+ * When provided, eval results will be attached to the test for the MCP reporter
3139
+ */
3140
+ testInfo?: TestInfo;
3141
+ /**
3142
+ * Optional Playwright expect function for snapshot testing
3143
+ * Required for snapshot expectations to work properly
3144
+ */
3145
+ expect?: Expect;
3146
+ }
3147
+
3148
+ /**
3149
+ * Result of a single eval case
3150
+ */
3151
+ interface EvalCaseResult$1 {
3152
+ /**
3153
+ * Case ID
3154
+ */
3155
+ id: string;
3156
+ /**
3157
+ * Dataset name this case belongs to
3158
+ */
3159
+ datasetName: string;
3160
+ /**
3161
+ * MCP tool name that was called
3162
+ */
3163
+ toolName: string;
3164
+ /**
3165
+ * Evaluation mode (direct or llm_host)
3166
+ * @deprecated Mode is inferred from test context, not displayed in reports
3167
+ */
3168
+ mode?: 'direct' | 'llm_host';
3169
+ /**
3170
+ * Source of this result
3171
+ * - 'eval': From runEvalDataset() using JSON eval datasets
3172
+ * - 'test': From direct API test tracking (MCP fixture calls)
3173
+ */
3174
+ source: ResultSource;
3175
+ /**
3176
+ * Overall pass/fail status
3177
+ */
3178
+ pass: boolean;
3179
+ /**
3180
+ * Tool response
3181
+ */
3182
+ response?: unknown;
3183
+ /**
3184
+ * Error if tool call failed
3185
+ */
3186
+ error?: string;
3187
+ /**
3188
+ * Expectation results
3189
+ */
3190
+ expectations: Partial<Record<ExpectationType, EvalExpectationResult>>;
3191
+ /**
3192
+ * Authentication type used for this test
3193
+ */
3194
+ authType?: AuthType;
3195
+ /**
3196
+ * Playwright project name this test belongs to
3197
+ * Used for filtering/grouping results by project in the reporter
3198
+ */
3199
+ project?: string;
3200
+ /**
3201
+ * Execution time in milliseconds
3202
+ */
3203
+ durationMs: number;
3204
+ }
3205
+ /**
3206
+ * Overall result of running an eval dataset
3207
+ */
3208
+ interface EvalRunnerResult {
3209
+ /**
3210
+ * Total number of cases
3211
+ */
3212
+ total: number;
3213
+ /**
3214
+ * Number of passing cases
3215
+ */
3216
+ passed: number;
3217
+ /**
3218
+ * Number of failing cases
3219
+ */
3220
+ failed: number;
3221
+ /**
3222
+ * Individual case results
3223
+ */
3224
+ caseResults: Array<EvalCaseResult$1>;
3225
+ /**
3226
+ * Overall execution time in milliseconds
3227
+ */
3228
+ durationMs: number;
3229
+ }
3230
+ /**
3231
+ * Options for running eval dataset
3232
+ */
3233
+ interface EvalRunnerOptions {
3234
+ /**
3235
+ * The dataset to run
3236
+ */
3237
+ dataset: EvalDataset;
3238
+ /**
3239
+ * Schema registry for schema validation by name
3240
+ *
3241
+ * Maps schema names to Zod schemas for use with expect.schema
3242
+ *
3243
+ * @example
3244
+ * ```typescript
3245
+ * {
3246
+ * schemas: {
3247
+ * WeatherResponse: z.object({ temperature: z.number() }),
3248
+ * ErrorResponse: z.object({ error: z.string() }),
3249
+ * }
3250
+ * }
3251
+ * ```
3252
+ */
3253
+ schemas?: Record<string, ZodType>;
3254
+ /**
3255
+ * Judge configuration registry by ID
3256
+ *
3257
+ * Maps config IDs to JudgeConfig for use with expect.passesJudge.configId
3258
+ */
3259
+ judgeConfigs?: Record<string, JudgeConfig>;
3260
+ /**
3261
+ * Whether to stop on first failure
3262
+ * @default false
3263
+ */
3264
+ stopOnFailure?: boolean;
3265
+ /**
3266
+ * Optional callback called after each case
3267
+ */
3268
+ onCaseComplete?: (result: EvalCaseResult$1) => void | Promise<void>;
3269
+ }
3270
+ /**
3271
+ * Options for running a single eval case
3272
+ */
3273
+ interface EvalCaseOptions {
3274
+ /**
3275
+ * Dataset name for the result (defaults to 'single-case')
3276
+ */
3277
+ datasetName?: string;
3278
+ /**
3279
+ * Schema registry for schema validation by name
3280
+ */
3281
+ schemas?: Record<string, ZodType>;
3282
+ /**
3283
+ * Judge configuration registry by ID
3284
+ */
3285
+ judgeConfigs?: Record<string, JudgeConfig>;
3286
+ }
3287
+ /**
3288
+ * Runs a single eval case and returns the result
3289
+ *
3290
+ * @param evalCase - The eval case to run
3291
+ * @param context - Context containing mcp, testInfo, expect
3292
+ * @param options - Optional configuration (datasetName, schemas, judgeConfigs)
3293
+ * @returns The result of running the eval case
3294
+ *
3295
+ * @example
3296
+ * ```typescript
3297
+ * const result = await runEvalCase(
3298
+ * evalCase,
3299
+ * { mcp, testInfo, expect },
3300
+ * { schemas: { WeatherResponse: WeatherSchema } }
3301
+ * );
3302
+ *
3303
+ * expect(result.pass).toBe(true);
3304
+ * ```
3305
+ */
3306
+ declare function runEvalCase(evalCase: EvalCase, context: EvalContext, options?: EvalCaseOptions): Promise<EvalCaseResult$1>;
3307
+ /**
3308
+ * Runs an eval dataset against an MCP server
3309
+ *
3310
+ * This function composes runEvalCase() for each case in the dataset,
3311
+ * adding dataset-level features like stopOnFailure and callbacks.
3312
+ *
3313
+ * @param options - Eval runner options (dataset, schemas, judgeConfigs)
3314
+ * @param context - Eval context (mcp fixture, optional testInfo, optional expect)
3315
+ * @returns Eval results
3316
+ *
3317
+ * @example
3318
+ * // Basic usage
3319
+ * const result = await runEvalDataset(
3320
+ * {
3321
+ * dataset,
3322
+ * schemas: { WeatherResponse: WeatherSchema },
3323
+ * },
3324
+ * { mcp }
3325
+ * );
3326
+ *
3327
+ * @example
3328
+ * // With MCP reporter integration
3329
+ * test('eval dataset', async ({ mcp }, testInfo) => {
3330
+ * const result = await runEvalDataset(
3331
+ * { dataset },
3332
+ * { mcp, testInfo } // testInfo enables MCP reporter
3333
+ * );
3334
+ * });
3335
+ */
3336
+ declare function runEvalDataset(options: EvalRunnerOptions, context: EvalContext): Promise<EvalRunnerResult>;
3337
+
3338
+ /**
3339
+ * LLM Host Simulation - Main entry point
3340
+ *
3341
+ * Provides the public API for simulating LLM hosts interacting
3342
+ * with MCP servers through actual LLM providers.
3343
+ */
3344
+
3345
+ /**
3346
+ * Simulates an LLM host interacting with an MCP server
3347
+ *
3348
+ * This function uses actual LLM providers (OpenAI or Anthropic) to test
3349
+ * MCP servers through natural language scenarios. The LLM chooses which
3350
+ * tools to call based on their descriptions, testing discoverability and
3351
+ * parameter clarity.
3352
+ *
3353
+ * @param mcp - MCP fixture API
3354
+ * @param scenario - Natural language prompt describing what to do
3355
+ * @param config - LLM host configuration
3356
+ * @returns Simulation result with tool calls and final response
3357
+ *
3358
+ * @example
3359
+ * ```typescript
3360
+ * const result = await simulateLLMHost(mcp,
3361
+ * "Get the weather for London",
3362
+ * {
3363
+ * provider: 'openai',
3364
+ * model: 'gpt-4o'
3365
+ * }
3366
+ * );
3367
+ *
3368
+ * expect(result.success).toBe(true);
3369
+ * expect(result.toolCalls).toContainEqual({
3370
+ * name: 'get_weather',
3371
+ * arguments: { city: 'London' }
3372
+ * });
3373
+ * ```
3374
+ */
3375
+ declare function simulateLLMHost(mcp: MCPFixtureApi, scenario: string, config: LLMHostConfig): Promise<LLMHostSimulationResult>;
3376
+ /**
3377
+ * Checks if the required SDK is available for a given provider
3378
+ *
3379
+ * This performs a quick check without actually loading the SDK.
3380
+ * The actual SDK loading happens in the adapter when simulation runs.
3381
+ *
3382
+ * @param provider - LLM provider to check
3383
+ * @returns true if an adapter is registered for the provider
3384
+ */
3385
+ declare function isProviderAvailable(provider: LLMProvider): boolean;
3386
+ /**
3387
+ * Gets a helpful error message for missing dependencies
3388
+ *
3389
+ * @param provider - LLM provider
3390
+ * @returns Error message with installation instructions
3391
+ */
3392
+ declare function getMissingDependencyMessage(provider: LLMProvider): string;
3393
+
3394
+ /**
3395
+ * Tool call validator for LLM host mode
3396
+ *
3397
+ * Validates that the LLM made the expected tool calls with correct arguments
3398
+ */
3399
+
3400
+ /**
3401
+ * Tool call validation function signature
3402
+ */
3403
+ type ToolCallValidator = (evalCase: EvalCase, response: unknown) => Promise<EvalExpectationResult>;
3404
+ /**
3405
+ * Creates a tool call validator for LLM host mode
3406
+ *
3407
+ * Validates that the LLM made the expected tool calls with correct arguments.
3408
+ * Supports partial argument matching and optional calls.
3409
+ *
3410
+ * @returns Validator function
3411
+ *
3412
+ * @example
3413
+ * ```typescript
3414
+ * // In your eval case:
3415
+ * {
3416
+ * "id": "weather-london",
3417
+ * "mode": "llm_host",
3418
+ * "scenario": "Get the weather for London",
3419
+ * "expectedToolCalls": [
3420
+ * {
3421
+ * "name": "get_weather",
3422
+ * "arguments": { "city": "London" },
3423
+ * "required": true
3424
+ * }
3425
+ * ]
3426
+ * }
3427
+ * ```
3428
+ */
3429
+ declare function createToolCallValidator(): ToolCallValidator;
3430
+
3431
+ /**
3432
+ * Creates an LLM judge for evaluating tool responses
3433
+ *
3434
+ * Uses Claude Agent SDK for evaluation with usage metrics tracking.
3435
+ *
3436
+ * @param config - Judge configuration
3437
+ * @returns Judge instance
3438
+ * @throws {Error} If provider is unsupported or configuration is invalid
3439
+ *
3440
+ * @example
3441
+ * // Default Claude judge
3442
+ * const judge = createJudge();
3443
+ *
3444
+ * @example
3445
+ * // With configuration
3446
+ * const judge = createJudge({
3447
+ * model: 'claude-sonnet-4-20250514',
3448
+ * maxToolOutputSize: 50000, // Fail if response > 50KB
3449
+ * maxBudgetUsd: 0.05,
3450
+ * });
3451
+ *
3452
+ * // Evaluate a response
3453
+ * const result = await judge.evaluate(
3454
+ * candidateResponse,
3455
+ * referenceResponse,
3456
+ * 'Evaluate for accuracy and completeness'
3457
+ * );
3458
+ *
3459
+ * // Access usage metrics
3460
+ * console.log('Cost:', result.usage?.totalCostUsd);
3461
+ * console.log('Tokens:', result.usage?.inputTokens, result.usage?.outputTokens);
3462
+ */
3463
+ declare function createJudge(config?: JudgeConfig): Judge;
3464
+
3465
+ /**
3466
+ * Options for conformance checks
3467
+ */
3468
+ interface MCPConformanceOptions {
3469
+ /**
3470
+ * List of tools that must be present
3471
+ */
3472
+ requiredTools?: Array<string>;
3473
+ /**
3474
+ * Whether to validate tool schemas
3475
+ * @default true
3476
+ */
3477
+ validateSchemas?: boolean;
3478
+ /**
3479
+ * Whether to check server info is present
3480
+ * @default true
3481
+ */
3482
+ checkServerInfo?: boolean;
3483
+ /**
3484
+ * Whether to check resources capability (if declared by server)
3485
+ * @default true
3486
+ */
3487
+ checkResources?: boolean;
3488
+ /**
3489
+ * Whether to check prompts capability (if declared by server)
3490
+ * @default true
3491
+ */
3492
+ checkPrompts?: boolean;
3493
+ }
3494
+ /**
3495
+ * Individual check result
3496
+ */
3497
+ interface MCPConformanceCheck$1 {
3498
+ name: string;
3499
+ pass: boolean;
3500
+ message: string;
3501
+ }
3502
+ /**
3503
+ * Raw MCP responses for snapshotting
3504
+ */
3505
+ interface MCPConformanceRaw {
3506
+ /**
3507
+ * Server info (name, version)
3508
+ * null if not available
3509
+ */
3510
+ serverInfo: Implementation | null;
3511
+ /**
3512
+ * Server capabilities
3513
+ * null if not available
3514
+ */
3515
+ capabilities: ServerCapabilities | null;
3516
+ /**
3517
+ * List of tools from the server
3518
+ */
3519
+ tools: Tool[];
3520
+ /**
3521
+ * List of resources from the server
3522
+ * null if server doesn't declare resources capability
3523
+ */
3524
+ resources: Resource[] | null;
3525
+ /**
3526
+ * List of prompts from the server
3527
+ * null if server doesn't declare prompts capability
3528
+ */
3529
+ prompts: Prompt[] | null;
3530
+ }
3531
+ /**
3532
+ * Result of conformance checks
3533
+ */
3534
+ interface MCPConformanceResult {
3535
+ /**
3536
+ * Whether all checks passed
3537
+ */
3538
+ pass: boolean;
3539
+ /**
3540
+ * List of check results
3541
+ */
3542
+ checks: MCPConformanceCheck$1[];
3543
+ /**
3544
+ * Raw MCP responses for snapshotting
3545
+ *
3546
+ * @example
3547
+ * ```typescript
3548
+ * const result = await runConformanceChecks(mcp);
3549
+ * expect(result.raw.tools).toMatchSnapshot();
3550
+ * expect(result.raw.capabilities).toMatchSnapshot();
3551
+ * ```
3552
+ */
3553
+ raw: MCPConformanceRaw;
3554
+ }
3555
+ /**
3556
+ * Runs MCP protocol conformance checks
3557
+ *
3558
+ * Validates that the MCP server conforms to expected protocol behavior.
3559
+ * Returns both assertion results and raw MCP responses for snapshotting.
3560
+ *
3561
+ * When testInfo is provided, results are automatically attached for the MCP reporter.
3562
+ *
3563
+ * @param mcp - MCP fixture API
3564
+ * @param options - Conformance check options
3565
+ * @param testInfo - Optional Playwright TestInfo for reporter integration
3566
+ * @returns Conformance check results with raw responses
3567
+ *
3568
+ * @example
3569
+ * ```typescript
3570
+ * // Basic usage
3571
+ * const result = await runConformanceChecks(mcp, {
3572
+ * requiredTools: ['get_weather', 'search_docs'],
3573
+ * validateSchemas: true,
3574
+ * });
3575
+ *
3576
+ * // Check assertions
3577
+ * expect(result.pass).toBe(true);
3578
+ *
3579
+ * // With reporter integration (recommended in Playwright tests)
3580
+ * const result = await runConformanceChecks(mcp, {
3581
+ * requiredTools: ['search'],
3582
+ * }, testInfo);
3583
+ *
3584
+ * // Snapshot raw responses
3585
+ * expect(result.raw.tools).toMatchSnapshot();
3586
+ * expect(result.raw.capabilities).toMatchSnapshot();
3587
+ * ```
3588
+ */
3589
+ declare function runConformanceChecks(mcp: MCPFixtureApi, options?: MCPConformanceOptions, testInfo?: TestInfo): Promise<MCPConformanceResult>;
3590
+
3591
+ /**
3592
+ * Reporter-specific type definitions
3593
+ *
3594
+ * These types are used by the MCP reporter and UI.
3595
+ *
3596
+ * @packageDocumentation
3597
+ */
3598
+
3599
+ /**
3600
+ * Individual conformance check result
3601
+ */
3602
+ interface MCPConformanceCheck {
3603
+ /**
3604
+ * Check name (e.g., 'server_info_present', 'list_tools_succeeds')
3605
+ */
3606
+ name: string;
3607
+ /**
3608
+ * Whether the check passed
3609
+ */
3610
+ pass: boolean;
3611
+ /**
3612
+ * Human-readable message describing the result
3613
+ */
3614
+ message: string;
3615
+ }
3616
+ /**
3617
+ * Conformance check result as stored in reporter data
3618
+ */
3619
+ interface MCPConformanceResultData {
3620
+ /**
3621
+ * Test title where conformance check was run
3622
+ */
3623
+ testTitle: string;
3624
+ /**
3625
+ * Whether all checks passed
3626
+ */
3627
+ pass: boolean;
3628
+ /**
3629
+ * Individual check results
3630
+ */
3631
+ checks: MCPConformanceCheck[];
3632
+ /**
3633
+ * Server info if available
3634
+ */
3635
+ serverInfo?: {
3636
+ name?: string;
3637
+ version?: string;
3638
+ };
3639
+ /**
3640
+ * Number of tools discovered
3641
+ */
3642
+ toolCount: number;
3643
+ /**
3644
+ * Auth type used for this check
3645
+ */
3646
+ authType?: AuthType;
3647
+ /**
3648
+ * Project name
3649
+ */
3650
+ project?: string;
3651
+ }
3652
+ /**
3653
+ * Server capabilities data from mcp-list-tools attachment
3654
+ */
3655
+ interface MCPServerCapabilitiesData {
3656
+ /**
3657
+ * Test title where listTools was called
3658
+ */
3659
+ testTitle: string;
3660
+ /**
3661
+ * List of tools available on the server
3662
+ */
3663
+ tools: Array<{
3664
+ name: string;
3665
+ description?: string;
3666
+ }>;
3667
+ /**
3668
+ * Total number of tools
3669
+ */
3670
+ toolCount: number;
3671
+ /**
3672
+ * Auth type used for this test
3673
+ */
3674
+ authType?: AuthType;
3675
+ /**
3676
+ * Project name
3677
+ */
3678
+ project?: string;
3679
+ }
3680
+ /**
3681
+ * Result of a single eval case
3682
+ */
3683
+ interface EvalCaseResult {
3684
+ /**
3685
+ * Case ID
3686
+ */
3687
+ id: string;
3688
+ /**
3689
+ * Dataset name this case belongs to
3690
+ */
3691
+ datasetName: string;
3692
+ /**
3693
+ * MCP tool name that was called
3694
+ */
3695
+ toolName: string;
3696
+ /**
3697
+ * Source of this result
3698
+ */
3699
+ source: ResultSource;
3700
+ /**
3701
+ * Overall pass/fail status
3702
+ */
3703
+ pass: boolean;
3704
+ /**
3705
+ * Tool response
3706
+ */
3707
+ response?: unknown;
3708
+ /**
3709
+ * Error if tool call failed
3710
+ */
3711
+ error?: string;
3712
+ /**
3713
+ * Expectation results
3714
+ */
3715
+ expectations: Partial<Record<ExpectationType, EvalExpectationResult>>;
3716
+ /**
3717
+ * Authentication type used for this test
3718
+ */
3719
+ authType?: AuthType;
3720
+ /**
3721
+ * Playwright project name this test belongs to
3722
+ */
3723
+ project?: string;
3724
+ /**
3725
+ * Execution time in milliseconds
3726
+ */
3727
+ durationMs: number;
3728
+ /**
3729
+ * @deprecated Mode is inferred from test context, not displayed in reports
3730
+ */
3731
+ mode?: 'direct' | 'llm_host';
3732
+ }
3733
+ /**
3734
+ * Aggregated MCP eval run data
3735
+ */
3736
+ interface MCPEvalRunData {
3737
+ /**
3738
+ * Run timestamp (ISO 8601)
3739
+ */
3740
+ timestamp: string;
3741
+ /**
3742
+ * Total duration in milliseconds
3743
+ */
3744
+ durationMs: number;
3745
+ /**
3746
+ * Environment info
3747
+ */
3748
+ environment: {
3749
+ ci: boolean;
3750
+ node: string;
3751
+ platform: string;
3752
+ };
3753
+ /**
3754
+ * Aggregate metrics
3755
+ */
3756
+ metrics: {
3757
+ /**
3758
+ * Total number of eval cases
3759
+ */
3760
+ total: number;
3761
+ /**
3762
+ * Number of passed cases
3763
+ */
3764
+ passed: number;
3765
+ /**
3766
+ * Number of failed cases
3767
+ */
3768
+ failed: number;
3769
+ /**
3770
+ * Pass rate (0-1)
3771
+ */
3772
+ passRate: number;
3773
+ /**
3774
+ * Dataset breakdown: dataset name -> count
3775
+ */
3776
+ datasetBreakdown: Record<string, number>;
3777
+ /**
3778
+ * Expectation type breakdown
3779
+ */
3780
+ expectationBreakdown: ExpectationBreakdown;
3781
+ };
3782
+ /**
3783
+ * All eval results from this run
3784
+ */
3785
+ results: EvalCaseResult[];
3786
+ /**
3787
+ * Conformance check results (optional)
3788
+ */
3789
+ conformanceChecks?: MCPConformanceResultData[];
3790
+ /**
3791
+ * Server capabilities discovered via listTools (optional)
3792
+ */
3793
+ serverCapabilities?: MCPServerCapabilitiesData[];
3794
+ }
3795
+ /**
3796
+ * Historical summary for trend charts
3797
+ */
3798
+ interface MCPEvalHistoricalSummary {
3799
+ timestamp: string;
3800
+ total: number;
3801
+ passed: number;
3802
+ failed: number;
3803
+ passRate: number;
3804
+ durationMs: number;
3805
+ }
3806
+ /**
3807
+ * Complete data structure passed to UI
3808
+ */
3809
+ interface MCPEvalData {
3810
+ runData: MCPEvalRunData;
3811
+ historical: MCPEvalHistoricalSummary[];
3812
+ }
3813
+
3814
+ /**
3815
+ * Reporter types - re-exported from canonical source
3816
+ *
3817
+ * This module re-exports types from the canonical types module for backwards compatibility.
3818
+ * All type definitions now live in src/types/.
3819
+ *
3820
+ * @packageDocumentation
3821
+ */
3822
+
3823
+ /**
3824
+ * Configuration options for MCP Eval Reporter
3825
+ */
3826
+ interface MCPEvalReporterConfig {
3827
+ /**
3828
+ * Output directory for reports and historical data
3829
+ * @default '.mcp-test-results'
3830
+ */
3831
+ outputDir?: string;
3832
+ /**
3833
+ * Auto-open report in browser after test run
3834
+ * @default true (disabled in CI)
3835
+ */
3836
+ autoOpen?: boolean;
3837
+ /**
3838
+ * Number of historical runs to keep
3839
+ * @default 10
3840
+ */
3841
+ historyLimit?: number;
3842
+ /**
3843
+ * Suppress console output (report still generated)
3844
+ * @default false
3845
+ */
3846
+ quiet?: boolean;
3847
+ /**
3848
+ * Include auto-tracked MCP tool calls from tests without explicit eval results.
3849
+ * When true, any test using the MCP fixture will have its tool calls
3850
+ * included in the report, even without using runEvalCase/runEvalDataset.
3851
+ * When false, only tests with explicit eval results are included.
3852
+ * @default true
3853
+ */
3854
+ includeAutoTracking?: boolean;
3855
+ }
3856
+
3857
+ export { type AuthType, type BuiltInSanitizer, CLIOAuthClient, type CLIOAuthClientConfig, type CLIOAuthResult, type ContentBlock, type CreateMCPClientOptions, DiscoveryError, ENV_VAR_NAMES, type EvalCase, type EvalCaseResult$1 as EvalCaseResult, EvalCaseSchema, type EvalContext, type EvalDataset, EvalDatasetSchema, type EvalExpectBlock, type EvalExpectationResult, type EvalMode, type EvalRunnerOptions, type EvalRunnerResult, type ExpectationBreakdown, type ExpectationResultMap, type ExpectationType, type ExpectedToolCall, type FieldRemovalSanitizer, type Judge, type JudgeConfig, type JudgeMatcherOptions, type JudgeResult, type LLMHostConfig, type LLMHostSimulationResult, type LLMHostSimulator, type LLMProvider, type LLMToolCall, type LoadDatasetOptions, type MCPAuthConfig, type MCPConfig, MCPConfigSchema, type MCPConformanceCheck$1 as MCPConformanceCheck, type MCPConformanceOptions, type MCPConformanceRaw, type MCPConformanceResult, type MCPConformanceResultData, type MCPEvalData, type MCPEvalHistoricalSummary, type MCPEvalReporterConfig, type MCPEvalRunData, type MCPFixtureApi, type MCPFixtureOptions, type MCPHostCapabilities, type MCPOAuthConfig, type MCPServerCapabilitiesData, MCP_PROTOCOL_VERSION, type NormalizedToolResponse, type OAuthSetupConfig, type PatternValidatorOptions, PlaywrightOAuthClientProvider, type PlaywrightOAuthClientProviderConfig, type PredicateResult, type ProtectedResourceDiscoveryResult, type ProtectedResourceMetadata, type ProviderKind, type RegexSanitizer, type ResultSource, type SchemaRegistry, type SchemaValidatorOptions, type SerializedEvalDataset, type SizeValidatorOptions, type SnapshotSanitizer, type StoredClientInfo, type StoredOAuthState, type StoredServerMetadata, type StoredTokens, type TextValidatorOptions, type TokenResult, type ToolCallValidationResult, type ToolCallValidator, type ToolPredicate, type UsageMetrics, type ValidationResult, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, createToolCallValidator, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, extractText as extractTextFromResponse, getMissingDependencyMessage, getResponseSizeBytes, hasValidTokens, injectTokens, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, normalizeToolResponse, normalizeWhitespace, performOAuthSetup, performOAuthSetupIfNeeded, runConformanceChecks, runEvalCase, runEvalDataset, simulateLLMHost, test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText };