@gleanwork/mcp-server-tester 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,605 @@
1
+ import * as playwright_test from 'playwright/test';
2
+ import { ZodType } from 'zod';
3
+ import { Client } from '@modelcontextprotocol/sdk/client/index.js';
4
+ import { Tool, CallToolResult } from '@modelcontextprotocol/sdk/types.js';
5
+
6
+ /**
7
+ * toMatchToolResponse Matcher
8
+ *
9
+ * Validates that a response exactly matches an expected value.
10
+ */
11
+ /**
12
+ * Creates the toMatchToolResponse matcher function
13
+ */
14
+ declare function toMatchToolResponse(this: {
15
+ isNot: boolean;
16
+ }, received: unknown, expected: unknown): {
17
+ pass: boolean;
18
+ message: () => string;
19
+ };
20
+
21
+ /**
22
+ * Validator Types
23
+ *
24
+ * Core types for the unified assertion architecture.
25
+ * These types are used by both Playwright matchers and the eval runner.
26
+ */
27
+
28
+ /**
29
+ * Options for text validation
30
+ */
31
+ interface TextValidatorOptions {
32
+ /** Whether to perform case-sensitive matching (default: true) */
33
+ caseSensitive?: boolean;
34
+ }
35
+ /**
36
+ * Options for response size validation
37
+ */
38
+ interface SizeValidatorOptions {
39
+ /** Maximum allowed size in bytes */
40
+ maxBytes?: number;
41
+ /** Minimum required size in bytes */
42
+ minBytes?: number;
43
+ }
44
+ /**
45
+ * Options for schema validation
46
+ */
47
+ interface SchemaValidatorOptions {
48
+ /** Whether to use strict mode (fail on extra properties) */
49
+ strict?: boolean;
50
+ }
51
+ /**
52
+ * Options for pattern validation
53
+ */
54
+ interface PatternValidatorOptions {
55
+ /** Whether to perform case-sensitive matching (default: true) */
56
+ caseSensitive?: boolean;
57
+ }
58
+ /**
59
+ * Built-in sanitizer names for common variable patterns
60
+ */
61
+ type BuiltInSanitizer = 'timestamp' | 'uuid' | 'iso-date' | 'objectId' | 'jwt';
62
+ /**
63
+ * Custom regex-based sanitizer
64
+ */
65
+ interface RegexSanitizer {
66
+ /** Regex pattern to match */
67
+ pattern: string | RegExp;
68
+ /** Replacement string (default: "[SANITIZED]") */
69
+ replacement?: string;
70
+ }
71
+ /**
72
+ * Field removal sanitizer - removes specified fields from objects
73
+ */
74
+ interface FieldRemovalSanitizer {
75
+ /** Field paths to remove (supports dot notation for nested fields) */
76
+ remove: string[];
77
+ }
78
+ /**
79
+ * Snapshot sanitizer configuration
80
+ *
81
+ * Sanitizers transform response data before snapshot comparison,
82
+ * allowing variable content (timestamps, IDs, etc.) to be normalized.
83
+ *
84
+ * Can be:
85
+ * - A built-in sanitizer name: 'timestamp', 'uuid', 'iso-date', 'objectId', 'jwt'
86
+ * - A regex sanitizer: { pattern: /regex/, replacement: '[REPLACED]' }
87
+ * - A field removal sanitizer: { remove: ['field1', 'nested.field'] }
88
+ */
89
+ type SnapshotSanitizer = BuiltInSanitizer | RegexSanitizer | FieldRemovalSanitizer;
90
+
91
+ /**
92
+ * toMatchToolSchema Matcher
93
+ *
94
+ * Validates that a response matches a Zod schema.
95
+ */
96
+
97
+ /**
98
+ * Creates the toMatchToolSchema matcher function
99
+ */
100
+ declare function toMatchToolSchema(this: {
101
+ isNot: boolean;
102
+ }, received: unknown, schema: ZodType, options?: SchemaValidatorOptions): {
103
+ pass: boolean;
104
+ message: () => string;
105
+ };
106
+
107
+ /**
108
+ * toContainToolText Matcher
109
+ *
110
+ * Validates that a response contains expected text substrings.
111
+ */
112
+
113
+ /**
114
+ * Creates the toContainToolText matcher function
115
+ */
116
+ declare function toContainToolText(this: {
117
+ isNot: boolean;
118
+ }, received: unknown, expected: string | string[], options?: TextValidatorOptions): {
119
+ pass: boolean;
120
+ message: () => string;
121
+ };
122
+
123
+ /**
124
+ * toMatchToolPattern Matcher
125
+ *
126
+ * Validates that a response matches regex patterns.
127
+ */
128
+
129
+ /**
130
+ * Creates the toMatchToolPattern matcher function
131
+ */
132
+ declare function toMatchToolPattern(this: {
133
+ isNot: boolean;
134
+ }, received: unknown, patterns: string | RegExp | (string | RegExp)[], options?: PatternValidatorOptions): {
135
+ pass: boolean;
136
+ message: () => string;
137
+ };
138
+
139
+ /**
140
+ * toMatchToolSnapshot Matcher
141
+ *
142
+ * Validates that a response matches a saved snapshot.
143
+ * Uses Playwright's native snapshot testing functionality.
144
+ */
145
+
146
+ /**
147
+ * Creates the toMatchToolSnapshot matcher function
148
+ *
149
+ * Note: This is an async matcher that uses Playwright's snapshot testing.
150
+ */
151
+ declare function toMatchToolSnapshot(this: {
152
+ isNot: boolean;
153
+ }, received: unknown, name: string, sanitizers?: SnapshotSanitizer[]): Promise<{
154
+ pass: boolean;
155
+ message: () => string;
156
+ }>;
157
+
158
+ /**
159
+ * toBeToolError Matcher
160
+ *
161
+ * Validates that a response is (or is not) an error.
162
+ */
163
+ /**
164
+ * Creates the toBeToolError matcher function
165
+ */
166
+ declare function toBeToolError(this: {
167
+ isNot: boolean;
168
+ }, received: unknown, expected?: boolean | string | string[]): {
169
+ pass: boolean;
170
+ message: () => string;
171
+ };
172
+
173
+ /**
174
+ * Supported LLM provider types
175
+ */
176
+ type ProviderKind = 'claude' | 'anthropic' | 'openai' | 'custom-http';
177
+ /**
178
+ * Configuration for an LLM judge
179
+ */
180
+ interface JudgeConfig {
181
+ /**
182
+ * LLM provider to use
183
+ * @default 'claude'
184
+ */
185
+ provider?: ProviderKind;
186
+ /**
187
+ * Environment variable name containing the API key
188
+ * @default 'ANTHROPIC_API_KEY'
189
+ */
190
+ apiKeyEnvVar?: string;
191
+ /**
192
+ * Model to use for judging
193
+ * @default 'claude-sonnet-4-20250514'
194
+ */
195
+ model?: string;
196
+ /**
197
+ * Maximum tokens for response
198
+ * @default 1000
199
+ */
200
+ maxTokens?: number;
201
+ /**
202
+ * Temperature (0-1, lower is more deterministic)
203
+ * @default 0.0
204
+ */
205
+ temperature?: number;
206
+ /**
207
+ * Maximum budget in USD for the judge evaluation
208
+ * @default 0.10
209
+ */
210
+ maxBudgetUsd?: number;
211
+ /**
212
+ * Maximum size (in bytes) for tool output before failing the test
213
+ * When set, the judge will fail if the candidate response exceeds this size
214
+ */
215
+ maxToolOutputSize?: number;
216
+ }
217
+
218
+ /**
219
+ * Matcher Types
220
+ *
221
+ * TypeScript declarations for custom Playwright matchers.
222
+ */
223
+
224
+ /**
225
+ * Options for the LLM judge matcher
226
+ */
227
+ interface JudgeMatcherOptions {
228
+ /** Reference response to compare against */
229
+ reference?: unknown;
230
+ /** Score threshold for passing (default: 0.7) */
231
+ passingThreshold?: number;
232
+ /** Judge configuration override */
233
+ judgeConfig?: JudgeConfig;
234
+ }
235
+ /**
236
+ * Declaration merging for Playwright matchers
237
+ */
238
+ declare global {
239
+ namespace PlaywrightTest {
240
+ interface Matchers<R, T = unknown> {
241
+ /**
242
+ * Validates that a response exactly matches the expected value
243
+ *
244
+ * @param expected - The expected response value
245
+ *
246
+ * @example
247
+ * ```typescript
248
+ * expect(result).toMatchToolResponse({ status: 'ok', count: 42 });
249
+ * ```
250
+ */
251
+ toMatchToolResponse(expected: unknown): R;
252
+ /**
253
+ * Validates that a response matches a Zod schema
254
+ *
255
+ * @param schema - The Zod schema to validate against
256
+ * @param options - Validation options
257
+ *
258
+ * @example
259
+ * ```typescript
260
+ * const WeatherSchema = z.object({
261
+ * temperature: z.number(),
262
+ * conditions: z.string(),
263
+ * });
264
+ * expect(result).toMatchToolSchema(WeatherSchema);
265
+ * ```
266
+ */
267
+ toMatchToolSchema(schema: ZodType, options?: SchemaValidatorOptions): R;
268
+ /**
269
+ * Validates that a response contains expected text substrings
270
+ *
271
+ * @param expected - Expected substring(s) to find
272
+ * @param options - Validation options
273
+ *
274
+ * @example
275
+ * ```typescript
276
+ * expect(result).toContainToolText('temperature');
277
+ * expect(result).toContainToolText(['temperature', 'conditions']);
278
+ * expect(result).toContainToolText('HELLO', { caseSensitive: false });
279
+ * ```
280
+ */
281
+ toContainToolText(expected: string | string[], options?: TextValidatorOptions): R;
282
+ /**
283
+ * Validates that a response matches regex patterns
284
+ *
285
+ * @param patterns - Expected pattern(s) to match
286
+ * @param options - Validation options
287
+ *
288
+ * @example
289
+ * ```typescript
290
+ * expect(result).toMatchToolPattern(/temperature: \d+/);
291
+ * expect(result).toMatchToolPattern(['temp: \\d+', 'humidity: \\d+%']);
292
+ * ```
293
+ */
294
+ toMatchToolPattern(patterns: string | RegExp | (string | RegExp)[], options?: PatternValidatorOptions): R;
295
+ /**
296
+ * Validates that a response matches a saved snapshot
297
+ *
298
+ * @param name - Snapshot name
299
+ * @param sanitizers - Optional sanitizers for non-deterministic values
300
+ *
301
+ * @example
302
+ * ```typescript
303
+ * expect(result).toMatchToolSnapshot('weather-response');
304
+ * expect(result).toMatchToolSnapshot('user-data', [
305
+ * { pattern: /\d{4}-\d{2}-\d{2}/, replacement: '[DATE]' },
306
+ * ]);
307
+ * ```
308
+ */
309
+ toMatchToolSnapshot(name: string, sanitizers?: SnapshotSanitizer[]): Promise<R>;
310
+ /**
311
+ * Validates that a response is (or is not) an error
312
+ *
313
+ * @param expected - What to expect (true for error, false for success, string for specific message)
314
+ *
315
+ * @example
316
+ * ```typescript
317
+ * expect(result).toBeToolError(); // Expects any error
318
+ * expect(result).not.toBeToolError(); // Expects success
319
+ * expect(result).toBeToolError('File not found'); // Expects specific error
320
+ * ```
321
+ */
322
+ toBeToolError(expected?: boolean | string | string[]): R;
323
+ /**
324
+ * Validates that a response passes LLM-as-judge evaluation
325
+ *
326
+ * @param rubric - Evaluation rubric/criteria
327
+ * @param options - Judge options
328
+ *
329
+ * @example
330
+ * ```typescript
331
+ * expect(result).toPassToolJudge('Response should be helpful and accurate');
332
+ * expect(result).toPassToolJudge('Response should match reference', {
333
+ * reference: expectedOutput,
334
+ * passingThreshold: 0.8,
335
+ * });
336
+ * ```
337
+ */
338
+ toPassToolJudge(rubric: string, options?: JudgeMatcherOptions): Promise<R>;
339
+ /**
340
+ * Validates that a response meets size constraints
341
+ *
342
+ * @param options - Size constraints (maxBytes, minBytes)
343
+ *
344
+ * @example
345
+ * ```typescript
346
+ * expect(result).toHaveToolResponseSize({ maxBytes: 10000 });
347
+ * expect(result).toHaveToolResponseSize({ minBytes: 100, maxBytes: 50000 });
348
+ * ```
349
+ */
350
+ toHaveToolResponseSize(options: SizeValidatorOptions): R;
351
+ /**
352
+ * Validates that a response satisfies a custom predicate function
353
+ *
354
+ * Use this as an escape hatch when built-in matchers don't cover your use case.
355
+ * The predicate receives both the raw response and extracted text for convenience.
356
+ *
357
+ * @param predicate - Function that validates the response
358
+ * @param description - Optional description for error messages
359
+ *
360
+ * @example
361
+ * ```typescript
362
+ * // Simple boolean predicate
363
+ * expect(result).toSatisfyToolPredicate((response) => {
364
+ * return response.data?.items?.length > 0;
365
+ * });
366
+ *
367
+ * // Predicate with custom message
368
+ * expect(result).toSatisfyToolPredicate(
369
+ * (response, text) => ({
370
+ * pass: text.includes('success'),
371
+ * message: 'Expected response to contain "success"',
372
+ * }),
373
+ * 'success check'
374
+ * );
375
+ *
376
+ * // Async predicate
377
+ * expect(result).toSatisfyToolPredicate(async (response) => {
378
+ * return await validateWithExternalService(response);
379
+ * });
380
+ * ```
381
+ */
382
+ toSatisfyToolPredicate(predicate: ToolPredicate, description?: string): Promise<R>;
383
+ }
384
+ }
385
+ }
386
+ /**
387
+ * Predicate result returned by the user's predicate function
388
+ */
389
+ interface PredicateResult {
390
+ /** Whether the predicate passed */
391
+ pass: boolean;
392
+ /** Message explaining the result (shown on failure) */
393
+ message?: string;
394
+ }
395
+ /**
396
+ * A predicate function that validates a response
397
+ */
398
+ type ToolPredicate = (response: unknown, text: string) => boolean | PredicateResult | Promise<boolean | PredicateResult>;
399
+
400
+ /**
401
+ * toPassToolJudge Matcher
402
+ *
403
+ * Validates that a response passes LLM-as-judge evaluation.
404
+ */
405
+
406
+ /**
407
+ * Creates the toPassToolJudge matcher function
408
+ *
409
+ * Note: This is an async matcher that calls an LLM for evaluation.
410
+ */
411
+ declare function toPassToolJudge(this: {
412
+ isNot: boolean;
413
+ }, received: unknown, rubric: string, options?: JudgeMatcherOptions): Promise<{
414
+ pass: boolean;
415
+ message: () => string;
416
+ }>;
417
+
418
+ /**
419
+ * toHaveToolResponseSize Matcher
420
+ *
421
+ * Validates that a response meets size constraints.
422
+ */
423
+
424
+ /**
425
+ * Creates the toHaveToolResponseSize matcher function
426
+ */
427
+ declare function toHaveToolResponseSize(this: {
428
+ isNot: boolean;
429
+ }, received: unknown, options: SizeValidatorOptions): {
430
+ pass: boolean;
431
+ message: () => string;
432
+ };
433
+
434
+ /**
435
+ * toSatisfyToolPredicate Matcher
436
+ *
437
+ * Validates that a response satisfies a custom predicate function.
438
+ * This is an escape hatch for custom validation logic when built-in
439
+ * matchers don't cover the use case.
440
+ */
441
+
442
+ /**
443
+ * Creates the toSatisfyToolPredicate matcher function
444
+ *
445
+ * This matcher allows custom validation logic via a predicate function.
446
+ * The predicate receives both the raw response and extracted text.
447
+ *
448
+ * @example
449
+ * ```typescript
450
+ * // Simple boolean predicate
451
+ * expect(result).toSatisfyToolPredicate((response) => {
452
+ * return response.data?.length > 0;
453
+ * });
454
+ *
455
+ * // Predicate with custom message
456
+ * expect(result).toSatisfyToolPredicate((response, text) => {
457
+ * const hasTemperature = text.includes('temperature');
458
+ * return {
459
+ * pass: hasTemperature,
460
+ * message: hasTemperature
461
+ * ? 'Found temperature in response'
462
+ * : 'Expected response to contain temperature',
463
+ * };
464
+ * });
465
+ *
466
+ * // Async predicate
467
+ * expect(result).toSatisfyToolPredicate(async (response) => {
468
+ * const isValid = await validateWithExternalService(response);
469
+ * return isValid;
470
+ * });
471
+ * ```
472
+ */
473
+ declare function toSatisfyToolPredicate(this: {
474
+ isNot: boolean;
475
+ }, received: unknown, predicate: ToolPredicate, description?: string): Promise<{
476
+ pass: boolean;
477
+ message: () => string;
478
+ }>;
479
+
480
+ /**
481
+ * Extended Playwright expect with MCP tool matchers
482
+ *
483
+ * @example
484
+ * ```typescript
485
+ * import { expect } from '@gleanwork/mcp-server-tester';
486
+ *
487
+ * test('weather tool', async ({ mcp }) => {
488
+ * const result = await mcp.callTool('get_weather', { city: 'London' });
489
+ *
490
+ * expect(result).toContainToolText('temperature');
491
+ * expect(result).toMatchToolSchema(WeatherSchema);
492
+ * expect(result).not.toBeToolError();
493
+ * });
494
+ * ```
495
+ */
496
+ declare const expect: playwright_test.Expect<{
497
+ toMatchToolResponse: typeof toMatchToolResponse;
498
+ toMatchToolSchema: typeof toMatchToolSchema;
499
+ toContainToolText: typeof toContainToolText;
500
+ toMatchToolPattern: typeof toMatchToolPattern;
501
+ toMatchToolSnapshot: typeof toMatchToolSnapshot;
502
+ toBeToolError: typeof toBeToolError;
503
+ toPassToolJudge: typeof toPassToolJudge;
504
+ toHaveToolResponseSize: typeof toHaveToolResponseSize;
505
+ toSatisfyToolPredicate: typeof toSatisfyToolPredicate;
506
+ }>;
507
+
508
+ /**
509
+ * Canonical type definitions for @gleanwork/mcp-server-tester
510
+ *
511
+ * This module is the single source of truth for shared types.
512
+ * All other modules should import from here rather than defining their own.
513
+ *
514
+ * @packageDocumentation
515
+ */
516
+ /**
517
+ * Authentication type for MCP connections
518
+ *
519
+ * - 'oauth': Interactive OAuth 2.1 with PKCE (browser-based authentication)
520
+ * - 'api-token': Static API token (e.g., from a dashboard or environment variable)
521
+ * - 'none': No authentication
522
+ */
523
+ type AuthType = 'oauth' | 'api-token' | 'none';
524
+
525
+ /**
526
+ * High-level API for interacting with MCP servers in tests
527
+ *
528
+ * This interface wraps the raw MCP Client with test-friendly methods
529
+ */
530
+ interface MCPFixtureApi {
531
+ /**
532
+ * The underlying MCP client (for advanced usage)
533
+ */
534
+ client: Client;
535
+ /**
536
+ * Authentication type used for this test session
537
+ */
538
+ authType: AuthType;
539
+ /**
540
+ * Playwright project name for this test session
541
+ */
542
+ project?: string;
543
+ /**
544
+ * Lists all available tools from the MCP server
545
+ *
546
+ * @returns Array of tool definitions
547
+ */
548
+ listTools(): Promise<Array<Tool>>;
549
+ /**
550
+ * Calls a tool on the MCP server
551
+ *
552
+ * @param name - Tool name
553
+ * @param args - Tool arguments
554
+ * @returns Tool call result
555
+ */
556
+ callTool<TArgs extends Record<string, unknown> = Record<string, unknown>>(name: string, args: TArgs): Promise<CallToolResult>;
557
+ /**
558
+ * Gets information about the connected server
559
+ */
560
+ getServerInfo(): {
561
+ name?: string;
562
+ version?: string;
563
+ } | null;
564
+ }
565
+
566
+ /**
567
+ * Internal fixture state for passing auth type between fixtures
568
+ */
569
+ interface MCPFixtureState {
570
+ /**
571
+ * The resolved authentication type (may differ from config if CLI tokens are used)
572
+ */
573
+ resolvedAuthType: AuthType;
574
+ }
575
+ /**
576
+ * Extended test fixtures for MCP testing
577
+ */
578
+ type MCPFixtures = {
579
+ /**
580
+ * Raw MCP client instance (automatically connected and cleaned up)
581
+ */
582
+ mcpClient: Client;
583
+ /**
584
+ * High-level MCP API for tests
585
+ */
586
+ mcp: MCPFixtureApi;
587
+ /**
588
+ * Internal fixture state (not for external use)
589
+ */
590
+ _mcpFixtureState: MCPFixtureState;
591
+ };
592
+ /**
593
+ * Extended Playwright test with MCP fixtures
594
+ *
595
+ * @example
596
+ * import { test, expect } from '@gleanwork/mcp-server-tester';
597
+ *
598
+ * test('lists tools from MCP server', async ({ mcp }) => {
599
+ * const tools = await mcp.listTools();
600
+ * expect(tools.length).toBeGreaterThan(0);
601
+ * });
602
+ */
603
+ declare const test: playwright_test.TestType<playwright_test.PlaywrightTestArgs & playwright_test.PlaywrightTestOptions & MCPFixtures, playwright_test.PlaywrightWorkerArgs & playwright_test.PlaywrightWorkerOptions>;
604
+
605
+ export { expect, test };