npm - @gleanwork/mcp-server-tester - Versions diffs - 0.12.0 - Mend

@gleanwork/mcp-server-tester 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/LICENSE +21 -0
package/README.md +421 -0
package/dist/cli/index.js +2785 -0
package/dist/fixtures/mcp.d.ts +605 -0
package/dist/fixtures/mcp.js +2378 -0
package/dist/fixtures/mcp.js.map +1 -0
package/dist/fixtures/mcpAuth.d.ts +31 -0
package/dist/fixtures/mcpAuth.js +317 -0
package/dist/fixtures/mcpAuth.js.map +1 -0
package/dist/index.cjs +3658 -0
package/dist/index.cjs.map +1 -0
package/dist/index.d.cts +3857 -0
package/dist/index.d.ts +3857 -0
package/dist/index.js +3582 -0
package/dist/index.js.map +1 -0
package/dist/reporters/mcpReporter.cjs +301 -0
package/dist/reporters/mcpReporter.cjs.map +1 -0
package/dist/reporters/mcpReporter.d.cts +85 -0
package/dist/reporters/mcpReporter.d.ts +85 -0
package/dist/reporters/mcpReporter.js +297 -0
package/dist/reporters/mcpReporter.js.map +1 -0
package/dist/reporters/ui-dist/app.js +174 -0
package/dist/reporters/ui-dist/index.html +28 -0
package/dist/reporters/ui-dist/styles.css +1 -0
package/package.json +138 -0
package/src/reporters/ui-dist/app.js +174 -0
package/src/reporters/ui-dist/index.html +28 -0
package/src/reporters/ui-dist/styles.css +1 -0

package/dist/fixtures/mcp.d.ts ADDED Viewed

@@ -0,0 +1,605 @@
+import * as playwright_test from 'playwright/test';
+import { ZodType } from 'zod';
+import { Client } from '@modelcontextprotocol/sdk/client/index.js';
+import { Tool, CallToolResult } from '@modelcontextprotocol/sdk/types.js';
+/**
+ * toMatchToolResponse Matcher
+ *
+ * Validates that a response exactly matches an expected value.
+ */
+/**
+ * Creates the toMatchToolResponse matcher function
+ */
+declare function toMatchToolResponse(this: {
+    isNot: boolean;
+}, received: unknown, expected: unknown): {
+    pass: boolean;
+    message: () => string;
+};
+/**
+ * Validator Types
+ *
+ * Core types for the unified assertion architecture.
+ * These types are used by both Playwright matchers and the eval runner.
+ */
+/**
+ * Options for text validation
+ */
+interface TextValidatorOptions {
+    /** Whether to perform case-sensitive matching (default: true) */
+    caseSensitive?: boolean;
+}
+/**
+ * Options for response size validation
+ */
+interface SizeValidatorOptions {
+    /** Maximum allowed size in bytes */
+    maxBytes?: number;
+    /** Minimum required size in bytes */
+    minBytes?: number;
+}
+/**
+ * Options for schema validation
+ */
+interface SchemaValidatorOptions {
+    /** Whether to use strict mode (fail on extra properties) */
+    strict?: boolean;
+}
+/**
+ * Options for pattern validation
+ */
+interface PatternValidatorOptions {
+    /** Whether to perform case-sensitive matching (default: true) */
+    caseSensitive?: boolean;
+}
+/**
+ * Built-in sanitizer names for common variable patterns
+ */
+type BuiltInSanitizer = 'timestamp' | 'uuid' | 'iso-date' | 'objectId' | 'jwt';
+/**
+ * Custom regex-based sanitizer
+ */
+interface RegexSanitizer {
+    /** Regex pattern to match */
+    pattern: string | RegExp;
+    /** Replacement string (default: "[SANITIZED]") */
+    replacement?: string;
+}
+/**
+ * Field removal sanitizer - removes specified fields from objects
+ */
+interface FieldRemovalSanitizer {
+    /** Field paths to remove (supports dot notation for nested fields) */
+    remove: string[];
+}
+/**
+ * Snapshot sanitizer configuration
+ *
+ * Sanitizers transform response data before snapshot comparison,
+ * allowing variable content (timestamps, IDs, etc.) to be normalized.
+ *
+ * Can be:
+ * - A built-in sanitizer name: 'timestamp', 'uuid', 'iso-date', 'objectId', 'jwt'
+ * - A regex sanitizer: { pattern: /regex/, replacement: '[REPLACED]' }
+ * - A field removal sanitizer: { remove: ['field1', 'nested.field'] }
+ */
+type SnapshotSanitizer = BuiltInSanitizer | RegexSanitizer | FieldRemovalSanitizer;
+/**
+ * toMatchToolSchema Matcher
+ *
+ * Validates that a response matches a Zod schema.
+ */
+/**
+ * Creates the toMatchToolSchema matcher function
+ */
+declare function toMatchToolSchema(this: {
+    isNot: boolean;
+}, received: unknown, schema: ZodType, options?: SchemaValidatorOptions): {
+    pass: boolean;
+    message: () => string;
+};
+/**
+ * toContainToolText Matcher
+ *
+ * Validates that a response contains expected text substrings.
+ */
+/**
+ * Creates the toContainToolText matcher function
+ */
+declare function toContainToolText(this: {
+    isNot: boolean;
+}, received: unknown, expected: string | string[], options?: TextValidatorOptions): {
+    pass: boolean;
+    message: () => string;
+};
+/**
+ * toMatchToolPattern Matcher
+ *
+ * Validates that a response matches regex patterns.
+ */
+/**
+ * Creates the toMatchToolPattern matcher function
+ */
+declare function toMatchToolPattern(this: {
+    isNot: boolean;
+}, received: unknown, patterns: string | RegExp | (string | RegExp)[], options?: PatternValidatorOptions): {
+    pass: boolean;
+    message: () => string;
+};
+/**
+ * toMatchToolSnapshot Matcher
+ *
+ * Validates that a response matches a saved snapshot.
+ * Uses Playwright's native snapshot testing functionality.
+ */
+/**
+ * Creates the toMatchToolSnapshot matcher function
+ *
+ * Note: This is an async matcher that uses Playwright's snapshot testing.
+ */
+declare function toMatchToolSnapshot(this: {
+    isNot: boolean;
+}, received: unknown, name: string, sanitizers?: SnapshotSanitizer[]): Promise<{
+    pass: boolean;
+    message: () => string;
+}>;
+/**
+ * toBeToolError Matcher
+ *
+ * Validates that a response is (or is not) an error.
+ */
+/**
+ * Creates the toBeToolError matcher function
+ */
+declare function toBeToolError(this: {
+    isNot: boolean;
+}, received: unknown, expected?: boolean | string | string[]): {
+    pass: boolean;
+    message: () => string;
+};
+/**
+ * Supported LLM provider types
+ */
+type ProviderKind = 'claude' | 'anthropic' | 'openai' | 'custom-http';
+/**
+ * Configuration for an LLM judge
+ */
+interface JudgeConfig {
+    /**
+     * LLM provider to use
+     * @default 'claude'
+     */
+    provider?: ProviderKind;
+    /**
+     * Environment variable name containing the API key
+     * @default 'ANTHROPIC_API_KEY'
+     */
+    apiKeyEnvVar?: string;
+    /**
+     * Model to use for judging
+     * @default 'claude-sonnet-4-20250514'
+     */
+    model?: string;
+    /**
+     * Maximum tokens for response
+     * @default 1000
+     */
+    maxTokens?: number;
+    /**
+     * Temperature (0-1, lower is more deterministic)
+     * @default 0.0
+     */
+    temperature?: number;
+    /**
+     * Maximum budget in USD for the judge evaluation
+     * @default 0.10
+     */
+    maxBudgetUsd?: number;
+    /**
+     * Maximum size (in bytes) for tool output before failing the test
+     * When set, the judge will fail if the candidate response exceeds this size
+     */
+    maxToolOutputSize?: number;
+}
+/**
+ * Matcher Types
+ *
+ * TypeScript declarations for custom Playwright matchers.
+ */
+/**
+ * Options for the LLM judge matcher
+ */
+interface JudgeMatcherOptions {
+    /** Reference response to compare against */
+    reference?: unknown;
+    /** Score threshold for passing (default: 0.7) */
+    passingThreshold?: number;
+    /** Judge configuration override */
+    judgeConfig?: JudgeConfig;
+}
+/**
+ * Declaration merging for Playwright matchers
+ */
+declare global {
+    namespace PlaywrightTest {
+        interface Matchers<R, T = unknown> {
+            /**
+             * Validates that a response exactly matches the expected value
+             *
+             * @param expected - The expected response value
+             *
+             * @example
+             * ```typescript
+             * expect(result).toMatchToolResponse({ status: 'ok', count: 42 });
+             * ```
+             */
+            toMatchToolResponse(expected: unknown): R;
+            /**
+             * Validates that a response matches a Zod schema
+             *
+             * @param schema - The Zod schema to validate against
+             * @param options - Validation options
+             *
+             * @example
+             * ```typescript
+             * const WeatherSchema = z.object({
+             *   temperature: z.number(),
+             *   conditions: z.string(),
+             * });
+             * expect(result).toMatchToolSchema(WeatherSchema);
+             * ```
+             */
+            toMatchToolSchema(schema: ZodType, options?: SchemaValidatorOptions): R;
+            /**
+             * Validates that a response contains expected text substrings
+             *
+             * @param expected - Expected substring(s) to find
+             * @param options - Validation options
+             *
+             * @example
+             * ```typescript
+             * expect(result).toContainToolText('temperature');
+             * expect(result).toContainToolText(['temperature', 'conditions']);
+             * expect(result).toContainToolText('HELLO', { caseSensitive: false });
+             * ```
+             */
+            toContainToolText(expected: string | string[], options?: TextValidatorOptions): R;
+            /**
+             * Validates that a response matches regex patterns
+             *
+             * @param patterns - Expected pattern(s) to match
+             * @param options - Validation options
+             *
+             * @example
+             * ```typescript
+             * expect(result).toMatchToolPattern(/temperature: \d+/);
+             * expect(result).toMatchToolPattern(['temp: \\d+', 'humidity: \\d+%']);
+             * ```
+             */
+            toMatchToolPattern(patterns: string | RegExp | (string | RegExp)[], options?: PatternValidatorOptions): R;
+            /**
+             * Validates that a response matches a saved snapshot
+             *
+             * @param name - Snapshot name
+             * @param sanitizers - Optional sanitizers for non-deterministic values
+             *
+             * @example
+             * ```typescript
+             * expect(result).toMatchToolSnapshot('weather-response');
+             * expect(result).toMatchToolSnapshot('user-data', [
+             *   { pattern: /\d{4}-\d{2}-\d{2}/, replacement: '[DATE]' },
+             * ]);
+             * ```
+             */
+            toMatchToolSnapshot(name: string, sanitizers?: SnapshotSanitizer[]): Promise<R>;
+            /**
+             * Validates that a response is (or is not) an error
+             *
+             * @param expected - What to expect (true for error, false for success, string for specific message)
+             *
+             * @example
+             * ```typescript
+             * expect(result).toBeToolError();  // Expects any error
+             * expect(result).not.toBeToolError();  // Expects success
+             * expect(result).toBeToolError('File not found');  // Expects specific error
+             * ```
+             */
+            toBeToolError(expected?: boolean | string | string[]): R;
+            /**
+             * Validates that a response passes LLM-as-judge evaluation
+             *
+             * @param rubric - Evaluation rubric/criteria
+             * @param options - Judge options
+             *
+             * @example
+             * ```typescript
+             * expect(result).toPassToolJudge('Response should be helpful and accurate');
+             * expect(result).toPassToolJudge('Response should match reference', {
+             *   reference: expectedOutput,
+             *   passingThreshold: 0.8,
+             * });
+             * ```
+             */
+            toPassToolJudge(rubric: string, options?: JudgeMatcherOptions): Promise<R>;
+            /**
+             * Validates that a response meets size constraints
+             *
+             * @param options - Size constraints (maxBytes, minBytes)
+             *
+             * @example
+             * ```typescript
+             * expect(result).toHaveToolResponseSize({ maxBytes: 10000 });
+             * expect(result).toHaveToolResponseSize({ minBytes: 100, maxBytes: 50000 });
+             * ```
+             */
+            toHaveToolResponseSize(options: SizeValidatorOptions): R;
+            /**
+             * Validates that a response satisfies a custom predicate function
+             *
+             * Use this as an escape hatch when built-in matchers don't cover your use case.
+             * The predicate receives both the raw response and extracted text for convenience.
+             *
+             * @param predicate - Function that validates the response
+             * @param description - Optional description for error messages
+             *
+             * @example
+             * ```typescript
+             * // Simple boolean predicate
+             * expect(result).toSatisfyToolPredicate((response) => {
+             *   return response.data?.items?.length > 0;
+             * });
+             *
+             * // Predicate with custom message
+             * expect(result).toSatisfyToolPredicate(
+             *   (response, text) => ({
+             *     pass: text.includes('success'),
+             *     message: 'Expected response to contain "success"',
+             *   }),
+             *   'success check'
+             * );
+             *
+             * // Async predicate
+             * expect(result).toSatisfyToolPredicate(async (response) => {
+             *   return await validateWithExternalService(response);
+             * });
+             * ```
+             */
+            toSatisfyToolPredicate(predicate: ToolPredicate, description?: string): Promise<R>;
+        }
+    }
+}
+/**
+ * Predicate result returned by the user's predicate function
+ */
+interface PredicateResult {
+    /** Whether the predicate passed */
+    pass: boolean;
+    /** Message explaining the result (shown on failure) */
+    message?: string;
+}
+/**
+ * A predicate function that validates a response
+ */
+type ToolPredicate = (response: unknown, text: string) => boolean | PredicateResult | Promise<boolean | PredicateResult>;
+/**
+ * toPassToolJudge Matcher
+ *
+ * Validates that a response passes LLM-as-judge evaluation.
+ */
+/**
+ * Creates the toPassToolJudge matcher function
+ *
+ * Note: This is an async matcher that calls an LLM for evaluation.
+ */
+declare function toPassToolJudge(this: {
+    isNot: boolean;
+}, received: unknown, rubric: string, options?: JudgeMatcherOptions): Promise<{
+    pass: boolean;
+    message: () => string;
+}>;
+/**
+ * toHaveToolResponseSize Matcher
+ *
+ * Validates that a response meets size constraints.
+ */
+/**
+ * Creates the toHaveToolResponseSize matcher function
+ */
+declare function toHaveToolResponseSize(this: {
+    isNot: boolean;
+}, received: unknown, options: SizeValidatorOptions): {
+    pass: boolean;
+    message: () => string;
+};
+/**
+ * toSatisfyToolPredicate Matcher
+ *
+ * Validates that a response satisfies a custom predicate function.
+ * This is an escape hatch for custom validation logic when built-in
+ * matchers don't cover the use case.
+ */
+/**
+ * Creates the toSatisfyToolPredicate matcher function
+ *
+ * This matcher allows custom validation logic via a predicate function.
+ * The predicate receives both the raw response and extracted text.
+ *
+ * @example
+ * ```typescript
+ * // Simple boolean predicate
+ * expect(result).toSatisfyToolPredicate((response) => {
+ *   return response.data?.length > 0;
+ * });
+ *
+ * // Predicate with custom message
+ * expect(result).toSatisfyToolPredicate((response, text) => {
+ *   const hasTemperature = text.includes('temperature');
+ *   return {
+ *     pass: hasTemperature,
+ *     message: hasTemperature
+ *       ? 'Found temperature in response'
+ *       : 'Expected response to contain temperature',
+ *   };
+ * });
+ *
+ * // Async predicate
+ * expect(result).toSatisfyToolPredicate(async (response) => {
+ *   const isValid = await validateWithExternalService(response);
+ *   return isValid;
+ * });
+ * ```
+ */
+declare function toSatisfyToolPredicate(this: {
+    isNot: boolean;
+}, received: unknown, predicate: ToolPredicate, description?: string): Promise<{
+    pass: boolean;
+    message: () => string;
+}>;
+/**
+ * Extended Playwright expect with MCP tool matchers
+ *
+ * @example
+ * ```typescript
+ * import { expect } from '@gleanwork/mcp-server-tester';
+ *
+ * test('weather tool', async ({ mcp }) => {
+ *   const result = await mcp.callTool('get_weather', { city: 'London' });
+ *
+ *   expect(result).toContainToolText('temperature');
+ *   expect(result).toMatchToolSchema(WeatherSchema);
+ *   expect(result).not.toBeToolError();
+ * });
+ * ```
+ */
+declare const expect: playwright_test.Expect<{
+    toMatchToolResponse: typeof toMatchToolResponse;
+    toMatchToolSchema: typeof toMatchToolSchema;
+    toContainToolText: typeof toContainToolText;
+    toMatchToolPattern: typeof toMatchToolPattern;
+    toMatchToolSnapshot: typeof toMatchToolSnapshot;
+    toBeToolError: typeof toBeToolError;
+    toPassToolJudge: typeof toPassToolJudge;
+    toHaveToolResponseSize: typeof toHaveToolResponseSize;
+    toSatisfyToolPredicate: typeof toSatisfyToolPredicate;
+}>;
+/**
+ * Canonical type definitions for @gleanwork/mcp-server-tester
+ *
+ * This module is the single source of truth for shared types.
+ * All other modules should import from here rather than defining their own.
+ *
+ * @packageDocumentation
+ */
+/**
+ * Authentication type for MCP connections
+ *
+ * - 'oauth': Interactive OAuth 2.1 with PKCE (browser-based authentication)
+ * - 'api-token': Static API token (e.g., from a dashboard or environment variable)
+ * - 'none': No authentication
+ */
+type AuthType = 'oauth' | 'api-token' | 'none';
+/**
+ * High-level API for interacting with MCP servers in tests
+ *
+ * This interface wraps the raw MCP Client with test-friendly methods
+ */
+interface MCPFixtureApi {
+    /**
+     * The underlying MCP client (for advanced usage)
+     */
+    client: Client;
+    /**
+     * Authentication type used for this test session
+     */
+    authType: AuthType;
+    /**
+     * Playwright project name for this test session
+     */
+    project?: string;
+    /**
+     * Lists all available tools from the MCP server
+     *
+     * @returns Array of tool definitions
+     */
+    listTools(): Promise<Array<Tool>>;
+    /**
+     * Calls a tool on the MCP server
+     *
+     * @param name - Tool name
+     * @param args - Tool arguments
+     * @returns Tool call result
+     */
+    callTool<TArgs extends Record<string, unknown> = Record<string, unknown>>(name: string, args: TArgs): Promise<CallToolResult>;
+    /**
+     * Gets information about the connected server
+     */
+    getServerInfo(): {
+        name?: string;
+        version?: string;
+    } | null;
+}
+/**
+ * Internal fixture state for passing auth type between fixtures
+ */
+interface MCPFixtureState {
+    /**
+     * The resolved authentication type (may differ from config if CLI tokens are used)
+     */
+    resolvedAuthType: AuthType;
+}
+/**
+ * Extended test fixtures for MCP testing
+ */
+type MCPFixtures = {
+    /**
+     * Raw MCP client instance (automatically connected and cleaned up)
+     */
+    mcpClient: Client;
+    /**
+     * High-level MCP API for tests
+     */
+    mcp: MCPFixtureApi;
+    /**
+     * Internal fixture state (not for external use)
+     */
+    _mcpFixtureState: MCPFixtureState;
+};
+/**
+ * Extended Playwright test with MCP fixtures
+ *
+ * @example
+ * import { test, expect } from '@gleanwork/mcp-server-tester';
+ *
+ * test('lists tools from MCP server', async ({ mcp }) => {
+ *   const tools = await mcp.listTools();
+ *   expect(tools.length).toBeGreaterThan(0);
+ * });
+ */
+declare const test: playwright_test.TestType<playwright_test.PlaywrightTestArgs & playwright_test.PlaywrightTestOptions & MCPFixtures, playwright_test.PlaywrightWorkerArgs & playwright_test.PlaywrightWorkerOptions>;
+export { expect, test };