@gleanwork/mcp-server-tester 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,3582 @@
1
+ import * as fs from 'fs/promises';
2
+ import { readFile } from 'fs/promises';
3
+ import * as path2 from 'path';
4
+ import { z } from 'zod';
5
+ import { expect as expect$1, test as test$1, chromium } from '@playwright/test';
6
+ import { discoverAuthorizationServerMetadata, startAuthorization, exchangeAuthorization } from '@modelcontextprotocol/sdk/client/auth.js';
7
+ import createDebug from 'debug';
8
+ import * as oauth2 from 'oauth4webapi';
9
+ import { homedir } from 'os';
10
+ import * as http from 'http';
11
+ import { Client } from '@modelcontextprotocol/sdk/client/index.js';
12
+ import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
13
+ import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
14
+ import { query } from '@anthropic-ai/claude-agent-sdk';
15
+
16
+ var __defProp = Object.defineProperty;
17
+ var __getOwnPropNames = Object.getOwnPropertyNames;
18
+ var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
19
+ get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
20
+ }) : x)(function(x) {
21
+ if (typeof require !== "undefined") return require.apply(this, arguments);
22
+ throw Error('Dynamic require of "' + x + '" is not supported');
23
+ });
24
+ var __esm = (fn, res) => function __init() {
25
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
26
+ };
27
+ var __export = (target, all) => {
28
+ for (var name in all)
29
+ __defProp(target, name, { get: all[name], enumerable: true });
30
+ };
31
+
32
+ // src/auth/oauthClientProvider.ts
33
+ var oauthClientProvider_exports = {};
34
+ __export(oauthClientProvider_exports, {
35
+ PlaywrightOAuthClientProvider: () => PlaywrightOAuthClientProvider,
36
+ loadOAuthState: () => loadOAuthState,
37
+ saveOAuthState: () => saveOAuthState
38
+ });
39
+ async function loadOAuthState(storagePath) {
40
+ try {
41
+ const content = await fs.readFile(storagePath, "utf-8");
42
+ return JSON.parse(content);
43
+ } catch (error) {
44
+ if (error.code === "ENOENT") {
45
+ return null;
46
+ }
47
+ throw error;
48
+ }
49
+ }
50
+ async function saveOAuthState(storagePath, state) {
51
+ state.savedAt = Date.now();
52
+ const dir = path2.dirname(storagePath);
53
+ await fs.mkdir(dir, { recursive: true });
54
+ await fs.writeFile(storagePath, JSON.stringify(state, null, 2), "utf-8");
55
+ }
56
+ var PlaywrightOAuthClientProvider;
57
+ var init_oauthClientProvider = __esm({
58
+ "src/auth/oauthClientProvider.ts"() {
59
+ PlaywrightOAuthClientProvider = class {
60
+ config;
61
+ cachedState = null;
62
+ stateParam = null;
63
+ constructor(config) {
64
+ this.config = config;
65
+ }
66
+ /**
67
+ * The URL to redirect the user agent to after authorization
68
+ */
69
+ get redirectUrl() {
70
+ return this.config.redirectUri;
71
+ }
72
+ /**
73
+ * Metadata about this OAuth client
74
+ */
75
+ get clientMetadata() {
76
+ return {
77
+ redirect_uris: [this.config.redirectUri],
78
+ token_endpoint_auth_method: this.config.clientSecret ? "client_secret_basic" : "none",
79
+ grant_types: ["authorization_code", "refresh_token"],
80
+ response_types: ["code"],
81
+ client_name: "@gleanwork/mcp-server-tester",
82
+ ...this.config.clientMetadata
83
+ };
84
+ }
85
+ /**
86
+ * Returns an OAuth2 state parameter
87
+ */
88
+ state() {
89
+ if (!this.stateParam) {
90
+ this.stateParam = this.generateRandomString(32);
91
+ }
92
+ return this.stateParam;
93
+ }
94
+ /**
95
+ * Loads information about this OAuth client
96
+ */
97
+ async clientInformation() {
98
+ if (this.config.clientId) {
99
+ return {
100
+ client_id: this.config.clientId,
101
+ client_secret: this.config.clientSecret,
102
+ redirect_uris: [this.config.redirectUri]
103
+ };
104
+ }
105
+ const state = await this.loadState();
106
+ if (state?.clientInfo) {
107
+ return {
108
+ client_id: state.clientInfo.clientId,
109
+ client_secret: state.clientInfo.clientSecret,
110
+ client_id_issued_at: state.clientInfo.clientIdIssuedAt,
111
+ client_secret_expires_at: state.clientInfo.clientSecretExpiresAt,
112
+ redirect_uris: [this.config.redirectUri]
113
+ };
114
+ }
115
+ return void 0;
116
+ }
117
+ /**
118
+ * Saves client information from Dynamic Client Registration
119
+ */
120
+ async saveClientInformation(clientInformation) {
121
+ const state = await this.loadState() ?? this.createEmptyState();
122
+ state.clientInfo = {
123
+ clientId: clientInformation.client_id,
124
+ clientSecret: clientInformation.client_secret,
125
+ clientIdIssuedAt: clientInformation.client_id_issued_at,
126
+ clientSecretExpiresAt: clientInformation.client_secret_expires_at
127
+ };
128
+ await this.saveState(state);
129
+ }
130
+ /**
131
+ * Loads any existing OAuth tokens for the current session
132
+ */
133
+ async tokens() {
134
+ const state = await this.loadState();
135
+ if (state?.tokens) {
136
+ return {
137
+ access_token: state.tokens.accessToken,
138
+ token_type: state.tokens.tokenType,
139
+ refresh_token: state.tokens.refreshToken,
140
+ expires_in: state.tokens.expiresAt ? Math.floor((state.tokens.expiresAt - Date.now()) / 1e3) : void 0
141
+ };
142
+ }
143
+ return void 0;
144
+ }
145
+ /**
146
+ * Stores new OAuth tokens for the current session
147
+ */
148
+ async saveTokens(tokens) {
149
+ const state = await this.loadState() ?? this.createEmptyState();
150
+ state.tokens = {
151
+ accessToken: tokens.access_token,
152
+ tokenType: tokens.token_type,
153
+ refreshToken: tokens.refresh_token,
154
+ expiresAt: tokens.expires_in ? Date.now() + tokens.expires_in * 1e3 : void 0
155
+ };
156
+ await this.saveState(state);
157
+ }
158
+ /**
159
+ * Invoked to redirect the user agent to the given URL
160
+ *
161
+ * In a testing context, this is typically handled by Playwright automation.
162
+ * This implementation throws an error to signal that the caller needs to
163
+ * handle the redirect externally.
164
+ */
165
+ async redirectToAuthorization(authorizationUrl) {
166
+ throw new Error(
167
+ `OAuth authorization required. Redirect to: ${authorizationUrl.toString()}
168
+ In a testing context, use performOAuthSetup() in your Playwright globalSetup to complete the OAuth flow before running tests.`
169
+ );
170
+ }
171
+ /**
172
+ * Saves a PKCE code verifier for the current session
173
+ */
174
+ async saveCodeVerifier(codeVerifier) {
175
+ const state = await this.loadState() ?? this.createEmptyState();
176
+ state.codeVerifier = codeVerifier;
177
+ await this.saveState(state);
178
+ }
179
+ /**
180
+ * Loads the PKCE code verifier for the current session
181
+ */
182
+ async codeVerifier() {
183
+ const state = await this.loadState();
184
+ if (!state?.codeVerifier) {
185
+ throw new Error("No code verifier found in auth state");
186
+ }
187
+ return state.codeVerifier;
188
+ }
189
+ /**
190
+ * Invalidates the specified credentials
191
+ */
192
+ async invalidateCredentials(scope) {
193
+ const state = await this.loadState();
194
+ if (!state) {
195
+ return;
196
+ }
197
+ switch (scope) {
198
+ case "all":
199
+ await this.deleteState();
200
+ break;
201
+ case "client":
202
+ delete state.clientInfo;
203
+ await this.saveState(state);
204
+ break;
205
+ case "tokens":
206
+ delete state.tokens;
207
+ await this.saveState(state);
208
+ break;
209
+ case "verifier":
210
+ delete state.codeVerifier;
211
+ await this.saveState(state);
212
+ break;
213
+ }
214
+ }
215
+ // ---- Private helper methods ----
216
+ async loadState() {
217
+ if (this.cachedState) {
218
+ return this.cachedState;
219
+ }
220
+ try {
221
+ const content = await fs.readFile(this.config.storagePath, "utf-8");
222
+ this.cachedState = JSON.parse(content);
223
+ return this.cachedState;
224
+ } catch (error) {
225
+ if (error.code === "ENOENT") {
226
+ return null;
227
+ }
228
+ throw error;
229
+ }
230
+ }
231
+ async saveState(state) {
232
+ state.savedAt = Date.now();
233
+ this.cachedState = state;
234
+ const dir = path2.dirname(this.config.storagePath);
235
+ await fs.mkdir(dir, { recursive: true });
236
+ await fs.writeFile(
237
+ this.config.storagePath,
238
+ JSON.stringify(state, null, 2),
239
+ "utf-8"
240
+ );
241
+ }
242
+ async deleteState() {
243
+ this.cachedState = null;
244
+ try {
245
+ await fs.unlink(this.config.storagePath);
246
+ } catch (error) {
247
+ if (error.code !== "ENOENT") {
248
+ throw error;
249
+ }
250
+ }
251
+ }
252
+ createEmptyState() {
253
+ return {
254
+ savedAt: Date.now()
255
+ };
256
+ }
257
+ generateRandomString(length) {
258
+ const chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
259
+ let result = "";
260
+ const randomValues = new Uint8Array(length);
261
+ crypto.getRandomValues(randomValues);
262
+ for (let i = 0; i < length; i++) {
263
+ const randomValue = randomValues[i] ?? 0;
264
+ result += chars[randomValue % chars.length];
265
+ }
266
+ return result;
267
+ }
268
+ };
269
+ }
270
+ });
271
+ var MCPHostCapabilitiesSchema = z.object({
272
+ sampling: z.record(z.unknown()).optional(),
273
+ roots: z.object({
274
+ listChanged: z.boolean()
275
+ }).optional()
276
+ });
277
+ var MCPOAuthConfigSchema = z.object({
278
+ serverUrl: z.string().url("serverUrl must be a valid URL"),
279
+ scopes: z.array(z.string()).optional(),
280
+ resource: z.string().url().optional(),
281
+ authStatePath: z.string().optional(),
282
+ clientId: z.string().optional(),
283
+ clientSecret: z.string().optional(),
284
+ redirectUri: z.string().url().optional()
285
+ });
286
+ var MCPAuthConfigSchema = z.object({
287
+ accessToken: z.string().optional(),
288
+ oauth: MCPOAuthConfigSchema.optional()
289
+ }).refine(
290
+ (data) => !(data.accessToken && data.oauth),
291
+ "Cannot specify both accessToken and oauth configuration"
292
+ );
293
+ var StdioConfigSchema = z.object({
294
+ transport: z.literal("stdio"),
295
+ command: z.string().min(1, "command is required for stdio transport"),
296
+ args: z.array(z.string()).optional(),
297
+ cwd: z.string().optional(),
298
+ capabilities: MCPHostCapabilitiesSchema.optional(),
299
+ connectTimeoutMs: z.number().positive().optional(),
300
+ requestTimeoutMs: z.number().positive().optional(),
301
+ quiet: z.boolean().optional()
302
+ });
303
+ var HttpConfigSchema = z.object({
304
+ transport: z.literal("http"),
305
+ serverUrl: z.string().url("serverUrl must be a valid URL"),
306
+ headers: z.record(z.string()).optional(),
307
+ capabilities: MCPHostCapabilitiesSchema.optional(),
308
+ connectTimeoutMs: z.number().positive().optional(),
309
+ requestTimeoutMs: z.number().positive().optional(),
310
+ auth: MCPAuthConfigSchema.optional()
311
+ });
312
+ var MCPConfigSchema = z.discriminatedUnion("transport", [
313
+ StdioConfigSchema,
314
+ HttpConfigSchema
315
+ ]);
316
+ function validateMCPConfig(config) {
317
+ return MCPConfigSchema.parse(config);
318
+ }
319
+ function isStdioConfig(config) {
320
+ return config.transport === "stdio" && typeof config.command === "string";
321
+ }
322
+ function isHttpConfig(config) {
323
+ return config.transport === "http" && typeof config.serverUrl === "string";
324
+ }
325
+
326
+ // src/index.ts
327
+ init_oauthClientProvider();
328
+
329
+ // src/auth/tokenAuth.ts
330
+ function createTokenAuthHeaders(accessToken, tokenType = "Bearer") {
331
+ return {
332
+ Authorization: `${tokenType} ${accessToken}`
333
+ };
334
+ }
335
+ function validateAccessToken(accessToken) {
336
+ if (!accessToken) {
337
+ throw new Error("Access token is required but was not provided");
338
+ }
339
+ if (accessToken.trim().length === 0) {
340
+ throw new Error("Access token cannot be empty");
341
+ }
342
+ }
343
+ function isTokenExpired(accessToken) {
344
+ try {
345
+ const parts = accessToken.split(".");
346
+ if (parts.length !== 3) {
347
+ return false;
348
+ }
349
+ const payloadPart = parts[1];
350
+ if (!payloadPart) {
351
+ return false;
352
+ }
353
+ const payload = JSON.parse(
354
+ Buffer.from(payloadPart, "base64url").toString("utf-8")
355
+ );
356
+ if (typeof payload.exp === "number") {
357
+ return payload.exp * 1e3 < Date.now();
358
+ }
359
+ return false;
360
+ } catch {
361
+ return false;
362
+ }
363
+ }
364
+ function isTokenExpiringSoon(expiresAt, bufferMs = 6e4) {
365
+ if (expiresAt === void 0) {
366
+ return false;
367
+ }
368
+ return expiresAt - bufferMs < Date.now();
369
+ }
370
+
371
+ // src/auth/setupOAuth.ts
372
+ init_oauthClientProvider();
373
+ var NAMESPACE = "mcp-server-tester";
374
+ var debugClient = createDebug(`${NAMESPACE}:client`);
375
+ var debugOAuth = createDebug(`${NAMESPACE}:oauth`);
376
+ createDebug(`${NAMESPACE}:eval`);
377
+
378
+ // src/auth/setupOAuth.ts
379
+ var DEFAULT_TIMEOUT_MS = 3e4;
380
+ var DEFAULT_REDIRECT_URI = "http://localhost:3000/oauth/callback";
381
+ async function performOAuthSetup(config) {
382
+ const timeoutMs = config.timeoutMs ?? DEFAULT_TIMEOUT_MS;
383
+ const redirectUri = config.redirectUri ?? DEFAULT_REDIRECT_URI;
384
+ const metadata = await discoverAuthorizationServerMetadata(
385
+ config.authServerUrl
386
+ );
387
+ if (!metadata) {
388
+ throw new Error(
389
+ `Could not discover OAuth metadata at ${config.authServerUrl}`
390
+ );
391
+ }
392
+ const clientInformation = {
393
+ client_id: config.clientId ?? "mcp-server-tester-client",
394
+ client_secret: config.clientSecret
395
+ };
396
+ const { authorizationUrl, codeVerifier } = await startAuthorization(
397
+ config.authServerUrl,
398
+ {
399
+ metadata,
400
+ clientInformation,
401
+ redirectUrl: redirectUri,
402
+ scope: config.scopes.join(" "),
403
+ resource: config.resource ? new URL(config.resource) : void 0
404
+ }
405
+ );
406
+ const browser = await chromium.launch({
407
+ headless: process.env.OAUTH_DEBUG !== "true"
408
+ });
409
+ try {
410
+ const context = await browser.newContext();
411
+ const page = await context.newPage();
412
+ page.setDefaultTimeout(timeoutMs);
413
+ await page.goto(authorizationUrl.toString());
414
+ await completeLoginForm(page, config);
415
+ await page.waitForURL(
416
+ (url) => url.href.startsWith(redirectUri) && url.searchParams.has("code"),
417
+ { timeout: timeoutMs }
418
+ );
419
+ const callbackUrl = new URL(page.url());
420
+ const code = callbackUrl.searchParams.get("code");
421
+ const error = callbackUrl.searchParams.get("error");
422
+ if (error) {
423
+ const errorDescription = callbackUrl.searchParams.get("error_description");
424
+ throw new Error(
425
+ `OAuth authorization failed: ${error}${errorDescription ? ` - ${errorDescription}` : ""}`
426
+ );
427
+ }
428
+ if (!code) {
429
+ throw new Error("No authorization code in callback URL");
430
+ }
431
+ const tokens = await exchangeAuthorization(config.authServerUrl, {
432
+ metadata,
433
+ clientInformation,
434
+ authorizationCode: code,
435
+ codeVerifier,
436
+ redirectUri,
437
+ resource: config.resource ? new URL(config.resource) : void 0
438
+ });
439
+ const state = {
440
+ tokens: {
441
+ accessToken: tokens.access_token,
442
+ tokenType: tokens.token_type,
443
+ refreshToken: tokens.refresh_token,
444
+ expiresAt: tokens.expires_in ? Date.now() + tokens.expires_in * 1e3 : void 0
445
+ },
446
+ clientInfo: config.clientId ? {
447
+ clientId: config.clientId,
448
+ clientSecret: config.clientSecret
449
+ } : void 0,
450
+ codeVerifier,
451
+ savedAt: Date.now()
452
+ };
453
+ await saveOAuthState(config.outputPath, state);
454
+ debugOAuth("Auth state saved to %s", config.outputPath);
455
+ } finally {
456
+ await browser.close();
457
+ }
458
+ }
459
+ async function completeLoginForm(page, config) {
460
+ const { loginSelectors, credentials } = config;
461
+ await page.waitForSelector(loginSelectors.usernameInput, {
462
+ state: "visible"
463
+ });
464
+ await page.fill(loginSelectors.usernameInput, credentials.username);
465
+ await page.waitForSelector(loginSelectors.passwordInput, {
466
+ state: "visible"
467
+ });
468
+ await page.fill(loginSelectors.passwordInput, credentials.password);
469
+ await page.waitForSelector(loginSelectors.submitButton, {
470
+ state: "visible"
471
+ });
472
+ await page.click(loginSelectors.submitButton);
473
+ if (loginSelectors.consentButton) {
474
+ try {
475
+ await page.waitForSelector(loginSelectors.consentButton, {
476
+ state: "visible",
477
+ timeout: 5e3
478
+ });
479
+ await page.click(loginSelectors.consentButton);
480
+ } catch {
481
+ }
482
+ }
483
+ }
484
+ async function hasValidOAuthState(storagePath) {
485
+ try {
486
+ const { loadOAuthState: loadOAuthState2 } = await Promise.resolve().then(() => (init_oauthClientProvider(), oauthClientProvider_exports));
487
+ const state = await loadOAuthState2(storagePath);
488
+ if (!state?.tokens?.accessToken) {
489
+ return false;
490
+ }
491
+ if (state.tokens.expiresAt) {
492
+ const bufferMs = 6e4;
493
+ if (state.tokens.expiresAt - bufferMs < Date.now()) {
494
+ return false;
495
+ }
496
+ }
497
+ return true;
498
+ } catch {
499
+ return false;
500
+ }
501
+ }
502
+ async function performOAuthSetupIfNeeded(config) {
503
+ const hasValid = await hasValidOAuthState(config.outputPath);
504
+ if (hasValid) {
505
+ debugOAuth("Using existing auth state from %s", config.outputPath);
506
+ return;
507
+ }
508
+ debugOAuth("No valid auth state found, performing OAuth flow...");
509
+ await performOAuthSetup(config);
510
+ }
511
+ var MCP_PROTOCOL_VERSION = "2025-06-18";
512
+ async function discoverProtectedResource(mcpServerUrl) {
513
+ const url = new URL(mcpServerUrl);
514
+ const origin = url.origin;
515
+ const pathname = url.pathname;
516
+ const pathAwareUrl = `${origin}/.well-known/oauth-protected-resource${pathname}`;
517
+ try {
518
+ const metadata = await fetchProtectedResourceMetadata(pathAwareUrl);
519
+ return {
520
+ metadata,
521
+ discoveryUrl: pathAwareUrl,
522
+ usedPathAwareDiscovery: true
523
+ };
524
+ } catch (error) {
525
+ if (error instanceof DiscoveryError && error.status === 404) {
526
+ const baseUrl = `${origin}/.well-known/oauth-protected-resource`;
527
+ const metadata = await fetchProtectedResourceMetadata(baseUrl);
528
+ return {
529
+ metadata,
530
+ discoveryUrl: baseUrl,
531
+ usedPathAwareDiscovery: false
532
+ };
533
+ }
534
+ throw error;
535
+ }
536
+ }
537
+ var DiscoveryError = class extends Error {
538
+ constructor(message, status, url) {
539
+ super(message);
540
+ this.status = status;
541
+ this.url = url;
542
+ this.name = "DiscoveryError";
543
+ }
544
+ };
545
+ async function fetchProtectedResourceMetadata(discoveryUrl) {
546
+ const response = await fetch(discoveryUrl, {
547
+ method: "GET",
548
+ headers: {
549
+ Accept: "application/json",
550
+ "MCP-Protocol-Version": MCP_PROTOCOL_VERSION
551
+ }
552
+ });
553
+ if (!response.ok) {
554
+ throw new DiscoveryError(
555
+ `Protected resource discovery failed: ${response.status} ${response.statusText}`,
556
+ response.status,
557
+ discoveryUrl
558
+ );
559
+ }
560
+ const metadata = await response.json();
561
+ if (!metadata.resource) {
562
+ throw new DiscoveryError(
563
+ 'Invalid protected resource metadata: missing required "resource" field',
564
+ void 0,
565
+ discoveryUrl
566
+ );
567
+ }
568
+ return metadata;
569
+ }
570
+ async function discoverAuthorizationServer(authServerUrl) {
571
+ const issuer = new URL(authServerUrl);
572
+ const response = await oauth2.discoveryRequest(issuer, {
573
+ algorithm: "oauth2",
574
+ headers: new Headers({
575
+ "MCP-Protocol-Version": MCP_PROTOCOL_VERSION
576
+ })
577
+ });
578
+ const metadata = await oauth2.processDiscoveryResponse(issuer, response);
579
+ return {
580
+ server: metadata,
581
+ issuer: authServerUrl
582
+ };
583
+ }
584
+ var ENV_VAR_NAMES = {
585
+ accessToken: "MCP_ACCESS_TOKEN",
586
+ refreshToken: "MCP_REFRESH_TOKEN",
587
+ tokenType: "MCP_TOKEN_TYPE",
588
+ expiresAt: "MCP_TOKEN_EXPIRES_AT"
589
+ };
590
+ var DEFAULT_EXPIRY_BUFFER_MS = 6e4;
591
+ function generateServerKey(serverUrl) {
592
+ const url = new URL(serverUrl);
593
+ let key = url.hostname;
594
+ if (url.port) {
595
+ key += `_${url.port}`;
596
+ }
597
+ if (url.pathname && url.pathname !== "/") {
598
+ const cleanPath = url.pathname.replace(/^\/+|\/+$/g, "").replace(/\//g, "_");
599
+ if (cleanPath) {
600
+ key += `_${cleanPath}`;
601
+ }
602
+ }
603
+ return key.replace(/[^a-zA-Z0-9_.-]/g, "_");
604
+ }
605
+ function getStateDir(serverUrl, customDir) {
606
+ const serverKey = generateServerKey(serverUrl);
607
+ if (customDir) {
608
+ return path2.join(customDir, serverKey);
609
+ }
610
+ if (process.platform === "win32") {
611
+ const localAppData = process.env.LOCALAPPDATA;
612
+ if (localAppData) {
613
+ return path2.join(localAppData, "mcp-tests", serverKey);
614
+ }
615
+ return path2.join(homedir(), "AppData", "Local", "mcp-tests", serverKey);
616
+ }
617
+ if (process.platform === "linux" && process.env.XDG_STATE_HOME) {
618
+ return path2.join(process.env.XDG_STATE_HOME, "mcp-tests", serverKey);
619
+ }
620
+ return path2.join(homedir(), ".local", "state", "mcp-tests", serverKey);
621
+ }
622
+ function loadTokensFromEnv() {
623
+ const accessToken = process.env[ENV_VAR_NAMES.accessToken];
624
+ if (!accessToken) {
625
+ return null;
626
+ }
627
+ const expiresAtStr = process.env[ENV_VAR_NAMES.expiresAt];
628
+ const expiresAt = expiresAtStr ? parseInt(expiresAtStr, 10) : void 0;
629
+ return {
630
+ accessToken,
631
+ refreshToken: process.env[ENV_VAR_NAMES.refreshToken],
632
+ tokenType: process.env[ENV_VAR_NAMES.tokenType] ?? "Bearer",
633
+ expiresAt: expiresAt && !isNaN(expiresAt) ? expiresAt : void 0
634
+ };
635
+ }
636
+ async function injectTokens(serverUrl, tokens, stateDir) {
637
+ const storage = createFileOAuthStorage({ serverUrl, stateDir });
638
+ await storage.saveTokens(tokens);
639
+ }
640
+ async function loadTokens(serverUrl, stateDir) {
641
+ const storage = createFileOAuthStorage({ serverUrl, stateDir });
642
+ return storage.loadTokens();
643
+ }
644
+ async function hasValidTokens(serverUrl, options) {
645
+ const storage = createFileOAuthStorage({
646
+ serverUrl,
647
+ stateDir: options?.stateDir
648
+ });
649
+ return storage.hasValidToken(options?.bufferMs);
650
+ }
651
+ function createFileOAuthStorage(config) {
652
+ return new FileOAuthStorage(config);
653
+ }
654
+ var FileOAuthStorage = class {
655
+ stateDir;
656
+ constructor(config) {
657
+ this.stateDir = getStateDir(config.serverUrl, config.stateDir);
658
+ }
659
+ get serverMetadataPath() {
660
+ return path2.join(this.stateDir, "server.json");
661
+ }
662
+ get clientPath() {
663
+ return path2.join(this.stateDir, "client.json");
664
+ }
665
+ get tokensPath() {
666
+ return path2.join(this.stateDir, "tokens.json");
667
+ }
668
+ async loadServerMetadata() {
669
+ return this.loadFile(this.serverMetadataPath);
670
+ }
671
+ async saveServerMetadata(metadata) {
672
+ await this.atomicWrite(this.serverMetadataPath, metadata);
673
+ }
674
+ async loadClient() {
675
+ return this.loadFile(this.clientPath);
676
+ }
677
+ async saveClient(client) {
678
+ await this.atomicWrite(this.clientPath, client);
679
+ }
680
+ async loadTokens() {
681
+ return this.loadFile(this.tokensPath);
682
+ }
683
+ async saveTokens(tokens) {
684
+ await this.atomicWrite(this.tokensPath, tokens);
685
+ }
686
+ async deleteTokens() {
687
+ await this.deleteFile(this.tokensPath);
688
+ }
689
+ async hasValidToken(bufferMs = DEFAULT_EXPIRY_BUFFER_MS) {
690
+ const tokens = await this.loadTokens();
691
+ if (!tokens?.accessToken) {
692
+ return false;
693
+ }
694
+ if (!tokens.expiresAt) {
695
+ return true;
696
+ }
697
+ return tokens.expiresAt > Date.now() + bufferMs;
698
+ }
699
+ /**
700
+ * Load a JSON file, returning null if not found
701
+ */
702
+ async loadFile(filePath) {
703
+ try {
704
+ const content = await fs.readFile(filePath, "utf-8");
705
+ return JSON.parse(content);
706
+ } catch (error) {
707
+ if (error.code === "ENOENT") {
708
+ return null;
709
+ }
710
+ throw error;
711
+ }
712
+ }
713
+ /**
714
+ * Write data atomically: write to .tmp file, then rename
715
+ * Files are created with 0o600 permissions (user read/write only)
716
+ */
717
+ async atomicWrite(filePath, data) {
718
+ await fs.mkdir(this.stateDir, { recursive: true, mode: 448 });
719
+ const tmpPath = `${filePath}.tmp`;
720
+ const content = JSON.stringify(data, null, 2);
721
+ await fs.writeFile(tmpPath, content, { encoding: "utf-8", mode: 384 });
722
+ await fs.rename(tmpPath, filePath);
723
+ }
724
+ /**
725
+ * Delete a file, ignoring errors if the file doesn't exist
726
+ */
727
+ async deleteFile(filePath) {
728
+ try {
729
+ await fs.unlink(filePath);
730
+ } catch (error) {
731
+ if (error.code !== "ENOENT") {
732
+ throw error;
733
+ }
734
+ }
735
+ }
736
+ };
737
+ async function generatePKCE() {
738
+ const codeVerifier = oauth2.generateRandomCodeVerifier();
739
+ const codeChallenge = await oauth2.calculatePKCECodeChallenge(codeVerifier);
740
+ return {
741
+ codeVerifier,
742
+ codeChallenge
743
+ };
744
+ }
745
+ function generateState() {
746
+ return oauth2.generateRandomState();
747
+ }
748
+ function buildAuthorizationUrl(config) {
749
+ const authorizationEndpoint = config.authServer.server.authorization_endpoint;
750
+ if (!authorizationEndpoint) {
751
+ throw new Error(
752
+ "Authorization server does not have an authorization_endpoint"
753
+ );
754
+ }
755
+ const authorizationUrl = new URL(authorizationEndpoint);
756
+ authorizationUrl.searchParams.set("client_id", config.clientId);
757
+ authorizationUrl.searchParams.set("redirect_uri", config.redirectUri);
758
+ authorizationUrl.searchParams.set("response_type", "code");
759
+ authorizationUrl.searchParams.set("scope", config.scopes.join(" "));
760
+ authorizationUrl.searchParams.set("code_challenge", config.codeChallenge);
761
+ authorizationUrl.searchParams.set("code_challenge_method", "S256");
762
+ authorizationUrl.searchParams.set("state", config.state);
763
+ if (config.resource) {
764
+ authorizationUrl.searchParams.set("resource", config.resource);
765
+ }
766
+ return authorizationUrl;
767
+ }
768
+ async function exchangeCodeForTokens(config) {
769
+ const client = {
770
+ client_id: config.clientId,
771
+ token_endpoint_auth_method: config.clientSecret ? "client_secret_basic" : "none"
772
+ };
773
+ const clientAuth = config.clientSecret ? oauth2.ClientSecretBasic(config.clientSecret) : oauth2.None();
774
+ const callbackUrl = new URL(config.redirectUri);
775
+ callbackUrl.searchParams.set("code", config.code);
776
+ callbackUrl.searchParams.set("state", config.state);
777
+ const validatedParams = oauth2.validateAuthResponse(
778
+ config.authServer.server,
779
+ client,
780
+ callbackUrl,
781
+ config.state
782
+ );
783
+ const response = await oauth2.authorizationCodeGrantRequest(
784
+ config.authServer.server,
785
+ client,
786
+ clientAuth,
787
+ validatedParams,
788
+ config.redirectUri,
789
+ config.codeVerifier
790
+ );
791
+ const result = await oauth2.processAuthorizationCodeResponse(
792
+ config.authServer.server,
793
+ client,
794
+ response
795
+ );
796
+ return {
797
+ accessToken: result.access_token,
798
+ tokenType: result.token_type,
799
+ expiresIn: result.expires_in,
800
+ refreshToken: result.refresh_token,
801
+ scope: result.scope
802
+ };
803
+ }
804
+ async function refreshAccessToken(config) {
805
+ const client = {
806
+ client_id: config.clientId,
807
+ token_endpoint_auth_method: config.clientSecret ? "client_secret_basic" : "none"
808
+ };
809
+ const clientAuth = config.clientSecret ? oauth2.ClientSecretBasic(config.clientSecret) : oauth2.None();
810
+ const response = await oauth2.refreshTokenGrantRequest(
811
+ config.authServer.server,
812
+ client,
813
+ clientAuth,
814
+ config.refreshToken
815
+ );
816
+ if (!response.ok) {
817
+ const contentType = response.headers.get("content-type") ?? "";
818
+ let errorMessage = `Token refresh failed: ${response.status} ${response.statusText}`;
819
+ try {
820
+ if (contentType.includes("application/json")) {
821
+ const errorBody = await response.clone().json();
822
+ if (errorBody.error) {
823
+ errorMessage = `Token refresh failed: ${errorBody.error}`;
824
+ if (errorBody.error_description) {
825
+ errorMessage += ` - ${errorBody.error_description}`;
826
+ }
827
+ }
828
+ } else {
829
+ const textBody = await response.clone().text();
830
+ if (textBody) {
831
+ errorMessage = `Token refresh failed: ${response.status} - ${textBody}`;
832
+ }
833
+ }
834
+ } catch {
835
+ }
836
+ throw new Error(errorMessage);
837
+ }
838
+ const result = await oauth2.processRefreshTokenResponse(
839
+ config.authServer.server,
840
+ client,
841
+ response
842
+ );
843
+ return {
844
+ accessToken: result.access_token,
845
+ tokenType: result.token_type,
846
+ expiresIn: result.expires_in,
847
+ refreshToken: result.refresh_token,
848
+ scope: result.scope
849
+ };
850
+ }
851
+
852
+ // src/auth/cli.ts
853
+ var debug = createDebug("mcp-server-tester:cli-oauth");
854
+ var DEFAULT_TIMEOUT_MS2 = 3e5;
855
+ var DEFAULT_CLIENT_NAME = "@gleanwork/mcp-server-tester";
856
+ var DEFAULT_METADATA_TTL_MS = 24 * 60 * 60 * 1e3;
857
+ var CLIOAuthClient = class {
858
+ config;
859
+ storage;
860
+ constructor(config) {
861
+ this.config = config;
862
+ this.storage = createFileOAuthStorage({
863
+ serverUrl: config.mcpServerUrl,
864
+ stateDir: config.stateDir
865
+ });
866
+ }
867
+ /**
868
+ * Get a valid access token, authenticating if necessary
869
+ *
870
+ * Token resolution priority:
871
+ * 1. Check environment variables (for CI/CD)
872
+ * 2. Check file storage for cached tokens
873
+ * 3. Try to refresh if expired but refresh token exists
874
+ * 4. Run full OAuth flow if needed
875
+ */
876
+ async getAccessToken() {
877
+ const envTokens = loadTokensFromEnv();
878
+ if (envTokens) {
879
+ debug("Using tokens from environment variables");
880
+ return {
881
+ accessToken: envTokens.accessToken,
882
+ tokenType: envTokens.tokenType,
883
+ expiresAt: envTokens.expiresAt,
884
+ refreshed: false,
885
+ fromEnv: true
886
+ };
887
+ }
888
+ const storedTokens = await this.storage.loadTokens();
889
+ if (storedTokens?.accessToken) {
890
+ const isValid = await this.storage.hasValidToken();
891
+ if (isValid) {
892
+ debug("Using cached tokens from storage");
893
+ return {
894
+ accessToken: storedTokens.accessToken,
895
+ tokenType: storedTokens.tokenType,
896
+ expiresAt: storedTokens.expiresAt,
897
+ refreshed: false,
898
+ fromEnv: false
899
+ };
900
+ }
901
+ if (storedTokens.refreshToken) {
902
+ debug("Token expired, attempting refresh");
903
+ try {
904
+ const refreshedTokens = await this.refreshStoredToken(storedTokens);
905
+ return {
906
+ accessToken: refreshedTokens.accessToken,
907
+ tokenType: refreshedTokens.tokenType,
908
+ expiresAt: refreshedTokens.expiresAt,
909
+ refreshed: true,
910
+ fromEnv: false
911
+ };
912
+ } catch (error) {
913
+ debug("Token refresh failed, will re-authenticate:", error);
914
+ }
915
+ }
916
+ }
917
+ debug("Performing full OAuth authentication");
918
+ return this.authenticate();
919
+ }
920
+ /**
921
+ * Try to get a valid access token without triggering browser auth
922
+ *
923
+ * Returns null if no valid token is available (no stored tokens,
924
+ * expired without refresh token, or refresh failed). Unlike getAccessToken(),
925
+ * this will NOT open a browser for authentication.
926
+ *
927
+ * Use this for CLI commands that should prompt the user to run `login`
928
+ * instead of automatically starting the OAuth flow.
929
+ */
930
+ async tryGetAccessToken() {
931
+ const envTokens = loadTokensFromEnv();
932
+ if (envTokens) {
933
+ debug("Using tokens from environment variables");
934
+ return {
935
+ accessToken: envTokens.accessToken,
936
+ tokenType: envTokens.tokenType,
937
+ expiresAt: envTokens.expiresAt,
938
+ refreshed: false,
939
+ fromEnv: true
940
+ };
941
+ }
942
+ const storedTokens = await this.storage.loadTokens();
943
+ if (storedTokens?.accessToken) {
944
+ const isValid = await this.storage.hasValidToken();
945
+ if (isValid) {
946
+ debug("Using cached tokens from storage");
947
+ return {
948
+ accessToken: storedTokens.accessToken,
949
+ tokenType: storedTokens.tokenType,
950
+ expiresAt: storedTokens.expiresAt,
951
+ refreshed: false,
952
+ fromEnv: false
953
+ };
954
+ }
955
+ if (storedTokens.refreshToken) {
956
+ debug("Token expired, attempting refresh");
957
+ try {
958
+ const refreshedTokens = await this.refreshStoredToken(storedTokens);
959
+ return {
960
+ accessToken: refreshedTokens.accessToken,
961
+ tokenType: refreshedTokens.tokenType,
962
+ expiresAt: refreshedTokens.expiresAt,
963
+ refreshed: true,
964
+ fromEnv: false
965
+ };
966
+ } catch (error) {
967
+ debug("Token refresh failed:", error);
968
+ return null;
969
+ }
970
+ }
971
+ }
972
+ debug("No valid token available");
973
+ return null;
974
+ }
975
+ /**
976
+ * Force a new authentication flow
977
+ */
978
+ async authenticate() {
979
+ const { protectedResource, authServer } = await this.discoverServers();
980
+ const client = await this.getOrRegisterClient(authServer);
981
+ const { tokens, requestedScopes } = await this.performOAuthFlow(
982
+ authServer,
983
+ client,
984
+ protectedResource
985
+ );
986
+ return {
987
+ accessToken: tokens.accessToken,
988
+ tokenType: tokens.tokenType,
989
+ expiresAt: tokens.expiresAt,
990
+ refreshed: false,
991
+ fromEnv: false,
992
+ requestedScopes
993
+ };
994
+ }
995
+ /**
996
+ * Check if stored credentials exist (may be expired)
997
+ */
998
+ async hasStoredCredentials() {
999
+ const tokens = await this.storage.loadTokens();
1000
+ return tokens?.accessToken !== void 0;
1001
+ }
1002
+ /**
1003
+ * Clear stored credentials
1004
+ */
1005
+ async clearCredentials() {
1006
+ await this.storage.deleteTokens();
1007
+ debug("Cleared stored credentials");
1008
+ }
1009
+ /**
1010
+ * Discover protected resource and authorization server
1011
+ */
1012
+ async discoverServers() {
1013
+ const cachedMetadata = await this.storage.loadServerMetadata();
1014
+ if (cachedMetadata) {
1015
+ const age = Date.now() - cachedMetadata.discoveredAt;
1016
+ if (age < DEFAULT_METADATA_TTL_MS) {
1017
+ debug("Using cached server metadata (age: %dms)", age);
1018
+ debug(
1019
+ "Cached protected resource scopes: %O",
1020
+ cachedMetadata.protectedResource.scopes_supported
1021
+ );
1022
+ debug(
1023
+ "Cached auth server scopes: %O",
1024
+ cachedMetadata.authServer.server.scopes_supported
1025
+ );
1026
+ return {
1027
+ protectedResource: cachedMetadata.protectedResource,
1028
+ authServer: cachedMetadata.authServer
1029
+ };
1030
+ }
1031
+ debug("Cached server metadata is stale (age: %dms), re-discovering", age);
1032
+ }
1033
+ debug("Discovering protected resource:", this.config.mcpServerUrl);
1034
+ const prResult = await discoverProtectedResource(this.config.mcpServerUrl);
1035
+ debug("Found protected resource:", prResult.metadata.resource);
1036
+ debug(
1037
+ "Protected resource scopes_supported: %O",
1038
+ prResult.metadata.scopes_supported
1039
+ );
1040
+ const authServerUrl = prResult.metadata.authorization_servers?.[0];
1041
+ if (!authServerUrl) {
1042
+ throw new Error(
1043
+ "No authorization servers found in protected resource metadata"
1044
+ );
1045
+ }
1046
+ debug("Discovering authorization server:", authServerUrl);
1047
+ const authServer = await discoverAuthorizationServer(authServerUrl);
1048
+ debug("Found authorization server:", authServer.issuer);
1049
+ debug(
1050
+ "Auth server scopes_supported: %O",
1051
+ authServer.server.scopes_supported
1052
+ );
1053
+ const metadata = {
1054
+ authServer,
1055
+ protectedResource: prResult.metadata,
1056
+ discoveredAt: Date.now()
1057
+ };
1058
+ await this.storage.saveServerMetadata(metadata);
1059
+ return {
1060
+ protectedResource: prResult.metadata,
1061
+ authServer
1062
+ };
1063
+ }
1064
+ /**
1065
+ * Get existing client or register new one via DCR
1066
+ */
1067
+ async getOrRegisterClient(authServer) {
1068
+ if (this.config.clientId) {
1069
+ debug("Using pre-configured client ID");
1070
+ return {
1071
+ clientId: this.config.clientId,
1072
+ clientSecret: this.config.clientSecret
1073
+ };
1074
+ }
1075
+ const cachedClient = await this.storage.loadClient();
1076
+ if (cachedClient?.clientId) {
1077
+ debug("Using cached client registration");
1078
+ return cachedClient;
1079
+ }
1080
+ debug("Registering new client via DCR");
1081
+ const client = await this.registerClient(authServer);
1082
+ await this.storage.saveClient(client);
1083
+ return client;
1084
+ }
1085
+ /**
1086
+ * Register a new client via Dynamic Client Registration
1087
+ */
1088
+ async registerClient(authServer) {
1089
+ const registrationEndpoint = authServer.server.registration_endpoint;
1090
+ if (!registrationEndpoint) {
1091
+ throw new Error(
1092
+ "Authorization server does not support Dynamic Client Registration. Please provide a clientId in the configuration."
1093
+ );
1094
+ }
1095
+ const redirectUri = "http://127.0.0.1:0/callback";
1096
+ const response = await fetch(registrationEndpoint, {
1097
+ method: "POST",
1098
+ headers: {
1099
+ "Content-Type": "application/json",
1100
+ "MCP-Protocol-Version": MCP_PROTOCOL_VERSION
1101
+ },
1102
+ body: JSON.stringify({
1103
+ redirect_uris: [redirectUri],
1104
+ token_endpoint_auth_method: "none",
1105
+ grant_types: ["authorization_code", "refresh_token"],
1106
+ response_types: ["code"],
1107
+ client_name: this.config.clientName ?? DEFAULT_CLIENT_NAME
1108
+ })
1109
+ });
1110
+ if (!response.ok) {
1111
+ const errorText = await response.text();
1112
+ throw new Error(
1113
+ `Dynamic Client Registration failed: ${response.status} ${response.statusText}
1114
+ ${errorText}`
1115
+ );
1116
+ }
1117
+ const data = await response.json();
1118
+ debug("Client registered:", data.client_id);
1119
+ return {
1120
+ clientId: data.client_id,
1121
+ clientSecret: data.client_secret,
1122
+ clientIdIssuedAt: data.client_id_issued_at,
1123
+ clientSecretExpiresAt: data.client_secret_expires_at
1124
+ };
1125
+ }
1126
+ /**
1127
+ * Perform the full OAuth authorization flow
1128
+ */
1129
+ async performOAuthFlow(authServer, client, protectedResource) {
1130
+ const pkce = await generatePKCE();
1131
+ const state = generateState();
1132
+ const { port, codePromise, close } = await this.startCallbackServer(state);
1133
+ const redirectUri = `http://127.0.0.1:${port}/callback`;
1134
+ try {
1135
+ const requestedScopes = this.config.scopes ?? protectedResource.scopes_supported ?? authServer.server.scopes_supported ?? ["openid"];
1136
+ debug("Scope resolution:");
1137
+ debug(" - User config scopes: %O", this.config.scopes);
1138
+ debug(
1139
+ " - Protected resource scopes_supported: %O",
1140
+ protectedResource.scopes_supported
1141
+ );
1142
+ debug(
1143
+ " - Auth server scopes_supported: %O",
1144
+ authServer.server.scopes_supported
1145
+ );
1146
+ debug(" - Final requested scopes: %O", requestedScopes);
1147
+ const authUrl = buildAuthorizationUrl({
1148
+ authServer,
1149
+ clientId: client.clientId,
1150
+ redirectUri,
1151
+ scopes: requestedScopes,
1152
+ codeChallenge: pkce.codeChallenge,
1153
+ state,
1154
+ resource: protectedResource.resource
1155
+ });
1156
+ debug("Authorization URL: %s", authUrl.toString());
1157
+ debug("Authorization URL params:");
1158
+ debug(" - client_id: %s", authUrl.searchParams.get("client_id"));
1159
+ debug(" - redirect_uri: %s", authUrl.searchParams.get("redirect_uri"));
1160
+ debug(" - scope: %s", authUrl.searchParams.get("scope"));
1161
+ debug(" - resource: %s", authUrl.searchParams.get("resource"));
1162
+ await this.openBrowserOrPrintUrl(authUrl);
1163
+ debug("Waiting for OAuth callback...");
1164
+ const code = await codePromise;
1165
+ debug("Received authorization code");
1166
+ const tokenResult = await exchangeCodeForTokens({
1167
+ authServer,
1168
+ clientId: client.clientId,
1169
+ clientSecret: client.clientSecret,
1170
+ code,
1171
+ state,
1172
+ codeVerifier: pkce.codeVerifier,
1173
+ redirectUri
1174
+ });
1175
+ const tokens = this.tokenResultToStoredTokens(
1176
+ tokenResult,
1177
+ client.clientId
1178
+ );
1179
+ await this.storage.saveTokens(tokens);
1180
+ return { tokens, requestedScopes };
1181
+ } finally {
1182
+ close();
1183
+ }
1184
+ }
1185
+ /**
1186
+ * Refresh an expired token
1187
+ *
1188
+ * Uses the clientId stored with the tokens (if available) to ensure
1189
+ * the refresh request uses the same client that obtained the original tokens.
1190
+ * This is important because refresh tokens are bound to the client_id.
1191
+ */
1192
+ async refreshStoredToken(storedTokens) {
1193
+ if (!storedTokens.refreshToken) {
1194
+ throw new Error("No refresh token available");
1195
+ }
1196
+ const metadata = await this.storage.loadServerMetadata();
1197
+ if (!metadata) {
1198
+ throw new Error("No cached server metadata for refresh");
1199
+ }
1200
+ let clientId;
1201
+ let clientSecret;
1202
+ if (storedTokens.clientId) {
1203
+ debug("Using clientId from stored tokens for refresh");
1204
+ clientId = storedTokens.clientId;
1205
+ const storedClient = await this.storage.loadClient();
1206
+ if (storedClient?.clientId === clientId) {
1207
+ clientSecret = storedClient.clientSecret;
1208
+ }
1209
+ } else {
1210
+ debug(
1211
+ "No clientId in stored tokens, falling back to stored client (legacy behavior)"
1212
+ );
1213
+ const client = await this.getOrRegisterClient(metadata.authServer);
1214
+ clientId = client.clientId;
1215
+ clientSecret = client.clientSecret;
1216
+ }
1217
+ const tokenResult = await refreshAccessToken({
1218
+ authServer: metadata.authServer,
1219
+ clientId,
1220
+ clientSecret,
1221
+ refreshToken: storedTokens.refreshToken
1222
+ });
1223
+ const tokens = this.tokenResultToStoredTokens(tokenResult, clientId);
1224
+ await this.storage.saveTokens(tokens);
1225
+ return tokens;
1226
+ }
1227
+ /**
1228
+ * Start local callback server
1229
+ */
1230
+ async startCallbackServer(expectedState) {
1231
+ const timeoutMs = this.config.timeoutMs ?? DEFAULT_TIMEOUT_MS2;
1232
+ return new Promise((resolve, reject) => {
1233
+ const server = http.createServer();
1234
+ const connections = /* @__PURE__ */ new Set();
1235
+ server.on("connection", (socket) => {
1236
+ connections.add(socket);
1237
+ socket.on("close", () => connections.delete(socket));
1238
+ });
1239
+ const forceClose = () => {
1240
+ for (const socket of connections) {
1241
+ socket.destroy();
1242
+ }
1243
+ server.close();
1244
+ };
1245
+ let codeResolve;
1246
+ let codeReject;
1247
+ const codePromise = new Promise((res, rej) => {
1248
+ codeResolve = res;
1249
+ codeReject = rej;
1250
+ });
1251
+ const timeout = setTimeout(() => {
1252
+ forceClose();
1253
+ codeReject(new Error(`OAuth flow timed out after ${timeoutMs}ms`));
1254
+ }, timeoutMs);
1255
+ server.on("request", (req, res) => {
1256
+ const url = new URL(
1257
+ req.url ?? "/",
1258
+ `http://127.0.0.1:${server.address().port}`
1259
+ );
1260
+ if (url.pathname !== "/callback") {
1261
+ res.writeHead(404);
1262
+ res.end("Not Found");
1263
+ return;
1264
+ }
1265
+ const error = url.searchParams.get("error");
1266
+ if (error) {
1267
+ const errorDescription = url.searchParams.get("error_description");
1268
+ clearTimeout(timeout);
1269
+ res.writeHead(400, { "Content-Type": "text/html" });
1270
+ res.end(this.errorHtml(error, errorDescription ?? void 0));
1271
+ codeReject(
1272
+ new Error(
1273
+ `OAuth error: ${error}${errorDescription ? ` - ${errorDescription}` : ""}`
1274
+ )
1275
+ );
1276
+ return;
1277
+ }
1278
+ const state = url.searchParams.get("state");
1279
+ if (state !== expectedState) {
1280
+ clearTimeout(timeout);
1281
+ res.writeHead(400, { "Content-Type": "text/html" });
1282
+ res.end(this.errorHtml("invalid_state", "State parameter mismatch"));
1283
+ codeReject(new Error("OAuth state mismatch - possible CSRF attack"));
1284
+ return;
1285
+ }
1286
+ const code = url.searchParams.get("code");
1287
+ if (!code) {
1288
+ clearTimeout(timeout);
1289
+ res.writeHead(400, { "Content-Type": "text/html" });
1290
+ res.end(
1291
+ this.errorHtml("missing_code", "No authorization code received")
1292
+ );
1293
+ codeReject(new Error("No authorization code in callback"));
1294
+ return;
1295
+ }
1296
+ clearTimeout(timeout);
1297
+ res.writeHead(200, { "Content-Type": "text/html" });
1298
+ res.end(this.successHtml());
1299
+ codeResolve(code);
1300
+ });
1301
+ const preferredPort = this.config.callbackPort ?? 0;
1302
+ server.listen(preferredPort, "127.0.0.1", () => {
1303
+ const address = server.address();
1304
+ debug("Callback server listening on port", address.port);
1305
+ resolve({ port: address.port, codePromise, close: forceClose });
1306
+ });
1307
+ server.on("error", (err) => {
1308
+ reject(err);
1309
+ });
1310
+ });
1311
+ }
1312
+ /**
1313
+ * Open browser or print URL for headless environments
1314
+ */
1315
+ async openBrowserOrPrintUrl(url) {
1316
+ if (isHeadless()) {
1317
+ console.log("\n" + "=".repeat(60));
1318
+ console.log(
1319
+ "Please open the following URL in your browser to authenticate:"
1320
+ );
1321
+ console.log("\n" + url.toString() + "\n");
1322
+ console.log("=".repeat(60) + "\n");
1323
+ return;
1324
+ }
1325
+ try {
1326
+ const open = await import('open');
1327
+ await open.default(url.toString());
1328
+ debug("Opened browser for authentication");
1329
+ } catch (error) {
1330
+ debug("Failed to open browser:", error);
1331
+ console.log("\nFailed to open browser automatically.");
1332
+ console.log("Please open the following URL manually:\n");
1333
+ console.log(url.toString() + "\n");
1334
+ }
1335
+ }
1336
+ /**
1337
+ * Convert TokenResult to StoredTokens
1338
+ *
1339
+ * @param result - Token result from exchange or refresh
1340
+ * @param clientId - Client ID that was used to obtain these tokens
1341
+ */
1342
+ tokenResultToStoredTokens(result, clientId) {
1343
+ return {
1344
+ accessToken: result.accessToken,
1345
+ tokenType: result.tokenType,
1346
+ refreshToken: result.refreshToken,
1347
+ expiresAt: result.expiresIn ? Date.now() + result.expiresIn * 1e3 : void 0,
1348
+ clientId
1349
+ };
1350
+ }
1351
+ /**
1352
+ * HTML page for successful authentication
1353
+ */
1354
+ successHtml() {
1355
+ return `
1356
+ <!DOCTYPE html>
1357
+ <html>
1358
+ <head>
1359
+ <meta charset="UTF-8">
1360
+ <title>Authentication Successful</title>
1361
+ <style>
1362
+ body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
1363
+ display: flex; justify-content: center; align-items: center; height: 100vh; margin: 0;
1364
+ background: #f8fafc; }
1365
+ .container { text-align: center; background: white; padding: 48px 64px; border-radius: 8px;
1366
+ border: 1px solid #e2e8f0; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
1367
+ .icon { width: 48px; height: 48px; margin: 0 auto 24px; background: #dcfce7; border-radius: 50%;
1368
+ display: flex; align-items: center; justify-content: center; }
1369
+ .icon svg { width: 24px; height: 24px; color: #16a34a; }
1370
+ h1 { color: #0f172a; margin: 0 0 8px 0; font-size: 20px; font-weight: 600; }
1371
+ p { color: #64748b; margin: 0; font-size: 14px; }
1372
+ </style>
1373
+ </head>
1374
+ <body>
1375
+ <div class="container">
1376
+ <div class="icon">
1377
+ <svg fill="none" stroke="currentColor" viewBox="0 0 24 24">
1378
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M5 13l4 4L19 7"/>
1379
+ </svg>
1380
+ </div>
1381
+ <h1>Authentication Successful</h1>
1382
+ <p>You can close this window and return to the terminal.</p>
1383
+ </div>
1384
+ </body>
1385
+ </html>`;
1386
+ }
1387
+ /**
1388
+ * HTML page for authentication error
1389
+ */
1390
+ errorHtml(error, description) {
1391
+ return `
1392
+ <!DOCTYPE html>
1393
+ <html>
1394
+ <head>
1395
+ <meta charset="UTF-8">
1396
+ <title>Authentication Failed</title>
1397
+ <style>
1398
+ body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
1399
+ display: flex; justify-content: center; align-items: center; height: 100vh; margin: 0;
1400
+ background: #f8fafc; }
1401
+ .container { text-align: center; background: white; padding: 48px 64px; border-radius: 8px;
1402
+ border: 1px solid #e2e8f0; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
1403
+ .icon { width: 48px; height: 48px; margin: 0 auto 24px; background: #fee2e2; border-radius: 50%;
1404
+ display: flex; align-items: center; justify-content: center; }
1405
+ .icon svg { width: 24px; height: 24px; color: #dc2626; }
1406
+ h1 { color: #0f172a; margin: 0 0 8px 0; font-size: 20px; font-weight: 600; }
1407
+ p { color: #64748b; margin: 0 0 8px 0; font-size: 14px; }
1408
+ code { background: #f1f5f9; padding: 2px 8px; border-radius: 4px; color: #dc2626; font-size: 13px; }
1409
+ </style>
1410
+ </head>
1411
+ <body>
1412
+ <div class="container">
1413
+ <div class="icon">
1414
+ <svg fill="none" stroke="currentColor" viewBox="0 0 24 24">
1415
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M6 18L18 6M6 6l12 12"/>
1416
+ </svg>
1417
+ </div>
1418
+ <h1>Authentication Failed</h1>
1419
+ <p>Error: <code>${escapeHtml(error)}</code></p>
1420
+ ${description ? `<p>${escapeHtml(description)}</p>` : ""}
1421
+ </div>
1422
+ </body>
1423
+ </html>`;
1424
+ }
1425
+ };
1426
+ function isHeadless() {
1427
+ if (process.env.CI) {
1428
+ return true;
1429
+ }
1430
+ if (!process.stdin.isTTY) {
1431
+ return true;
1432
+ }
1433
+ if (process.platform === "linux" && !process.env.DISPLAY && !process.env.WAYLAND_DISPLAY) {
1434
+ return true;
1435
+ }
1436
+ return false;
1437
+ }
1438
+ function escapeHtml(text) {
1439
+ return text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&#039;");
1440
+ }
1441
+ async function createMCPClientForConfig(config, options) {
1442
+ const validatedConfig = validateMCPConfig(config);
1443
+ const client = new Client(
1444
+ {
1445
+ name: options?.clientInfo?.name ?? "@gleanwork/mcp-server-tester",
1446
+ version: options?.clientInfo?.version ?? "0.1.0"
1447
+ },
1448
+ {
1449
+ capabilities: validatedConfig.capabilities ?? {}
1450
+ }
1451
+ );
1452
+ if (isStdioConfig(validatedConfig)) {
1453
+ const transport = new StdioClientTransport({
1454
+ command: validatedConfig.command,
1455
+ args: validatedConfig.args ?? [],
1456
+ ...validatedConfig.cwd && { cwd: validatedConfig.cwd },
1457
+ // Suppress server stderr when quiet mode is enabled
1458
+ ...validatedConfig.quiet && { stderr: "ignore" }
1459
+ });
1460
+ debugClient("Connecting via stdio: %O", {
1461
+ command: validatedConfig.command,
1462
+ args: validatedConfig.args,
1463
+ cwd: validatedConfig.cwd
1464
+ });
1465
+ await client.connect(transport);
1466
+ } else if (isHttpConfig(validatedConfig)) {
1467
+ const headers = { ...validatedConfig.headers };
1468
+ if (validatedConfig.auth?.accessToken && !options?.authProvider) {
1469
+ headers.Authorization = `Bearer ${validatedConfig.auth.accessToken}`;
1470
+ }
1471
+ const transport = new StreamableHTTPClientTransport(
1472
+ new URL(validatedConfig.serverUrl),
1473
+ {
1474
+ requestInit: Object.keys(headers).length > 0 ? { headers } : void 0,
1475
+ // Pass auth provider for OAuth flow - MCP SDK handles it automatically
1476
+ authProvider: options?.authProvider
1477
+ }
1478
+ );
1479
+ debugClient("Connecting via HTTP: %O", {
1480
+ serverUrl: validatedConfig.serverUrl,
1481
+ headers: Object.keys(headers).length > 0 ? Object.keys(headers) : void 0,
1482
+ hasAuthProvider: !!options?.authProvider
1483
+ });
1484
+ await client.connect(transport);
1485
+ }
1486
+ debugClient("Connected successfully");
1487
+ const serverInfo = client.getServerVersion();
1488
+ if (serverInfo) {
1489
+ debugClient("Server info: %O", serverInfo);
1490
+ }
1491
+ return client;
1492
+ }
1493
+ async function closeMCPClient(client) {
1494
+ try {
1495
+ await client.close();
1496
+ } catch (error) {
1497
+ console.error("[MCP] Error closing client:", error);
1498
+ throw error;
1499
+ }
1500
+ }
1501
+
1502
+ // src/mcp/response.ts
1503
+ function normalizeToolResponse(result) {
1504
+ const isError = result.isError ?? false;
1505
+ const contentBlocks = [];
1506
+ const textParts = [];
1507
+ if (Array.isArray(result.content)) {
1508
+ for (const block of result.content) {
1509
+ if (block == null || typeof block !== "object") {
1510
+ continue;
1511
+ }
1512
+ const b = block;
1513
+ const contentBlock = {
1514
+ type: typeof b.type === "string" ? b.type : "unknown"
1515
+ };
1516
+ if (typeof b.text === "string") {
1517
+ contentBlock.text = b.text;
1518
+ textParts.push(b.text);
1519
+ }
1520
+ if (b.data !== void 0) {
1521
+ contentBlock.data = b.data;
1522
+ }
1523
+ if (typeof b.mimeType === "string") {
1524
+ contentBlock.mimeType = b.mimeType;
1525
+ }
1526
+ contentBlocks.push(contentBlock);
1527
+ }
1528
+ }
1529
+ let structuredContent = null;
1530
+ if (result.structuredContent !== void 0) {
1531
+ structuredContent = result.structuredContent;
1532
+ if (textParts.length === 0) {
1533
+ if (typeof result.structuredContent === "string") {
1534
+ textParts.push(result.structuredContent);
1535
+ } else if (result.structuredContent != null) {
1536
+ textParts.push(JSON.stringify(result.structuredContent));
1537
+ }
1538
+ }
1539
+ }
1540
+ const text = textParts.join("\n");
1541
+ return {
1542
+ text,
1543
+ raw: result,
1544
+ isError,
1545
+ contentBlocks,
1546
+ structuredContent
1547
+ };
1548
+ }
1549
+ function extractText(response) {
1550
+ if (response == null) {
1551
+ return "";
1552
+ }
1553
+ if (typeof response === "string") {
1554
+ return response;
1555
+ }
1556
+ if (isNormalizedResponse(response)) {
1557
+ return response.text;
1558
+ }
1559
+ if (isCallToolResult(response)) {
1560
+ return normalizeToolResponse(response).text;
1561
+ }
1562
+ if (Array.isArray(response)) {
1563
+ return extractTextFromContentArray(response);
1564
+ }
1565
+ if (typeof response === "object") {
1566
+ const r = response;
1567
+ if (Array.isArray(r.content)) {
1568
+ return extractTextFromContentArray(r.content);
1569
+ }
1570
+ if (typeof r.content === "string") {
1571
+ return r.content;
1572
+ }
1573
+ if (r.structuredContent !== void 0) {
1574
+ if (typeof r.structuredContent === "string") {
1575
+ return r.structuredContent;
1576
+ }
1577
+ return JSON.stringify(r.structuredContent);
1578
+ }
1579
+ if (typeof r.text === "string") {
1580
+ return r.text;
1581
+ }
1582
+ return JSON.stringify(r);
1583
+ }
1584
+ if (typeof response === "number" || typeof response === "boolean" || typeof response === "bigint") {
1585
+ return String(response);
1586
+ }
1587
+ return "";
1588
+ }
1589
+ function isNormalizedResponse(value) {
1590
+ if (value == null || typeof value !== "object") {
1591
+ return false;
1592
+ }
1593
+ const v = value;
1594
+ return typeof v.text === "string" && typeof v.isError === "boolean" && Array.isArray(v.contentBlocks) && v.raw !== void 0;
1595
+ }
1596
+ function isCallToolResult(value) {
1597
+ if (value == null || typeof value !== "object") {
1598
+ return false;
1599
+ }
1600
+ const v = value;
1601
+ return Array.isArray(v.content) || typeof v.isError === "boolean";
1602
+ }
1603
+ function extractTextFromContentArray(content) {
1604
+ const textParts = [];
1605
+ for (const block of content) {
1606
+ if (block == null || typeof block !== "object") {
1607
+ continue;
1608
+ }
1609
+ const b = block;
1610
+ if (b.type === "text" && typeof b.text === "string") {
1611
+ textParts.push(b.text);
1612
+ }
1613
+ }
1614
+ if (textParts.length > 0) {
1615
+ return textParts.join("\n");
1616
+ }
1617
+ return JSON.stringify(content);
1618
+ }
1619
+
1620
+ // src/assertions/validators/utils.ts
1621
+ var extractText2 = extractText;
1622
+ function getResponseSizeBytes(response) {
1623
+ if (response === null || response === void 0) {
1624
+ return 0;
1625
+ }
1626
+ if (typeof response === "string") {
1627
+ return Buffer.byteLength(response, "utf8");
1628
+ }
1629
+ const serialized = JSON.stringify(response, null, 2);
1630
+ return Buffer.byteLength(serialized, "utf8");
1631
+ }
1632
+ function stringifyResponse(response) {
1633
+ if (response === null || response === void 0) {
1634
+ return "";
1635
+ }
1636
+ if (typeof response === "string") {
1637
+ return response;
1638
+ }
1639
+ return JSON.stringify(response, null, 2);
1640
+ }
1641
+ function isErrorResponse(response) {
1642
+ if (response === null || response === void 0) {
1643
+ return false;
1644
+ }
1645
+ if (typeof response !== "object") {
1646
+ return false;
1647
+ }
1648
+ const r = response;
1649
+ if (r.isError === true) {
1650
+ return true;
1651
+ }
1652
+ if ("raw" in r && typeof r.raw === "object" && r.raw !== null) {
1653
+ const raw = r.raw;
1654
+ return raw.isError === true;
1655
+ }
1656
+ return false;
1657
+ }
1658
+ function extractErrorMessage(response) {
1659
+ if (!isErrorResponse(response)) {
1660
+ return "";
1661
+ }
1662
+ return extractText2(response);
1663
+ }
1664
+ function normalizeWhitespace(text) {
1665
+ return text.replace(/\s+/g, " ").trim();
1666
+ }
1667
+
1668
+ // src/assertions/validators/response.ts
1669
+ function validateResponse(actual, expected) {
1670
+ const actualStr = stringifyResponse(actual);
1671
+ const expectedStr = stringifyResponse(expected);
1672
+ if (actualStr === expectedStr) {
1673
+ return {
1674
+ pass: true,
1675
+ message: "Response matches expected value"
1676
+ };
1677
+ }
1678
+ return {
1679
+ pass: false,
1680
+ message: `Response does not match expected value`,
1681
+ details: {
1682
+ actual: truncateForDisplay(actualStr),
1683
+ expected: truncateForDisplay(expectedStr)
1684
+ }
1685
+ };
1686
+ }
1687
+ function truncateForDisplay(str, maxLength = 500) {
1688
+ if (str.length <= maxLength) {
1689
+ return str;
1690
+ }
1691
+ return str.slice(0, maxLength) + "... (truncated)";
1692
+ }
1693
+
1694
+ // src/assertions/validators/schema.ts
1695
+ function validateSchema(response, schema, options = {}) {
1696
+ const valueToValidate = getValidatableValue(response);
1697
+ if (options.strict && valueToValidate !== null) ;
1698
+ try {
1699
+ schema.parse(valueToValidate);
1700
+ return {
1701
+ pass: true,
1702
+ message: "Response matches schema"
1703
+ };
1704
+ } catch (error) {
1705
+ const zodError = error;
1706
+ const issues = formatZodIssues(zodError);
1707
+ return {
1708
+ pass: false,
1709
+ message: `Response does not match schema: ${issues}`,
1710
+ details: {
1711
+ issues: zodError.issues
1712
+ }
1713
+ };
1714
+ }
1715
+ }
1716
+ function getValidatableValue(response) {
1717
+ if (response === null || response === void 0) {
1718
+ return null;
1719
+ }
1720
+ if (typeof response === "object" && !Array.isArray(response)) {
1721
+ const r = response;
1722
+ if ("structuredContent" in r && r.structuredContent !== void 0) {
1723
+ return r.structuredContent;
1724
+ }
1725
+ if ("raw" in r && "text" in r && "isError" in r && "contentBlocks" in r) {
1726
+ if (r.structuredContent !== void 0) {
1727
+ return r.structuredContent;
1728
+ }
1729
+ const text = r.text;
1730
+ return tryParseJson(text) ?? response;
1731
+ }
1732
+ if ("content" in r && Array.isArray(r.content)) {
1733
+ const text = extractText2(response);
1734
+ return tryParseJson(text) ?? response;
1735
+ }
1736
+ return response;
1737
+ }
1738
+ if (typeof response === "string") {
1739
+ return tryParseJson(response) ?? response;
1740
+ }
1741
+ return response;
1742
+ }
1743
+ function tryParseJson(text) {
1744
+ if (!text || typeof text !== "string") {
1745
+ return null;
1746
+ }
1747
+ const trimmed = text.trim();
1748
+ if (!(trimmed.startsWith("{") || trimmed.startsWith("[")) || !(trimmed.endsWith("}") || trimmed.endsWith("]"))) {
1749
+ return null;
1750
+ }
1751
+ try {
1752
+ return JSON.parse(trimmed);
1753
+ } catch {
1754
+ return null;
1755
+ }
1756
+ }
1757
+ function formatZodIssues(error) {
1758
+ const issues = error.issues.map((issue) => {
1759
+ const path3 = issue.path.length > 0 ? issue.path.join(".") : "root";
1760
+ return `${path3}: ${issue.message}`;
1761
+ });
1762
+ return issues.join("; ");
1763
+ }
1764
+
1765
+ // src/assertions/validators/text.ts
1766
+ function validateText(response, expected, options = {}) {
1767
+ const { caseSensitive = true } = options;
1768
+ const expectedStrings = Array.isArray(expected) ? expected : [expected];
1769
+ const text = extractText2(response);
1770
+ const compareText = caseSensitive ? text : text.toLowerCase();
1771
+ const missing = [];
1772
+ for (const substring of expectedStrings) {
1773
+ const compareSubstring = caseSensitive ? substring : substring.toLowerCase();
1774
+ if (!compareText.includes(compareSubstring)) {
1775
+ missing.push(substring);
1776
+ }
1777
+ }
1778
+ if (missing.length === 0) {
1779
+ return {
1780
+ pass: true,
1781
+ message: expectedStrings.length === 1 ? `Response contains expected text` : `Response contains all ${expectedStrings.length} expected substrings`
1782
+ };
1783
+ }
1784
+ return {
1785
+ pass: false,
1786
+ message: missing.length === 1 ? `Response does not contain expected text: "${missing[0]}"` : `Response is missing ${missing.length} expected substrings: ${missing.map((s) => `"${s}"`).join(", ")}`,
1787
+ details: {
1788
+ missing,
1789
+ textLength: text.length,
1790
+ textPreview: truncateForDisplay2(text)
1791
+ }
1792
+ };
1793
+ }
1794
+ function truncateForDisplay2(str, maxLength = 200) {
1795
+ if (str.length <= maxLength) {
1796
+ return str;
1797
+ }
1798
+ return str.slice(0, maxLength) + "... (truncated)";
1799
+ }
1800
+
1801
+ // src/assertions/validators/pattern.ts
1802
+ function validatePattern(response, patterns, options = {}) {
1803
+ const { caseSensitive = true } = options;
1804
+ const caseInsensitive = !caseSensitive;
1805
+ const patternList = Array.isArray(patterns) ? patterns : [patterns];
1806
+ const text = extractText2(response);
1807
+ const unmatched = [];
1808
+ for (const pattern of patternList) {
1809
+ const regex = toRegExp(pattern, caseInsensitive);
1810
+ if (!regex.test(text)) {
1811
+ unmatched.push(patternToString(pattern));
1812
+ }
1813
+ }
1814
+ if (unmatched.length === 0) {
1815
+ return {
1816
+ pass: true,
1817
+ message: patternList.length === 1 ? `Response matches pattern` : `Response matches all ${patternList.length} patterns`
1818
+ };
1819
+ }
1820
+ return {
1821
+ pass: false,
1822
+ message: unmatched.length === 1 ? `Response does not match pattern: ${unmatched[0]}` : `Response does not match ${unmatched.length} patterns: ${unmatched.join(", ")}`,
1823
+ details: {
1824
+ unmatched,
1825
+ textLength: text.length,
1826
+ textPreview: truncateForDisplay3(text)
1827
+ }
1828
+ };
1829
+ }
1830
+ function toRegExp(pattern, caseInsensitive) {
1831
+ if (pattern instanceof RegExp) {
1832
+ if (caseInsensitive && !pattern.flags.includes("i")) {
1833
+ return new RegExp(pattern.source, pattern.flags + "i");
1834
+ }
1835
+ return pattern;
1836
+ }
1837
+ const flags = caseInsensitive ? "i" : "";
1838
+ return new RegExp(pattern, flags);
1839
+ }
1840
+ function patternToString(pattern) {
1841
+ if (pattern instanceof RegExp) {
1842
+ return pattern.toString();
1843
+ }
1844
+ return `/${pattern}/`;
1845
+ }
1846
+ function truncateForDisplay3(str, maxLength = 200) {
1847
+ if (str.length <= maxLength) {
1848
+ return str;
1849
+ }
1850
+ return str.slice(0, maxLength) + "... (truncated)";
1851
+ }
1852
+
1853
+ // src/assertions/validators/error.ts
1854
+ function validateError(response, expected = true) {
1855
+ const actualIsError = isErrorResponse(response);
1856
+ const errorMessage = actualIsError ? extractErrorMessage(response) : "";
1857
+ if (typeof expected === "boolean") {
1858
+ if (expected) {
1859
+ if (actualIsError) {
1860
+ return {
1861
+ pass: true,
1862
+ message: "Response is an error as expected"
1863
+ };
1864
+ }
1865
+ return {
1866
+ pass: false,
1867
+ message: "Expected an error response but got success",
1868
+ details: {
1869
+ textPreview: truncateForDisplay4(extractText2(response))
1870
+ }
1871
+ };
1872
+ } else {
1873
+ if (!actualIsError) {
1874
+ return {
1875
+ pass: true,
1876
+ message: "Response is not an error as expected"
1877
+ };
1878
+ }
1879
+ return {
1880
+ pass: false,
1881
+ message: `Expected a success response but got error: "${truncateForDisplay4(errorMessage)}"`,
1882
+ details: {
1883
+ errorMessage
1884
+ }
1885
+ };
1886
+ }
1887
+ }
1888
+ const expectedMessages = Array.isArray(expected) ? expected : [expected];
1889
+ if (!actualIsError) {
1890
+ return {
1891
+ pass: false,
1892
+ message: `Expected an error containing "${expectedMessages[0]}" but got success`,
1893
+ details: {
1894
+ textPreview: truncateForDisplay4(extractText2(response))
1895
+ }
1896
+ };
1897
+ }
1898
+ const matched = expectedMessages.some(
1899
+ (msg) => errorMessage.toLowerCase().includes(msg.toLowerCase())
1900
+ );
1901
+ if (matched) {
1902
+ return {
1903
+ pass: true,
1904
+ message: "Error message contains expected text"
1905
+ };
1906
+ }
1907
+ return {
1908
+ pass: false,
1909
+ message: expectedMessages.length === 1 ? `Error message does not contain "${expectedMessages[0]}"` : `Error message does not contain any of: ${expectedMessages.map((m) => `"${m}"`).join(", ")}`,
1910
+ details: {
1911
+ actualErrorMessage: errorMessage,
1912
+ expectedToContain: expectedMessages
1913
+ }
1914
+ };
1915
+ }
1916
+ function truncateForDisplay4(str, maxLength = 200) {
1917
+ if (str.length <= maxLength) {
1918
+ return str;
1919
+ }
1920
+ return str.slice(0, maxLength) + "... (truncated)";
1921
+ }
1922
+
1923
+ // src/assertions/validators/size.ts
1924
+ function validateSize(response, options) {
1925
+ const { maxBytes, minBytes } = options;
1926
+ if (maxBytes === void 0 && minBytes === void 0) {
1927
+ return {
1928
+ pass: false,
1929
+ message: "Size validation requires at least one of maxBytes or minBytes"
1930
+ };
1931
+ }
1932
+ const actualSize = getResponseSizeBytes(response);
1933
+ const issues = [];
1934
+ if (minBytes !== void 0 && actualSize < minBytes) {
1935
+ issues.push(
1936
+ `Response size (${formatBytes(actualSize)}) is below minimum (${formatBytes(minBytes)})`
1937
+ );
1938
+ }
1939
+ if (maxBytes !== void 0 && actualSize > maxBytes) {
1940
+ issues.push(
1941
+ `Response size (${formatBytes(actualSize)}) exceeds maximum (${formatBytes(maxBytes)})`
1942
+ );
1943
+ }
1944
+ if (issues.length === 0) {
1945
+ return {
1946
+ pass: true,
1947
+ message: `Response size (${formatBytes(actualSize)}) is within bounds`,
1948
+ details: {
1949
+ actualBytes: actualSize
1950
+ }
1951
+ };
1952
+ }
1953
+ return {
1954
+ pass: false,
1955
+ message: issues.join("; "),
1956
+ details: {
1957
+ actualBytes: actualSize,
1958
+ minBytes,
1959
+ maxBytes
1960
+ }
1961
+ };
1962
+ }
1963
+ function formatBytes(bytes) {
1964
+ if (bytes < 1024) {
1965
+ return `${bytes} bytes`;
1966
+ }
1967
+ if (bytes < 1024 * 1024) {
1968
+ return `${(bytes / 1024).toFixed(1)} KB`;
1969
+ }
1970
+ return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
1971
+ }
1972
+
1973
+ // src/mcp/fixtures/mcpFixture.ts
1974
+ var testStep = null;
1975
+ try {
1976
+ const playwright = __require("@playwright/test");
1977
+ if (playwright && playwright.test && playwright.test.step) {
1978
+ testStep = playwright.test.step.bind(playwright.test);
1979
+ }
1980
+ } catch {
1981
+ }
1982
+ function createMCPFixture(client, testInfo, options) {
1983
+ const authType = options?.authType ?? "none";
1984
+ const project = options?.project;
1985
+ if (!testInfo) {
1986
+ return {
1987
+ client,
1988
+ authType,
1989
+ project,
1990
+ async listTools() {
1991
+ const result = await client.listTools();
1992
+ return result.tools;
1993
+ },
1994
+ async callTool(name, args) {
1995
+ const result = await client.callTool({
1996
+ name,
1997
+ arguments: args
1998
+ });
1999
+ return result;
2000
+ },
2001
+ getServerInfo() {
2002
+ const serverVersion = client.getServerVersion();
2003
+ if (!serverVersion) {
2004
+ return null;
2005
+ }
2006
+ return {
2007
+ name: serverVersion.name,
2008
+ version: serverVersion.version
2009
+ };
2010
+ }
2011
+ };
2012
+ }
2013
+ return {
2014
+ client,
2015
+ authType,
2016
+ project,
2017
+ async listTools() {
2018
+ const execute = async () => {
2019
+ const result = await client.listTools();
2020
+ const tools = result.tools;
2021
+ await testInfo.attach("mcp-list-tools", {
2022
+ contentType: "application/json",
2023
+ body: JSON.stringify(
2024
+ {
2025
+ operation: "listTools",
2026
+ toolCount: tools.length,
2027
+ tools: tools.map((t) => ({
2028
+ name: t.name,
2029
+ description: t.description
2030
+ }))
2031
+ },
2032
+ null,
2033
+ 2
2034
+ )
2035
+ });
2036
+ return tools;
2037
+ };
2038
+ return testStep ? testStep("MCP: listTools()", execute) : execute();
2039
+ },
2040
+ async callTool(name, args) {
2041
+ const execute = async () => {
2042
+ const startTime = Date.now();
2043
+ const result = await client.callTool({
2044
+ name,
2045
+ arguments: args
2046
+ });
2047
+ const durationMs = Date.now() - startTime;
2048
+ await testInfo.attach(`mcp-call-${name}`, {
2049
+ contentType: "application/json",
2050
+ body: JSON.stringify(
2051
+ {
2052
+ operation: "callTool",
2053
+ toolName: name,
2054
+ args,
2055
+ result,
2056
+ durationMs,
2057
+ isError: result.isError || false,
2058
+ authType,
2059
+ project
2060
+ },
2061
+ null,
2062
+ 2
2063
+ )
2064
+ });
2065
+ return result;
2066
+ };
2067
+ return testStep ? testStep(`MCP: callTool("${name}")`, execute) : execute();
2068
+ },
2069
+ getServerInfo() {
2070
+ const serverVersion = client.getServerVersion();
2071
+ const result = serverVersion ? {
2072
+ name: serverVersion.name,
2073
+ version: serverVersion.version
2074
+ } : null;
2075
+ testInfo.attach("mcp-server-info", {
2076
+ contentType: "application/json",
2077
+ body: JSON.stringify(
2078
+ {
2079
+ operation: "getServerInfo",
2080
+ serverInfo: result
2081
+ },
2082
+ null,
2083
+ 2
2084
+ )
2085
+ }).catch(() => {
2086
+ });
2087
+ return result;
2088
+ }
2089
+ };
2090
+ }
2091
+
2092
+ // src/assertions/matchers/toMatchToolResponse.ts
2093
+ function toMatchToolResponse(received, expected) {
2094
+ const result = validateResponse(received, expected);
2095
+ return {
2096
+ pass: result.pass,
2097
+ message: () => {
2098
+ if (this.isNot) {
2099
+ return result.pass ? "Expected response NOT to match, but it did" : result.message;
2100
+ }
2101
+ return result.message;
2102
+ }
2103
+ };
2104
+ }
2105
+
2106
+ // src/assertions/matchers/toMatchToolSchema.ts
2107
+ function toMatchToolSchema(received, schema, options = {}) {
2108
+ const result = validateSchema(received, schema, options);
2109
+ return {
2110
+ pass: result.pass,
2111
+ message: () => {
2112
+ if (this.isNot) {
2113
+ return result.pass ? "Expected response NOT to match schema, but it did" : result.message;
2114
+ }
2115
+ return result.message;
2116
+ }
2117
+ };
2118
+ }
2119
+
2120
+ // src/assertions/matchers/toContainToolText.ts
2121
+ function toContainToolText(received, expected, options = {}) {
2122
+ const result = validateText(received, expected, options);
2123
+ return {
2124
+ pass: result.pass,
2125
+ message: () => {
2126
+ if (this.isNot) {
2127
+ const expectedStr = Array.isArray(expected) ? expected.map((s) => `"${s}"`).join(", ") : `"${expected}"`;
2128
+ return result.pass ? `Expected response NOT to contain ${expectedStr}, but it did` : result.message;
2129
+ }
2130
+ return result.message;
2131
+ }
2132
+ };
2133
+ }
2134
+
2135
+ // src/assertions/matchers/toMatchToolPattern.ts
2136
+ function toMatchToolPattern(received, patterns, options = {}) {
2137
+ const result = validatePattern(received, patterns, options);
2138
+ return {
2139
+ pass: result.pass,
2140
+ message: () => {
2141
+ if (this.isNot) {
2142
+ return result.pass ? "Expected response NOT to match pattern(s), but it did" : result.message;
2143
+ }
2144
+ return result.message;
2145
+ }
2146
+ };
2147
+ }
2148
+ var BUILT_IN_PATTERNS = {
2149
+ timestamp: {
2150
+ pattern: /\b\d{10,13}\b/g,
2151
+ replacement: "[TIMESTAMP]"
2152
+ },
2153
+ uuid: {
2154
+ pattern: /\b[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}\b/gi,
2155
+ replacement: "[UUID]"
2156
+ },
2157
+ "iso-date": {
2158
+ pattern: /\b\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}(\.\d{1,3})?(Z|[+-]\d{2}:?\d{2})?)?\b/g,
2159
+ replacement: "[ISO_DATE]"
2160
+ },
2161
+ objectId: {
2162
+ pattern: /\b[0-9a-f]{24}\b/gi,
2163
+ replacement: "[OBJECT_ID]"
2164
+ },
2165
+ jwt: {
2166
+ pattern: /\beyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]+\b/g,
2167
+ replacement: "[JWT]"
2168
+ }
2169
+ };
2170
+ function isRegexSanitizer(sanitizer) {
2171
+ return typeof sanitizer === "object" && sanitizer !== null && "pattern" in sanitizer;
2172
+ }
2173
+ function isFieldRemovalSanitizer(sanitizer) {
2174
+ return typeof sanitizer === "object" && sanitizer !== null && "remove" in sanitizer;
2175
+ }
2176
+ function applySanitizers(value, sanitizers) {
2177
+ let result = value;
2178
+ for (const sanitizer of sanitizers) {
2179
+ if (typeof sanitizer === "string") {
2180
+ const builtIn = BUILT_IN_PATTERNS[sanitizer];
2181
+ if (builtIn) {
2182
+ result = result.replace(builtIn.pattern, builtIn.replacement);
2183
+ }
2184
+ continue;
2185
+ }
2186
+ if (isRegexSanitizer(sanitizer)) {
2187
+ const pattern = sanitizer.pattern instanceof RegExp ? sanitizer.pattern : new RegExp(sanitizer.pattern, "g");
2188
+ const replacement = sanitizer.replacement ?? "[SANITIZED]";
2189
+ result = result.replace(pattern, replacement);
2190
+ continue;
2191
+ }
2192
+ if (isFieldRemovalSanitizer(sanitizer)) {
2193
+ try {
2194
+ const parsed = JSON.parse(result);
2195
+ removeFields(parsed, sanitizer.remove);
2196
+ result = JSON.stringify(parsed, null, 2);
2197
+ } catch {
2198
+ }
2199
+ }
2200
+ }
2201
+ return result;
2202
+ }
2203
+ function removeFields(obj, paths) {
2204
+ if (typeof obj !== "object" || obj === null) {
2205
+ return;
2206
+ }
2207
+ for (const path3 of paths) {
2208
+ const parts = path3.split(".");
2209
+ if (parts.length === 0) {
2210
+ continue;
2211
+ }
2212
+ let current = obj;
2213
+ for (let i = 0; i < parts.length - 1; i++) {
2214
+ if (typeof current !== "object" || current === null) {
2215
+ break;
2216
+ }
2217
+ const key = parts[i];
2218
+ if (key !== void 0) {
2219
+ current = current[key];
2220
+ }
2221
+ }
2222
+ if (typeof current === "object" && current !== null) {
2223
+ const lastKey = parts[parts.length - 1];
2224
+ if (lastKey !== void 0) {
2225
+ delete current[lastKey];
2226
+ }
2227
+ }
2228
+ }
2229
+ }
2230
+ async function toMatchToolSnapshot(received, name, sanitizers = []) {
2231
+ let content = extractText2(received);
2232
+ if (sanitizers.length > 0) {
2233
+ content = applySanitizers(content, sanitizers);
2234
+ }
2235
+ if (this.isNot) {
2236
+ try {
2237
+ await expect$1(content).toMatchSnapshot(name);
2238
+ return {
2239
+ pass: false,
2240
+ message: () => `Expected response NOT to match snapshot "${name}", but it did`
2241
+ };
2242
+ } catch {
2243
+ return {
2244
+ pass: true,
2245
+ message: () => `Response does not match snapshot "${name}" as expected`
2246
+ };
2247
+ }
2248
+ }
2249
+ try {
2250
+ await expect$1(content).toMatchSnapshot(name);
2251
+ return {
2252
+ pass: true,
2253
+ message: () => `Response matches snapshot "${name}"`
2254
+ };
2255
+ } catch (error) {
2256
+ return {
2257
+ pass: false,
2258
+ message: () => error instanceof Error ? error.message : `Response does not match snapshot "${name}"`
2259
+ };
2260
+ }
2261
+ }
2262
+
2263
+ // src/assertions/matchers/toBeToolError.ts
2264
+ function toBeToolError(received, expected = true) {
2265
+ const effectiveExpected = this.isNot ? typeof expected === "boolean" ? !expected : false : expected;
2266
+ const result = validateError(received, effectiveExpected);
2267
+ return {
2268
+ pass: this.isNot ? !result.pass : result.pass,
2269
+ message: () => {
2270
+ if (this.isNot) {
2271
+ if (typeof expected === "boolean") {
2272
+ return result.pass ? "Expected response NOT to be an error, but it was" : "Response is not an error as expected";
2273
+ }
2274
+ const expectedStr = Array.isArray(expected) ? expected.join(", ") : expected;
2275
+ return result.pass ? `Expected response NOT to be an error with "${expectedStr}", but it was` : result.message;
2276
+ }
2277
+ return result.message;
2278
+ }
2279
+ };
2280
+ }
2281
+ function createClaudeAgentJudge(config) {
2282
+ const model = config.model ?? "claude-sonnet-4-20250514";
2283
+ const maxBudgetUsd = config.maxBudgetUsd ?? 0.1;
2284
+ const maxToolOutputSize = config.maxToolOutputSize;
2285
+ return {
2286
+ async evaluate(candidate, reference, rubric) {
2287
+ const candidateStr = typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2);
2288
+ const candidateSizeBytes = Buffer.byteLength(candidateStr, "utf8");
2289
+ if (maxToolOutputSize !== void 0 && candidateSizeBytes > maxToolOutputSize) {
2290
+ return {
2291
+ pass: false,
2292
+ score: 0,
2293
+ reasoning: `Tool output size (${candidateSizeBytes} bytes) exceeds maximum allowed size (${maxToolOutputSize} bytes)`,
2294
+ candidateSizeBytes,
2295
+ exceedsMaxToolOutputSize: true
2296
+ };
2297
+ }
2298
+ const prompt = buildJudgePrompt(candidate, reference, rubric);
2299
+ try {
2300
+ let resultMessage;
2301
+ for await (const message of query({
2302
+ prompt,
2303
+ options: {
2304
+ model,
2305
+ maxBudgetUsd,
2306
+ // Use empty tools array for response-only mode
2307
+ tools: [],
2308
+ // Bypass permissions since we're not using any tools
2309
+ permissionMode: "bypassPermissions",
2310
+ allowDangerouslySkipPermissions: true,
2311
+ // Use a custom system prompt for JSON output
2312
+ systemPrompt: buildSystemPrompt(),
2313
+ // Limit to 1 turn since this is a simple evaluation
2314
+ maxTurns: 1
2315
+ }
2316
+ })) {
2317
+ if (message.type === "result") {
2318
+ resultMessage = message;
2319
+ }
2320
+ }
2321
+ if (!resultMessage) {
2322
+ throw new Error("No result message received from Claude Agent SDK");
2323
+ }
2324
+ if (resultMessage.subtype !== "success" && resultMessage.errors?.length) {
2325
+ throw new Error(
2326
+ `Claude Agent SDK error: ${resultMessage.errors.join(", ")}`
2327
+ );
2328
+ }
2329
+ const responseText = resultMessage.result ?? "";
2330
+ const parsed = parseJudgeResponse(responseText);
2331
+ const usage = {
2332
+ inputTokens: resultMessage.usage?.input_tokens ?? 0,
2333
+ outputTokens: resultMessage.usage?.output_tokens ?? 0,
2334
+ totalCostUsd: resultMessage.total_cost_usd ?? 0,
2335
+ durationMs: resultMessage.duration_ms ?? 0,
2336
+ durationApiMs: resultMessage.duration_api_ms,
2337
+ cacheReadInputTokens: resultMessage.usage?.cache_read_input_tokens,
2338
+ cacheCreationInputTokens: resultMessage.usage?.cache_creation_input_tokens
2339
+ };
2340
+ return {
2341
+ pass: parsed.pass ?? false,
2342
+ score: parsed.score,
2343
+ reasoning: parsed.reasoning,
2344
+ usage,
2345
+ candidateSizeBytes,
2346
+ exceedsMaxToolOutputSize: false
2347
+ };
2348
+ } catch (error) {
2349
+ throw new Error(
2350
+ `Claude Agent judge evaluation failed: ${error instanceof Error ? error.message : String(error)}`
2351
+ );
2352
+ }
2353
+ }
2354
+ };
2355
+ }
2356
+ function buildSystemPrompt() {
2357
+ return 'You are an expert evaluator. Evaluate the candidate response based on the rubric provided. Respond ONLY with valid JSON in this exact format: {"pass": boolean, "score": number (0-1), "reasoning": string}. Do not include any other text, markdown formatting, or code blocks.';
2358
+ }
2359
+ function buildJudgePrompt(candidate, reference, rubric) {
2360
+ const parts = [];
2361
+ parts.push("# Evaluation Task\n");
2362
+ parts.push(rubric);
2363
+ parts.push("\n\n# Candidate Response\n");
2364
+ parts.push(
2365
+ typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2)
2366
+ );
2367
+ if (reference !== null && reference !== void 0) {
2368
+ parts.push("\n\n# Reference Response\n");
2369
+ parts.push(
2370
+ typeof reference === "string" ? reference : JSON.stringify(reference, null, 2)
2371
+ );
2372
+ }
2373
+ parts.push(
2374
+ "\n\n# Instructions\nEvaluate the candidate response based on the rubric. " + (reference !== null && reference !== void 0 ? "Compare it against the reference response if helpful. " : "") + 'Respond with JSON containing "pass" (boolean), "score" (0-1), and "reasoning" (string).'
2375
+ );
2376
+ return parts.join("");
2377
+ }
2378
+ function parseJudgeResponse(text) {
2379
+ let jsonText = text.trim();
2380
+ if (jsonText.startsWith("```json")) {
2381
+ jsonText = jsonText.slice(7);
2382
+ }
2383
+ if (jsonText.startsWith("```")) {
2384
+ jsonText = jsonText.slice(3);
2385
+ }
2386
+ if (jsonText.endsWith("```")) {
2387
+ jsonText = jsonText.slice(0, -3);
2388
+ }
2389
+ jsonText = jsonText.trim();
2390
+ try {
2391
+ return JSON.parse(jsonText);
2392
+ } catch {
2393
+ const jsonMatch = jsonText.match(/\{[\s\S]*"pass"[\s\S]*\}/);
2394
+ if (jsonMatch) {
2395
+ return JSON.parse(jsonMatch[0]);
2396
+ }
2397
+ throw new Error(`Failed to parse judge response as JSON: ${text}`);
2398
+ }
2399
+ }
2400
+
2401
+ // src/judge/judgeClient.ts
2402
+ function createJudge(config = {}) {
2403
+ const provider = config.provider ?? "claude";
2404
+ switch (provider) {
2405
+ case "claude":
2406
+ case "anthropic":
2407
+ return createClaudeAgentJudge(config);
2408
+ case "openai":
2409
+ throw new Error(
2410
+ 'OpenAI provider is no longer supported. Please use createJudge() without specifying provider, or use provider: "claude". See migration guide at https://github.com/gleanwork/mcp-server-tester/blob/main/docs/migration-v0.11.md'
2411
+ );
2412
+ case "custom-http":
2413
+ throw new Error(
2414
+ "custom-http provider is no longer supported. Please use createJudge() without specifying provider."
2415
+ );
2416
+ default:
2417
+ throw new Error(`Unsupported LLM provider: ${String(provider)}`);
2418
+ }
2419
+ }
2420
+
2421
+ // src/assertions/matchers/toPassToolJudge.ts
2422
+ var DEFAULT_PASSING_THRESHOLD = 0.7;
2423
+ var DEFAULT_JUDGE_CONFIG = {};
2424
+ async function toPassToolJudge(received, rubric, options = {}) {
2425
+ const {
2426
+ reference = null,
2427
+ passingThreshold = DEFAULT_PASSING_THRESHOLD,
2428
+ judgeConfig = DEFAULT_JUDGE_CONFIG
2429
+ } = options;
2430
+ const judge = createJudge(judgeConfig);
2431
+ try {
2432
+ const result = await judge.evaluate(received, reference, rubric);
2433
+ const score = result.score ?? (result.pass ? 1 : 0);
2434
+ const passes = score >= passingThreshold;
2435
+ if (this.isNot) {
2436
+ return {
2437
+ pass: !passes,
2438
+ message: () => passes ? `Expected judge evaluation to fail, but it passed with score ${score.toFixed(2)}` : `Judge evaluation failed as expected with score ${score.toFixed(2)}`
2439
+ };
2440
+ }
2441
+ if (passes) {
2442
+ return {
2443
+ pass: true,
2444
+ message: () => `Judge evaluation passed with score ${score.toFixed(2)} (threshold: ${passingThreshold})`
2445
+ };
2446
+ }
2447
+ return {
2448
+ pass: false,
2449
+ message: () => `Judge evaluation failed with score ${score.toFixed(2)} (threshold: ${passingThreshold}). Reasoning: ${result.reasoning ?? "No reasoning provided"}`
2450
+ };
2451
+ } catch (error) {
2452
+ return {
2453
+ pass: false,
2454
+ message: () => `Judge evaluation failed with error: ${error instanceof Error ? error.message : String(error)}`
2455
+ };
2456
+ }
2457
+ }
2458
+
2459
+ // src/assertions/matchers/toHaveToolResponseSize.ts
2460
+ function toHaveToolResponseSize(received, options) {
2461
+ const result = validateSize(received, options);
2462
+ return {
2463
+ pass: result.pass,
2464
+ message: () => {
2465
+ if (this.isNot) {
2466
+ return result.pass ? "Expected response size NOT to be within bounds, but it was" : result.message;
2467
+ }
2468
+ return result.message;
2469
+ }
2470
+ };
2471
+ }
2472
+
2473
+ // src/assertions/matchers/toSatisfyToolPredicate.ts
2474
+ function normalizeResult(result) {
2475
+ if (typeof result === "boolean") {
2476
+ return {
2477
+ pass: result,
2478
+ message: result ? "Predicate passed" : "Predicate returned false"
2479
+ };
2480
+ }
2481
+ return result;
2482
+ }
2483
+ async function toSatisfyToolPredicate(received, predicate, description) {
2484
+ const predicateDescription = description ?? "custom predicate";
2485
+ try {
2486
+ const text = extractText2(received);
2487
+ const rawResult = await predicate(received, text);
2488
+ const result = normalizeResult(rawResult);
2489
+ if (this.isNot) {
2490
+ return {
2491
+ pass: !result.pass,
2492
+ message: () => result.pass ? `Expected response NOT to satisfy ${predicateDescription}` : `Response does not satisfy ${predicateDescription} as expected`
2493
+ };
2494
+ }
2495
+ return {
2496
+ pass: result.pass,
2497
+ message: () => result.pass ? result.message ?? `Response satisfies ${predicateDescription}` : result.message ?? `Expected response to satisfy ${predicateDescription}`
2498
+ };
2499
+ } catch (error) {
2500
+ const errorMessage = error instanceof Error ? error.message : String(error);
2501
+ return {
2502
+ pass: this.isNot,
2503
+ // If using .not, an error means the predicate didn't pass
2504
+ message: () => `Predicate threw error: ${errorMessage}`
2505
+ };
2506
+ }
2507
+ }
2508
+
2509
+ // src/assertions/matchers/index.ts
2510
+ var expect = expect$1.extend({
2511
+ toMatchToolResponse,
2512
+ toMatchToolSchema,
2513
+ toContainToolText,
2514
+ toMatchToolPattern,
2515
+ toMatchToolSnapshot,
2516
+ toBeToolError,
2517
+ toPassToolJudge,
2518
+ toHaveToolResponseSize,
2519
+ toSatisfyToolPredicate
2520
+ });
2521
+
2522
+ // src/fixtures/mcp.ts
2523
+ init_oauthClientProvider();
2524
+ var test = test$1.extend({
2525
+ /**
2526
+ * Internal fixture state - tracks resolved auth type between fixtures
2527
+ */
2528
+ _mcpFixtureState: [
2529
+ // eslint-disable-next-line no-empty-pattern
2530
+ async ({}, use) => {
2531
+ const state = { resolvedAuthType: "none" };
2532
+ await use(state);
2533
+ },
2534
+ { scope: "test" }
2535
+ ],
2536
+ /**
2537
+ * mcpClient fixture: Creates and connects an MCP client
2538
+ *
2539
+ * The client configuration is read from the project's `use.mcpConfig`
2540
+ * setting in playwright.config.ts
2541
+ *
2542
+ * Authentication resolution order:
2543
+ * 1. Explicit authStatePath → uses PlaywrightOAuthClientProvider
2544
+ * 2. Explicit accessToken → uses static Bearer token
2545
+ * 3. HTTP transport with no auth → tries CLI-stored tokens (from `mcp-server-tester login`)
2546
+ * with automatic token refresh
2547
+ */
2548
+ mcpClient: async ({ _mcpFixtureState }, use, testInfo) => {
2549
+ const useConfig = testInfo.project.use;
2550
+ const mcpConfig = useConfig.mcpConfig;
2551
+ if (!mcpConfig) {
2552
+ throw new Error(
2553
+ `Missing mcpConfig in project.use for project "${testInfo.project.name}". Please add mcpConfig to your project configuration in playwright.config.ts`
2554
+ );
2555
+ }
2556
+ let resolvedAuthType = "none";
2557
+ let authProvider;
2558
+ if (mcpConfig.auth?.oauth?.authStatePath) {
2559
+ authProvider = new PlaywrightOAuthClientProvider({
2560
+ storagePath: mcpConfig.auth.oauth.authStatePath,
2561
+ redirectUri: mcpConfig.auth.oauth.redirectUri ?? "http://localhost:3000/oauth/callback",
2562
+ clientId: mcpConfig.auth.oauth.clientId,
2563
+ clientSecret: mcpConfig.auth.oauth.clientSecret
2564
+ });
2565
+ resolvedAuthType = "oauth";
2566
+ }
2567
+ let effectiveConfig = mcpConfig;
2568
+ if (mcpConfig.auth?.accessToken) {
2569
+ resolvedAuthType = "api-token";
2570
+ }
2571
+ if (isHttpConfig(mcpConfig) && !mcpConfig.auth?.accessToken && !mcpConfig.auth?.oauth?.authStatePath) {
2572
+ const cliClient = new CLIOAuthClient({
2573
+ mcpServerUrl: mcpConfig.serverUrl
2574
+ });
2575
+ const tokenResult = await cliClient.tryGetAccessToken();
2576
+ if (tokenResult) {
2577
+ effectiveConfig = {
2578
+ ...mcpConfig,
2579
+ auth: {
2580
+ ...mcpConfig.auth,
2581
+ accessToken: tokenResult.accessToken
2582
+ }
2583
+ };
2584
+ resolvedAuthType = "oauth";
2585
+ }
2586
+ }
2587
+ _mcpFixtureState.resolvedAuthType = resolvedAuthType;
2588
+ const client = await createMCPClientForConfig(effectiveConfig, {
2589
+ clientInfo: {
2590
+ name: "@gleanwork/mcp-server-tester",
2591
+ version: "0.1.0"
2592
+ },
2593
+ authProvider
2594
+ });
2595
+ try {
2596
+ await use(client);
2597
+ } finally {
2598
+ await closeMCPClient(client);
2599
+ }
2600
+ },
2601
+ /**
2602
+ * mcp fixture: High-level test API built on mcpClient
2603
+ *
2604
+ * Depends on mcpClient fixture
2605
+ * Automatically tracks all MCP operations for the reporter
2606
+ */
2607
+ mcp: async ({ mcpClient, _mcpFixtureState }, use, testInfo) => {
2608
+ const api = createMCPFixture(mcpClient, testInfo, {
2609
+ authType: _mcpFixtureState.resolvedAuthType,
2610
+ project: testInfo.project.name
2611
+ });
2612
+ await use(api);
2613
+ }
2614
+ });
2615
+ var LLMHostConfigSchema = z.object({
2616
+ provider: z.enum(["openai", "anthropic"]),
2617
+ apiKeyEnvVar: z.string().optional(),
2618
+ model: z.string().optional(),
2619
+ maxTokens: z.number().optional(),
2620
+ temperature: z.number().optional(),
2621
+ maxToolCalls: z.number().optional()
2622
+ });
2623
+ var SnapshotSanitizerSchema = z.union([
2624
+ // Built-in sanitizers
2625
+ z.enum(["timestamp", "uuid", "iso-date", "objectId", "jwt"]),
2626
+ // Custom regex sanitizer
2627
+ z.object({
2628
+ pattern: z.string(),
2629
+ replacement: z.string().optional()
2630
+ }),
2631
+ // Field removal sanitizer
2632
+ z.object({
2633
+ remove: z.array(z.string())
2634
+ })
2635
+ ]);
2636
+ var EvalExpectBlockSchema = z.object({
2637
+ response: z.unknown().optional(),
2638
+ schema: z.string().optional(),
2639
+ containsText: z.union([z.string(), z.array(z.string())]).optional(),
2640
+ matchesPattern: z.union([z.string(), z.array(z.string())]).optional(),
2641
+ snapshot: z.string().optional(),
2642
+ snapshotSanitizers: z.array(SnapshotSanitizerSchema).optional(),
2643
+ isError: z.union([z.boolean(), z.string(), z.array(z.string())]).optional(),
2644
+ passesJudge: z.object({
2645
+ rubric: z.string(),
2646
+ reference: z.unknown().optional(),
2647
+ threshold: z.number().min(0).max(1).optional(),
2648
+ configId: z.string().optional()
2649
+ }).optional(),
2650
+ responseSize: z.object({
2651
+ maxBytes: z.number().optional(),
2652
+ minBytes: z.number().optional()
2653
+ }).optional()
2654
+ });
2655
+ var EvalCaseSchema = z.object({
2656
+ id: z.string().min(1, "id must not be empty"),
2657
+ description: z.string().optional(),
2658
+ mode: z.enum(["direct", "llm_host"]).optional(),
2659
+ toolName: z.string().min(1, "toolName must not be empty").optional(),
2660
+ args: z.record(z.unknown()).optional(),
2661
+ scenario: z.string().optional(),
2662
+ llmHostConfig: LLMHostConfigSchema.optional(),
2663
+ metadata: z.record(z.unknown()).optional(),
2664
+ expect: EvalExpectBlockSchema.optional()
2665
+ });
2666
+ var EvalDatasetSchema = z.object({
2667
+ name: z.string().min(1, "name must not be empty"),
2668
+ description: z.string().optional(),
2669
+ cases: z.array(EvalCaseSchema).min(1, "dataset must have at least one case"),
2670
+ metadata: z.record(z.unknown()).optional()
2671
+ });
2672
+ function validateEvalCase(evalCase) {
2673
+ return EvalCaseSchema.parse(evalCase);
2674
+ }
2675
+ function validateEvalDataset(dataset) {
2676
+ return EvalDatasetSchema.parse(dataset);
2677
+ }
2678
+ async function loadEvalDataset(filePath, options = {}) {
2679
+ const { schemas, validate = true } = options;
2680
+ try {
2681
+ const fileContents = await readFile(filePath, "utf-8");
2682
+ const rawData = JSON.parse(fileContents);
2683
+ const serializedDataset = validate ? validateEvalDataset(rawData) : rawData;
2684
+ const dataset = {
2685
+ ...serializedDataset,
2686
+ schemas: schemas ?? {}
2687
+ };
2688
+ return dataset;
2689
+ } catch (error) {
2690
+ if (error instanceof SyntaxError) {
2691
+ throw new Error(
2692
+ `Failed to parse JSON from ${filePath}: ${error.message}`
2693
+ );
2694
+ }
2695
+ throw error;
2696
+ }
2697
+ }
2698
+ function loadEvalDatasetFromObject(data, options = {}) {
2699
+ const { schemas, validate = true } = options;
2700
+ const serializedDataset = validate ? validateEvalDataset(data) : data;
2701
+ const dataset = {
2702
+ ...serializedDataset,
2703
+ schemas: schemas ?? {}
2704
+ };
2705
+ return dataset;
2706
+ }
2707
+
2708
+ // src/evals/llmHost/adapter.ts
2709
+ var adapters = /* @__PURE__ */ new Map();
2710
+ function registerAdapter(provider, factory) {
2711
+ adapters.set(provider, factory);
2712
+ }
2713
+ function getAdapter(provider) {
2714
+ const factory = adapters.get(provider);
2715
+ if (!factory) {
2716
+ throw new Error(
2717
+ `No adapter registered for provider: ${provider}. Available: ${Array.from(adapters.keys()).join(", ")}`
2718
+ );
2719
+ }
2720
+ return factory();
2721
+ }
2722
+ function hasAdapter(provider) {
2723
+ return adapters.has(provider);
2724
+ }
2725
+
2726
+ // src/evals/llmHost/retry.ts
2727
+ var DEFAULT_OPTIONS = {
2728
+ maxAttempts: 3,
2729
+ baseDelayMs: 1e3,
2730
+ maxDelayMs: 3e4,
2731
+ isRetryable: isRetryableError
2732
+ };
2733
+ async function withRetry(fn, options = {}) {
2734
+ const {
2735
+ maxAttempts = DEFAULT_OPTIONS.maxAttempts,
2736
+ baseDelayMs = DEFAULT_OPTIONS.baseDelayMs,
2737
+ maxDelayMs = DEFAULT_OPTIONS.maxDelayMs,
2738
+ isRetryable = DEFAULT_OPTIONS.isRetryable,
2739
+ onRetry
2740
+ } = options;
2741
+ let lastError;
2742
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
2743
+ try {
2744
+ return await fn();
2745
+ } catch (error) {
2746
+ lastError = error;
2747
+ if (attempt >= maxAttempts || !isRetryable(error)) {
2748
+ throw error;
2749
+ }
2750
+ const exponentialDelay = baseDelayMs * Math.pow(2, attempt - 1);
2751
+ const jitter = Math.random() * 0.1 * exponentialDelay;
2752
+ const delayMs = Math.min(exponentialDelay + jitter, maxDelayMs);
2753
+ if (onRetry) {
2754
+ onRetry(error, attempt, delayMs);
2755
+ }
2756
+ await sleep(delayMs);
2757
+ }
2758
+ }
2759
+ throw lastError;
2760
+ }
2761
+ function isRetryableError(error) {
2762
+ const statusCode = extractStatusCode(error);
2763
+ if (statusCode !== null) {
2764
+ return [429, 500, 502, 503, 504].includes(statusCode);
2765
+ }
2766
+ const message = extractErrorMessage2(error).toLowerCase();
2767
+ return message.includes("rate limit") || message.includes("429") || message.includes("too many requests") || message.includes("timeout") || message.includes("temporarily unavailable") || message.includes("service unavailable") || message.includes("internal server error");
2768
+ }
2769
+ function extractStatusCode(error) {
2770
+ if (error == null || typeof error !== "object") {
2771
+ return null;
2772
+ }
2773
+ const e = error;
2774
+ if (typeof e.status === "number") {
2775
+ return e.status;
2776
+ }
2777
+ if (typeof e.statusCode === "number") {
2778
+ return e.statusCode;
2779
+ }
2780
+ if (e.response && typeof e.response === "object") {
2781
+ const response = e.response;
2782
+ if (typeof response.status === "number") {
2783
+ return response.status;
2784
+ }
2785
+ }
2786
+ if (typeof e.code === "number") {
2787
+ return e.code;
2788
+ }
2789
+ return null;
2790
+ }
2791
+ function extractErrorMessage2(error) {
2792
+ if (error == null) {
2793
+ return "";
2794
+ }
2795
+ if (typeof error === "string") {
2796
+ return error;
2797
+ }
2798
+ if (error instanceof Error) {
2799
+ return error.message;
2800
+ }
2801
+ if (typeof error === "object") {
2802
+ const e = error;
2803
+ if (typeof e.message === "string") {
2804
+ return e.message;
2805
+ }
2806
+ if (typeof e.error === "string") {
2807
+ return e.error;
2808
+ }
2809
+ return JSON.stringify(error);
2810
+ }
2811
+ if (typeof error === "number" || typeof error === "boolean") {
2812
+ return String(error);
2813
+ }
2814
+ return "Unknown error";
2815
+ }
2816
+ function sleep(ms) {
2817
+ return new Promise((resolve) => setTimeout(resolve, ms));
2818
+ }
2819
+
2820
+ // src/evals/llmHost/orchestrator.ts
2821
+ async function runSimulation(adapter, mcp, scenario, config, options = {}) {
2822
+ const maxIterations = config.maxToolCalls || 10;
2823
+ const retryOptions = options.retry || {};
2824
+ const allToolCalls = [];
2825
+ const conversationHistory = [];
2826
+ try {
2827
+ const client = await adapter.createClient(config);
2828
+ const mcpTools = await mcp.listTools();
2829
+ const formattedTools = adapter.formatTools(mcpTools);
2830
+ const messages = [adapter.createUserMessage(scenario)];
2831
+ conversationHistory.push({ role: "user", content: scenario });
2832
+ let finalResponse = "";
2833
+ for (let iteration = 0; iteration < maxIterations; iteration++) {
2834
+ const chatResult = await withRetry(
2835
+ () => adapter.chat(client, messages, formattedTools, config),
2836
+ retryOptions
2837
+ );
2838
+ if (chatResult.wantsToolCalls && chatResult.toolCalls.length > 0) {
2839
+ messages.push(adapter.createAssistantMessage(chatResult));
2840
+ const toolResultMessages = [];
2841
+ for (const toolCall of chatResult.toolCalls) {
2842
+ allToolCalls.push(toolCall);
2843
+ const mcpResult = await mcp.callTool(
2844
+ toolCall.name,
2845
+ toolCall.arguments
2846
+ );
2847
+ const resultText = extractText(mcpResult);
2848
+ const resultMessage = adapter.createToolResultMessage(
2849
+ toolCall,
2850
+ resultText
2851
+ );
2852
+ toolResultMessages.push(resultMessage);
2853
+ conversationHistory.push({ role: "tool", content: resultText });
2854
+ }
2855
+ if (adapter.provider === "anthropic") {
2856
+ messages.push({
2857
+ role: "user",
2858
+ content: toolResultMessages
2859
+ });
2860
+ } else {
2861
+ for (const msg of toolResultMessages) {
2862
+ messages.push(msg);
2863
+ }
2864
+ }
2865
+ } else {
2866
+ finalResponse = chatResult.textContent || "";
2867
+ conversationHistory.push({ role: "assistant", content: finalResponse });
2868
+ break;
2869
+ }
2870
+ }
2871
+ return {
2872
+ success: true,
2873
+ toolCalls: allToolCalls,
2874
+ response: finalResponse,
2875
+ conversationHistory
2876
+ };
2877
+ } catch (error) {
2878
+ return {
2879
+ success: false,
2880
+ toolCalls: allToolCalls,
2881
+ error: error instanceof Error ? error.message : String(error),
2882
+ conversationHistory
2883
+ };
2884
+ }
2885
+ }
2886
+
2887
+ // src/evals/llmHost/adapters/openai.ts
2888
+ function createOpenAIAdapter() {
2889
+ return {
2890
+ provider: "openai",
2891
+ async createClient(config) {
2892
+ let OpenAI;
2893
+ try {
2894
+ const module = await import('openai');
2895
+ OpenAI = module.OpenAI;
2896
+ } catch {
2897
+ throw new Error(
2898
+ "OpenAI SDK is not installed. Install it with: npm install openai"
2899
+ );
2900
+ }
2901
+ const apiKeyEnvVar = config.apiKeyEnvVar || "OPENAI_API_KEY";
2902
+ const apiKey = process.env[apiKeyEnvVar];
2903
+ if (!apiKey) {
2904
+ throw new Error(
2905
+ `OpenAI API key not found in environment variable ${apiKeyEnvVar}`
2906
+ );
2907
+ }
2908
+ return new OpenAI({ apiKey });
2909
+ },
2910
+ formatTools(tools) {
2911
+ return tools.map((tool) => ({
2912
+ type: "function",
2913
+ function: {
2914
+ name: tool.name,
2915
+ description: tool.description || "",
2916
+ parameters: tool.inputSchema || {}
2917
+ }
2918
+ }));
2919
+ },
2920
+ async chat(client, messages, tools, config) {
2921
+ const openai = client;
2922
+ const response = await openai.chat.completions.create({
2923
+ model: config.model || "gpt-4o",
2924
+ messages,
2925
+ tools,
2926
+ temperature: config.temperature ?? 0,
2927
+ max_tokens: config.maxTokens
2928
+ });
2929
+ const resp = response;
2930
+ const message = resp.choices[0]?.message;
2931
+ if (!message) {
2932
+ throw new Error("No response from OpenAI");
2933
+ }
2934
+ if (message.tool_calls && message.tool_calls.length > 0) {
2935
+ const toolCalls = message.tool_calls.map((tc) => ({
2936
+ name: tc.function.name,
2937
+ arguments: JSON.parse(tc.function.arguments),
2938
+ id: tc.id
2939
+ }));
2940
+ return {
2941
+ wantsToolCalls: true,
2942
+ toolCalls,
2943
+ textContent: message.content,
2944
+ rawResponse: response
2945
+ };
2946
+ }
2947
+ return {
2948
+ wantsToolCalls: false,
2949
+ toolCalls: [],
2950
+ textContent: message.content,
2951
+ rawResponse: response
2952
+ };
2953
+ },
2954
+ createUserMessage(scenario) {
2955
+ return {
2956
+ role: "user",
2957
+ content: scenario
2958
+ };
2959
+ },
2960
+ createAssistantMessage(chatResult) {
2961
+ const rawResponse = chatResult.rawResponse;
2962
+ return {
2963
+ role: "assistant",
2964
+ content: chatResult.textContent,
2965
+ tool_calls: rawResponse.choices[0]?.message?.tool_calls
2966
+ };
2967
+ },
2968
+ createToolResultMessage(toolCall, result) {
2969
+ return {
2970
+ role: "tool",
2971
+ tool_call_id: toolCall.id,
2972
+ content: result
2973
+ };
2974
+ }
2975
+ };
2976
+ }
2977
+
2978
+ // src/evals/llmHost/adapters/anthropic.ts
2979
+ function createAnthropicAdapter() {
2980
+ return {
2981
+ provider: "anthropic",
2982
+ async createClient(config) {
2983
+ let Anthropic;
2984
+ try {
2985
+ const module = await import('@anthropic-ai/sdk');
2986
+ Anthropic = module.default;
2987
+ } catch {
2988
+ throw new Error(
2989
+ "Anthropic SDK is not installed. Install it with: npm install @anthropic-ai/sdk"
2990
+ );
2991
+ }
2992
+ const apiKeyEnvVar = config.apiKeyEnvVar || "ANTHROPIC_API_KEY";
2993
+ const apiKey = process.env[apiKeyEnvVar];
2994
+ if (!apiKey) {
2995
+ throw new Error(
2996
+ `Anthropic API key not found in environment variable ${apiKeyEnvVar}`
2997
+ );
2998
+ }
2999
+ return new Anthropic({ apiKey });
3000
+ },
3001
+ formatTools(tools) {
3002
+ return tools.map((tool) => ({
3003
+ name: tool.name,
3004
+ description: tool.description || "",
3005
+ input_schema: tool.inputSchema || {}
3006
+ }));
3007
+ },
3008
+ async chat(client, messages, tools, config) {
3009
+ const anthropic = client;
3010
+ const response = await anthropic.messages.create({
3011
+ model: config.model || "claude-3-5-sonnet-20241022",
3012
+ max_tokens: config.maxTokens || 4096,
3013
+ temperature: config.temperature ?? 0,
3014
+ messages,
3015
+ tools
3016
+ });
3017
+ const resp = response;
3018
+ const textBlock = resp.content.find((c) => c.type === "text");
3019
+ const textContent = textBlock?.text || null;
3020
+ if (resp.stop_reason === "tool_use") {
3021
+ const toolUses = resp.content.filter((c) => c.type === "tool_use");
3022
+ const toolCalls = toolUses.map((tu) => ({
3023
+ name: tu.name,
3024
+ arguments: tu.input,
3025
+ id: tu.id
3026
+ }));
3027
+ return {
3028
+ wantsToolCalls: true,
3029
+ toolCalls,
3030
+ textContent,
3031
+ rawResponse: response
3032
+ };
3033
+ }
3034
+ if (resp.stop_reason === "max_tokens") {
3035
+ throw new Error("Response exceeded max tokens");
3036
+ }
3037
+ return {
3038
+ wantsToolCalls: false,
3039
+ toolCalls: [],
3040
+ textContent,
3041
+ rawResponse: response
3042
+ };
3043
+ },
3044
+ createUserMessage(scenario) {
3045
+ return {
3046
+ role: "user",
3047
+ content: scenario
3048
+ };
3049
+ },
3050
+ createAssistantMessage(chatResult) {
3051
+ const rawResponse = chatResult.rawResponse;
3052
+ return {
3053
+ role: "assistant",
3054
+ content: rawResponse.content
3055
+ };
3056
+ },
3057
+ createToolResultMessage(toolCall, result) {
3058
+ return {
3059
+ type: "tool_result",
3060
+ tool_use_id: toolCall.id,
3061
+ content: result
3062
+ };
3063
+ }
3064
+ };
3065
+ }
3066
+
3067
+ // src/evals/llmHost/llmHostSimulation.ts
3068
+ registerAdapter("openai", createOpenAIAdapter);
3069
+ registerAdapter("anthropic", createAnthropicAdapter);
3070
+ async function simulateLLMHost(mcp, scenario, config) {
3071
+ const adapter = getAdapter(config.provider);
3072
+ return runSimulation(adapter, mcp, scenario, config, {
3073
+ retry: {
3074
+ maxAttempts: 3,
3075
+ baseDelayMs: 1e3,
3076
+ maxDelayMs: 3e4
3077
+ }
3078
+ });
3079
+ }
3080
+ function isProviderAvailable(provider) {
3081
+ return hasAdapter(provider);
3082
+ }
3083
+ function getMissingDependencyMessage(provider) {
3084
+ switch (provider) {
3085
+ case "openai":
3086
+ return "OpenAI SDK is not installed. Install it with: npm install openai";
3087
+ case "anthropic":
3088
+ return "Anthropic SDK is not installed. Install it with: npm install @anthropic-ai/sdk";
3089
+ default:
3090
+ return `Unknown provider: ${String(provider)}`;
3091
+ }
3092
+ }
3093
+
3094
+ // src/evals/evalRunner.ts
3095
+ async function executeToolCall(evalCase, mcp) {
3096
+ const mode = evalCase.mode || "direct";
3097
+ try {
3098
+ if (mode === "llm_host") {
3099
+ if (!evalCase.scenario) {
3100
+ throw new Error(
3101
+ `Eval case ${evalCase.id}: scenario is required for llm_host mode`
3102
+ );
3103
+ }
3104
+ if (!evalCase.llmHostConfig) {
3105
+ throw new Error(
3106
+ `Eval case ${evalCase.id}: llmHostConfig is required for llm_host mode`
3107
+ );
3108
+ }
3109
+ const simulationResult = await simulateLLMHost(
3110
+ mcp,
3111
+ evalCase.scenario,
3112
+ evalCase.llmHostConfig
3113
+ );
3114
+ if (!simulationResult.success) {
3115
+ throw new Error(simulationResult.error || "LLM host simulation failed");
3116
+ }
3117
+ return { response: simulationResult };
3118
+ } else {
3119
+ if (!evalCase.toolName) {
3120
+ throw new Error(
3121
+ `Eval case ${evalCase.id}: toolName is required for direct mode`
3122
+ );
3123
+ }
3124
+ if (!evalCase.args) {
3125
+ throw new Error(
3126
+ `Eval case ${evalCase.id}: args is required for direct mode`
3127
+ );
3128
+ }
3129
+ const result = await mcp.callTool(evalCase.toolName, evalCase.args);
3130
+ if (evalCase.expect?.isError !== void 0) {
3131
+ return { response: result };
3132
+ }
3133
+ return { response: result.structuredContent ?? result.content };
3134
+ }
3135
+ } catch (err) {
3136
+ return {
3137
+ response: void 0,
3138
+ error: err instanceof Error ? err.message : String(err)
3139
+ };
3140
+ }
3141
+ }
3142
+ function didCasePass(error, expectations) {
3143
+ return !error && Object.values(expectations).every(
3144
+ (result) => result === void 0 || result.pass
3145
+ );
3146
+ }
3147
+ async function runExpectBlockValidations(expectBlock, response, config) {
3148
+ const results = {};
3149
+ if (expectBlock.response !== void 0) {
3150
+ const validation = validateResponse(response, expectBlock.response);
3151
+ results.exact = {
3152
+ pass: validation.pass,
3153
+ details: validation.message
3154
+ };
3155
+ }
3156
+ if (expectBlock.schema !== void 0) {
3157
+ const schema = config.schemas?.[expectBlock.schema];
3158
+ if (!schema) {
3159
+ results.schema = {
3160
+ pass: false,
3161
+ details: `Schema "${expectBlock.schema}" not found in schemas registry`
3162
+ };
3163
+ } else {
3164
+ const validation = validateSchema(response, schema);
3165
+ results.schema = {
3166
+ pass: validation.pass,
3167
+ details: validation.message
3168
+ };
3169
+ }
3170
+ }
3171
+ if (expectBlock.containsText !== void 0) {
3172
+ const validation = validateText(response, expectBlock.containsText);
3173
+ results.textContains = {
3174
+ pass: validation.pass,
3175
+ details: validation.message
3176
+ };
3177
+ }
3178
+ if (expectBlock.matchesPattern !== void 0) {
3179
+ const validation = validatePattern(response, expectBlock.matchesPattern);
3180
+ results.regex = {
3181
+ pass: validation.pass,
3182
+ details: validation.message
3183
+ };
3184
+ }
3185
+ if (expectBlock.isError !== void 0) {
3186
+ const validation = validateError(response, expectBlock.isError);
3187
+ results.error = {
3188
+ pass: validation.pass,
3189
+ details: validation.message
3190
+ };
3191
+ }
3192
+ if (expectBlock.responseSize !== void 0) {
3193
+ const validation = validateSize(response, expectBlock.responseSize);
3194
+ results.size = {
3195
+ pass: validation.pass,
3196
+ details: validation.message
3197
+ };
3198
+ }
3199
+ if (expectBlock.passesJudge !== void 0) {
3200
+ const {
3201
+ rubric,
3202
+ reference,
3203
+ threshold = 0.7,
3204
+ configId
3205
+ } = expectBlock.passesJudge;
3206
+ const judgeConfig = configId ? config.judgeConfigs?.[configId] ?? {} : {};
3207
+ try {
3208
+ const judge = createJudge(judgeConfig);
3209
+ const judgeResult = await judge.evaluate(
3210
+ response,
3211
+ reference ?? null,
3212
+ rubric
3213
+ );
3214
+ const score = judgeResult.score ?? (judgeResult.pass ? 1 : 0);
3215
+ const passed = score >= threshold;
3216
+ results.judge = {
3217
+ pass: passed,
3218
+ details: passed ? `Judge passed with score ${score.toFixed(2)}` : `Judge failed with score ${score.toFixed(2)} (threshold: ${threshold}). ${judgeResult.reasoning ?? ""}`
3219
+ };
3220
+ } catch (err) {
3221
+ results.judge = {
3222
+ pass: false,
3223
+ details: `Judge evaluation error: ${err instanceof Error ? err.message : String(err)}`
3224
+ };
3225
+ }
3226
+ }
3227
+ if (expectBlock.snapshot !== void 0) {
3228
+ if (!config.playwrightExpect) {
3229
+ results.snapshot = {
3230
+ pass: false,
3231
+ details: "Snapshot testing requires expect in context"
3232
+ };
3233
+ } else {
3234
+ try {
3235
+ const sanitizers = expectBlock.snapshotSanitizers ?? [];
3236
+ await config.playwrightExpect(response).toMatchToolSnapshot(
3237
+ expectBlock.snapshot,
3238
+ sanitizers
3239
+ );
3240
+ results.snapshot = {
3241
+ pass: true,
3242
+ details: `Matches snapshot "${expectBlock.snapshot}"`
3243
+ };
3244
+ } catch (err) {
3245
+ results.snapshot = {
3246
+ pass: false,
3247
+ details: err instanceof Error ? err.message : String(err)
3248
+ };
3249
+ }
3250
+ }
3251
+ }
3252
+ return results;
3253
+ }
3254
+ async function runEvalCase(evalCase, context, options = {}) {
3255
+ const startTime = Date.now();
3256
+ const mode = evalCase.mode || "direct";
3257
+ const { response, error } = await executeToolCall(evalCase, context.mcp);
3258
+ let expectationResults = {};
3259
+ if (!error && evalCase.expect) {
3260
+ expectationResults = await runExpectBlockValidations(
3261
+ evalCase.expect,
3262
+ response,
3263
+ {
3264
+ schemas: options.schemas,
3265
+ judgeConfigs: options.judgeConfigs,
3266
+ playwrightExpect: context.expect
3267
+ }
3268
+ );
3269
+ }
3270
+ return {
3271
+ id: evalCase.id,
3272
+ datasetName: options.datasetName ?? "single-case",
3273
+ toolName: evalCase.toolName ?? evalCase.scenario ?? "unknown",
3274
+ mode,
3275
+ source: "eval",
3276
+ pass: didCasePass(error, expectationResults),
3277
+ response,
3278
+ error,
3279
+ expectations: expectationResults,
3280
+ authType: context.mcp.authType,
3281
+ project: context.mcp.project,
3282
+ durationMs: Date.now() - startTime
3283
+ };
3284
+ }
3285
+ async function runEvalDataset(options, context) {
3286
+ const {
3287
+ dataset,
3288
+ schemas,
3289
+ judgeConfigs,
3290
+ stopOnFailure = false,
3291
+ onCaseComplete
3292
+ } = options;
3293
+ const startTime = Date.now();
3294
+ const caseResults = [];
3295
+ const enrichedContext = context;
3296
+ const allSchemas = {
3297
+ ...dataset.schemas,
3298
+ ...schemas
3299
+ };
3300
+ for (const evalCase of dataset.cases) {
3301
+ const result2 = await runEvalCase(evalCase, enrichedContext, {
3302
+ datasetName: dataset.name,
3303
+ schemas: allSchemas,
3304
+ judgeConfigs
3305
+ });
3306
+ caseResults.push(result2);
3307
+ if (onCaseComplete) {
3308
+ await onCaseComplete(result2);
3309
+ }
3310
+ if (stopOnFailure && !result2.pass) {
3311
+ break;
3312
+ }
3313
+ }
3314
+ const total = caseResults.length;
3315
+ const passed = caseResults.filter((r) => r.pass).length;
3316
+ const result = {
3317
+ total,
3318
+ passed,
3319
+ failed: total - passed,
3320
+ caseResults,
3321
+ durationMs: Date.now() - startTime
3322
+ };
3323
+ if (context.testInfo) {
3324
+ await context.testInfo.attach("mcp-test-results", {
3325
+ contentType: "application/json",
3326
+ body: Buffer.from(JSON.stringify({ caseResults }))
3327
+ });
3328
+ }
3329
+ return result;
3330
+ }
3331
+
3332
+ // src/evals/llmHost/toolCallExpectation.ts
3333
+ function argumentsMatch(actual, expected) {
3334
+ for (const key of Object.keys(expected)) {
3335
+ if (!(key in actual)) {
3336
+ return false;
3337
+ }
3338
+ const actualValue = actual[key];
3339
+ const expectedValue = expected[key];
3340
+ if (JSON.stringify(actualValue) !== JSON.stringify(expectedValue)) {
3341
+ return false;
3342
+ }
3343
+ }
3344
+ return true;
3345
+ }
3346
+ function findMatchingCall(expected, actualCalls) {
3347
+ for (const actualCall of actualCalls) {
3348
+ if (actualCall.name !== expected.name) {
3349
+ continue;
3350
+ }
3351
+ if (!expected.arguments) {
3352
+ return actualCall;
3353
+ }
3354
+ if (argumentsMatch(actualCall.arguments, expected.arguments)) {
3355
+ return actualCall;
3356
+ }
3357
+ }
3358
+ return null;
3359
+ }
3360
+ function createToolCallValidator() {
3361
+ return async (evalCase, response) => {
3362
+ const expectedCalls = evalCase.metadata?.expectedToolCalls;
3363
+ if (!expectedCalls || expectedCalls.length === 0) {
3364
+ return {
3365
+ pass: true,
3366
+ details: "No expected tool calls specified"
3367
+ };
3368
+ }
3369
+ const responseObj = response;
3370
+ const actualCalls = responseObj?.toolCalls;
3371
+ if (!actualCalls || actualCalls.length === 0) {
3372
+ const requiredCalls = expectedCalls.filter(
3373
+ (call) => call.required !== false
3374
+ );
3375
+ if (requiredCalls.length > 0) {
3376
+ return {
3377
+ pass: false,
3378
+ details: `Expected ${requiredCalls.length} tool call(s), but LLM made no tool calls`
3379
+ };
3380
+ }
3381
+ return {
3382
+ pass: true,
3383
+ details: "No tool calls expected or made"
3384
+ };
3385
+ }
3386
+ const missingCalls = [];
3387
+ for (const expectedCall of expectedCalls) {
3388
+ const matchingCall = findMatchingCall(expectedCall, actualCalls);
3389
+ if (!matchingCall) {
3390
+ if (expectedCall.required !== false) {
3391
+ missingCalls.push(expectedCall);
3392
+ }
3393
+ }
3394
+ }
3395
+ if (missingCalls.length > 0) {
3396
+ const missingDetails = missingCalls.map((call) => `${call.name}(${JSON.stringify(call.arguments || {})})`).join(", ");
3397
+ return {
3398
+ pass: false,
3399
+ details: `Missing required tool call(s): ${missingDetails}. Actual calls: ${actualCalls.map((c) => c.name).join(", ")}`
3400
+ };
3401
+ }
3402
+ return {
3403
+ pass: true,
3404
+ details: `All ${expectedCalls.length} expected tool call(s) were made correctly`
3405
+ };
3406
+ };
3407
+ }
3408
+
3409
+ // src/spec/conformanceChecks.ts
3410
+ async function runConformanceChecks(mcp, options = {}, testInfo) {
3411
+ const {
3412
+ requiredTools = [],
3413
+ validateSchemas = true,
3414
+ checkServerInfo = true,
3415
+ checkResources = true,
3416
+ checkPrompts = true
3417
+ } = options;
3418
+ const checks = [];
3419
+ const raw = {
3420
+ serverInfo: null,
3421
+ capabilities: null,
3422
+ tools: [],
3423
+ resources: null,
3424
+ prompts: null
3425
+ };
3426
+ const serverInfo = mcp.getServerInfo();
3427
+ if (serverInfo) {
3428
+ raw.serverInfo = serverInfo;
3429
+ }
3430
+ if (checkServerInfo) {
3431
+ checks.push({
3432
+ name: "server_info_present",
3433
+ pass: serverInfo !== null,
3434
+ message: serverInfo ? `Server info: ${serverInfo.name ?? "unknown"} v${serverInfo.version ?? "unknown"}` : "Server info is missing"
3435
+ });
3436
+ }
3437
+ const capabilities = mcp.client.getServerCapabilities();
3438
+ if (capabilities) {
3439
+ raw.capabilities = capabilities;
3440
+ }
3441
+ checks.push({
3442
+ name: "capabilities_valid",
3443
+ pass: capabilities !== void 0,
3444
+ message: capabilities ? `Server capabilities: ${formatCapabilities(capabilities)}` : "Server capabilities not available"
3445
+ });
3446
+ let tools = [];
3447
+ try {
3448
+ tools = await mcp.listTools();
3449
+ raw.tools = tools;
3450
+ checks.push({
3451
+ name: "list_tools_succeeds",
3452
+ pass: true,
3453
+ message: `listTools returned ${tools.length} tools`
3454
+ });
3455
+ } catch (error) {
3456
+ checks.push({
3457
+ name: "list_tools_succeeds",
3458
+ pass: false,
3459
+ message: `listTools failed: ${error instanceof Error ? error.message : String(error)}`
3460
+ });
3461
+ const pass2 = checks.every((check) => check.pass);
3462
+ return { pass: pass2, checks, raw };
3463
+ }
3464
+ if (requiredTools.length > 0) {
3465
+ const toolNames = new Set(tools.map((t) => t.name));
3466
+ const missingTools = requiredTools.filter((name) => !toolNames.has(name));
3467
+ checks.push({
3468
+ name: "required_tools_present",
3469
+ pass: missingTools.length === 0,
3470
+ message: missingTools.length === 0 ? `All ${requiredTools.length} required tools are present` : `Missing required tools: ${missingTools.join(", ")}`
3471
+ });
3472
+ }
3473
+ if (validateSchemas && tools.length > 0) {
3474
+ const invalidTools = [];
3475
+ for (const tool of tools) {
3476
+ if (!tool.name) {
3477
+ invalidTools.push(`(unnamed tool): missing name`);
3478
+ continue;
3479
+ }
3480
+ if (!tool.inputSchema) {
3481
+ invalidTools.push(`${tool.name}: missing inputSchema`);
3482
+ continue;
3483
+ }
3484
+ if (tool.inputSchema.type !== "object") {
3485
+ invalidTools.push(
3486
+ `${tool.name}: inputSchema.type must be "object", got "${String(tool.inputSchema.type)}"`
3487
+ );
3488
+ }
3489
+ }
3490
+ checks.push({
3491
+ name: "tool_schemas_valid",
3492
+ pass: invalidTools.length === 0,
3493
+ message: invalidTools.length === 0 ? `All ${tools.length} tools have valid schemas` : `Invalid tool schemas:
3494
+ ${invalidTools.join("\n ")}`
3495
+ });
3496
+ }
3497
+ if (checkResources && capabilities?.resources) {
3498
+ try {
3499
+ const resourcesResult = await mcp.client.listResources();
3500
+ raw.resources = resourcesResult.resources;
3501
+ checks.push({
3502
+ name: "list_resources_succeeds",
3503
+ pass: true,
3504
+ message: `listResources returned ${resourcesResult.resources.length} resources`
3505
+ });
3506
+ } catch (error) {
3507
+ checks.push({
3508
+ name: "list_resources_succeeds",
3509
+ pass: false,
3510
+ message: `listResources failed: ${error instanceof Error ? error.message : String(error)}`
3511
+ });
3512
+ }
3513
+ }
3514
+ if (checkPrompts && capabilities?.prompts) {
3515
+ try {
3516
+ const promptsResult = await mcp.client.listPrompts();
3517
+ raw.prompts = promptsResult.prompts;
3518
+ checks.push({
3519
+ name: "list_prompts_succeeds",
3520
+ pass: true,
3521
+ message: `listPrompts returned ${promptsResult.prompts.length} prompts`
3522
+ });
3523
+ } catch (error) {
3524
+ checks.push({
3525
+ name: "list_prompts_succeeds",
3526
+ pass: false,
3527
+ message: `listPrompts failed: ${error instanceof Error ? error.message : String(error)}`
3528
+ });
3529
+ }
3530
+ }
3531
+ try {
3532
+ const result2 = await mcp.callTool("__nonexistent_tool__", {});
3533
+ const hasError = result2.isError === true;
3534
+ checks.push({
3535
+ name: "invalid_tool_returns_error",
3536
+ pass: hasError,
3537
+ message: hasError ? "Nonexistent tool correctly returned an error" : "Calling nonexistent tool should have returned an error"
3538
+ });
3539
+ } catch {
3540
+ checks.push({
3541
+ name: "invalid_tool_returns_error",
3542
+ pass: true,
3543
+ message: "Nonexistent tool correctly threw an error"
3544
+ });
3545
+ }
3546
+ const pass = checks.every((check) => check.pass);
3547
+ const result = { pass, checks, raw };
3548
+ if (testInfo) {
3549
+ await testInfo.attach("mcp-conformance-checks", {
3550
+ contentType: "application/json",
3551
+ body: JSON.stringify(
3552
+ {
3553
+ operation: "conformanceChecks",
3554
+ pass,
3555
+ checks,
3556
+ serverInfo: raw.serverInfo,
3557
+ capabilities: raw.capabilities,
3558
+ toolCount: raw.tools.length,
3559
+ authType: mcp.authType,
3560
+ project: mcp.project
3561
+ },
3562
+ null,
3563
+ 2
3564
+ )
3565
+ });
3566
+ }
3567
+ return result;
3568
+ }
3569
+ function formatCapabilities(capabilities) {
3570
+ const parts = [];
3571
+ if (capabilities.tools) parts.push("tools");
3572
+ if (capabilities.resources) parts.push("resources");
3573
+ if (capabilities.prompts) parts.push("prompts");
3574
+ if (capabilities.logging) parts.push("logging");
3575
+ if (capabilities.completions) parts.push("completions");
3576
+ if (capabilities.experimental) parts.push("experimental");
3577
+ return parts.length > 0 ? parts.join(", ") : "none declared";
3578
+ }
3579
+
3580
+ export { CLIOAuthClient, DiscoveryError, ENV_VAR_NAMES, EvalCaseSchema, EvalDatasetSchema, MCPConfigSchema, MCP_PROTOCOL_VERSION, PlaywrightOAuthClientProvider, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, createToolCallValidator, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, extractText as extractTextFromResponse, getMissingDependencyMessage, getResponseSizeBytes, hasValidTokens, injectTokens, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, normalizeToolResponse, normalizeWhitespace, performOAuthSetup, performOAuthSetupIfNeeded, runConformanceChecks, runEvalCase, runEvalDataset, simulateLLMHost, test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText };
3581
+ //# sourceMappingURL=index.js.map
3582
+ //# sourceMappingURL=index.js.map