@gleanwork/mcp-server-tester 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs ADDED
@@ -0,0 +1,3658 @@
1
+ 'use strict';
2
+
3
+ var fs = require('fs/promises');
4
+ var path2 = require('path');
5
+ var zod = require('zod');
6
+ var test$1 = require('@playwright/test');
7
+ var auth_js = require('@modelcontextprotocol/sdk/client/auth.js');
8
+ var createDebug = require('debug');
9
+ var oauth2 = require('oauth4webapi');
10
+ var os = require('os');
11
+ var http = require('http');
12
+ var index_js = require('@modelcontextprotocol/sdk/client/index.js');
13
+ var stdio_js = require('@modelcontextprotocol/sdk/client/stdio.js');
14
+ var streamableHttp_js = require('@modelcontextprotocol/sdk/client/streamableHttp.js');
15
+ var claudeAgentSdk = require('@anthropic-ai/claude-agent-sdk');
16
+
17
+ function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
18
+
19
+ function _interopNamespace(e) {
20
+ if (e && e.__esModule) return e;
21
+ var n = Object.create(null);
22
+ if (e) {
23
+ Object.keys(e).forEach(function (k) {
24
+ if (k !== 'default') {
25
+ var d = Object.getOwnPropertyDescriptor(e, k);
26
+ Object.defineProperty(n, k, d.get ? d : {
27
+ enumerable: true,
28
+ get: function () { return e[k]; }
29
+ });
30
+ }
31
+ });
32
+ }
33
+ n.default = e;
34
+ return Object.freeze(n);
35
+ }
36
+
37
+ var fs__namespace = /*#__PURE__*/_interopNamespace(fs);
38
+ var path2__namespace = /*#__PURE__*/_interopNamespace(path2);
39
+ var createDebug__default = /*#__PURE__*/_interopDefault(createDebug);
40
+ var oauth2__namespace = /*#__PURE__*/_interopNamespace(oauth2);
41
+ var http__namespace = /*#__PURE__*/_interopNamespace(http);
42
+
43
+ var __defProp = Object.defineProperty;
44
+ var __getOwnPropNames = Object.getOwnPropertyNames;
45
+ var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
46
+ get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
47
+ }) : x)(function(x) {
48
+ if (typeof require !== "undefined") return require.apply(this, arguments);
49
+ throw Error('Dynamic require of "' + x + '" is not supported');
50
+ });
51
+ var __esm = (fn, res) => function __init() {
52
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
53
+ };
54
+ var __export = (target, all) => {
55
+ for (var name in all)
56
+ __defProp(target, name, { get: all[name], enumerable: true });
57
+ };
58
+
59
+ // src/auth/oauthClientProvider.ts
60
+ var oauthClientProvider_exports = {};
61
+ __export(oauthClientProvider_exports, {
62
+ PlaywrightOAuthClientProvider: () => exports.PlaywrightOAuthClientProvider,
63
+ loadOAuthState: () => loadOAuthState,
64
+ saveOAuthState: () => saveOAuthState
65
+ });
66
+ async function loadOAuthState(storagePath) {
67
+ try {
68
+ const content = await fs__namespace.readFile(storagePath, "utf-8");
69
+ return JSON.parse(content);
70
+ } catch (error) {
71
+ if (error.code === "ENOENT") {
72
+ return null;
73
+ }
74
+ throw error;
75
+ }
76
+ }
77
+ async function saveOAuthState(storagePath, state) {
78
+ state.savedAt = Date.now();
79
+ const dir = path2__namespace.dirname(storagePath);
80
+ await fs__namespace.mkdir(dir, { recursive: true });
81
+ await fs__namespace.writeFile(storagePath, JSON.stringify(state, null, 2), "utf-8");
82
+ }
83
+ exports.PlaywrightOAuthClientProvider = void 0;
84
+ var init_oauthClientProvider = __esm({
85
+ "src/auth/oauthClientProvider.ts"() {
86
+ exports.PlaywrightOAuthClientProvider = class {
87
+ config;
88
+ cachedState = null;
89
+ stateParam = null;
90
+ constructor(config) {
91
+ this.config = config;
92
+ }
93
+ /**
94
+ * The URL to redirect the user agent to after authorization
95
+ */
96
+ get redirectUrl() {
97
+ return this.config.redirectUri;
98
+ }
99
+ /**
100
+ * Metadata about this OAuth client
101
+ */
102
+ get clientMetadata() {
103
+ return {
104
+ redirect_uris: [this.config.redirectUri],
105
+ token_endpoint_auth_method: this.config.clientSecret ? "client_secret_basic" : "none",
106
+ grant_types: ["authorization_code", "refresh_token"],
107
+ response_types: ["code"],
108
+ client_name: "@gleanwork/mcp-server-tester",
109
+ ...this.config.clientMetadata
110
+ };
111
+ }
112
+ /**
113
+ * Returns an OAuth2 state parameter
114
+ */
115
+ state() {
116
+ if (!this.stateParam) {
117
+ this.stateParam = this.generateRandomString(32);
118
+ }
119
+ return this.stateParam;
120
+ }
121
+ /**
122
+ * Loads information about this OAuth client
123
+ */
124
+ async clientInformation() {
125
+ if (this.config.clientId) {
126
+ return {
127
+ client_id: this.config.clientId,
128
+ client_secret: this.config.clientSecret,
129
+ redirect_uris: [this.config.redirectUri]
130
+ };
131
+ }
132
+ const state = await this.loadState();
133
+ if (state?.clientInfo) {
134
+ return {
135
+ client_id: state.clientInfo.clientId,
136
+ client_secret: state.clientInfo.clientSecret,
137
+ client_id_issued_at: state.clientInfo.clientIdIssuedAt,
138
+ client_secret_expires_at: state.clientInfo.clientSecretExpiresAt,
139
+ redirect_uris: [this.config.redirectUri]
140
+ };
141
+ }
142
+ return void 0;
143
+ }
144
+ /**
145
+ * Saves client information from Dynamic Client Registration
146
+ */
147
+ async saveClientInformation(clientInformation) {
148
+ const state = await this.loadState() ?? this.createEmptyState();
149
+ state.clientInfo = {
150
+ clientId: clientInformation.client_id,
151
+ clientSecret: clientInformation.client_secret,
152
+ clientIdIssuedAt: clientInformation.client_id_issued_at,
153
+ clientSecretExpiresAt: clientInformation.client_secret_expires_at
154
+ };
155
+ await this.saveState(state);
156
+ }
157
+ /**
158
+ * Loads any existing OAuth tokens for the current session
159
+ */
160
+ async tokens() {
161
+ const state = await this.loadState();
162
+ if (state?.tokens) {
163
+ return {
164
+ access_token: state.tokens.accessToken,
165
+ token_type: state.tokens.tokenType,
166
+ refresh_token: state.tokens.refreshToken,
167
+ expires_in: state.tokens.expiresAt ? Math.floor((state.tokens.expiresAt - Date.now()) / 1e3) : void 0
168
+ };
169
+ }
170
+ return void 0;
171
+ }
172
+ /**
173
+ * Stores new OAuth tokens for the current session
174
+ */
175
+ async saveTokens(tokens) {
176
+ const state = await this.loadState() ?? this.createEmptyState();
177
+ state.tokens = {
178
+ accessToken: tokens.access_token,
179
+ tokenType: tokens.token_type,
180
+ refreshToken: tokens.refresh_token,
181
+ expiresAt: tokens.expires_in ? Date.now() + tokens.expires_in * 1e3 : void 0
182
+ };
183
+ await this.saveState(state);
184
+ }
185
+ /**
186
+ * Invoked to redirect the user agent to the given URL
187
+ *
188
+ * In a testing context, this is typically handled by Playwright automation.
189
+ * This implementation throws an error to signal that the caller needs to
190
+ * handle the redirect externally.
191
+ */
192
+ async redirectToAuthorization(authorizationUrl) {
193
+ throw new Error(
194
+ `OAuth authorization required. Redirect to: ${authorizationUrl.toString()}
195
+ In a testing context, use performOAuthSetup() in your Playwright globalSetup to complete the OAuth flow before running tests.`
196
+ );
197
+ }
198
+ /**
199
+ * Saves a PKCE code verifier for the current session
200
+ */
201
+ async saveCodeVerifier(codeVerifier) {
202
+ const state = await this.loadState() ?? this.createEmptyState();
203
+ state.codeVerifier = codeVerifier;
204
+ await this.saveState(state);
205
+ }
206
+ /**
207
+ * Loads the PKCE code verifier for the current session
208
+ */
209
+ async codeVerifier() {
210
+ const state = await this.loadState();
211
+ if (!state?.codeVerifier) {
212
+ throw new Error("No code verifier found in auth state");
213
+ }
214
+ return state.codeVerifier;
215
+ }
216
+ /**
217
+ * Invalidates the specified credentials
218
+ */
219
+ async invalidateCredentials(scope) {
220
+ const state = await this.loadState();
221
+ if (!state) {
222
+ return;
223
+ }
224
+ switch (scope) {
225
+ case "all":
226
+ await this.deleteState();
227
+ break;
228
+ case "client":
229
+ delete state.clientInfo;
230
+ await this.saveState(state);
231
+ break;
232
+ case "tokens":
233
+ delete state.tokens;
234
+ await this.saveState(state);
235
+ break;
236
+ case "verifier":
237
+ delete state.codeVerifier;
238
+ await this.saveState(state);
239
+ break;
240
+ }
241
+ }
242
+ // ---- Private helper methods ----
243
+ async loadState() {
244
+ if (this.cachedState) {
245
+ return this.cachedState;
246
+ }
247
+ try {
248
+ const content = await fs__namespace.readFile(this.config.storagePath, "utf-8");
249
+ this.cachedState = JSON.parse(content);
250
+ return this.cachedState;
251
+ } catch (error) {
252
+ if (error.code === "ENOENT") {
253
+ return null;
254
+ }
255
+ throw error;
256
+ }
257
+ }
258
+ async saveState(state) {
259
+ state.savedAt = Date.now();
260
+ this.cachedState = state;
261
+ const dir = path2__namespace.dirname(this.config.storagePath);
262
+ await fs__namespace.mkdir(dir, { recursive: true });
263
+ await fs__namespace.writeFile(
264
+ this.config.storagePath,
265
+ JSON.stringify(state, null, 2),
266
+ "utf-8"
267
+ );
268
+ }
269
+ async deleteState() {
270
+ this.cachedState = null;
271
+ try {
272
+ await fs__namespace.unlink(this.config.storagePath);
273
+ } catch (error) {
274
+ if (error.code !== "ENOENT") {
275
+ throw error;
276
+ }
277
+ }
278
+ }
279
+ createEmptyState() {
280
+ return {
281
+ savedAt: Date.now()
282
+ };
283
+ }
284
+ generateRandomString(length) {
285
+ const chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
286
+ let result = "";
287
+ const randomValues = new Uint8Array(length);
288
+ crypto.getRandomValues(randomValues);
289
+ for (let i = 0; i < length; i++) {
290
+ const randomValue = randomValues[i] ?? 0;
291
+ result += chars[randomValue % chars.length];
292
+ }
293
+ return result;
294
+ }
295
+ };
296
+ }
297
+ });
298
+ var MCPHostCapabilitiesSchema = zod.z.object({
299
+ sampling: zod.z.record(zod.z.unknown()).optional(),
300
+ roots: zod.z.object({
301
+ listChanged: zod.z.boolean()
302
+ }).optional()
303
+ });
304
+ var MCPOAuthConfigSchema = zod.z.object({
305
+ serverUrl: zod.z.string().url("serverUrl must be a valid URL"),
306
+ scopes: zod.z.array(zod.z.string()).optional(),
307
+ resource: zod.z.string().url().optional(),
308
+ authStatePath: zod.z.string().optional(),
309
+ clientId: zod.z.string().optional(),
310
+ clientSecret: zod.z.string().optional(),
311
+ redirectUri: zod.z.string().url().optional()
312
+ });
313
+ var MCPAuthConfigSchema = zod.z.object({
314
+ accessToken: zod.z.string().optional(),
315
+ oauth: MCPOAuthConfigSchema.optional()
316
+ }).refine(
317
+ (data) => !(data.accessToken && data.oauth),
318
+ "Cannot specify both accessToken and oauth configuration"
319
+ );
320
+ var StdioConfigSchema = zod.z.object({
321
+ transport: zod.z.literal("stdio"),
322
+ command: zod.z.string().min(1, "command is required for stdio transport"),
323
+ args: zod.z.array(zod.z.string()).optional(),
324
+ cwd: zod.z.string().optional(),
325
+ capabilities: MCPHostCapabilitiesSchema.optional(),
326
+ connectTimeoutMs: zod.z.number().positive().optional(),
327
+ requestTimeoutMs: zod.z.number().positive().optional(),
328
+ quiet: zod.z.boolean().optional()
329
+ });
330
+ var HttpConfigSchema = zod.z.object({
331
+ transport: zod.z.literal("http"),
332
+ serverUrl: zod.z.string().url("serverUrl must be a valid URL"),
333
+ headers: zod.z.record(zod.z.string()).optional(),
334
+ capabilities: MCPHostCapabilitiesSchema.optional(),
335
+ connectTimeoutMs: zod.z.number().positive().optional(),
336
+ requestTimeoutMs: zod.z.number().positive().optional(),
337
+ auth: MCPAuthConfigSchema.optional()
338
+ });
339
+ var MCPConfigSchema = zod.z.discriminatedUnion("transport", [
340
+ StdioConfigSchema,
341
+ HttpConfigSchema
342
+ ]);
343
+ function validateMCPConfig(config) {
344
+ return MCPConfigSchema.parse(config);
345
+ }
346
+ function isStdioConfig(config) {
347
+ return config.transport === "stdio" && typeof config.command === "string";
348
+ }
349
+ function isHttpConfig(config) {
350
+ return config.transport === "http" && typeof config.serverUrl === "string";
351
+ }
352
+
353
+ // src/index.ts
354
+ init_oauthClientProvider();
355
+
356
+ // src/auth/tokenAuth.ts
357
+ function createTokenAuthHeaders(accessToken, tokenType = "Bearer") {
358
+ return {
359
+ Authorization: `${tokenType} ${accessToken}`
360
+ };
361
+ }
362
+ function validateAccessToken(accessToken) {
363
+ if (!accessToken) {
364
+ throw new Error("Access token is required but was not provided");
365
+ }
366
+ if (accessToken.trim().length === 0) {
367
+ throw new Error("Access token cannot be empty");
368
+ }
369
+ }
370
+ function isTokenExpired(accessToken) {
371
+ try {
372
+ const parts = accessToken.split(".");
373
+ if (parts.length !== 3) {
374
+ return false;
375
+ }
376
+ const payloadPart = parts[1];
377
+ if (!payloadPart) {
378
+ return false;
379
+ }
380
+ const payload = JSON.parse(
381
+ Buffer.from(payloadPart, "base64url").toString("utf-8")
382
+ );
383
+ if (typeof payload.exp === "number") {
384
+ return payload.exp * 1e3 < Date.now();
385
+ }
386
+ return false;
387
+ } catch {
388
+ return false;
389
+ }
390
+ }
391
+ function isTokenExpiringSoon(expiresAt, bufferMs = 6e4) {
392
+ if (expiresAt === void 0) {
393
+ return false;
394
+ }
395
+ return expiresAt - bufferMs < Date.now();
396
+ }
397
+
398
+ // src/auth/setupOAuth.ts
399
+ init_oauthClientProvider();
400
+ var NAMESPACE = "mcp-server-tester";
401
+ var debugClient = createDebug__default.default(`${NAMESPACE}:client`);
402
+ var debugOAuth = createDebug__default.default(`${NAMESPACE}:oauth`);
403
+ createDebug__default.default(`${NAMESPACE}:eval`);
404
+
405
+ // src/auth/setupOAuth.ts
406
+ var DEFAULT_TIMEOUT_MS = 3e4;
407
+ var DEFAULT_REDIRECT_URI = "http://localhost:3000/oauth/callback";
408
+ async function performOAuthSetup(config) {
409
+ const timeoutMs = config.timeoutMs ?? DEFAULT_TIMEOUT_MS;
410
+ const redirectUri = config.redirectUri ?? DEFAULT_REDIRECT_URI;
411
+ const metadata = await auth_js.discoverAuthorizationServerMetadata(
412
+ config.authServerUrl
413
+ );
414
+ if (!metadata) {
415
+ throw new Error(
416
+ `Could not discover OAuth metadata at ${config.authServerUrl}`
417
+ );
418
+ }
419
+ const clientInformation = {
420
+ client_id: config.clientId ?? "mcp-server-tester-client",
421
+ client_secret: config.clientSecret
422
+ };
423
+ const { authorizationUrl, codeVerifier } = await auth_js.startAuthorization(
424
+ config.authServerUrl,
425
+ {
426
+ metadata,
427
+ clientInformation,
428
+ redirectUrl: redirectUri,
429
+ scope: config.scopes.join(" "),
430
+ resource: config.resource ? new URL(config.resource) : void 0
431
+ }
432
+ );
433
+ const browser = await test$1.chromium.launch({
434
+ headless: process.env.OAUTH_DEBUG !== "true"
435
+ });
436
+ try {
437
+ const context = await browser.newContext();
438
+ const page = await context.newPage();
439
+ page.setDefaultTimeout(timeoutMs);
440
+ await page.goto(authorizationUrl.toString());
441
+ await completeLoginForm(page, config);
442
+ await page.waitForURL(
443
+ (url) => url.href.startsWith(redirectUri) && url.searchParams.has("code"),
444
+ { timeout: timeoutMs }
445
+ );
446
+ const callbackUrl = new URL(page.url());
447
+ const code = callbackUrl.searchParams.get("code");
448
+ const error = callbackUrl.searchParams.get("error");
449
+ if (error) {
450
+ const errorDescription = callbackUrl.searchParams.get("error_description");
451
+ throw new Error(
452
+ `OAuth authorization failed: ${error}${errorDescription ? ` - ${errorDescription}` : ""}`
453
+ );
454
+ }
455
+ if (!code) {
456
+ throw new Error("No authorization code in callback URL");
457
+ }
458
+ const tokens = await auth_js.exchangeAuthorization(config.authServerUrl, {
459
+ metadata,
460
+ clientInformation,
461
+ authorizationCode: code,
462
+ codeVerifier,
463
+ redirectUri,
464
+ resource: config.resource ? new URL(config.resource) : void 0
465
+ });
466
+ const state = {
467
+ tokens: {
468
+ accessToken: tokens.access_token,
469
+ tokenType: tokens.token_type,
470
+ refreshToken: tokens.refresh_token,
471
+ expiresAt: tokens.expires_in ? Date.now() + tokens.expires_in * 1e3 : void 0
472
+ },
473
+ clientInfo: config.clientId ? {
474
+ clientId: config.clientId,
475
+ clientSecret: config.clientSecret
476
+ } : void 0,
477
+ codeVerifier,
478
+ savedAt: Date.now()
479
+ };
480
+ await saveOAuthState(config.outputPath, state);
481
+ debugOAuth("Auth state saved to %s", config.outputPath);
482
+ } finally {
483
+ await browser.close();
484
+ }
485
+ }
486
+ async function completeLoginForm(page, config) {
487
+ const { loginSelectors, credentials } = config;
488
+ await page.waitForSelector(loginSelectors.usernameInput, {
489
+ state: "visible"
490
+ });
491
+ await page.fill(loginSelectors.usernameInput, credentials.username);
492
+ await page.waitForSelector(loginSelectors.passwordInput, {
493
+ state: "visible"
494
+ });
495
+ await page.fill(loginSelectors.passwordInput, credentials.password);
496
+ await page.waitForSelector(loginSelectors.submitButton, {
497
+ state: "visible"
498
+ });
499
+ await page.click(loginSelectors.submitButton);
500
+ if (loginSelectors.consentButton) {
501
+ try {
502
+ await page.waitForSelector(loginSelectors.consentButton, {
503
+ state: "visible",
504
+ timeout: 5e3
505
+ });
506
+ await page.click(loginSelectors.consentButton);
507
+ } catch {
508
+ }
509
+ }
510
+ }
511
+ async function hasValidOAuthState(storagePath) {
512
+ try {
513
+ const { loadOAuthState: loadOAuthState2 } = await Promise.resolve().then(() => (init_oauthClientProvider(), oauthClientProvider_exports));
514
+ const state = await loadOAuthState2(storagePath);
515
+ if (!state?.tokens?.accessToken) {
516
+ return false;
517
+ }
518
+ if (state.tokens.expiresAt) {
519
+ const bufferMs = 6e4;
520
+ if (state.tokens.expiresAt - bufferMs < Date.now()) {
521
+ return false;
522
+ }
523
+ }
524
+ return true;
525
+ } catch {
526
+ return false;
527
+ }
528
+ }
529
+ async function performOAuthSetupIfNeeded(config) {
530
+ const hasValid = await hasValidOAuthState(config.outputPath);
531
+ if (hasValid) {
532
+ debugOAuth("Using existing auth state from %s", config.outputPath);
533
+ return;
534
+ }
535
+ debugOAuth("No valid auth state found, performing OAuth flow...");
536
+ await performOAuthSetup(config);
537
+ }
538
+ var MCP_PROTOCOL_VERSION = "2025-06-18";
539
+ async function discoverProtectedResource(mcpServerUrl) {
540
+ const url = new URL(mcpServerUrl);
541
+ const origin = url.origin;
542
+ const pathname = url.pathname;
543
+ const pathAwareUrl = `${origin}/.well-known/oauth-protected-resource${pathname}`;
544
+ try {
545
+ const metadata = await fetchProtectedResourceMetadata(pathAwareUrl);
546
+ return {
547
+ metadata,
548
+ discoveryUrl: pathAwareUrl,
549
+ usedPathAwareDiscovery: true
550
+ };
551
+ } catch (error) {
552
+ if (error instanceof DiscoveryError && error.status === 404) {
553
+ const baseUrl = `${origin}/.well-known/oauth-protected-resource`;
554
+ const metadata = await fetchProtectedResourceMetadata(baseUrl);
555
+ return {
556
+ metadata,
557
+ discoveryUrl: baseUrl,
558
+ usedPathAwareDiscovery: false
559
+ };
560
+ }
561
+ throw error;
562
+ }
563
+ }
564
+ var DiscoveryError = class extends Error {
565
+ constructor(message, status, url) {
566
+ super(message);
567
+ this.status = status;
568
+ this.url = url;
569
+ this.name = "DiscoveryError";
570
+ }
571
+ };
572
+ async function fetchProtectedResourceMetadata(discoveryUrl) {
573
+ const response = await fetch(discoveryUrl, {
574
+ method: "GET",
575
+ headers: {
576
+ Accept: "application/json",
577
+ "MCP-Protocol-Version": MCP_PROTOCOL_VERSION
578
+ }
579
+ });
580
+ if (!response.ok) {
581
+ throw new DiscoveryError(
582
+ `Protected resource discovery failed: ${response.status} ${response.statusText}`,
583
+ response.status,
584
+ discoveryUrl
585
+ );
586
+ }
587
+ const metadata = await response.json();
588
+ if (!metadata.resource) {
589
+ throw new DiscoveryError(
590
+ 'Invalid protected resource metadata: missing required "resource" field',
591
+ void 0,
592
+ discoveryUrl
593
+ );
594
+ }
595
+ return metadata;
596
+ }
597
+ async function discoverAuthorizationServer(authServerUrl) {
598
+ const issuer = new URL(authServerUrl);
599
+ const response = await oauth2__namespace.discoveryRequest(issuer, {
600
+ algorithm: "oauth2",
601
+ headers: new Headers({
602
+ "MCP-Protocol-Version": MCP_PROTOCOL_VERSION
603
+ })
604
+ });
605
+ const metadata = await oauth2__namespace.processDiscoveryResponse(issuer, response);
606
+ return {
607
+ server: metadata,
608
+ issuer: authServerUrl
609
+ };
610
+ }
611
+ var ENV_VAR_NAMES = {
612
+ accessToken: "MCP_ACCESS_TOKEN",
613
+ refreshToken: "MCP_REFRESH_TOKEN",
614
+ tokenType: "MCP_TOKEN_TYPE",
615
+ expiresAt: "MCP_TOKEN_EXPIRES_AT"
616
+ };
617
+ var DEFAULT_EXPIRY_BUFFER_MS = 6e4;
618
+ function generateServerKey(serverUrl) {
619
+ const url = new URL(serverUrl);
620
+ let key = url.hostname;
621
+ if (url.port) {
622
+ key += `_${url.port}`;
623
+ }
624
+ if (url.pathname && url.pathname !== "/") {
625
+ const cleanPath = url.pathname.replace(/^\/+|\/+$/g, "").replace(/\//g, "_");
626
+ if (cleanPath) {
627
+ key += `_${cleanPath}`;
628
+ }
629
+ }
630
+ return key.replace(/[^a-zA-Z0-9_.-]/g, "_");
631
+ }
632
+ function getStateDir(serverUrl, customDir) {
633
+ const serverKey = generateServerKey(serverUrl);
634
+ if (customDir) {
635
+ return path2__namespace.join(customDir, serverKey);
636
+ }
637
+ if (process.platform === "win32") {
638
+ const localAppData = process.env.LOCALAPPDATA;
639
+ if (localAppData) {
640
+ return path2__namespace.join(localAppData, "mcp-tests", serverKey);
641
+ }
642
+ return path2__namespace.join(os.homedir(), "AppData", "Local", "mcp-tests", serverKey);
643
+ }
644
+ if (process.platform === "linux" && process.env.XDG_STATE_HOME) {
645
+ return path2__namespace.join(process.env.XDG_STATE_HOME, "mcp-tests", serverKey);
646
+ }
647
+ return path2__namespace.join(os.homedir(), ".local", "state", "mcp-tests", serverKey);
648
+ }
649
+ function loadTokensFromEnv() {
650
+ const accessToken = process.env[ENV_VAR_NAMES.accessToken];
651
+ if (!accessToken) {
652
+ return null;
653
+ }
654
+ const expiresAtStr = process.env[ENV_VAR_NAMES.expiresAt];
655
+ const expiresAt = expiresAtStr ? parseInt(expiresAtStr, 10) : void 0;
656
+ return {
657
+ accessToken,
658
+ refreshToken: process.env[ENV_VAR_NAMES.refreshToken],
659
+ tokenType: process.env[ENV_VAR_NAMES.tokenType] ?? "Bearer",
660
+ expiresAt: expiresAt && !isNaN(expiresAt) ? expiresAt : void 0
661
+ };
662
+ }
663
+ async function injectTokens(serverUrl, tokens, stateDir) {
664
+ const storage = createFileOAuthStorage({ serverUrl, stateDir });
665
+ await storage.saveTokens(tokens);
666
+ }
667
+ async function loadTokens(serverUrl, stateDir) {
668
+ const storage = createFileOAuthStorage({ serverUrl, stateDir });
669
+ return storage.loadTokens();
670
+ }
671
+ async function hasValidTokens(serverUrl, options) {
672
+ const storage = createFileOAuthStorage({
673
+ serverUrl,
674
+ stateDir: options?.stateDir
675
+ });
676
+ return storage.hasValidToken(options?.bufferMs);
677
+ }
678
+ function createFileOAuthStorage(config) {
679
+ return new FileOAuthStorage(config);
680
+ }
681
+ var FileOAuthStorage = class {
682
+ stateDir;
683
+ constructor(config) {
684
+ this.stateDir = getStateDir(config.serverUrl, config.stateDir);
685
+ }
686
+ get serverMetadataPath() {
687
+ return path2__namespace.join(this.stateDir, "server.json");
688
+ }
689
+ get clientPath() {
690
+ return path2__namespace.join(this.stateDir, "client.json");
691
+ }
692
+ get tokensPath() {
693
+ return path2__namespace.join(this.stateDir, "tokens.json");
694
+ }
695
+ async loadServerMetadata() {
696
+ return this.loadFile(this.serverMetadataPath);
697
+ }
698
+ async saveServerMetadata(metadata) {
699
+ await this.atomicWrite(this.serverMetadataPath, metadata);
700
+ }
701
+ async loadClient() {
702
+ return this.loadFile(this.clientPath);
703
+ }
704
+ async saveClient(client) {
705
+ await this.atomicWrite(this.clientPath, client);
706
+ }
707
+ async loadTokens() {
708
+ return this.loadFile(this.tokensPath);
709
+ }
710
+ async saveTokens(tokens) {
711
+ await this.atomicWrite(this.tokensPath, tokens);
712
+ }
713
+ async deleteTokens() {
714
+ await this.deleteFile(this.tokensPath);
715
+ }
716
+ async hasValidToken(bufferMs = DEFAULT_EXPIRY_BUFFER_MS) {
717
+ const tokens = await this.loadTokens();
718
+ if (!tokens?.accessToken) {
719
+ return false;
720
+ }
721
+ if (!tokens.expiresAt) {
722
+ return true;
723
+ }
724
+ return tokens.expiresAt > Date.now() + bufferMs;
725
+ }
726
+ /**
727
+ * Load a JSON file, returning null if not found
728
+ */
729
+ async loadFile(filePath) {
730
+ try {
731
+ const content = await fs__namespace.readFile(filePath, "utf-8");
732
+ return JSON.parse(content);
733
+ } catch (error) {
734
+ if (error.code === "ENOENT") {
735
+ return null;
736
+ }
737
+ throw error;
738
+ }
739
+ }
740
+ /**
741
+ * Write data atomically: write to .tmp file, then rename
742
+ * Files are created with 0o600 permissions (user read/write only)
743
+ */
744
+ async atomicWrite(filePath, data) {
745
+ await fs__namespace.mkdir(this.stateDir, { recursive: true, mode: 448 });
746
+ const tmpPath = `${filePath}.tmp`;
747
+ const content = JSON.stringify(data, null, 2);
748
+ await fs__namespace.writeFile(tmpPath, content, { encoding: "utf-8", mode: 384 });
749
+ await fs__namespace.rename(tmpPath, filePath);
750
+ }
751
+ /**
752
+ * Delete a file, ignoring errors if the file doesn't exist
753
+ */
754
+ async deleteFile(filePath) {
755
+ try {
756
+ await fs__namespace.unlink(filePath);
757
+ } catch (error) {
758
+ if (error.code !== "ENOENT") {
759
+ throw error;
760
+ }
761
+ }
762
+ }
763
+ };
764
+ async function generatePKCE() {
765
+ const codeVerifier = oauth2__namespace.generateRandomCodeVerifier();
766
+ const codeChallenge = await oauth2__namespace.calculatePKCECodeChallenge(codeVerifier);
767
+ return {
768
+ codeVerifier,
769
+ codeChallenge
770
+ };
771
+ }
772
+ function generateState() {
773
+ return oauth2__namespace.generateRandomState();
774
+ }
775
+ function buildAuthorizationUrl(config) {
776
+ const authorizationEndpoint = config.authServer.server.authorization_endpoint;
777
+ if (!authorizationEndpoint) {
778
+ throw new Error(
779
+ "Authorization server does not have an authorization_endpoint"
780
+ );
781
+ }
782
+ const authorizationUrl = new URL(authorizationEndpoint);
783
+ authorizationUrl.searchParams.set("client_id", config.clientId);
784
+ authorizationUrl.searchParams.set("redirect_uri", config.redirectUri);
785
+ authorizationUrl.searchParams.set("response_type", "code");
786
+ authorizationUrl.searchParams.set("scope", config.scopes.join(" "));
787
+ authorizationUrl.searchParams.set("code_challenge", config.codeChallenge);
788
+ authorizationUrl.searchParams.set("code_challenge_method", "S256");
789
+ authorizationUrl.searchParams.set("state", config.state);
790
+ if (config.resource) {
791
+ authorizationUrl.searchParams.set("resource", config.resource);
792
+ }
793
+ return authorizationUrl;
794
+ }
795
+ async function exchangeCodeForTokens(config) {
796
+ const client = {
797
+ client_id: config.clientId,
798
+ token_endpoint_auth_method: config.clientSecret ? "client_secret_basic" : "none"
799
+ };
800
+ const clientAuth = config.clientSecret ? oauth2__namespace.ClientSecretBasic(config.clientSecret) : oauth2__namespace.None();
801
+ const callbackUrl = new URL(config.redirectUri);
802
+ callbackUrl.searchParams.set("code", config.code);
803
+ callbackUrl.searchParams.set("state", config.state);
804
+ const validatedParams = oauth2__namespace.validateAuthResponse(
805
+ config.authServer.server,
806
+ client,
807
+ callbackUrl,
808
+ config.state
809
+ );
810
+ const response = await oauth2__namespace.authorizationCodeGrantRequest(
811
+ config.authServer.server,
812
+ client,
813
+ clientAuth,
814
+ validatedParams,
815
+ config.redirectUri,
816
+ config.codeVerifier
817
+ );
818
+ const result = await oauth2__namespace.processAuthorizationCodeResponse(
819
+ config.authServer.server,
820
+ client,
821
+ response
822
+ );
823
+ return {
824
+ accessToken: result.access_token,
825
+ tokenType: result.token_type,
826
+ expiresIn: result.expires_in,
827
+ refreshToken: result.refresh_token,
828
+ scope: result.scope
829
+ };
830
+ }
831
+ async function refreshAccessToken(config) {
832
+ const client = {
833
+ client_id: config.clientId,
834
+ token_endpoint_auth_method: config.clientSecret ? "client_secret_basic" : "none"
835
+ };
836
+ const clientAuth = config.clientSecret ? oauth2__namespace.ClientSecretBasic(config.clientSecret) : oauth2__namespace.None();
837
+ const response = await oauth2__namespace.refreshTokenGrantRequest(
838
+ config.authServer.server,
839
+ client,
840
+ clientAuth,
841
+ config.refreshToken
842
+ );
843
+ if (!response.ok) {
844
+ const contentType = response.headers.get("content-type") ?? "";
845
+ let errorMessage = `Token refresh failed: ${response.status} ${response.statusText}`;
846
+ try {
847
+ if (contentType.includes("application/json")) {
848
+ const errorBody = await response.clone().json();
849
+ if (errorBody.error) {
850
+ errorMessage = `Token refresh failed: ${errorBody.error}`;
851
+ if (errorBody.error_description) {
852
+ errorMessage += ` - ${errorBody.error_description}`;
853
+ }
854
+ }
855
+ } else {
856
+ const textBody = await response.clone().text();
857
+ if (textBody) {
858
+ errorMessage = `Token refresh failed: ${response.status} - ${textBody}`;
859
+ }
860
+ }
861
+ } catch {
862
+ }
863
+ throw new Error(errorMessage);
864
+ }
865
+ const result = await oauth2__namespace.processRefreshTokenResponse(
866
+ config.authServer.server,
867
+ client,
868
+ response
869
+ );
870
+ return {
871
+ accessToken: result.access_token,
872
+ tokenType: result.token_type,
873
+ expiresIn: result.expires_in,
874
+ refreshToken: result.refresh_token,
875
+ scope: result.scope
876
+ };
877
+ }
878
+
879
+ // src/auth/cli.ts
880
+ var debug = createDebug__default.default("mcp-server-tester:cli-oauth");
881
+ var DEFAULT_TIMEOUT_MS2 = 3e5;
882
+ var DEFAULT_CLIENT_NAME = "@gleanwork/mcp-server-tester";
883
+ var DEFAULT_METADATA_TTL_MS = 24 * 60 * 60 * 1e3;
884
+ var CLIOAuthClient = class {
885
+ config;
886
+ storage;
887
+ constructor(config) {
888
+ this.config = config;
889
+ this.storage = createFileOAuthStorage({
890
+ serverUrl: config.mcpServerUrl,
891
+ stateDir: config.stateDir
892
+ });
893
+ }
894
+ /**
895
+ * Get a valid access token, authenticating if necessary
896
+ *
897
+ * Token resolution priority:
898
+ * 1. Check environment variables (for CI/CD)
899
+ * 2. Check file storage for cached tokens
900
+ * 3. Try to refresh if expired but refresh token exists
901
+ * 4. Run full OAuth flow if needed
902
+ */
903
+ async getAccessToken() {
904
+ const envTokens = loadTokensFromEnv();
905
+ if (envTokens) {
906
+ debug("Using tokens from environment variables");
907
+ return {
908
+ accessToken: envTokens.accessToken,
909
+ tokenType: envTokens.tokenType,
910
+ expiresAt: envTokens.expiresAt,
911
+ refreshed: false,
912
+ fromEnv: true
913
+ };
914
+ }
915
+ const storedTokens = await this.storage.loadTokens();
916
+ if (storedTokens?.accessToken) {
917
+ const isValid = await this.storage.hasValidToken();
918
+ if (isValid) {
919
+ debug("Using cached tokens from storage");
920
+ return {
921
+ accessToken: storedTokens.accessToken,
922
+ tokenType: storedTokens.tokenType,
923
+ expiresAt: storedTokens.expiresAt,
924
+ refreshed: false,
925
+ fromEnv: false
926
+ };
927
+ }
928
+ if (storedTokens.refreshToken) {
929
+ debug("Token expired, attempting refresh");
930
+ try {
931
+ const refreshedTokens = await this.refreshStoredToken(storedTokens);
932
+ return {
933
+ accessToken: refreshedTokens.accessToken,
934
+ tokenType: refreshedTokens.tokenType,
935
+ expiresAt: refreshedTokens.expiresAt,
936
+ refreshed: true,
937
+ fromEnv: false
938
+ };
939
+ } catch (error) {
940
+ debug("Token refresh failed, will re-authenticate:", error);
941
+ }
942
+ }
943
+ }
944
+ debug("Performing full OAuth authentication");
945
+ return this.authenticate();
946
+ }
947
+ /**
948
+ * Try to get a valid access token without triggering browser auth
949
+ *
950
+ * Returns null if no valid token is available (no stored tokens,
951
+ * expired without refresh token, or refresh failed). Unlike getAccessToken(),
952
+ * this will NOT open a browser for authentication.
953
+ *
954
+ * Use this for CLI commands that should prompt the user to run `login`
955
+ * instead of automatically starting the OAuth flow.
956
+ */
957
+ async tryGetAccessToken() {
958
+ const envTokens = loadTokensFromEnv();
959
+ if (envTokens) {
960
+ debug("Using tokens from environment variables");
961
+ return {
962
+ accessToken: envTokens.accessToken,
963
+ tokenType: envTokens.tokenType,
964
+ expiresAt: envTokens.expiresAt,
965
+ refreshed: false,
966
+ fromEnv: true
967
+ };
968
+ }
969
+ const storedTokens = await this.storage.loadTokens();
970
+ if (storedTokens?.accessToken) {
971
+ const isValid = await this.storage.hasValidToken();
972
+ if (isValid) {
973
+ debug("Using cached tokens from storage");
974
+ return {
975
+ accessToken: storedTokens.accessToken,
976
+ tokenType: storedTokens.tokenType,
977
+ expiresAt: storedTokens.expiresAt,
978
+ refreshed: false,
979
+ fromEnv: false
980
+ };
981
+ }
982
+ if (storedTokens.refreshToken) {
983
+ debug("Token expired, attempting refresh");
984
+ try {
985
+ const refreshedTokens = await this.refreshStoredToken(storedTokens);
986
+ return {
987
+ accessToken: refreshedTokens.accessToken,
988
+ tokenType: refreshedTokens.tokenType,
989
+ expiresAt: refreshedTokens.expiresAt,
990
+ refreshed: true,
991
+ fromEnv: false
992
+ };
993
+ } catch (error) {
994
+ debug("Token refresh failed:", error);
995
+ return null;
996
+ }
997
+ }
998
+ }
999
+ debug("No valid token available");
1000
+ return null;
1001
+ }
1002
+ /**
1003
+ * Force a new authentication flow
1004
+ */
1005
+ async authenticate() {
1006
+ const { protectedResource, authServer } = await this.discoverServers();
1007
+ const client = await this.getOrRegisterClient(authServer);
1008
+ const { tokens, requestedScopes } = await this.performOAuthFlow(
1009
+ authServer,
1010
+ client,
1011
+ protectedResource
1012
+ );
1013
+ return {
1014
+ accessToken: tokens.accessToken,
1015
+ tokenType: tokens.tokenType,
1016
+ expiresAt: tokens.expiresAt,
1017
+ refreshed: false,
1018
+ fromEnv: false,
1019
+ requestedScopes
1020
+ };
1021
+ }
1022
+ /**
1023
+ * Check if stored credentials exist (may be expired)
1024
+ */
1025
+ async hasStoredCredentials() {
1026
+ const tokens = await this.storage.loadTokens();
1027
+ return tokens?.accessToken !== void 0;
1028
+ }
1029
+ /**
1030
+ * Clear stored credentials
1031
+ */
1032
+ async clearCredentials() {
1033
+ await this.storage.deleteTokens();
1034
+ debug("Cleared stored credentials");
1035
+ }
1036
+ /**
1037
+ * Discover protected resource and authorization server
1038
+ */
1039
+ async discoverServers() {
1040
+ const cachedMetadata = await this.storage.loadServerMetadata();
1041
+ if (cachedMetadata) {
1042
+ const age = Date.now() - cachedMetadata.discoveredAt;
1043
+ if (age < DEFAULT_METADATA_TTL_MS) {
1044
+ debug("Using cached server metadata (age: %dms)", age);
1045
+ debug(
1046
+ "Cached protected resource scopes: %O",
1047
+ cachedMetadata.protectedResource.scopes_supported
1048
+ );
1049
+ debug(
1050
+ "Cached auth server scopes: %O",
1051
+ cachedMetadata.authServer.server.scopes_supported
1052
+ );
1053
+ return {
1054
+ protectedResource: cachedMetadata.protectedResource,
1055
+ authServer: cachedMetadata.authServer
1056
+ };
1057
+ }
1058
+ debug("Cached server metadata is stale (age: %dms), re-discovering", age);
1059
+ }
1060
+ debug("Discovering protected resource:", this.config.mcpServerUrl);
1061
+ const prResult = await discoverProtectedResource(this.config.mcpServerUrl);
1062
+ debug("Found protected resource:", prResult.metadata.resource);
1063
+ debug(
1064
+ "Protected resource scopes_supported: %O",
1065
+ prResult.metadata.scopes_supported
1066
+ );
1067
+ const authServerUrl = prResult.metadata.authorization_servers?.[0];
1068
+ if (!authServerUrl) {
1069
+ throw new Error(
1070
+ "No authorization servers found in protected resource metadata"
1071
+ );
1072
+ }
1073
+ debug("Discovering authorization server:", authServerUrl);
1074
+ const authServer = await discoverAuthorizationServer(authServerUrl);
1075
+ debug("Found authorization server:", authServer.issuer);
1076
+ debug(
1077
+ "Auth server scopes_supported: %O",
1078
+ authServer.server.scopes_supported
1079
+ );
1080
+ const metadata = {
1081
+ authServer,
1082
+ protectedResource: prResult.metadata,
1083
+ discoveredAt: Date.now()
1084
+ };
1085
+ await this.storage.saveServerMetadata(metadata);
1086
+ return {
1087
+ protectedResource: prResult.metadata,
1088
+ authServer
1089
+ };
1090
+ }
1091
+ /**
1092
+ * Get existing client or register new one via DCR
1093
+ */
1094
+ async getOrRegisterClient(authServer) {
1095
+ if (this.config.clientId) {
1096
+ debug("Using pre-configured client ID");
1097
+ return {
1098
+ clientId: this.config.clientId,
1099
+ clientSecret: this.config.clientSecret
1100
+ };
1101
+ }
1102
+ const cachedClient = await this.storage.loadClient();
1103
+ if (cachedClient?.clientId) {
1104
+ debug("Using cached client registration");
1105
+ return cachedClient;
1106
+ }
1107
+ debug("Registering new client via DCR");
1108
+ const client = await this.registerClient(authServer);
1109
+ await this.storage.saveClient(client);
1110
+ return client;
1111
+ }
1112
+ /**
1113
+ * Register a new client via Dynamic Client Registration
1114
+ */
1115
+ async registerClient(authServer) {
1116
+ const registrationEndpoint = authServer.server.registration_endpoint;
1117
+ if (!registrationEndpoint) {
1118
+ throw new Error(
1119
+ "Authorization server does not support Dynamic Client Registration. Please provide a clientId in the configuration."
1120
+ );
1121
+ }
1122
+ const redirectUri = "http://127.0.0.1:0/callback";
1123
+ const response = await fetch(registrationEndpoint, {
1124
+ method: "POST",
1125
+ headers: {
1126
+ "Content-Type": "application/json",
1127
+ "MCP-Protocol-Version": MCP_PROTOCOL_VERSION
1128
+ },
1129
+ body: JSON.stringify({
1130
+ redirect_uris: [redirectUri],
1131
+ token_endpoint_auth_method: "none",
1132
+ grant_types: ["authorization_code", "refresh_token"],
1133
+ response_types: ["code"],
1134
+ client_name: this.config.clientName ?? DEFAULT_CLIENT_NAME
1135
+ })
1136
+ });
1137
+ if (!response.ok) {
1138
+ const errorText = await response.text();
1139
+ throw new Error(
1140
+ `Dynamic Client Registration failed: ${response.status} ${response.statusText}
1141
+ ${errorText}`
1142
+ );
1143
+ }
1144
+ const data = await response.json();
1145
+ debug("Client registered:", data.client_id);
1146
+ return {
1147
+ clientId: data.client_id,
1148
+ clientSecret: data.client_secret,
1149
+ clientIdIssuedAt: data.client_id_issued_at,
1150
+ clientSecretExpiresAt: data.client_secret_expires_at
1151
+ };
1152
+ }
1153
+ /**
1154
+ * Perform the full OAuth authorization flow
1155
+ */
1156
+ async performOAuthFlow(authServer, client, protectedResource) {
1157
+ const pkce = await generatePKCE();
1158
+ const state = generateState();
1159
+ const { port, codePromise, close } = await this.startCallbackServer(state);
1160
+ const redirectUri = `http://127.0.0.1:${port}/callback`;
1161
+ try {
1162
+ const requestedScopes = this.config.scopes ?? protectedResource.scopes_supported ?? authServer.server.scopes_supported ?? ["openid"];
1163
+ debug("Scope resolution:");
1164
+ debug(" - User config scopes: %O", this.config.scopes);
1165
+ debug(
1166
+ " - Protected resource scopes_supported: %O",
1167
+ protectedResource.scopes_supported
1168
+ );
1169
+ debug(
1170
+ " - Auth server scopes_supported: %O",
1171
+ authServer.server.scopes_supported
1172
+ );
1173
+ debug(" - Final requested scopes: %O", requestedScopes);
1174
+ const authUrl = buildAuthorizationUrl({
1175
+ authServer,
1176
+ clientId: client.clientId,
1177
+ redirectUri,
1178
+ scopes: requestedScopes,
1179
+ codeChallenge: pkce.codeChallenge,
1180
+ state,
1181
+ resource: protectedResource.resource
1182
+ });
1183
+ debug("Authorization URL: %s", authUrl.toString());
1184
+ debug("Authorization URL params:");
1185
+ debug(" - client_id: %s", authUrl.searchParams.get("client_id"));
1186
+ debug(" - redirect_uri: %s", authUrl.searchParams.get("redirect_uri"));
1187
+ debug(" - scope: %s", authUrl.searchParams.get("scope"));
1188
+ debug(" - resource: %s", authUrl.searchParams.get("resource"));
1189
+ await this.openBrowserOrPrintUrl(authUrl);
1190
+ debug("Waiting for OAuth callback...");
1191
+ const code = await codePromise;
1192
+ debug("Received authorization code");
1193
+ const tokenResult = await exchangeCodeForTokens({
1194
+ authServer,
1195
+ clientId: client.clientId,
1196
+ clientSecret: client.clientSecret,
1197
+ code,
1198
+ state,
1199
+ codeVerifier: pkce.codeVerifier,
1200
+ redirectUri
1201
+ });
1202
+ const tokens = this.tokenResultToStoredTokens(
1203
+ tokenResult,
1204
+ client.clientId
1205
+ );
1206
+ await this.storage.saveTokens(tokens);
1207
+ return { tokens, requestedScopes };
1208
+ } finally {
1209
+ close();
1210
+ }
1211
+ }
1212
+ /**
1213
+ * Refresh an expired token
1214
+ *
1215
+ * Uses the clientId stored with the tokens (if available) to ensure
1216
+ * the refresh request uses the same client that obtained the original tokens.
1217
+ * This is important because refresh tokens are bound to the client_id.
1218
+ */
1219
+ async refreshStoredToken(storedTokens) {
1220
+ if (!storedTokens.refreshToken) {
1221
+ throw new Error("No refresh token available");
1222
+ }
1223
+ const metadata = await this.storage.loadServerMetadata();
1224
+ if (!metadata) {
1225
+ throw new Error("No cached server metadata for refresh");
1226
+ }
1227
+ let clientId;
1228
+ let clientSecret;
1229
+ if (storedTokens.clientId) {
1230
+ debug("Using clientId from stored tokens for refresh");
1231
+ clientId = storedTokens.clientId;
1232
+ const storedClient = await this.storage.loadClient();
1233
+ if (storedClient?.clientId === clientId) {
1234
+ clientSecret = storedClient.clientSecret;
1235
+ }
1236
+ } else {
1237
+ debug(
1238
+ "No clientId in stored tokens, falling back to stored client (legacy behavior)"
1239
+ );
1240
+ const client = await this.getOrRegisterClient(metadata.authServer);
1241
+ clientId = client.clientId;
1242
+ clientSecret = client.clientSecret;
1243
+ }
1244
+ const tokenResult = await refreshAccessToken({
1245
+ authServer: metadata.authServer,
1246
+ clientId,
1247
+ clientSecret,
1248
+ refreshToken: storedTokens.refreshToken
1249
+ });
1250
+ const tokens = this.tokenResultToStoredTokens(tokenResult, clientId);
1251
+ await this.storage.saveTokens(tokens);
1252
+ return tokens;
1253
+ }
1254
+ /**
1255
+ * Start local callback server
1256
+ */
1257
+ async startCallbackServer(expectedState) {
1258
+ const timeoutMs = this.config.timeoutMs ?? DEFAULT_TIMEOUT_MS2;
1259
+ return new Promise((resolve, reject) => {
1260
+ const server = http__namespace.createServer();
1261
+ const connections = /* @__PURE__ */ new Set();
1262
+ server.on("connection", (socket) => {
1263
+ connections.add(socket);
1264
+ socket.on("close", () => connections.delete(socket));
1265
+ });
1266
+ const forceClose = () => {
1267
+ for (const socket of connections) {
1268
+ socket.destroy();
1269
+ }
1270
+ server.close();
1271
+ };
1272
+ let codeResolve;
1273
+ let codeReject;
1274
+ const codePromise = new Promise((res, rej) => {
1275
+ codeResolve = res;
1276
+ codeReject = rej;
1277
+ });
1278
+ const timeout = setTimeout(() => {
1279
+ forceClose();
1280
+ codeReject(new Error(`OAuth flow timed out after ${timeoutMs}ms`));
1281
+ }, timeoutMs);
1282
+ server.on("request", (req, res) => {
1283
+ const url = new URL(
1284
+ req.url ?? "/",
1285
+ `http://127.0.0.1:${server.address().port}`
1286
+ );
1287
+ if (url.pathname !== "/callback") {
1288
+ res.writeHead(404);
1289
+ res.end("Not Found");
1290
+ return;
1291
+ }
1292
+ const error = url.searchParams.get("error");
1293
+ if (error) {
1294
+ const errorDescription = url.searchParams.get("error_description");
1295
+ clearTimeout(timeout);
1296
+ res.writeHead(400, { "Content-Type": "text/html" });
1297
+ res.end(this.errorHtml(error, errorDescription ?? void 0));
1298
+ codeReject(
1299
+ new Error(
1300
+ `OAuth error: ${error}${errorDescription ? ` - ${errorDescription}` : ""}`
1301
+ )
1302
+ );
1303
+ return;
1304
+ }
1305
+ const state = url.searchParams.get("state");
1306
+ if (state !== expectedState) {
1307
+ clearTimeout(timeout);
1308
+ res.writeHead(400, { "Content-Type": "text/html" });
1309
+ res.end(this.errorHtml("invalid_state", "State parameter mismatch"));
1310
+ codeReject(new Error("OAuth state mismatch - possible CSRF attack"));
1311
+ return;
1312
+ }
1313
+ const code = url.searchParams.get("code");
1314
+ if (!code) {
1315
+ clearTimeout(timeout);
1316
+ res.writeHead(400, { "Content-Type": "text/html" });
1317
+ res.end(
1318
+ this.errorHtml("missing_code", "No authorization code received")
1319
+ );
1320
+ codeReject(new Error("No authorization code in callback"));
1321
+ return;
1322
+ }
1323
+ clearTimeout(timeout);
1324
+ res.writeHead(200, { "Content-Type": "text/html" });
1325
+ res.end(this.successHtml());
1326
+ codeResolve(code);
1327
+ });
1328
+ const preferredPort = this.config.callbackPort ?? 0;
1329
+ server.listen(preferredPort, "127.0.0.1", () => {
1330
+ const address = server.address();
1331
+ debug("Callback server listening on port", address.port);
1332
+ resolve({ port: address.port, codePromise, close: forceClose });
1333
+ });
1334
+ server.on("error", (err) => {
1335
+ reject(err);
1336
+ });
1337
+ });
1338
+ }
1339
+ /**
1340
+ * Open browser or print URL for headless environments
1341
+ */
1342
+ async openBrowserOrPrintUrl(url) {
1343
+ if (isHeadless()) {
1344
+ console.log("\n" + "=".repeat(60));
1345
+ console.log(
1346
+ "Please open the following URL in your browser to authenticate:"
1347
+ );
1348
+ console.log("\n" + url.toString() + "\n");
1349
+ console.log("=".repeat(60) + "\n");
1350
+ return;
1351
+ }
1352
+ try {
1353
+ const open = await import('open');
1354
+ await open.default(url.toString());
1355
+ debug("Opened browser for authentication");
1356
+ } catch (error) {
1357
+ debug("Failed to open browser:", error);
1358
+ console.log("\nFailed to open browser automatically.");
1359
+ console.log("Please open the following URL manually:\n");
1360
+ console.log(url.toString() + "\n");
1361
+ }
1362
+ }
1363
+ /**
1364
+ * Convert TokenResult to StoredTokens
1365
+ *
1366
+ * @param result - Token result from exchange or refresh
1367
+ * @param clientId - Client ID that was used to obtain these tokens
1368
+ */
1369
+ tokenResultToStoredTokens(result, clientId) {
1370
+ return {
1371
+ accessToken: result.accessToken,
1372
+ tokenType: result.tokenType,
1373
+ refreshToken: result.refreshToken,
1374
+ expiresAt: result.expiresIn ? Date.now() + result.expiresIn * 1e3 : void 0,
1375
+ clientId
1376
+ };
1377
+ }
1378
+ /**
1379
+ * HTML page for successful authentication
1380
+ */
1381
+ successHtml() {
1382
+ return `
1383
+ <!DOCTYPE html>
1384
+ <html>
1385
+ <head>
1386
+ <meta charset="UTF-8">
1387
+ <title>Authentication Successful</title>
1388
+ <style>
1389
+ body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
1390
+ display: flex; justify-content: center; align-items: center; height: 100vh; margin: 0;
1391
+ background: #f8fafc; }
1392
+ .container { text-align: center; background: white; padding: 48px 64px; border-radius: 8px;
1393
+ border: 1px solid #e2e8f0; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
1394
+ .icon { width: 48px; height: 48px; margin: 0 auto 24px; background: #dcfce7; border-radius: 50%;
1395
+ display: flex; align-items: center; justify-content: center; }
1396
+ .icon svg { width: 24px; height: 24px; color: #16a34a; }
1397
+ h1 { color: #0f172a; margin: 0 0 8px 0; font-size: 20px; font-weight: 600; }
1398
+ p { color: #64748b; margin: 0; font-size: 14px; }
1399
+ </style>
1400
+ </head>
1401
+ <body>
1402
+ <div class="container">
1403
+ <div class="icon">
1404
+ <svg fill="none" stroke="currentColor" viewBox="0 0 24 24">
1405
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M5 13l4 4L19 7"/>
1406
+ </svg>
1407
+ </div>
1408
+ <h1>Authentication Successful</h1>
1409
+ <p>You can close this window and return to the terminal.</p>
1410
+ </div>
1411
+ </body>
1412
+ </html>`;
1413
+ }
1414
+ /**
1415
+ * HTML page for authentication error
1416
+ */
1417
+ errorHtml(error, description) {
1418
+ return `
1419
+ <!DOCTYPE html>
1420
+ <html>
1421
+ <head>
1422
+ <meta charset="UTF-8">
1423
+ <title>Authentication Failed</title>
1424
+ <style>
1425
+ body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
1426
+ display: flex; justify-content: center; align-items: center; height: 100vh; margin: 0;
1427
+ background: #f8fafc; }
1428
+ .container { text-align: center; background: white; padding: 48px 64px; border-radius: 8px;
1429
+ border: 1px solid #e2e8f0; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
1430
+ .icon { width: 48px; height: 48px; margin: 0 auto 24px; background: #fee2e2; border-radius: 50%;
1431
+ display: flex; align-items: center; justify-content: center; }
1432
+ .icon svg { width: 24px; height: 24px; color: #dc2626; }
1433
+ h1 { color: #0f172a; margin: 0 0 8px 0; font-size: 20px; font-weight: 600; }
1434
+ p { color: #64748b; margin: 0 0 8px 0; font-size: 14px; }
1435
+ code { background: #f1f5f9; padding: 2px 8px; border-radius: 4px; color: #dc2626; font-size: 13px; }
1436
+ </style>
1437
+ </head>
1438
+ <body>
1439
+ <div class="container">
1440
+ <div class="icon">
1441
+ <svg fill="none" stroke="currentColor" viewBox="0 0 24 24">
1442
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M6 18L18 6M6 6l12 12"/>
1443
+ </svg>
1444
+ </div>
1445
+ <h1>Authentication Failed</h1>
1446
+ <p>Error: <code>${escapeHtml(error)}</code></p>
1447
+ ${description ? `<p>${escapeHtml(description)}</p>` : ""}
1448
+ </div>
1449
+ </body>
1450
+ </html>`;
1451
+ }
1452
+ };
1453
+ function isHeadless() {
1454
+ if (process.env.CI) {
1455
+ return true;
1456
+ }
1457
+ if (!process.stdin.isTTY) {
1458
+ return true;
1459
+ }
1460
+ if (process.platform === "linux" && !process.env.DISPLAY && !process.env.WAYLAND_DISPLAY) {
1461
+ return true;
1462
+ }
1463
+ return false;
1464
+ }
1465
+ function escapeHtml(text) {
1466
+ return text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&#039;");
1467
+ }
1468
+ async function createMCPClientForConfig(config, options) {
1469
+ const validatedConfig = validateMCPConfig(config);
1470
+ const client = new index_js.Client(
1471
+ {
1472
+ name: options?.clientInfo?.name ?? "@gleanwork/mcp-server-tester",
1473
+ version: options?.clientInfo?.version ?? "0.1.0"
1474
+ },
1475
+ {
1476
+ capabilities: validatedConfig.capabilities ?? {}
1477
+ }
1478
+ );
1479
+ if (isStdioConfig(validatedConfig)) {
1480
+ const transport = new stdio_js.StdioClientTransport({
1481
+ command: validatedConfig.command,
1482
+ args: validatedConfig.args ?? [],
1483
+ ...validatedConfig.cwd && { cwd: validatedConfig.cwd },
1484
+ // Suppress server stderr when quiet mode is enabled
1485
+ ...validatedConfig.quiet && { stderr: "ignore" }
1486
+ });
1487
+ debugClient("Connecting via stdio: %O", {
1488
+ command: validatedConfig.command,
1489
+ args: validatedConfig.args,
1490
+ cwd: validatedConfig.cwd
1491
+ });
1492
+ await client.connect(transport);
1493
+ } else if (isHttpConfig(validatedConfig)) {
1494
+ const headers = { ...validatedConfig.headers };
1495
+ if (validatedConfig.auth?.accessToken && !options?.authProvider) {
1496
+ headers.Authorization = `Bearer ${validatedConfig.auth.accessToken}`;
1497
+ }
1498
+ const transport = new streamableHttp_js.StreamableHTTPClientTransport(
1499
+ new URL(validatedConfig.serverUrl),
1500
+ {
1501
+ requestInit: Object.keys(headers).length > 0 ? { headers } : void 0,
1502
+ // Pass auth provider for OAuth flow - MCP SDK handles it automatically
1503
+ authProvider: options?.authProvider
1504
+ }
1505
+ );
1506
+ debugClient("Connecting via HTTP: %O", {
1507
+ serverUrl: validatedConfig.serverUrl,
1508
+ headers: Object.keys(headers).length > 0 ? Object.keys(headers) : void 0,
1509
+ hasAuthProvider: !!options?.authProvider
1510
+ });
1511
+ await client.connect(transport);
1512
+ }
1513
+ debugClient("Connected successfully");
1514
+ const serverInfo = client.getServerVersion();
1515
+ if (serverInfo) {
1516
+ debugClient("Server info: %O", serverInfo);
1517
+ }
1518
+ return client;
1519
+ }
1520
+ async function closeMCPClient(client) {
1521
+ try {
1522
+ await client.close();
1523
+ } catch (error) {
1524
+ console.error("[MCP] Error closing client:", error);
1525
+ throw error;
1526
+ }
1527
+ }
1528
+
1529
+ // src/mcp/response.ts
1530
+ function normalizeToolResponse(result) {
1531
+ const isError = result.isError ?? false;
1532
+ const contentBlocks = [];
1533
+ const textParts = [];
1534
+ if (Array.isArray(result.content)) {
1535
+ for (const block of result.content) {
1536
+ if (block == null || typeof block !== "object") {
1537
+ continue;
1538
+ }
1539
+ const b = block;
1540
+ const contentBlock = {
1541
+ type: typeof b.type === "string" ? b.type : "unknown"
1542
+ };
1543
+ if (typeof b.text === "string") {
1544
+ contentBlock.text = b.text;
1545
+ textParts.push(b.text);
1546
+ }
1547
+ if (b.data !== void 0) {
1548
+ contentBlock.data = b.data;
1549
+ }
1550
+ if (typeof b.mimeType === "string") {
1551
+ contentBlock.mimeType = b.mimeType;
1552
+ }
1553
+ contentBlocks.push(contentBlock);
1554
+ }
1555
+ }
1556
+ let structuredContent = null;
1557
+ if (result.structuredContent !== void 0) {
1558
+ structuredContent = result.structuredContent;
1559
+ if (textParts.length === 0) {
1560
+ if (typeof result.structuredContent === "string") {
1561
+ textParts.push(result.structuredContent);
1562
+ } else if (result.structuredContent != null) {
1563
+ textParts.push(JSON.stringify(result.structuredContent));
1564
+ }
1565
+ }
1566
+ }
1567
+ const text = textParts.join("\n");
1568
+ return {
1569
+ text,
1570
+ raw: result,
1571
+ isError,
1572
+ contentBlocks,
1573
+ structuredContent
1574
+ };
1575
+ }
1576
+ function extractText(response) {
1577
+ if (response == null) {
1578
+ return "";
1579
+ }
1580
+ if (typeof response === "string") {
1581
+ return response;
1582
+ }
1583
+ if (isNormalizedResponse(response)) {
1584
+ return response.text;
1585
+ }
1586
+ if (isCallToolResult(response)) {
1587
+ return normalizeToolResponse(response).text;
1588
+ }
1589
+ if (Array.isArray(response)) {
1590
+ return extractTextFromContentArray(response);
1591
+ }
1592
+ if (typeof response === "object") {
1593
+ const r = response;
1594
+ if (Array.isArray(r.content)) {
1595
+ return extractTextFromContentArray(r.content);
1596
+ }
1597
+ if (typeof r.content === "string") {
1598
+ return r.content;
1599
+ }
1600
+ if (r.structuredContent !== void 0) {
1601
+ if (typeof r.structuredContent === "string") {
1602
+ return r.structuredContent;
1603
+ }
1604
+ return JSON.stringify(r.structuredContent);
1605
+ }
1606
+ if (typeof r.text === "string") {
1607
+ return r.text;
1608
+ }
1609
+ return JSON.stringify(r);
1610
+ }
1611
+ if (typeof response === "number" || typeof response === "boolean" || typeof response === "bigint") {
1612
+ return String(response);
1613
+ }
1614
+ return "";
1615
+ }
1616
+ function isNormalizedResponse(value) {
1617
+ if (value == null || typeof value !== "object") {
1618
+ return false;
1619
+ }
1620
+ const v = value;
1621
+ return typeof v.text === "string" && typeof v.isError === "boolean" && Array.isArray(v.contentBlocks) && v.raw !== void 0;
1622
+ }
1623
+ function isCallToolResult(value) {
1624
+ if (value == null || typeof value !== "object") {
1625
+ return false;
1626
+ }
1627
+ const v = value;
1628
+ return Array.isArray(v.content) || typeof v.isError === "boolean";
1629
+ }
1630
+ function extractTextFromContentArray(content) {
1631
+ const textParts = [];
1632
+ for (const block of content) {
1633
+ if (block == null || typeof block !== "object") {
1634
+ continue;
1635
+ }
1636
+ const b = block;
1637
+ if (b.type === "text" && typeof b.text === "string") {
1638
+ textParts.push(b.text);
1639
+ }
1640
+ }
1641
+ if (textParts.length > 0) {
1642
+ return textParts.join("\n");
1643
+ }
1644
+ return JSON.stringify(content);
1645
+ }
1646
+
1647
+ // src/assertions/validators/utils.ts
1648
+ var extractText2 = extractText;
1649
+ function getResponseSizeBytes(response) {
1650
+ if (response === null || response === void 0) {
1651
+ return 0;
1652
+ }
1653
+ if (typeof response === "string") {
1654
+ return Buffer.byteLength(response, "utf8");
1655
+ }
1656
+ const serialized = JSON.stringify(response, null, 2);
1657
+ return Buffer.byteLength(serialized, "utf8");
1658
+ }
1659
+ function stringifyResponse(response) {
1660
+ if (response === null || response === void 0) {
1661
+ return "";
1662
+ }
1663
+ if (typeof response === "string") {
1664
+ return response;
1665
+ }
1666
+ return JSON.stringify(response, null, 2);
1667
+ }
1668
+ function isErrorResponse(response) {
1669
+ if (response === null || response === void 0) {
1670
+ return false;
1671
+ }
1672
+ if (typeof response !== "object") {
1673
+ return false;
1674
+ }
1675
+ const r = response;
1676
+ if (r.isError === true) {
1677
+ return true;
1678
+ }
1679
+ if ("raw" in r && typeof r.raw === "object" && r.raw !== null) {
1680
+ const raw = r.raw;
1681
+ return raw.isError === true;
1682
+ }
1683
+ return false;
1684
+ }
1685
+ function extractErrorMessage(response) {
1686
+ if (!isErrorResponse(response)) {
1687
+ return "";
1688
+ }
1689
+ return extractText2(response);
1690
+ }
1691
+ function normalizeWhitespace(text) {
1692
+ return text.replace(/\s+/g, " ").trim();
1693
+ }
1694
+
1695
+ // src/assertions/validators/response.ts
1696
+ function validateResponse(actual, expected) {
1697
+ const actualStr = stringifyResponse(actual);
1698
+ const expectedStr = stringifyResponse(expected);
1699
+ if (actualStr === expectedStr) {
1700
+ return {
1701
+ pass: true,
1702
+ message: "Response matches expected value"
1703
+ };
1704
+ }
1705
+ return {
1706
+ pass: false,
1707
+ message: `Response does not match expected value`,
1708
+ details: {
1709
+ actual: truncateForDisplay(actualStr),
1710
+ expected: truncateForDisplay(expectedStr)
1711
+ }
1712
+ };
1713
+ }
1714
+ function truncateForDisplay(str, maxLength = 500) {
1715
+ if (str.length <= maxLength) {
1716
+ return str;
1717
+ }
1718
+ return str.slice(0, maxLength) + "... (truncated)";
1719
+ }
1720
+
1721
+ // src/assertions/validators/schema.ts
1722
+ function validateSchema(response, schema, options = {}) {
1723
+ const valueToValidate = getValidatableValue(response);
1724
+ if (options.strict && valueToValidate !== null) ;
1725
+ try {
1726
+ schema.parse(valueToValidate);
1727
+ return {
1728
+ pass: true,
1729
+ message: "Response matches schema"
1730
+ };
1731
+ } catch (error) {
1732
+ const zodError = error;
1733
+ const issues = formatZodIssues(zodError);
1734
+ return {
1735
+ pass: false,
1736
+ message: `Response does not match schema: ${issues}`,
1737
+ details: {
1738
+ issues: zodError.issues
1739
+ }
1740
+ };
1741
+ }
1742
+ }
1743
+ function getValidatableValue(response) {
1744
+ if (response === null || response === void 0) {
1745
+ return null;
1746
+ }
1747
+ if (typeof response === "object" && !Array.isArray(response)) {
1748
+ const r = response;
1749
+ if ("structuredContent" in r && r.structuredContent !== void 0) {
1750
+ return r.structuredContent;
1751
+ }
1752
+ if ("raw" in r && "text" in r && "isError" in r && "contentBlocks" in r) {
1753
+ if (r.structuredContent !== void 0) {
1754
+ return r.structuredContent;
1755
+ }
1756
+ const text = r.text;
1757
+ return tryParseJson(text) ?? response;
1758
+ }
1759
+ if ("content" in r && Array.isArray(r.content)) {
1760
+ const text = extractText2(response);
1761
+ return tryParseJson(text) ?? response;
1762
+ }
1763
+ return response;
1764
+ }
1765
+ if (typeof response === "string") {
1766
+ return tryParseJson(response) ?? response;
1767
+ }
1768
+ return response;
1769
+ }
1770
+ function tryParseJson(text) {
1771
+ if (!text || typeof text !== "string") {
1772
+ return null;
1773
+ }
1774
+ const trimmed = text.trim();
1775
+ if (!(trimmed.startsWith("{") || trimmed.startsWith("[")) || !(trimmed.endsWith("}") || trimmed.endsWith("]"))) {
1776
+ return null;
1777
+ }
1778
+ try {
1779
+ return JSON.parse(trimmed);
1780
+ } catch {
1781
+ return null;
1782
+ }
1783
+ }
1784
+ function formatZodIssues(error) {
1785
+ const issues = error.issues.map((issue) => {
1786
+ const path3 = issue.path.length > 0 ? issue.path.join(".") : "root";
1787
+ return `${path3}: ${issue.message}`;
1788
+ });
1789
+ return issues.join("; ");
1790
+ }
1791
+
1792
+ // src/assertions/validators/text.ts
1793
+ function validateText(response, expected, options = {}) {
1794
+ const { caseSensitive = true } = options;
1795
+ const expectedStrings = Array.isArray(expected) ? expected : [expected];
1796
+ const text = extractText2(response);
1797
+ const compareText = caseSensitive ? text : text.toLowerCase();
1798
+ const missing = [];
1799
+ for (const substring of expectedStrings) {
1800
+ const compareSubstring = caseSensitive ? substring : substring.toLowerCase();
1801
+ if (!compareText.includes(compareSubstring)) {
1802
+ missing.push(substring);
1803
+ }
1804
+ }
1805
+ if (missing.length === 0) {
1806
+ return {
1807
+ pass: true,
1808
+ message: expectedStrings.length === 1 ? `Response contains expected text` : `Response contains all ${expectedStrings.length} expected substrings`
1809
+ };
1810
+ }
1811
+ return {
1812
+ pass: false,
1813
+ message: missing.length === 1 ? `Response does not contain expected text: "${missing[0]}"` : `Response is missing ${missing.length} expected substrings: ${missing.map((s) => `"${s}"`).join(", ")}`,
1814
+ details: {
1815
+ missing,
1816
+ textLength: text.length,
1817
+ textPreview: truncateForDisplay2(text)
1818
+ }
1819
+ };
1820
+ }
1821
+ function truncateForDisplay2(str, maxLength = 200) {
1822
+ if (str.length <= maxLength) {
1823
+ return str;
1824
+ }
1825
+ return str.slice(0, maxLength) + "... (truncated)";
1826
+ }
1827
+
1828
+ // src/assertions/validators/pattern.ts
1829
+ function validatePattern(response, patterns, options = {}) {
1830
+ const { caseSensitive = true } = options;
1831
+ const caseInsensitive = !caseSensitive;
1832
+ const patternList = Array.isArray(patterns) ? patterns : [patterns];
1833
+ const text = extractText2(response);
1834
+ const unmatched = [];
1835
+ for (const pattern of patternList) {
1836
+ const regex = toRegExp(pattern, caseInsensitive);
1837
+ if (!regex.test(text)) {
1838
+ unmatched.push(patternToString(pattern));
1839
+ }
1840
+ }
1841
+ if (unmatched.length === 0) {
1842
+ return {
1843
+ pass: true,
1844
+ message: patternList.length === 1 ? `Response matches pattern` : `Response matches all ${patternList.length} patterns`
1845
+ };
1846
+ }
1847
+ return {
1848
+ pass: false,
1849
+ message: unmatched.length === 1 ? `Response does not match pattern: ${unmatched[0]}` : `Response does not match ${unmatched.length} patterns: ${unmatched.join(", ")}`,
1850
+ details: {
1851
+ unmatched,
1852
+ textLength: text.length,
1853
+ textPreview: truncateForDisplay3(text)
1854
+ }
1855
+ };
1856
+ }
1857
+ function toRegExp(pattern, caseInsensitive) {
1858
+ if (pattern instanceof RegExp) {
1859
+ if (caseInsensitive && !pattern.flags.includes("i")) {
1860
+ return new RegExp(pattern.source, pattern.flags + "i");
1861
+ }
1862
+ return pattern;
1863
+ }
1864
+ const flags = caseInsensitive ? "i" : "";
1865
+ return new RegExp(pattern, flags);
1866
+ }
1867
+ function patternToString(pattern) {
1868
+ if (pattern instanceof RegExp) {
1869
+ return pattern.toString();
1870
+ }
1871
+ return `/${pattern}/`;
1872
+ }
1873
+ function truncateForDisplay3(str, maxLength = 200) {
1874
+ if (str.length <= maxLength) {
1875
+ return str;
1876
+ }
1877
+ return str.slice(0, maxLength) + "... (truncated)";
1878
+ }
1879
+
1880
+ // src/assertions/validators/error.ts
1881
+ function validateError(response, expected = true) {
1882
+ const actualIsError = isErrorResponse(response);
1883
+ const errorMessage = actualIsError ? extractErrorMessage(response) : "";
1884
+ if (typeof expected === "boolean") {
1885
+ if (expected) {
1886
+ if (actualIsError) {
1887
+ return {
1888
+ pass: true,
1889
+ message: "Response is an error as expected"
1890
+ };
1891
+ }
1892
+ return {
1893
+ pass: false,
1894
+ message: "Expected an error response but got success",
1895
+ details: {
1896
+ textPreview: truncateForDisplay4(extractText2(response))
1897
+ }
1898
+ };
1899
+ } else {
1900
+ if (!actualIsError) {
1901
+ return {
1902
+ pass: true,
1903
+ message: "Response is not an error as expected"
1904
+ };
1905
+ }
1906
+ return {
1907
+ pass: false,
1908
+ message: `Expected a success response but got error: "${truncateForDisplay4(errorMessage)}"`,
1909
+ details: {
1910
+ errorMessage
1911
+ }
1912
+ };
1913
+ }
1914
+ }
1915
+ const expectedMessages = Array.isArray(expected) ? expected : [expected];
1916
+ if (!actualIsError) {
1917
+ return {
1918
+ pass: false,
1919
+ message: `Expected an error containing "${expectedMessages[0]}" but got success`,
1920
+ details: {
1921
+ textPreview: truncateForDisplay4(extractText2(response))
1922
+ }
1923
+ };
1924
+ }
1925
+ const matched = expectedMessages.some(
1926
+ (msg) => errorMessage.toLowerCase().includes(msg.toLowerCase())
1927
+ );
1928
+ if (matched) {
1929
+ return {
1930
+ pass: true,
1931
+ message: "Error message contains expected text"
1932
+ };
1933
+ }
1934
+ return {
1935
+ pass: false,
1936
+ message: expectedMessages.length === 1 ? `Error message does not contain "${expectedMessages[0]}"` : `Error message does not contain any of: ${expectedMessages.map((m) => `"${m}"`).join(", ")}`,
1937
+ details: {
1938
+ actualErrorMessage: errorMessage,
1939
+ expectedToContain: expectedMessages
1940
+ }
1941
+ };
1942
+ }
1943
+ function truncateForDisplay4(str, maxLength = 200) {
1944
+ if (str.length <= maxLength) {
1945
+ return str;
1946
+ }
1947
+ return str.slice(0, maxLength) + "... (truncated)";
1948
+ }
1949
+
1950
+ // src/assertions/validators/size.ts
1951
+ function validateSize(response, options) {
1952
+ const { maxBytes, minBytes } = options;
1953
+ if (maxBytes === void 0 && minBytes === void 0) {
1954
+ return {
1955
+ pass: false,
1956
+ message: "Size validation requires at least one of maxBytes or minBytes"
1957
+ };
1958
+ }
1959
+ const actualSize = getResponseSizeBytes(response);
1960
+ const issues = [];
1961
+ if (minBytes !== void 0 && actualSize < minBytes) {
1962
+ issues.push(
1963
+ `Response size (${formatBytes(actualSize)}) is below minimum (${formatBytes(minBytes)})`
1964
+ );
1965
+ }
1966
+ if (maxBytes !== void 0 && actualSize > maxBytes) {
1967
+ issues.push(
1968
+ `Response size (${formatBytes(actualSize)}) exceeds maximum (${formatBytes(maxBytes)})`
1969
+ );
1970
+ }
1971
+ if (issues.length === 0) {
1972
+ return {
1973
+ pass: true,
1974
+ message: `Response size (${formatBytes(actualSize)}) is within bounds`,
1975
+ details: {
1976
+ actualBytes: actualSize
1977
+ }
1978
+ };
1979
+ }
1980
+ return {
1981
+ pass: false,
1982
+ message: issues.join("; "),
1983
+ details: {
1984
+ actualBytes: actualSize,
1985
+ minBytes,
1986
+ maxBytes
1987
+ }
1988
+ };
1989
+ }
1990
+ function formatBytes(bytes) {
1991
+ if (bytes < 1024) {
1992
+ return `${bytes} bytes`;
1993
+ }
1994
+ if (bytes < 1024 * 1024) {
1995
+ return `${(bytes / 1024).toFixed(1)} KB`;
1996
+ }
1997
+ return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
1998
+ }
1999
+
2000
+ // src/mcp/fixtures/mcpFixture.ts
2001
+ var testStep = null;
2002
+ try {
2003
+ const playwright = __require("@playwright/test");
2004
+ if (playwright && playwright.test && playwright.test.step) {
2005
+ testStep = playwright.test.step.bind(playwright.test);
2006
+ }
2007
+ } catch {
2008
+ }
2009
+ function createMCPFixture(client, testInfo, options) {
2010
+ const authType = options?.authType ?? "none";
2011
+ const project = options?.project;
2012
+ if (!testInfo) {
2013
+ return {
2014
+ client,
2015
+ authType,
2016
+ project,
2017
+ async listTools() {
2018
+ const result = await client.listTools();
2019
+ return result.tools;
2020
+ },
2021
+ async callTool(name, args) {
2022
+ const result = await client.callTool({
2023
+ name,
2024
+ arguments: args
2025
+ });
2026
+ return result;
2027
+ },
2028
+ getServerInfo() {
2029
+ const serverVersion = client.getServerVersion();
2030
+ if (!serverVersion) {
2031
+ return null;
2032
+ }
2033
+ return {
2034
+ name: serverVersion.name,
2035
+ version: serverVersion.version
2036
+ };
2037
+ }
2038
+ };
2039
+ }
2040
+ return {
2041
+ client,
2042
+ authType,
2043
+ project,
2044
+ async listTools() {
2045
+ const execute = async () => {
2046
+ const result = await client.listTools();
2047
+ const tools = result.tools;
2048
+ await testInfo.attach("mcp-list-tools", {
2049
+ contentType: "application/json",
2050
+ body: JSON.stringify(
2051
+ {
2052
+ operation: "listTools",
2053
+ toolCount: tools.length,
2054
+ tools: tools.map((t) => ({
2055
+ name: t.name,
2056
+ description: t.description
2057
+ }))
2058
+ },
2059
+ null,
2060
+ 2
2061
+ )
2062
+ });
2063
+ return tools;
2064
+ };
2065
+ return testStep ? testStep("MCP: listTools()", execute) : execute();
2066
+ },
2067
+ async callTool(name, args) {
2068
+ const execute = async () => {
2069
+ const startTime = Date.now();
2070
+ const result = await client.callTool({
2071
+ name,
2072
+ arguments: args
2073
+ });
2074
+ const durationMs = Date.now() - startTime;
2075
+ await testInfo.attach(`mcp-call-${name}`, {
2076
+ contentType: "application/json",
2077
+ body: JSON.stringify(
2078
+ {
2079
+ operation: "callTool",
2080
+ toolName: name,
2081
+ args,
2082
+ result,
2083
+ durationMs,
2084
+ isError: result.isError || false,
2085
+ authType,
2086
+ project
2087
+ },
2088
+ null,
2089
+ 2
2090
+ )
2091
+ });
2092
+ return result;
2093
+ };
2094
+ return testStep ? testStep(`MCP: callTool("${name}")`, execute) : execute();
2095
+ },
2096
+ getServerInfo() {
2097
+ const serverVersion = client.getServerVersion();
2098
+ const result = serverVersion ? {
2099
+ name: serverVersion.name,
2100
+ version: serverVersion.version
2101
+ } : null;
2102
+ testInfo.attach("mcp-server-info", {
2103
+ contentType: "application/json",
2104
+ body: JSON.stringify(
2105
+ {
2106
+ operation: "getServerInfo",
2107
+ serverInfo: result
2108
+ },
2109
+ null,
2110
+ 2
2111
+ )
2112
+ }).catch(() => {
2113
+ });
2114
+ return result;
2115
+ }
2116
+ };
2117
+ }
2118
+
2119
+ // src/assertions/matchers/toMatchToolResponse.ts
2120
+ function toMatchToolResponse(received, expected) {
2121
+ const result = validateResponse(received, expected);
2122
+ return {
2123
+ pass: result.pass,
2124
+ message: () => {
2125
+ if (this.isNot) {
2126
+ return result.pass ? "Expected response NOT to match, but it did" : result.message;
2127
+ }
2128
+ return result.message;
2129
+ }
2130
+ };
2131
+ }
2132
+
2133
+ // src/assertions/matchers/toMatchToolSchema.ts
2134
+ function toMatchToolSchema(received, schema, options = {}) {
2135
+ const result = validateSchema(received, schema, options);
2136
+ return {
2137
+ pass: result.pass,
2138
+ message: () => {
2139
+ if (this.isNot) {
2140
+ return result.pass ? "Expected response NOT to match schema, but it did" : result.message;
2141
+ }
2142
+ return result.message;
2143
+ }
2144
+ };
2145
+ }
2146
+
2147
+ // src/assertions/matchers/toContainToolText.ts
2148
+ function toContainToolText(received, expected, options = {}) {
2149
+ const result = validateText(received, expected, options);
2150
+ return {
2151
+ pass: result.pass,
2152
+ message: () => {
2153
+ if (this.isNot) {
2154
+ const expectedStr = Array.isArray(expected) ? expected.map((s) => `"${s}"`).join(", ") : `"${expected}"`;
2155
+ return result.pass ? `Expected response NOT to contain ${expectedStr}, but it did` : result.message;
2156
+ }
2157
+ return result.message;
2158
+ }
2159
+ };
2160
+ }
2161
+
2162
+ // src/assertions/matchers/toMatchToolPattern.ts
2163
+ function toMatchToolPattern(received, patterns, options = {}) {
2164
+ const result = validatePattern(received, patterns, options);
2165
+ return {
2166
+ pass: result.pass,
2167
+ message: () => {
2168
+ if (this.isNot) {
2169
+ return result.pass ? "Expected response NOT to match pattern(s), but it did" : result.message;
2170
+ }
2171
+ return result.message;
2172
+ }
2173
+ };
2174
+ }
2175
+ var BUILT_IN_PATTERNS = {
2176
+ timestamp: {
2177
+ pattern: /\b\d{10,13}\b/g,
2178
+ replacement: "[TIMESTAMP]"
2179
+ },
2180
+ uuid: {
2181
+ pattern: /\b[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}\b/gi,
2182
+ replacement: "[UUID]"
2183
+ },
2184
+ "iso-date": {
2185
+ pattern: /\b\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}(\.\d{1,3})?(Z|[+-]\d{2}:?\d{2})?)?\b/g,
2186
+ replacement: "[ISO_DATE]"
2187
+ },
2188
+ objectId: {
2189
+ pattern: /\b[0-9a-f]{24}\b/gi,
2190
+ replacement: "[OBJECT_ID]"
2191
+ },
2192
+ jwt: {
2193
+ pattern: /\beyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]+\b/g,
2194
+ replacement: "[JWT]"
2195
+ }
2196
+ };
2197
+ function isRegexSanitizer(sanitizer) {
2198
+ return typeof sanitizer === "object" && sanitizer !== null && "pattern" in sanitizer;
2199
+ }
2200
+ function isFieldRemovalSanitizer(sanitizer) {
2201
+ return typeof sanitizer === "object" && sanitizer !== null && "remove" in sanitizer;
2202
+ }
2203
+ function applySanitizers(value, sanitizers) {
2204
+ let result = value;
2205
+ for (const sanitizer of sanitizers) {
2206
+ if (typeof sanitizer === "string") {
2207
+ const builtIn = BUILT_IN_PATTERNS[sanitizer];
2208
+ if (builtIn) {
2209
+ result = result.replace(builtIn.pattern, builtIn.replacement);
2210
+ }
2211
+ continue;
2212
+ }
2213
+ if (isRegexSanitizer(sanitizer)) {
2214
+ const pattern = sanitizer.pattern instanceof RegExp ? sanitizer.pattern : new RegExp(sanitizer.pattern, "g");
2215
+ const replacement = sanitizer.replacement ?? "[SANITIZED]";
2216
+ result = result.replace(pattern, replacement);
2217
+ continue;
2218
+ }
2219
+ if (isFieldRemovalSanitizer(sanitizer)) {
2220
+ try {
2221
+ const parsed = JSON.parse(result);
2222
+ removeFields(parsed, sanitizer.remove);
2223
+ result = JSON.stringify(parsed, null, 2);
2224
+ } catch {
2225
+ }
2226
+ }
2227
+ }
2228
+ return result;
2229
+ }
2230
+ function removeFields(obj, paths) {
2231
+ if (typeof obj !== "object" || obj === null) {
2232
+ return;
2233
+ }
2234
+ for (const path3 of paths) {
2235
+ const parts = path3.split(".");
2236
+ if (parts.length === 0) {
2237
+ continue;
2238
+ }
2239
+ let current = obj;
2240
+ for (let i = 0; i < parts.length - 1; i++) {
2241
+ if (typeof current !== "object" || current === null) {
2242
+ break;
2243
+ }
2244
+ const key = parts[i];
2245
+ if (key !== void 0) {
2246
+ current = current[key];
2247
+ }
2248
+ }
2249
+ if (typeof current === "object" && current !== null) {
2250
+ const lastKey = parts[parts.length - 1];
2251
+ if (lastKey !== void 0) {
2252
+ delete current[lastKey];
2253
+ }
2254
+ }
2255
+ }
2256
+ }
2257
+ async function toMatchToolSnapshot(received, name, sanitizers = []) {
2258
+ let content = extractText2(received);
2259
+ if (sanitizers.length > 0) {
2260
+ content = applySanitizers(content, sanitizers);
2261
+ }
2262
+ if (this.isNot) {
2263
+ try {
2264
+ await test$1.expect(content).toMatchSnapshot(name);
2265
+ return {
2266
+ pass: false,
2267
+ message: () => `Expected response NOT to match snapshot "${name}", but it did`
2268
+ };
2269
+ } catch {
2270
+ return {
2271
+ pass: true,
2272
+ message: () => `Response does not match snapshot "${name}" as expected`
2273
+ };
2274
+ }
2275
+ }
2276
+ try {
2277
+ await test$1.expect(content).toMatchSnapshot(name);
2278
+ return {
2279
+ pass: true,
2280
+ message: () => `Response matches snapshot "${name}"`
2281
+ };
2282
+ } catch (error) {
2283
+ return {
2284
+ pass: false,
2285
+ message: () => error instanceof Error ? error.message : `Response does not match snapshot "${name}"`
2286
+ };
2287
+ }
2288
+ }
2289
+
2290
+ // src/assertions/matchers/toBeToolError.ts
2291
+ function toBeToolError(received, expected = true) {
2292
+ const effectiveExpected = this.isNot ? typeof expected === "boolean" ? !expected : false : expected;
2293
+ const result = validateError(received, effectiveExpected);
2294
+ return {
2295
+ pass: this.isNot ? !result.pass : result.pass,
2296
+ message: () => {
2297
+ if (this.isNot) {
2298
+ if (typeof expected === "boolean") {
2299
+ return result.pass ? "Expected response NOT to be an error, but it was" : "Response is not an error as expected";
2300
+ }
2301
+ const expectedStr = Array.isArray(expected) ? expected.join(", ") : expected;
2302
+ return result.pass ? `Expected response NOT to be an error with "${expectedStr}", but it was` : result.message;
2303
+ }
2304
+ return result.message;
2305
+ }
2306
+ };
2307
+ }
2308
+ function createClaudeAgentJudge(config) {
2309
+ const model = config.model ?? "claude-sonnet-4-20250514";
2310
+ const maxBudgetUsd = config.maxBudgetUsd ?? 0.1;
2311
+ const maxToolOutputSize = config.maxToolOutputSize;
2312
+ return {
2313
+ async evaluate(candidate, reference, rubric) {
2314
+ const candidateStr = typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2);
2315
+ const candidateSizeBytes = Buffer.byteLength(candidateStr, "utf8");
2316
+ if (maxToolOutputSize !== void 0 && candidateSizeBytes > maxToolOutputSize) {
2317
+ return {
2318
+ pass: false,
2319
+ score: 0,
2320
+ reasoning: `Tool output size (${candidateSizeBytes} bytes) exceeds maximum allowed size (${maxToolOutputSize} bytes)`,
2321
+ candidateSizeBytes,
2322
+ exceedsMaxToolOutputSize: true
2323
+ };
2324
+ }
2325
+ const prompt = buildJudgePrompt(candidate, reference, rubric);
2326
+ try {
2327
+ let resultMessage;
2328
+ for await (const message of claudeAgentSdk.query({
2329
+ prompt,
2330
+ options: {
2331
+ model,
2332
+ maxBudgetUsd,
2333
+ // Use empty tools array for response-only mode
2334
+ tools: [],
2335
+ // Bypass permissions since we're not using any tools
2336
+ permissionMode: "bypassPermissions",
2337
+ allowDangerouslySkipPermissions: true,
2338
+ // Use a custom system prompt for JSON output
2339
+ systemPrompt: buildSystemPrompt(),
2340
+ // Limit to 1 turn since this is a simple evaluation
2341
+ maxTurns: 1
2342
+ }
2343
+ })) {
2344
+ if (message.type === "result") {
2345
+ resultMessage = message;
2346
+ }
2347
+ }
2348
+ if (!resultMessage) {
2349
+ throw new Error("No result message received from Claude Agent SDK");
2350
+ }
2351
+ if (resultMessage.subtype !== "success" && resultMessage.errors?.length) {
2352
+ throw new Error(
2353
+ `Claude Agent SDK error: ${resultMessage.errors.join(", ")}`
2354
+ );
2355
+ }
2356
+ const responseText = resultMessage.result ?? "";
2357
+ const parsed = parseJudgeResponse(responseText);
2358
+ const usage = {
2359
+ inputTokens: resultMessage.usage?.input_tokens ?? 0,
2360
+ outputTokens: resultMessage.usage?.output_tokens ?? 0,
2361
+ totalCostUsd: resultMessage.total_cost_usd ?? 0,
2362
+ durationMs: resultMessage.duration_ms ?? 0,
2363
+ durationApiMs: resultMessage.duration_api_ms,
2364
+ cacheReadInputTokens: resultMessage.usage?.cache_read_input_tokens,
2365
+ cacheCreationInputTokens: resultMessage.usage?.cache_creation_input_tokens
2366
+ };
2367
+ return {
2368
+ pass: parsed.pass ?? false,
2369
+ score: parsed.score,
2370
+ reasoning: parsed.reasoning,
2371
+ usage,
2372
+ candidateSizeBytes,
2373
+ exceedsMaxToolOutputSize: false
2374
+ };
2375
+ } catch (error) {
2376
+ throw new Error(
2377
+ `Claude Agent judge evaluation failed: ${error instanceof Error ? error.message : String(error)}`
2378
+ );
2379
+ }
2380
+ }
2381
+ };
2382
+ }
2383
+ function buildSystemPrompt() {
2384
+ return 'You are an expert evaluator. Evaluate the candidate response based on the rubric provided. Respond ONLY with valid JSON in this exact format: {"pass": boolean, "score": number (0-1), "reasoning": string}. Do not include any other text, markdown formatting, or code blocks.';
2385
+ }
2386
+ function buildJudgePrompt(candidate, reference, rubric) {
2387
+ const parts = [];
2388
+ parts.push("# Evaluation Task\n");
2389
+ parts.push(rubric);
2390
+ parts.push("\n\n# Candidate Response\n");
2391
+ parts.push(
2392
+ typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2)
2393
+ );
2394
+ if (reference !== null && reference !== void 0) {
2395
+ parts.push("\n\n# Reference Response\n");
2396
+ parts.push(
2397
+ typeof reference === "string" ? reference : JSON.stringify(reference, null, 2)
2398
+ );
2399
+ }
2400
+ parts.push(
2401
+ "\n\n# Instructions\nEvaluate the candidate response based on the rubric. " + (reference !== null && reference !== void 0 ? "Compare it against the reference response if helpful. " : "") + 'Respond with JSON containing "pass" (boolean), "score" (0-1), and "reasoning" (string).'
2402
+ );
2403
+ return parts.join("");
2404
+ }
2405
+ function parseJudgeResponse(text) {
2406
+ let jsonText = text.trim();
2407
+ if (jsonText.startsWith("```json")) {
2408
+ jsonText = jsonText.slice(7);
2409
+ }
2410
+ if (jsonText.startsWith("```")) {
2411
+ jsonText = jsonText.slice(3);
2412
+ }
2413
+ if (jsonText.endsWith("```")) {
2414
+ jsonText = jsonText.slice(0, -3);
2415
+ }
2416
+ jsonText = jsonText.trim();
2417
+ try {
2418
+ return JSON.parse(jsonText);
2419
+ } catch {
2420
+ const jsonMatch = jsonText.match(/\{[\s\S]*"pass"[\s\S]*\}/);
2421
+ if (jsonMatch) {
2422
+ return JSON.parse(jsonMatch[0]);
2423
+ }
2424
+ throw new Error(`Failed to parse judge response as JSON: ${text}`);
2425
+ }
2426
+ }
2427
+
2428
+ // src/judge/judgeClient.ts
2429
+ function createJudge(config = {}) {
2430
+ const provider = config.provider ?? "claude";
2431
+ switch (provider) {
2432
+ case "claude":
2433
+ case "anthropic":
2434
+ return createClaudeAgentJudge(config);
2435
+ case "openai":
2436
+ throw new Error(
2437
+ 'OpenAI provider is no longer supported. Please use createJudge() without specifying provider, or use provider: "claude". See migration guide at https://github.com/gleanwork/mcp-server-tester/blob/main/docs/migration-v0.11.md'
2438
+ );
2439
+ case "custom-http":
2440
+ throw new Error(
2441
+ "custom-http provider is no longer supported. Please use createJudge() without specifying provider."
2442
+ );
2443
+ default:
2444
+ throw new Error(`Unsupported LLM provider: ${String(provider)}`);
2445
+ }
2446
+ }
2447
+
2448
+ // src/assertions/matchers/toPassToolJudge.ts
2449
+ var DEFAULT_PASSING_THRESHOLD = 0.7;
2450
+ var DEFAULT_JUDGE_CONFIG = {};
2451
+ async function toPassToolJudge(received, rubric, options = {}) {
2452
+ const {
2453
+ reference = null,
2454
+ passingThreshold = DEFAULT_PASSING_THRESHOLD,
2455
+ judgeConfig = DEFAULT_JUDGE_CONFIG
2456
+ } = options;
2457
+ const judge = createJudge(judgeConfig);
2458
+ try {
2459
+ const result = await judge.evaluate(received, reference, rubric);
2460
+ const score = result.score ?? (result.pass ? 1 : 0);
2461
+ const passes = score >= passingThreshold;
2462
+ if (this.isNot) {
2463
+ return {
2464
+ pass: !passes,
2465
+ message: () => passes ? `Expected judge evaluation to fail, but it passed with score ${score.toFixed(2)}` : `Judge evaluation failed as expected with score ${score.toFixed(2)}`
2466
+ };
2467
+ }
2468
+ if (passes) {
2469
+ return {
2470
+ pass: true,
2471
+ message: () => `Judge evaluation passed with score ${score.toFixed(2)} (threshold: ${passingThreshold})`
2472
+ };
2473
+ }
2474
+ return {
2475
+ pass: false,
2476
+ message: () => `Judge evaluation failed with score ${score.toFixed(2)} (threshold: ${passingThreshold}). Reasoning: ${result.reasoning ?? "No reasoning provided"}`
2477
+ };
2478
+ } catch (error) {
2479
+ return {
2480
+ pass: false,
2481
+ message: () => `Judge evaluation failed with error: ${error instanceof Error ? error.message : String(error)}`
2482
+ };
2483
+ }
2484
+ }
2485
+
2486
+ // src/assertions/matchers/toHaveToolResponseSize.ts
2487
+ function toHaveToolResponseSize(received, options) {
2488
+ const result = validateSize(received, options);
2489
+ return {
2490
+ pass: result.pass,
2491
+ message: () => {
2492
+ if (this.isNot) {
2493
+ return result.pass ? "Expected response size NOT to be within bounds, but it was" : result.message;
2494
+ }
2495
+ return result.message;
2496
+ }
2497
+ };
2498
+ }
2499
+
2500
+ // src/assertions/matchers/toSatisfyToolPredicate.ts
2501
+ function normalizeResult(result) {
2502
+ if (typeof result === "boolean") {
2503
+ return {
2504
+ pass: result,
2505
+ message: result ? "Predicate passed" : "Predicate returned false"
2506
+ };
2507
+ }
2508
+ return result;
2509
+ }
2510
+ async function toSatisfyToolPredicate(received, predicate, description) {
2511
+ const predicateDescription = description ?? "custom predicate";
2512
+ try {
2513
+ const text = extractText2(received);
2514
+ const rawResult = await predicate(received, text);
2515
+ const result = normalizeResult(rawResult);
2516
+ if (this.isNot) {
2517
+ return {
2518
+ pass: !result.pass,
2519
+ message: () => result.pass ? `Expected response NOT to satisfy ${predicateDescription}` : `Response does not satisfy ${predicateDescription} as expected`
2520
+ };
2521
+ }
2522
+ return {
2523
+ pass: result.pass,
2524
+ message: () => result.pass ? result.message ?? `Response satisfies ${predicateDescription}` : result.message ?? `Expected response to satisfy ${predicateDescription}`
2525
+ };
2526
+ } catch (error) {
2527
+ const errorMessage = error instanceof Error ? error.message : String(error);
2528
+ return {
2529
+ pass: this.isNot,
2530
+ // If using .not, an error means the predicate didn't pass
2531
+ message: () => `Predicate threw error: ${errorMessage}`
2532
+ };
2533
+ }
2534
+ }
2535
+
2536
+ // src/assertions/matchers/index.ts
2537
+ var expect = test$1.expect.extend({
2538
+ toMatchToolResponse,
2539
+ toMatchToolSchema,
2540
+ toContainToolText,
2541
+ toMatchToolPattern,
2542
+ toMatchToolSnapshot,
2543
+ toBeToolError,
2544
+ toPassToolJudge,
2545
+ toHaveToolResponseSize,
2546
+ toSatisfyToolPredicate
2547
+ });
2548
+
2549
+ // src/fixtures/mcp.ts
2550
+ init_oauthClientProvider();
2551
+ var test = test$1.test.extend({
2552
+ /**
2553
+ * Internal fixture state - tracks resolved auth type between fixtures
2554
+ */
2555
+ _mcpFixtureState: [
2556
+ // eslint-disable-next-line no-empty-pattern
2557
+ async ({}, use) => {
2558
+ const state = { resolvedAuthType: "none" };
2559
+ await use(state);
2560
+ },
2561
+ { scope: "test" }
2562
+ ],
2563
+ /**
2564
+ * mcpClient fixture: Creates and connects an MCP client
2565
+ *
2566
+ * The client configuration is read from the project's `use.mcpConfig`
2567
+ * setting in playwright.config.ts
2568
+ *
2569
+ * Authentication resolution order:
2570
+ * 1. Explicit authStatePath → uses PlaywrightOAuthClientProvider
2571
+ * 2. Explicit accessToken → uses static Bearer token
2572
+ * 3. HTTP transport with no auth → tries CLI-stored tokens (from `mcp-server-tester login`)
2573
+ * with automatic token refresh
2574
+ */
2575
+ mcpClient: async ({ _mcpFixtureState }, use, testInfo) => {
2576
+ const useConfig = testInfo.project.use;
2577
+ const mcpConfig = useConfig.mcpConfig;
2578
+ if (!mcpConfig) {
2579
+ throw new Error(
2580
+ `Missing mcpConfig in project.use for project "${testInfo.project.name}". Please add mcpConfig to your project configuration in playwright.config.ts`
2581
+ );
2582
+ }
2583
+ let resolvedAuthType = "none";
2584
+ let authProvider;
2585
+ if (mcpConfig.auth?.oauth?.authStatePath) {
2586
+ authProvider = new exports.PlaywrightOAuthClientProvider({
2587
+ storagePath: mcpConfig.auth.oauth.authStatePath,
2588
+ redirectUri: mcpConfig.auth.oauth.redirectUri ?? "http://localhost:3000/oauth/callback",
2589
+ clientId: mcpConfig.auth.oauth.clientId,
2590
+ clientSecret: mcpConfig.auth.oauth.clientSecret
2591
+ });
2592
+ resolvedAuthType = "oauth";
2593
+ }
2594
+ let effectiveConfig = mcpConfig;
2595
+ if (mcpConfig.auth?.accessToken) {
2596
+ resolvedAuthType = "api-token";
2597
+ }
2598
+ if (isHttpConfig(mcpConfig) && !mcpConfig.auth?.accessToken && !mcpConfig.auth?.oauth?.authStatePath) {
2599
+ const cliClient = new CLIOAuthClient({
2600
+ mcpServerUrl: mcpConfig.serverUrl
2601
+ });
2602
+ const tokenResult = await cliClient.tryGetAccessToken();
2603
+ if (tokenResult) {
2604
+ effectiveConfig = {
2605
+ ...mcpConfig,
2606
+ auth: {
2607
+ ...mcpConfig.auth,
2608
+ accessToken: tokenResult.accessToken
2609
+ }
2610
+ };
2611
+ resolvedAuthType = "oauth";
2612
+ }
2613
+ }
2614
+ _mcpFixtureState.resolvedAuthType = resolvedAuthType;
2615
+ const client = await createMCPClientForConfig(effectiveConfig, {
2616
+ clientInfo: {
2617
+ name: "@gleanwork/mcp-server-tester",
2618
+ version: "0.1.0"
2619
+ },
2620
+ authProvider
2621
+ });
2622
+ try {
2623
+ await use(client);
2624
+ } finally {
2625
+ await closeMCPClient(client);
2626
+ }
2627
+ },
2628
+ /**
2629
+ * mcp fixture: High-level test API built on mcpClient
2630
+ *
2631
+ * Depends on mcpClient fixture
2632
+ * Automatically tracks all MCP operations for the reporter
2633
+ */
2634
+ mcp: async ({ mcpClient, _mcpFixtureState }, use, testInfo) => {
2635
+ const api = createMCPFixture(mcpClient, testInfo, {
2636
+ authType: _mcpFixtureState.resolvedAuthType,
2637
+ project: testInfo.project.name
2638
+ });
2639
+ await use(api);
2640
+ }
2641
+ });
2642
+ var LLMHostConfigSchema = zod.z.object({
2643
+ provider: zod.z.enum(["openai", "anthropic"]),
2644
+ apiKeyEnvVar: zod.z.string().optional(),
2645
+ model: zod.z.string().optional(),
2646
+ maxTokens: zod.z.number().optional(),
2647
+ temperature: zod.z.number().optional(),
2648
+ maxToolCalls: zod.z.number().optional()
2649
+ });
2650
+ var SnapshotSanitizerSchema = zod.z.union([
2651
+ // Built-in sanitizers
2652
+ zod.z.enum(["timestamp", "uuid", "iso-date", "objectId", "jwt"]),
2653
+ // Custom regex sanitizer
2654
+ zod.z.object({
2655
+ pattern: zod.z.string(),
2656
+ replacement: zod.z.string().optional()
2657
+ }),
2658
+ // Field removal sanitizer
2659
+ zod.z.object({
2660
+ remove: zod.z.array(zod.z.string())
2661
+ })
2662
+ ]);
2663
+ var EvalExpectBlockSchema = zod.z.object({
2664
+ response: zod.z.unknown().optional(),
2665
+ schema: zod.z.string().optional(),
2666
+ containsText: zod.z.union([zod.z.string(), zod.z.array(zod.z.string())]).optional(),
2667
+ matchesPattern: zod.z.union([zod.z.string(), zod.z.array(zod.z.string())]).optional(),
2668
+ snapshot: zod.z.string().optional(),
2669
+ snapshotSanitizers: zod.z.array(SnapshotSanitizerSchema).optional(),
2670
+ isError: zod.z.union([zod.z.boolean(), zod.z.string(), zod.z.array(zod.z.string())]).optional(),
2671
+ passesJudge: zod.z.object({
2672
+ rubric: zod.z.string(),
2673
+ reference: zod.z.unknown().optional(),
2674
+ threshold: zod.z.number().min(0).max(1).optional(),
2675
+ configId: zod.z.string().optional()
2676
+ }).optional(),
2677
+ responseSize: zod.z.object({
2678
+ maxBytes: zod.z.number().optional(),
2679
+ minBytes: zod.z.number().optional()
2680
+ }).optional()
2681
+ });
2682
+ var EvalCaseSchema = zod.z.object({
2683
+ id: zod.z.string().min(1, "id must not be empty"),
2684
+ description: zod.z.string().optional(),
2685
+ mode: zod.z.enum(["direct", "llm_host"]).optional(),
2686
+ toolName: zod.z.string().min(1, "toolName must not be empty").optional(),
2687
+ args: zod.z.record(zod.z.unknown()).optional(),
2688
+ scenario: zod.z.string().optional(),
2689
+ llmHostConfig: LLMHostConfigSchema.optional(),
2690
+ metadata: zod.z.record(zod.z.unknown()).optional(),
2691
+ expect: EvalExpectBlockSchema.optional()
2692
+ });
2693
+ var EvalDatasetSchema = zod.z.object({
2694
+ name: zod.z.string().min(1, "name must not be empty"),
2695
+ description: zod.z.string().optional(),
2696
+ cases: zod.z.array(EvalCaseSchema).min(1, "dataset must have at least one case"),
2697
+ metadata: zod.z.record(zod.z.unknown()).optional()
2698
+ });
2699
+ function validateEvalCase(evalCase) {
2700
+ return EvalCaseSchema.parse(evalCase);
2701
+ }
2702
+ function validateEvalDataset(dataset) {
2703
+ return EvalDatasetSchema.parse(dataset);
2704
+ }
2705
+ async function loadEvalDataset(filePath, options = {}) {
2706
+ const { schemas, validate = true } = options;
2707
+ try {
2708
+ const fileContents = await fs.readFile(filePath, "utf-8");
2709
+ const rawData = JSON.parse(fileContents);
2710
+ const serializedDataset = validate ? validateEvalDataset(rawData) : rawData;
2711
+ const dataset = {
2712
+ ...serializedDataset,
2713
+ schemas: schemas ?? {}
2714
+ };
2715
+ return dataset;
2716
+ } catch (error) {
2717
+ if (error instanceof SyntaxError) {
2718
+ throw new Error(
2719
+ `Failed to parse JSON from ${filePath}: ${error.message}`
2720
+ );
2721
+ }
2722
+ throw error;
2723
+ }
2724
+ }
2725
+ function loadEvalDatasetFromObject(data, options = {}) {
2726
+ const { schemas, validate = true } = options;
2727
+ const serializedDataset = validate ? validateEvalDataset(data) : data;
2728
+ const dataset = {
2729
+ ...serializedDataset,
2730
+ schemas: schemas ?? {}
2731
+ };
2732
+ return dataset;
2733
+ }
2734
+
2735
+ // src/evals/llmHost/adapter.ts
2736
+ var adapters = /* @__PURE__ */ new Map();
2737
+ function registerAdapter(provider, factory) {
2738
+ adapters.set(provider, factory);
2739
+ }
2740
+ function getAdapter(provider) {
2741
+ const factory = adapters.get(provider);
2742
+ if (!factory) {
2743
+ throw new Error(
2744
+ `No adapter registered for provider: ${provider}. Available: ${Array.from(adapters.keys()).join(", ")}`
2745
+ );
2746
+ }
2747
+ return factory();
2748
+ }
2749
+ function hasAdapter(provider) {
2750
+ return adapters.has(provider);
2751
+ }
2752
+
2753
+ // src/evals/llmHost/retry.ts
2754
+ var DEFAULT_OPTIONS = {
2755
+ maxAttempts: 3,
2756
+ baseDelayMs: 1e3,
2757
+ maxDelayMs: 3e4,
2758
+ isRetryable: isRetryableError
2759
+ };
2760
+ async function withRetry(fn, options = {}) {
2761
+ const {
2762
+ maxAttempts = DEFAULT_OPTIONS.maxAttempts,
2763
+ baseDelayMs = DEFAULT_OPTIONS.baseDelayMs,
2764
+ maxDelayMs = DEFAULT_OPTIONS.maxDelayMs,
2765
+ isRetryable = DEFAULT_OPTIONS.isRetryable,
2766
+ onRetry
2767
+ } = options;
2768
+ let lastError;
2769
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
2770
+ try {
2771
+ return await fn();
2772
+ } catch (error) {
2773
+ lastError = error;
2774
+ if (attempt >= maxAttempts || !isRetryable(error)) {
2775
+ throw error;
2776
+ }
2777
+ const exponentialDelay = baseDelayMs * Math.pow(2, attempt - 1);
2778
+ const jitter = Math.random() * 0.1 * exponentialDelay;
2779
+ const delayMs = Math.min(exponentialDelay + jitter, maxDelayMs);
2780
+ if (onRetry) {
2781
+ onRetry(error, attempt, delayMs);
2782
+ }
2783
+ await sleep(delayMs);
2784
+ }
2785
+ }
2786
+ throw lastError;
2787
+ }
2788
+ function isRetryableError(error) {
2789
+ const statusCode = extractStatusCode(error);
2790
+ if (statusCode !== null) {
2791
+ return [429, 500, 502, 503, 504].includes(statusCode);
2792
+ }
2793
+ const message = extractErrorMessage2(error).toLowerCase();
2794
+ return message.includes("rate limit") || message.includes("429") || message.includes("too many requests") || message.includes("timeout") || message.includes("temporarily unavailable") || message.includes("service unavailable") || message.includes("internal server error");
2795
+ }
2796
+ function extractStatusCode(error) {
2797
+ if (error == null || typeof error !== "object") {
2798
+ return null;
2799
+ }
2800
+ const e = error;
2801
+ if (typeof e.status === "number") {
2802
+ return e.status;
2803
+ }
2804
+ if (typeof e.statusCode === "number") {
2805
+ return e.statusCode;
2806
+ }
2807
+ if (e.response && typeof e.response === "object") {
2808
+ const response = e.response;
2809
+ if (typeof response.status === "number") {
2810
+ return response.status;
2811
+ }
2812
+ }
2813
+ if (typeof e.code === "number") {
2814
+ return e.code;
2815
+ }
2816
+ return null;
2817
+ }
2818
+ function extractErrorMessage2(error) {
2819
+ if (error == null) {
2820
+ return "";
2821
+ }
2822
+ if (typeof error === "string") {
2823
+ return error;
2824
+ }
2825
+ if (error instanceof Error) {
2826
+ return error.message;
2827
+ }
2828
+ if (typeof error === "object") {
2829
+ const e = error;
2830
+ if (typeof e.message === "string") {
2831
+ return e.message;
2832
+ }
2833
+ if (typeof e.error === "string") {
2834
+ return e.error;
2835
+ }
2836
+ return JSON.stringify(error);
2837
+ }
2838
+ if (typeof error === "number" || typeof error === "boolean") {
2839
+ return String(error);
2840
+ }
2841
+ return "Unknown error";
2842
+ }
2843
+ function sleep(ms) {
2844
+ return new Promise((resolve) => setTimeout(resolve, ms));
2845
+ }
2846
+
2847
+ // src/evals/llmHost/orchestrator.ts
2848
+ async function runSimulation(adapter, mcp, scenario, config, options = {}) {
2849
+ const maxIterations = config.maxToolCalls || 10;
2850
+ const retryOptions = options.retry || {};
2851
+ const allToolCalls = [];
2852
+ const conversationHistory = [];
2853
+ try {
2854
+ const client = await adapter.createClient(config);
2855
+ const mcpTools = await mcp.listTools();
2856
+ const formattedTools = adapter.formatTools(mcpTools);
2857
+ const messages = [adapter.createUserMessage(scenario)];
2858
+ conversationHistory.push({ role: "user", content: scenario });
2859
+ let finalResponse = "";
2860
+ for (let iteration = 0; iteration < maxIterations; iteration++) {
2861
+ const chatResult = await withRetry(
2862
+ () => adapter.chat(client, messages, formattedTools, config),
2863
+ retryOptions
2864
+ );
2865
+ if (chatResult.wantsToolCalls && chatResult.toolCalls.length > 0) {
2866
+ messages.push(adapter.createAssistantMessage(chatResult));
2867
+ const toolResultMessages = [];
2868
+ for (const toolCall of chatResult.toolCalls) {
2869
+ allToolCalls.push(toolCall);
2870
+ const mcpResult = await mcp.callTool(
2871
+ toolCall.name,
2872
+ toolCall.arguments
2873
+ );
2874
+ const resultText = extractText(mcpResult);
2875
+ const resultMessage = adapter.createToolResultMessage(
2876
+ toolCall,
2877
+ resultText
2878
+ );
2879
+ toolResultMessages.push(resultMessage);
2880
+ conversationHistory.push({ role: "tool", content: resultText });
2881
+ }
2882
+ if (adapter.provider === "anthropic") {
2883
+ messages.push({
2884
+ role: "user",
2885
+ content: toolResultMessages
2886
+ });
2887
+ } else {
2888
+ for (const msg of toolResultMessages) {
2889
+ messages.push(msg);
2890
+ }
2891
+ }
2892
+ } else {
2893
+ finalResponse = chatResult.textContent || "";
2894
+ conversationHistory.push({ role: "assistant", content: finalResponse });
2895
+ break;
2896
+ }
2897
+ }
2898
+ return {
2899
+ success: true,
2900
+ toolCalls: allToolCalls,
2901
+ response: finalResponse,
2902
+ conversationHistory
2903
+ };
2904
+ } catch (error) {
2905
+ return {
2906
+ success: false,
2907
+ toolCalls: allToolCalls,
2908
+ error: error instanceof Error ? error.message : String(error),
2909
+ conversationHistory
2910
+ };
2911
+ }
2912
+ }
2913
+
2914
+ // src/evals/llmHost/adapters/openai.ts
2915
+ function createOpenAIAdapter() {
2916
+ return {
2917
+ provider: "openai",
2918
+ async createClient(config) {
2919
+ let OpenAI;
2920
+ try {
2921
+ const module = await import('openai');
2922
+ OpenAI = module.OpenAI;
2923
+ } catch {
2924
+ throw new Error(
2925
+ "OpenAI SDK is not installed. Install it with: npm install openai"
2926
+ );
2927
+ }
2928
+ const apiKeyEnvVar = config.apiKeyEnvVar || "OPENAI_API_KEY";
2929
+ const apiKey = process.env[apiKeyEnvVar];
2930
+ if (!apiKey) {
2931
+ throw new Error(
2932
+ `OpenAI API key not found in environment variable ${apiKeyEnvVar}`
2933
+ );
2934
+ }
2935
+ return new OpenAI({ apiKey });
2936
+ },
2937
+ formatTools(tools) {
2938
+ return tools.map((tool) => ({
2939
+ type: "function",
2940
+ function: {
2941
+ name: tool.name,
2942
+ description: tool.description || "",
2943
+ parameters: tool.inputSchema || {}
2944
+ }
2945
+ }));
2946
+ },
2947
+ async chat(client, messages, tools, config) {
2948
+ const openai = client;
2949
+ const response = await openai.chat.completions.create({
2950
+ model: config.model || "gpt-4o",
2951
+ messages,
2952
+ tools,
2953
+ temperature: config.temperature ?? 0,
2954
+ max_tokens: config.maxTokens
2955
+ });
2956
+ const resp = response;
2957
+ const message = resp.choices[0]?.message;
2958
+ if (!message) {
2959
+ throw new Error("No response from OpenAI");
2960
+ }
2961
+ if (message.tool_calls && message.tool_calls.length > 0) {
2962
+ const toolCalls = message.tool_calls.map((tc) => ({
2963
+ name: tc.function.name,
2964
+ arguments: JSON.parse(tc.function.arguments),
2965
+ id: tc.id
2966
+ }));
2967
+ return {
2968
+ wantsToolCalls: true,
2969
+ toolCalls,
2970
+ textContent: message.content,
2971
+ rawResponse: response
2972
+ };
2973
+ }
2974
+ return {
2975
+ wantsToolCalls: false,
2976
+ toolCalls: [],
2977
+ textContent: message.content,
2978
+ rawResponse: response
2979
+ };
2980
+ },
2981
+ createUserMessage(scenario) {
2982
+ return {
2983
+ role: "user",
2984
+ content: scenario
2985
+ };
2986
+ },
2987
+ createAssistantMessage(chatResult) {
2988
+ const rawResponse = chatResult.rawResponse;
2989
+ return {
2990
+ role: "assistant",
2991
+ content: chatResult.textContent,
2992
+ tool_calls: rawResponse.choices[0]?.message?.tool_calls
2993
+ };
2994
+ },
2995
+ createToolResultMessage(toolCall, result) {
2996
+ return {
2997
+ role: "tool",
2998
+ tool_call_id: toolCall.id,
2999
+ content: result
3000
+ };
3001
+ }
3002
+ };
3003
+ }
3004
+
3005
+ // src/evals/llmHost/adapters/anthropic.ts
3006
+ function createAnthropicAdapter() {
3007
+ return {
3008
+ provider: "anthropic",
3009
+ async createClient(config) {
3010
+ let Anthropic;
3011
+ try {
3012
+ const module = await import('@anthropic-ai/sdk');
3013
+ Anthropic = module.default;
3014
+ } catch {
3015
+ throw new Error(
3016
+ "Anthropic SDK is not installed. Install it with: npm install @anthropic-ai/sdk"
3017
+ );
3018
+ }
3019
+ const apiKeyEnvVar = config.apiKeyEnvVar || "ANTHROPIC_API_KEY";
3020
+ const apiKey = process.env[apiKeyEnvVar];
3021
+ if (!apiKey) {
3022
+ throw new Error(
3023
+ `Anthropic API key not found in environment variable ${apiKeyEnvVar}`
3024
+ );
3025
+ }
3026
+ return new Anthropic({ apiKey });
3027
+ },
3028
+ formatTools(tools) {
3029
+ return tools.map((tool) => ({
3030
+ name: tool.name,
3031
+ description: tool.description || "",
3032
+ input_schema: tool.inputSchema || {}
3033
+ }));
3034
+ },
3035
+ async chat(client, messages, tools, config) {
3036
+ const anthropic = client;
3037
+ const response = await anthropic.messages.create({
3038
+ model: config.model || "claude-3-5-sonnet-20241022",
3039
+ max_tokens: config.maxTokens || 4096,
3040
+ temperature: config.temperature ?? 0,
3041
+ messages,
3042
+ tools
3043
+ });
3044
+ const resp = response;
3045
+ const textBlock = resp.content.find((c) => c.type === "text");
3046
+ const textContent = textBlock?.text || null;
3047
+ if (resp.stop_reason === "tool_use") {
3048
+ const toolUses = resp.content.filter((c) => c.type === "tool_use");
3049
+ const toolCalls = toolUses.map((tu) => ({
3050
+ name: tu.name,
3051
+ arguments: tu.input,
3052
+ id: tu.id
3053
+ }));
3054
+ return {
3055
+ wantsToolCalls: true,
3056
+ toolCalls,
3057
+ textContent,
3058
+ rawResponse: response
3059
+ };
3060
+ }
3061
+ if (resp.stop_reason === "max_tokens") {
3062
+ throw new Error("Response exceeded max tokens");
3063
+ }
3064
+ return {
3065
+ wantsToolCalls: false,
3066
+ toolCalls: [],
3067
+ textContent,
3068
+ rawResponse: response
3069
+ };
3070
+ },
3071
+ createUserMessage(scenario) {
3072
+ return {
3073
+ role: "user",
3074
+ content: scenario
3075
+ };
3076
+ },
3077
+ createAssistantMessage(chatResult) {
3078
+ const rawResponse = chatResult.rawResponse;
3079
+ return {
3080
+ role: "assistant",
3081
+ content: rawResponse.content
3082
+ };
3083
+ },
3084
+ createToolResultMessage(toolCall, result) {
3085
+ return {
3086
+ type: "tool_result",
3087
+ tool_use_id: toolCall.id,
3088
+ content: result
3089
+ };
3090
+ }
3091
+ };
3092
+ }
3093
+
3094
+ // src/evals/llmHost/llmHostSimulation.ts
3095
+ registerAdapter("openai", createOpenAIAdapter);
3096
+ registerAdapter("anthropic", createAnthropicAdapter);
3097
+ async function simulateLLMHost(mcp, scenario, config) {
3098
+ const adapter = getAdapter(config.provider);
3099
+ return runSimulation(adapter, mcp, scenario, config, {
3100
+ retry: {
3101
+ maxAttempts: 3,
3102
+ baseDelayMs: 1e3,
3103
+ maxDelayMs: 3e4
3104
+ }
3105
+ });
3106
+ }
3107
+ function isProviderAvailable(provider) {
3108
+ return hasAdapter(provider);
3109
+ }
3110
+ function getMissingDependencyMessage(provider) {
3111
+ switch (provider) {
3112
+ case "openai":
3113
+ return "OpenAI SDK is not installed. Install it with: npm install openai";
3114
+ case "anthropic":
3115
+ return "Anthropic SDK is not installed. Install it with: npm install @anthropic-ai/sdk";
3116
+ default:
3117
+ return `Unknown provider: ${String(provider)}`;
3118
+ }
3119
+ }
3120
+
3121
+ // src/evals/evalRunner.ts
3122
+ async function executeToolCall(evalCase, mcp) {
3123
+ const mode = evalCase.mode || "direct";
3124
+ try {
3125
+ if (mode === "llm_host") {
3126
+ if (!evalCase.scenario) {
3127
+ throw new Error(
3128
+ `Eval case ${evalCase.id}: scenario is required for llm_host mode`
3129
+ );
3130
+ }
3131
+ if (!evalCase.llmHostConfig) {
3132
+ throw new Error(
3133
+ `Eval case ${evalCase.id}: llmHostConfig is required for llm_host mode`
3134
+ );
3135
+ }
3136
+ const simulationResult = await simulateLLMHost(
3137
+ mcp,
3138
+ evalCase.scenario,
3139
+ evalCase.llmHostConfig
3140
+ );
3141
+ if (!simulationResult.success) {
3142
+ throw new Error(simulationResult.error || "LLM host simulation failed");
3143
+ }
3144
+ return { response: simulationResult };
3145
+ } else {
3146
+ if (!evalCase.toolName) {
3147
+ throw new Error(
3148
+ `Eval case ${evalCase.id}: toolName is required for direct mode`
3149
+ );
3150
+ }
3151
+ if (!evalCase.args) {
3152
+ throw new Error(
3153
+ `Eval case ${evalCase.id}: args is required for direct mode`
3154
+ );
3155
+ }
3156
+ const result = await mcp.callTool(evalCase.toolName, evalCase.args);
3157
+ if (evalCase.expect?.isError !== void 0) {
3158
+ return { response: result };
3159
+ }
3160
+ return { response: result.structuredContent ?? result.content };
3161
+ }
3162
+ } catch (err) {
3163
+ return {
3164
+ response: void 0,
3165
+ error: err instanceof Error ? err.message : String(err)
3166
+ };
3167
+ }
3168
+ }
3169
+ function didCasePass(error, expectations) {
3170
+ return !error && Object.values(expectations).every(
3171
+ (result) => result === void 0 || result.pass
3172
+ );
3173
+ }
3174
+ async function runExpectBlockValidations(expectBlock, response, config) {
3175
+ const results = {};
3176
+ if (expectBlock.response !== void 0) {
3177
+ const validation = validateResponse(response, expectBlock.response);
3178
+ results.exact = {
3179
+ pass: validation.pass,
3180
+ details: validation.message
3181
+ };
3182
+ }
3183
+ if (expectBlock.schema !== void 0) {
3184
+ const schema = config.schemas?.[expectBlock.schema];
3185
+ if (!schema) {
3186
+ results.schema = {
3187
+ pass: false,
3188
+ details: `Schema "${expectBlock.schema}" not found in schemas registry`
3189
+ };
3190
+ } else {
3191
+ const validation = validateSchema(response, schema);
3192
+ results.schema = {
3193
+ pass: validation.pass,
3194
+ details: validation.message
3195
+ };
3196
+ }
3197
+ }
3198
+ if (expectBlock.containsText !== void 0) {
3199
+ const validation = validateText(response, expectBlock.containsText);
3200
+ results.textContains = {
3201
+ pass: validation.pass,
3202
+ details: validation.message
3203
+ };
3204
+ }
3205
+ if (expectBlock.matchesPattern !== void 0) {
3206
+ const validation = validatePattern(response, expectBlock.matchesPattern);
3207
+ results.regex = {
3208
+ pass: validation.pass,
3209
+ details: validation.message
3210
+ };
3211
+ }
3212
+ if (expectBlock.isError !== void 0) {
3213
+ const validation = validateError(response, expectBlock.isError);
3214
+ results.error = {
3215
+ pass: validation.pass,
3216
+ details: validation.message
3217
+ };
3218
+ }
3219
+ if (expectBlock.responseSize !== void 0) {
3220
+ const validation = validateSize(response, expectBlock.responseSize);
3221
+ results.size = {
3222
+ pass: validation.pass,
3223
+ details: validation.message
3224
+ };
3225
+ }
3226
+ if (expectBlock.passesJudge !== void 0) {
3227
+ const {
3228
+ rubric,
3229
+ reference,
3230
+ threshold = 0.7,
3231
+ configId
3232
+ } = expectBlock.passesJudge;
3233
+ const judgeConfig = configId ? config.judgeConfigs?.[configId] ?? {} : {};
3234
+ try {
3235
+ const judge = createJudge(judgeConfig);
3236
+ const judgeResult = await judge.evaluate(
3237
+ response,
3238
+ reference ?? null,
3239
+ rubric
3240
+ );
3241
+ const score = judgeResult.score ?? (judgeResult.pass ? 1 : 0);
3242
+ const passed = score >= threshold;
3243
+ results.judge = {
3244
+ pass: passed,
3245
+ details: passed ? `Judge passed with score ${score.toFixed(2)}` : `Judge failed with score ${score.toFixed(2)} (threshold: ${threshold}). ${judgeResult.reasoning ?? ""}`
3246
+ };
3247
+ } catch (err) {
3248
+ results.judge = {
3249
+ pass: false,
3250
+ details: `Judge evaluation error: ${err instanceof Error ? err.message : String(err)}`
3251
+ };
3252
+ }
3253
+ }
3254
+ if (expectBlock.snapshot !== void 0) {
3255
+ if (!config.playwrightExpect) {
3256
+ results.snapshot = {
3257
+ pass: false,
3258
+ details: "Snapshot testing requires expect in context"
3259
+ };
3260
+ } else {
3261
+ try {
3262
+ const sanitizers = expectBlock.snapshotSanitizers ?? [];
3263
+ await config.playwrightExpect(response).toMatchToolSnapshot(
3264
+ expectBlock.snapshot,
3265
+ sanitizers
3266
+ );
3267
+ results.snapshot = {
3268
+ pass: true,
3269
+ details: `Matches snapshot "${expectBlock.snapshot}"`
3270
+ };
3271
+ } catch (err) {
3272
+ results.snapshot = {
3273
+ pass: false,
3274
+ details: err instanceof Error ? err.message : String(err)
3275
+ };
3276
+ }
3277
+ }
3278
+ }
3279
+ return results;
3280
+ }
3281
+ async function runEvalCase(evalCase, context, options = {}) {
3282
+ const startTime = Date.now();
3283
+ const mode = evalCase.mode || "direct";
3284
+ const { response, error } = await executeToolCall(evalCase, context.mcp);
3285
+ let expectationResults = {};
3286
+ if (!error && evalCase.expect) {
3287
+ expectationResults = await runExpectBlockValidations(
3288
+ evalCase.expect,
3289
+ response,
3290
+ {
3291
+ schemas: options.schemas,
3292
+ judgeConfigs: options.judgeConfigs,
3293
+ playwrightExpect: context.expect
3294
+ }
3295
+ );
3296
+ }
3297
+ return {
3298
+ id: evalCase.id,
3299
+ datasetName: options.datasetName ?? "single-case",
3300
+ toolName: evalCase.toolName ?? evalCase.scenario ?? "unknown",
3301
+ mode,
3302
+ source: "eval",
3303
+ pass: didCasePass(error, expectationResults),
3304
+ response,
3305
+ error,
3306
+ expectations: expectationResults,
3307
+ authType: context.mcp.authType,
3308
+ project: context.mcp.project,
3309
+ durationMs: Date.now() - startTime
3310
+ };
3311
+ }
3312
+ async function runEvalDataset(options, context) {
3313
+ const {
3314
+ dataset,
3315
+ schemas,
3316
+ judgeConfigs,
3317
+ stopOnFailure = false,
3318
+ onCaseComplete
3319
+ } = options;
3320
+ const startTime = Date.now();
3321
+ const caseResults = [];
3322
+ const enrichedContext = context;
3323
+ const allSchemas = {
3324
+ ...dataset.schemas,
3325
+ ...schemas
3326
+ };
3327
+ for (const evalCase of dataset.cases) {
3328
+ const result2 = await runEvalCase(evalCase, enrichedContext, {
3329
+ datasetName: dataset.name,
3330
+ schemas: allSchemas,
3331
+ judgeConfigs
3332
+ });
3333
+ caseResults.push(result2);
3334
+ if (onCaseComplete) {
3335
+ await onCaseComplete(result2);
3336
+ }
3337
+ if (stopOnFailure && !result2.pass) {
3338
+ break;
3339
+ }
3340
+ }
3341
+ const total = caseResults.length;
3342
+ const passed = caseResults.filter((r) => r.pass).length;
3343
+ const result = {
3344
+ total,
3345
+ passed,
3346
+ failed: total - passed,
3347
+ caseResults,
3348
+ durationMs: Date.now() - startTime
3349
+ };
3350
+ if (context.testInfo) {
3351
+ await context.testInfo.attach("mcp-test-results", {
3352
+ contentType: "application/json",
3353
+ body: Buffer.from(JSON.stringify({ caseResults }))
3354
+ });
3355
+ }
3356
+ return result;
3357
+ }
3358
+
3359
+ // src/evals/llmHost/toolCallExpectation.ts
3360
+ function argumentsMatch(actual, expected) {
3361
+ for (const key of Object.keys(expected)) {
3362
+ if (!(key in actual)) {
3363
+ return false;
3364
+ }
3365
+ const actualValue = actual[key];
3366
+ const expectedValue = expected[key];
3367
+ if (JSON.stringify(actualValue) !== JSON.stringify(expectedValue)) {
3368
+ return false;
3369
+ }
3370
+ }
3371
+ return true;
3372
+ }
3373
+ function findMatchingCall(expected, actualCalls) {
3374
+ for (const actualCall of actualCalls) {
3375
+ if (actualCall.name !== expected.name) {
3376
+ continue;
3377
+ }
3378
+ if (!expected.arguments) {
3379
+ return actualCall;
3380
+ }
3381
+ if (argumentsMatch(actualCall.arguments, expected.arguments)) {
3382
+ return actualCall;
3383
+ }
3384
+ }
3385
+ return null;
3386
+ }
3387
+ function createToolCallValidator() {
3388
+ return async (evalCase, response) => {
3389
+ const expectedCalls = evalCase.metadata?.expectedToolCalls;
3390
+ if (!expectedCalls || expectedCalls.length === 0) {
3391
+ return {
3392
+ pass: true,
3393
+ details: "No expected tool calls specified"
3394
+ };
3395
+ }
3396
+ const responseObj = response;
3397
+ const actualCalls = responseObj?.toolCalls;
3398
+ if (!actualCalls || actualCalls.length === 0) {
3399
+ const requiredCalls = expectedCalls.filter(
3400
+ (call) => call.required !== false
3401
+ );
3402
+ if (requiredCalls.length > 0) {
3403
+ return {
3404
+ pass: false,
3405
+ details: `Expected ${requiredCalls.length} tool call(s), but LLM made no tool calls`
3406
+ };
3407
+ }
3408
+ return {
3409
+ pass: true,
3410
+ details: "No tool calls expected or made"
3411
+ };
3412
+ }
3413
+ const missingCalls = [];
3414
+ for (const expectedCall of expectedCalls) {
3415
+ const matchingCall = findMatchingCall(expectedCall, actualCalls);
3416
+ if (!matchingCall) {
3417
+ if (expectedCall.required !== false) {
3418
+ missingCalls.push(expectedCall);
3419
+ }
3420
+ }
3421
+ }
3422
+ if (missingCalls.length > 0) {
3423
+ const missingDetails = missingCalls.map((call) => `${call.name}(${JSON.stringify(call.arguments || {})})`).join(", ");
3424
+ return {
3425
+ pass: false,
3426
+ details: `Missing required tool call(s): ${missingDetails}. Actual calls: ${actualCalls.map((c) => c.name).join(", ")}`
3427
+ };
3428
+ }
3429
+ return {
3430
+ pass: true,
3431
+ details: `All ${expectedCalls.length} expected tool call(s) were made correctly`
3432
+ };
3433
+ };
3434
+ }
3435
+
3436
+ // src/spec/conformanceChecks.ts
3437
+ async function runConformanceChecks(mcp, options = {}, testInfo) {
3438
+ const {
3439
+ requiredTools = [],
3440
+ validateSchemas = true,
3441
+ checkServerInfo = true,
3442
+ checkResources = true,
3443
+ checkPrompts = true
3444
+ } = options;
3445
+ const checks = [];
3446
+ const raw = {
3447
+ serverInfo: null,
3448
+ capabilities: null,
3449
+ tools: [],
3450
+ resources: null,
3451
+ prompts: null
3452
+ };
3453
+ const serverInfo = mcp.getServerInfo();
3454
+ if (serverInfo) {
3455
+ raw.serverInfo = serverInfo;
3456
+ }
3457
+ if (checkServerInfo) {
3458
+ checks.push({
3459
+ name: "server_info_present",
3460
+ pass: serverInfo !== null,
3461
+ message: serverInfo ? `Server info: ${serverInfo.name ?? "unknown"} v${serverInfo.version ?? "unknown"}` : "Server info is missing"
3462
+ });
3463
+ }
3464
+ const capabilities = mcp.client.getServerCapabilities();
3465
+ if (capabilities) {
3466
+ raw.capabilities = capabilities;
3467
+ }
3468
+ checks.push({
3469
+ name: "capabilities_valid",
3470
+ pass: capabilities !== void 0,
3471
+ message: capabilities ? `Server capabilities: ${formatCapabilities(capabilities)}` : "Server capabilities not available"
3472
+ });
3473
+ let tools = [];
3474
+ try {
3475
+ tools = await mcp.listTools();
3476
+ raw.tools = tools;
3477
+ checks.push({
3478
+ name: "list_tools_succeeds",
3479
+ pass: true,
3480
+ message: `listTools returned ${tools.length} tools`
3481
+ });
3482
+ } catch (error) {
3483
+ checks.push({
3484
+ name: "list_tools_succeeds",
3485
+ pass: false,
3486
+ message: `listTools failed: ${error instanceof Error ? error.message : String(error)}`
3487
+ });
3488
+ const pass2 = checks.every((check) => check.pass);
3489
+ return { pass: pass2, checks, raw };
3490
+ }
3491
+ if (requiredTools.length > 0) {
3492
+ const toolNames = new Set(tools.map((t) => t.name));
3493
+ const missingTools = requiredTools.filter((name) => !toolNames.has(name));
3494
+ checks.push({
3495
+ name: "required_tools_present",
3496
+ pass: missingTools.length === 0,
3497
+ message: missingTools.length === 0 ? `All ${requiredTools.length} required tools are present` : `Missing required tools: ${missingTools.join(", ")}`
3498
+ });
3499
+ }
3500
+ if (validateSchemas && tools.length > 0) {
3501
+ const invalidTools = [];
3502
+ for (const tool of tools) {
3503
+ if (!tool.name) {
3504
+ invalidTools.push(`(unnamed tool): missing name`);
3505
+ continue;
3506
+ }
3507
+ if (!tool.inputSchema) {
3508
+ invalidTools.push(`${tool.name}: missing inputSchema`);
3509
+ continue;
3510
+ }
3511
+ if (tool.inputSchema.type !== "object") {
3512
+ invalidTools.push(
3513
+ `${tool.name}: inputSchema.type must be "object", got "${String(tool.inputSchema.type)}"`
3514
+ );
3515
+ }
3516
+ }
3517
+ checks.push({
3518
+ name: "tool_schemas_valid",
3519
+ pass: invalidTools.length === 0,
3520
+ message: invalidTools.length === 0 ? `All ${tools.length} tools have valid schemas` : `Invalid tool schemas:
3521
+ ${invalidTools.join("\n ")}`
3522
+ });
3523
+ }
3524
+ if (checkResources && capabilities?.resources) {
3525
+ try {
3526
+ const resourcesResult = await mcp.client.listResources();
3527
+ raw.resources = resourcesResult.resources;
3528
+ checks.push({
3529
+ name: "list_resources_succeeds",
3530
+ pass: true,
3531
+ message: `listResources returned ${resourcesResult.resources.length} resources`
3532
+ });
3533
+ } catch (error) {
3534
+ checks.push({
3535
+ name: "list_resources_succeeds",
3536
+ pass: false,
3537
+ message: `listResources failed: ${error instanceof Error ? error.message : String(error)}`
3538
+ });
3539
+ }
3540
+ }
3541
+ if (checkPrompts && capabilities?.prompts) {
3542
+ try {
3543
+ const promptsResult = await mcp.client.listPrompts();
3544
+ raw.prompts = promptsResult.prompts;
3545
+ checks.push({
3546
+ name: "list_prompts_succeeds",
3547
+ pass: true,
3548
+ message: `listPrompts returned ${promptsResult.prompts.length} prompts`
3549
+ });
3550
+ } catch (error) {
3551
+ checks.push({
3552
+ name: "list_prompts_succeeds",
3553
+ pass: false,
3554
+ message: `listPrompts failed: ${error instanceof Error ? error.message : String(error)}`
3555
+ });
3556
+ }
3557
+ }
3558
+ try {
3559
+ const result2 = await mcp.callTool("__nonexistent_tool__", {});
3560
+ const hasError = result2.isError === true;
3561
+ checks.push({
3562
+ name: "invalid_tool_returns_error",
3563
+ pass: hasError,
3564
+ message: hasError ? "Nonexistent tool correctly returned an error" : "Calling nonexistent tool should have returned an error"
3565
+ });
3566
+ } catch {
3567
+ checks.push({
3568
+ name: "invalid_tool_returns_error",
3569
+ pass: true,
3570
+ message: "Nonexistent tool correctly threw an error"
3571
+ });
3572
+ }
3573
+ const pass = checks.every((check) => check.pass);
3574
+ const result = { pass, checks, raw };
3575
+ if (testInfo) {
3576
+ await testInfo.attach("mcp-conformance-checks", {
3577
+ contentType: "application/json",
3578
+ body: JSON.stringify(
3579
+ {
3580
+ operation: "conformanceChecks",
3581
+ pass,
3582
+ checks,
3583
+ serverInfo: raw.serverInfo,
3584
+ capabilities: raw.capabilities,
3585
+ toolCount: raw.tools.length,
3586
+ authType: mcp.authType,
3587
+ project: mcp.project
3588
+ },
3589
+ null,
3590
+ 2
3591
+ )
3592
+ });
3593
+ }
3594
+ return result;
3595
+ }
3596
+ function formatCapabilities(capabilities) {
3597
+ const parts = [];
3598
+ if (capabilities.tools) parts.push("tools");
3599
+ if (capabilities.resources) parts.push("resources");
3600
+ if (capabilities.prompts) parts.push("prompts");
3601
+ if (capabilities.logging) parts.push("logging");
3602
+ if (capabilities.completions) parts.push("completions");
3603
+ if (capabilities.experimental) parts.push("experimental");
3604
+ return parts.length > 0 ? parts.join(", ") : "none declared";
3605
+ }
3606
+
3607
+ exports.CLIOAuthClient = CLIOAuthClient;
3608
+ exports.DiscoveryError = DiscoveryError;
3609
+ exports.ENV_VAR_NAMES = ENV_VAR_NAMES;
3610
+ exports.EvalCaseSchema = EvalCaseSchema;
3611
+ exports.EvalDatasetSchema = EvalDatasetSchema;
3612
+ exports.MCPConfigSchema = MCPConfigSchema;
3613
+ exports.MCP_PROTOCOL_VERSION = MCP_PROTOCOL_VERSION;
3614
+ exports.closeMCPClient = closeMCPClient;
3615
+ exports.createJudge = createJudge;
3616
+ exports.createMCPClientForConfig = createMCPClientForConfig;
3617
+ exports.createMCPFixture = createMCPFixture;
3618
+ exports.createTokenAuthHeaders = createTokenAuthHeaders;
3619
+ exports.createToolCallValidator = createToolCallValidator;
3620
+ exports.discoverAuthorizationServer = discoverAuthorizationServer;
3621
+ exports.discoverProtectedResource = discoverProtectedResource;
3622
+ exports.expect = expect;
3623
+ exports.extractText = extractText;
3624
+ exports.extractTextFromResponse = extractText;
3625
+ exports.getMissingDependencyMessage = getMissingDependencyMessage;
3626
+ exports.getResponseSizeBytes = getResponseSizeBytes;
3627
+ exports.hasValidTokens = hasValidTokens;
3628
+ exports.injectTokens = injectTokens;
3629
+ exports.isHttpConfig = isHttpConfig;
3630
+ exports.isProviderAvailable = isProviderAvailable;
3631
+ exports.isStdioConfig = isStdioConfig;
3632
+ exports.isTokenExpired = isTokenExpired;
3633
+ exports.isTokenExpiringSoon = isTokenExpiringSoon;
3634
+ exports.loadEvalDataset = loadEvalDataset;
3635
+ exports.loadEvalDatasetFromObject = loadEvalDatasetFromObject;
3636
+ exports.loadTokens = loadTokens;
3637
+ exports.loadTokensFromEnv = loadTokensFromEnv;
3638
+ exports.normalizeToolResponse = normalizeToolResponse;
3639
+ exports.normalizeWhitespace = normalizeWhitespace;
3640
+ exports.performOAuthSetup = performOAuthSetup;
3641
+ exports.performOAuthSetupIfNeeded = performOAuthSetupIfNeeded;
3642
+ exports.runConformanceChecks = runConformanceChecks;
3643
+ exports.runEvalCase = runEvalCase;
3644
+ exports.runEvalDataset = runEvalDataset;
3645
+ exports.simulateLLMHost = simulateLLMHost;
3646
+ exports.test = test;
3647
+ exports.validateAccessToken = validateAccessToken;
3648
+ exports.validateError = validateError;
3649
+ exports.validateEvalCase = validateEvalCase;
3650
+ exports.validateEvalDataset = validateEvalDataset;
3651
+ exports.validateMCPConfig = validateMCPConfig;
3652
+ exports.validatePattern = validatePattern;
3653
+ exports.validateResponse = validateResponse;
3654
+ exports.validateSchema = validateSchema;
3655
+ exports.validateSize = validateSize;
3656
+ exports.validateText = validateText;
3657
+ //# sourceMappingURL=index.cjs.map
3658
+ //# sourceMappingURL=index.cjs.map